'use strict' const cfg = require('./config') const { log, json, parseBody } = require('./helpers') const GEMINI_URL = () => `https://generativelanguage.googleapis.com/v1beta/models/${cfg.AI_MODEL}:generateContent?key=${cfg.AI_API_KEY}` async function geminiVision (base64Image, prompt, schema) { const resp = await fetch(GEMINI_URL(), { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ contents: [{ parts: [{ text: prompt }, { inline_data: { mime_type: 'image/jpeg', data: base64Image } }] }], generationConfig: { temperature: 0.1, maxOutputTokens: 1024, responseMimeType: 'application/json', responseSchema: schema }, }), }) if (!resp.ok) { const t = await resp.text(); throw new Error(`Gemini API ${resp.status}: ${t.slice(0, 200)}`) } const data = await resp.json() const text = (data.candidates?.[0]?.content?.parts?.[0]?.text || '').trim() log(`Vision response: ${text.slice(0, 300)}`) let parsed try { parsed = JSON.parse(text) } catch { const m = text.match(/\{[\s\S]*\}/); if (m) try { parsed = JSON.parse(m[0]) } catch {} } return parsed } function extractBase64 (req, body, label) { if (!cfg.AI_API_KEY) return { error: 'AI_API_KEY not configured', status: 500 } if (!body.image) return { error: 'Missing image field (base64)', status: 400 } const base64 = body.image.replace(/^data:image\/[^;]+;base64,/, '') log(`Vision ${label}: received image ${Math.round(base64.length * 3 / 4 / 1024)}KB`) return { base64 } } const BARCODE_PROMPT = `Read ALL identifiers on this equipment label photo (may be blurry/tilted). Extract: barcode text, serial numbers (S/N, SN), MAC addresses (12 hex chars), model numbers (M/N, Model, P/N), IMEI, GPON SN. Examples: 1608K44D9E79FAFF5, TPLG-A1B2C3D4, 04:18:D6:A1:B2:C3, HWTC87654321. Try your BEST on every character. Return max 3 most important (serial/MAC first).` const BARCODE_SCHEMA = { type: 'object', properties: { barcodes: { type: 'array', items: { type: 'string' }, maxItems: 3 } }, required: ['barcodes'], } async function handleBarcodes (req, res) { const body = await parseBody(req) const check = extractBase64(req, body, 'barcode') if (check.error) return json(res, check.status, { error: check.error }) try { const result = await extractBarcodes(check.base64) return json(res, 200, result) } catch (e) { log('Vision barcode error:', e.message) return json(res, 500, { error: 'Vision extraction failed: ' + e.message }) } } async function extractBarcodes (base64Image) { const parsed = await geminiVision(base64Image, BARCODE_PROMPT, BARCODE_SCHEMA) if (!parsed) return { barcodes: [] } const arr = Array.isArray(parsed) ? parsed : Array.isArray(parsed.barcodes) ? parsed.barcodes : [] const barcodes = arr.filter(v => typeof v === 'string' && v.trim().length > 3).map(v => v.trim().replace(/\s+/g, '')).slice(0, 3) log(`Vision: extracted ${barcodes.length} barcode(s): ${barcodes.join(', ')}`) return { barcodes } } const EQUIP_PROMPT = `Read this ISP equipment label (ONT/ONU/router/modem). Return structured JSON. Extract: brand/manufacturer, model (M/N, P/N), serial (S/N, SN, under barcode), MAC address (12 hex, no separators), GPON SN, HW version, barcodes. Try your BEST on blurry/angled text. Set missing fields to null.` const EQUIP_SCHEMA = { type: 'object', properties: { brand: { type: 'string', nullable: true }, model: { type: 'string', nullable: true }, serial_number: { type: 'string', nullable: true }, mac_address: { type: 'string', nullable: true }, gpon_sn: { type: 'string', nullable: true }, hw_version: { type: 'string', nullable: true }, equipment_type: { type: 'string', nullable: true }, barcodes: { type: 'array', items: { type: 'string' }, maxItems: 5 }, }, required: ['serial_number'], } async function handleEquipment (req, res) { const body = await parseBody(req) const check = extractBase64(req, body, 'equipment') if (check.error) return json(res, check.status, { error: check.error }) try { const parsed = await geminiVision(check.base64, EQUIP_PROMPT, EQUIP_SCHEMA) if (!parsed) return json(res, 200, { serial_number: null, barcodes: [] }) if (parsed.mac_address) parsed.mac_address = parsed.mac_address.replace(/[:\-.\s]/g, '').toUpperCase() if (parsed.serial_number) parsed.serial_number = parsed.serial_number.replace(/\s+/g, '').trim() log(`Vision equipment: brand=${parsed.brand} model=${parsed.model} sn=${parsed.serial_number} mac=${parsed.mac_address}`) return json(res, 200, parsed) } catch (e) { log('Vision equipment error:', e.message) return json(res, 500, { error: 'Vision extraction failed: ' + e.message }) } } // ─── Invoice / bill OCR ──────────────────────────────────────────────── // We run this on Gemini (not on Ollama) because the ops VM has no GPU — // ops must not depend on a local vision model. The schema matches what // the ops InvoiceScanPage expects so switching away from Ollama is a // drop-in replacement on the frontend. const INVOICE_PROMPT = `You are an invoice/bill OCR assistant. Extract structured data from this photo of a vendor invoice or bill. Return ONLY valid JSON that matches the provided schema. No prose, no markdown. Rules: - "date" / "due_date" must be ISO YYYY-MM-DD. If the date is MM/DD/YYYY or DD/MM/YYYY and ambiguous, prefer YYYY-MM-DD with the most likely interpretation for Canadian/Québec invoices. - "currency" is a 3-letter code (CAD, USD, EUR). Default to CAD if not visible. - "tax_gst" = GST/TPS/HST (Canadian federal tax); "tax_qst" = QST/TVQ (Québec provincial tax). - "items" is a line-by-line list; keep description as printed, collapse whitespace. - Missing fields → null for strings, 0 for numbers, [] for items.` const INVOICE_SCHEMA = { type: 'object', properties: { vendor: { type: 'string', nullable: true }, vendor_address: { type: 'string', nullable: true }, invoice_number: { type: 'string', nullable: true }, date: { type: 'string', nullable: true }, due_date: { type: 'string', nullable: true }, subtotal: { type: 'number', nullable: true }, tax_gst: { type: 'number', nullable: true }, tax_qst: { type: 'number', nullable: true }, total: { type: 'number', nullable: true }, currency: { type: 'string', nullable: true }, items: { type: 'array', items: { type: 'object', properties: { description: { type: 'string', nullable: true }, qty: { type: 'number', nullable: true }, rate: { type: 'number', nullable: true }, amount: { type: 'number', nullable: true }, }, }, }, notes: { type: 'string', nullable: true }, }, required: ['vendor', 'total'], } async function handleInvoice (req, res) { const body = await parseBody(req) const check = extractBase64(req, body, 'invoice') if (check.error) return json(res, check.status, { error: check.error }) try { const parsed = await geminiVision(check.base64, INVOICE_PROMPT, INVOICE_SCHEMA) if (!parsed) return json(res, 200, { vendor: null, total: null, items: [] }) // Normalize: trim + coerce numbers (model sometimes returns "1,234.56" as string) for (const k of ['subtotal', 'tax_gst', 'tax_qst', 'total']) { if (typeof parsed[k] === 'string') parsed[k] = Number(parsed[k].replace(/[^0-9.\-]/g, '')) || 0 } if (Array.isArray(parsed.items)) { for (const it of parsed.items) { for (const k of ['qty', 'rate', 'amount']) { if (typeof it[k] === 'string') it[k] = Number(it[k].replace(/[^0-9.\-]/g, '')) || 0 } } } log(`Vision invoice: vendor=${parsed.vendor} total=${parsed.total} items=${(parsed.items || []).length}`) return json(res, 200, parsed) } catch (e) { log('Vision invoice error:', e.message) return json(res, 500, { error: 'Vision extraction failed: ' + e.message }) } } // ─── Field-targeted extraction (for tech mobile form auto-fill) ───────── // Instead of "read everything on the label", this pulls ONE specific value. // Used when a tech has selected e.g. "Wi-Fi password" and wants Gemini to // find only that field on the sticker. Returns {value, confidence}. const FIELD_CONFIG = { serial_number: { desc: 'the device SERIAL NUMBER (labeled S/N, SN, Serial, N/S). Usually 8-20 alphanumeric chars, frequently printed under a Code128 barcode.', clean: v => v.replace(/\s+/g, '').toUpperCase(), }, mac_address: { desc: 'the MAC ADDRESS (12 hexadecimal chars, may be separated by colons, dashes or nothing). Labeled MAC, WAN MAC, LAN MAC, Ethernet, Wi-Fi MAC.', clean: v => v.replace(/[^0-9A-F]/gi, '').toUpperCase(), }, gpon_sn: { desc: 'the GPON SN — a 4-letter manufacturer code followed by 8 hex characters (e.g. HWTC12345678, ZTEG87654321, CIGG1A2B3C4D). Labeled GPON SN, GPON-SN, ONU SN.', clean: v => v.replace(/\s+/g, '').toUpperCase(), }, model: { desc: 'the MODEL number/name (labeled M/N, Model, P/N, Product, Type). Usually short, e.g. "HG8245H", "TL-WR841N", "HS8145V".', clean: v => v.trim(), }, wifi_ssid: { desc: 'the Wi-Fi NETWORK NAME (SSID). Labeled SSID, Wi-Fi name, WLAN SSID, Nom Wi-Fi, Nom du réseau.', clean: v => v.trim(), }, wifi_password: { desc: 'the Wi-Fi PASSWORD / KEY. Labeled WPA, WPA2, WPA Key, Wi-Fi Password, Wireless Password, Clé Wi-Fi, Mot de passe Wi-Fi, Password, Passphrase. Usually 8-20 chars, mixed case with numbers and sometimes symbols.', clean: v => v.trim(), }, imei: { desc: 'the IMEI (15 digits, exactly). Labeled IMEI.', clean: v => v.replace(/\D/g, ''), }, generic: { desc: 'the requested value (see context hint below)', clean: v => v.trim(), }, } const FIELD_SCHEMA = { type: 'object', properties: { value: { type: 'string', nullable: true }, confidence: { type: 'number' }, }, required: ['value', 'confidence'], } async function extractField (base64Image, field, context = {}) { const config = FIELD_CONFIG[field] || FIELD_CONFIG.generic const eq = context.equipment_type ? `Equipment type hint: ${context.equipment_type}.` : '' const brand = context.brand ? `Brand hint: ${context.brand}.` : '' const model = context.model ? `Model hint: ${context.model}.` : '' const custom = (field === 'generic' && context.hint) ? `Look for: ${context.hint}.` : '' const prompt = `You are reading an ISP equipment label (ONT, router, modem). Extract ${config.desc} ${eq} ${brand} ${model} ${custom} Return ONLY JSON matching the schema: {"value": "", "confidence": <0.0-1.0>}. If you cannot find it with confidence above 0.5, return {"value": null, "confidence": 0.0}. Do NOT invent data. Prefer returning null over guessing.` const parsed = await geminiVision(base64Image, prompt, FIELD_SCHEMA) if (!parsed || !parsed.value) return { value: null, confidence: 0 } const cleaned = config.clean(parsed.value) if (!cleaned) return { value: null, confidence: 0 } return { value: cleaned, confidence: Math.max(0, Math.min(1, Number(parsed.confidence) || 0.5)) } } async function handleFieldScan (req, res) { const body = await parseBody(req) const check = extractBase64(req, body, 'field-scan') if (check.error) return json(res, check.status, { error: check.error }) try { const out = await extractField(check.base64, body.field || 'generic', { hint: body.hint, equipment_type: body.equipment_type, brand: body.brand, model: body.model, }) return json(res, 200, { ok: true, ...out }) } catch (e) { log('Vision field-scan error:', e.message) return json(res, 500, { error: 'Vision field extraction failed: ' + e.message }) } } module.exports = { handleBarcodes, extractBarcodes, handleEquipment, handleInvoice, extractField, handleFieldScan }