- Invoice OCR migrated from Ollama (GPU-bound, local) to Gemini 2.5 Flash via new targo-hub /vision/invoice endpoint with responseSchema enforcement. Ops VM no longer needs a GPU. - Ops /j/* now has full camera scanner (TechScanPage) ported from apps/field with 8s timeout + offline queue + auto-link to Dispatch Job context on serial/barcode/MAC 3-tier lookup. - New TechDevicePage reached via /j/device/:serial showing every ERPNext entity related to a scanned device: Service Equipment, Customer, Service Location, active Subscription, open Issues, upcoming Dispatch Jobs, OLT info. - New docs/VISION_AND_OCR.md (full pipeline + §10 relationship graph + §8.1 secrets/rotation policy). Cross-linked from ARCHITECTURE, ROADMAP, HANDOFF, README. - Nginx /ollama/ proxy blocks removed from both ops + field. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
171 lines
8.0 KiB
JavaScript
171 lines
8.0 KiB
JavaScript
'use strict'
|
|
const cfg = require('./config')
|
|
const { log, json, parseBody } = require('./helpers')
|
|
|
|
const GEMINI_URL = () => `https://generativelanguage.googleapis.com/v1beta/models/${cfg.AI_MODEL}:generateContent?key=${cfg.AI_API_KEY}`
|
|
|
|
async function geminiVision (base64Image, prompt, schema) {
|
|
const resp = await fetch(GEMINI_URL(), {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
contents: [{ parts: [{ text: prompt }, { inline_data: { mime_type: 'image/jpeg', data: base64Image } }] }],
|
|
generationConfig: { temperature: 0.1, maxOutputTokens: 1024, responseMimeType: 'application/json', responseSchema: schema },
|
|
}),
|
|
})
|
|
if (!resp.ok) { const t = await resp.text(); throw new Error(`Gemini API ${resp.status}: ${t.slice(0, 200)}`) }
|
|
const data = await resp.json()
|
|
const text = (data.candidates?.[0]?.content?.parts?.[0]?.text || '').trim()
|
|
log(`Vision response: ${text.slice(0, 300)}`)
|
|
let parsed
|
|
try { parsed = JSON.parse(text) } catch { const m = text.match(/\{[\s\S]*\}/); if (m) try { parsed = JSON.parse(m[0]) } catch {} }
|
|
return parsed
|
|
}
|
|
|
|
function extractBase64 (req, body, label) {
|
|
if (!cfg.AI_API_KEY) return { error: 'AI_API_KEY not configured', status: 500 }
|
|
if (!body.image) return { error: 'Missing image field (base64)', status: 400 }
|
|
const base64 = body.image.replace(/^data:image\/[^;]+;base64,/, '')
|
|
log(`Vision ${label}: received image ${Math.round(base64.length * 3 / 4 / 1024)}KB`)
|
|
return { base64 }
|
|
}
|
|
|
|
const BARCODE_PROMPT = `Read ALL identifiers on this equipment label photo (may be blurry/tilted).
|
|
Extract: barcode text, serial numbers (S/N, SN), MAC addresses (12 hex chars), model numbers (M/N, Model, P/N), IMEI, GPON SN.
|
|
Examples: 1608K44D9E79FAFF5, TPLG-A1B2C3D4, 04:18:D6:A1:B2:C3, HWTC87654321.
|
|
Try your BEST on every character. Return max 3 most important (serial/MAC first).`
|
|
|
|
const BARCODE_SCHEMA = {
|
|
type: 'object',
|
|
properties: { barcodes: { type: 'array', items: { type: 'string' }, maxItems: 3 } },
|
|
required: ['barcodes'],
|
|
}
|
|
|
|
async function handleBarcodes (req, res) {
|
|
const body = await parseBody(req)
|
|
const check = extractBase64(req, body, 'barcode')
|
|
if (check.error) return json(res, check.status, { error: check.error })
|
|
try {
|
|
const result = await extractBarcodes(check.base64)
|
|
return json(res, 200, result)
|
|
} catch (e) {
|
|
log('Vision barcode error:', e.message)
|
|
return json(res, 500, { error: 'Vision extraction failed: ' + e.message })
|
|
}
|
|
}
|
|
|
|
async function extractBarcodes (base64Image) {
|
|
const parsed = await geminiVision(base64Image, BARCODE_PROMPT, BARCODE_SCHEMA)
|
|
if (!parsed) return { barcodes: [] }
|
|
const arr = Array.isArray(parsed) ? parsed : Array.isArray(parsed.barcodes) ? parsed.barcodes : []
|
|
const barcodes = arr.filter(v => typeof v === 'string' && v.trim().length > 3).map(v => v.trim().replace(/\s+/g, '')).slice(0, 3)
|
|
log(`Vision: extracted ${barcodes.length} barcode(s): ${barcodes.join(', ')}`)
|
|
return { barcodes }
|
|
}
|
|
|
|
const EQUIP_PROMPT = `Read this ISP equipment label (ONT/ONU/router/modem). Return structured JSON.
|
|
Extract: brand/manufacturer, model (M/N, P/N), serial (S/N, SN, under barcode), MAC address (12 hex, no separators), GPON SN, HW version, barcodes.
|
|
Try your BEST on blurry/angled text. Set missing fields to null.`
|
|
|
|
const EQUIP_SCHEMA = {
|
|
type: 'object',
|
|
properties: {
|
|
brand: { type: 'string', nullable: true }, model: { type: 'string', nullable: true },
|
|
serial_number: { type: 'string', nullable: true }, mac_address: { type: 'string', nullable: true },
|
|
gpon_sn: { type: 'string', nullable: true }, hw_version: { type: 'string', nullable: true },
|
|
equipment_type: { type: 'string', nullable: true },
|
|
barcodes: { type: 'array', items: { type: 'string' }, maxItems: 5 },
|
|
},
|
|
required: ['serial_number'],
|
|
}
|
|
|
|
async function handleEquipment (req, res) {
|
|
const body = await parseBody(req)
|
|
const check = extractBase64(req, body, 'equipment')
|
|
if (check.error) return json(res, check.status, { error: check.error })
|
|
try {
|
|
const parsed = await geminiVision(check.base64, EQUIP_PROMPT, EQUIP_SCHEMA)
|
|
if (!parsed) return json(res, 200, { serial_number: null, barcodes: [] })
|
|
if (parsed.mac_address) parsed.mac_address = parsed.mac_address.replace(/[:\-.\s]/g, '').toUpperCase()
|
|
if (parsed.serial_number) parsed.serial_number = parsed.serial_number.replace(/\s+/g, '').trim()
|
|
log(`Vision equipment: brand=${parsed.brand} model=${parsed.model} sn=${parsed.serial_number} mac=${parsed.mac_address}`)
|
|
return json(res, 200, parsed)
|
|
} catch (e) {
|
|
log('Vision equipment error:', e.message)
|
|
return json(res, 500, { error: 'Vision extraction failed: ' + e.message })
|
|
}
|
|
}
|
|
|
|
// ─── Invoice / bill OCR ────────────────────────────────────────────────
|
|
// We run this on Gemini (not on Ollama) because the ops VM has no GPU —
|
|
// ops must not depend on a local vision model. The schema matches what
|
|
// the ops InvoiceScanPage expects so switching away from Ollama is a
|
|
// drop-in replacement on the frontend.
|
|
|
|
const INVOICE_PROMPT = `You are an invoice/bill OCR assistant. Extract structured data from this photo of a vendor invoice or bill.
|
|
Return ONLY valid JSON that matches the provided schema. No prose, no markdown.
|
|
Rules:
|
|
- "date" / "due_date" must be ISO YYYY-MM-DD. If the date is MM/DD/YYYY or DD/MM/YYYY and ambiguous, prefer YYYY-MM-DD with the most likely interpretation for Canadian/Québec invoices.
|
|
- "currency" is a 3-letter code (CAD, USD, EUR). Default to CAD if not visible.
|
|
- "tax_gst" = GST/TPS/HST (Canadian federal tax); "tax_qst" = QST/TVQ (Québec provincial tax).
|
|
- "items" is a line-by-line list; keep description as printed, collapse whitespace.
|
|
- Missing fields → null for strings, 0 for numbers, [] for items.`
|
|
|
|
const INVOICE_SCHEMA = {
|
|
type: 'object',
|
|
properties: {
|
|
vendor: { type: 'string', nullable: true },
|
|
vendor_address: { type: 'string', nullable: true },
|
|
invoice_number: { type: 'string', nullable: true },
|
|
date: { type: 'string', nullable: true },
|
|
due_date: { type: 'string', nullable: true },
|
|
subtotal: { type: 'number', nullable: true },
|
|
tax_gst: { type: 'number', nullable: true },
|
|
tax_qst: { type: 'number', nullable: true },
|
|
total: { type: 'number', nullable: true },
|
|
currency: { type: 'string', nullable: true },
|
|
items: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
description: { type: 'string', nullable: true },
|
|
qty: { type: 'number', nullable: true },
|
|
rate: { type: 'number', nullable: true },
|
|
amount: { type: 'number', nullable: true },
|
|
},
|
|
},
|
|
},
|
|
notes: { type: 'string', nullable: true },
|
|
},
|
|
required: ['vendor', 'total'],
|
|
}
|
|
|
|
async function handleInvoice (req, res) {
|
|
const body = await parseBody(req)
|
|
const check = extractBase64(req, body, 'invoice')
|
|
if (check.error) return json(res, check.status, { error: check.error })
|
|
try {
|
|
const parsed = await geminiVision(check.base64, INVOICE_PROMPT, INVOICE_SCHEMA)
|
|
if (!parsed) return json(res, 200, { vendor: null, total: null, items: [] })
|
|
// Normalize: trim + coerce numbers (model sometimes returns "1,234.56" as string)
|
|
for (const k of ['subtotal', 'tax_gst', 'tax_qst', 'total']) {
|
|
if (typeof parsed[k] === 'string') parsed[k] = Number(parsed[k].replace(/[^0-9.\-]/g, '')) || 0
|
|
}
|
|
if (Array.isArray(parsed.items)) {
|
|
for (const it of parsed.items) {
|
|
for (const k of ['qty', 'rate', 'amount']) {
|
|
if (typeof it[k] === 'string') it[k] = Number(it[k].replace(/[^0-9.\-]/g, '')) || 0
|
|
}
|
|
}
|
|
}
|
|
log(`Vision invoice: vendor=${parsed.vendor} total=${parsed.total} items=${(parsed.items || []).length}`)
|
|
return json(res, 200, parsed)
|
|
} catch (e) {
|
|
log('Vision invoice error:', e.message)
|
|
return json(res, 500, { error: 'Vision extraction failed: ' + e.message })
|
|
}
|
|
}
|
|
|
|
module.exports = { handleBarcodes, extractBarcodes, handleEquipment, handleInvoice }
|