gigafibre-fsm/apps/ops/src/api/ocr.js
louispaulb e50ea88c08 feat: unify vision on Gemini + port field tech scan/device into /j
- Invoice OCR migrated from Ollama (GPU-bound, local) to Gemini 2.5
  Flash via new targo-hub /vision/invoice endpoint with responseSchema
  enforcement. Ops VM no longer needs a GPU.
- Ops /j/* now has full camera scanner (TechScanPage) ported from
  apps/field with 8s timeout + offline queue + auto-link to Dispatch
  Job context on serial/barcode/MAC 3-tier lookup.
- New TechDevicePage reached via /j/device/:serial showing every
  ERPNext entity related to a scanned device: Service Equipment,
  Customer, Service Location, active Subscription, open Issues,
  upcoming Dispatch Jobs, OLT info.
- New docs/VISION_AND_OCR.md (full pipeline + §10 relationship graph
  + §8.1 secrets/rotation policy). Cross-linked from ARCHITECTURE,
  ROADMAP, HANDOFF, README.
- Nginx /ollama/ proxy blocks removed from both ops + field.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-22 11:26:01 -04:00

105 lines
4.0 KiB
JavaScript

/**
* OCR / Vision client — all calls go through targo-hub, which runs Gemini
* 2.5 Flash. We deliberately do NOT call Ollama from the ops SPA because the
* ops/ERPNext VM has no GPU; invoice OCR used to hit a local Ollama vision
* model (llama3.2-vision), but that's now centralized in the hub so every
* app (ops, field-as-ops `/j`, future client portal) gets the same model,
* same prompt, same normalization.
*
* Endpoints used:
* POST {HUB_URL}/vision/barcodes → { barcodes: string[] }
* POST {HUB_URL}/vision/equipment → { brand, model, serial_number, mac_address, gpon_sn, hw_version, equipment_type, barcodes }
* POST {HUB_URL}/vision/invoice → { vendor, vendor_address, invoice_number, date, due_date, subtotal, tax_gst, tax_qst, total, currency, items[], notes }
*
* All three are public (no Authentik header) — the hub rate-limits and logs.
*/
import { HUB_URL } from 'src/config/hub'
const VISION_BARCODES = `${HUB_URL}/vision/barcodes`
const VISION_EQUIPMENT = `${HUB_URL}/vision/equipment`
const VISION_INVOICE = `${HUB_URL}/vision/invoice`
/** Strip any `data:image/...;base64,` prefix — hub accepts either form but
* we normalize here so error messages + logs stay consistent. */
function stripDataUri (base64Image) {
return String(base64Image || '').replace(/^data:image\/[^;]+;base64,/, '')
}
/**
* Send a photo to Gemini (via hub) for bill/invoice OCR.
* @param {string} base64Image — base64 or data URI
* @returns {Promise<object>} Parsed invoice data (schema in targo-hub/lib/vision.js)
* @throws {Error} on network/API failure — caller decides whether to retry
*/
export async function ocrBill (base64Image) {
const res = await fetch(VISION_INVOICE, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ image: stripDataUri(base64Image) }),
})
if (!res.ok) {
const text = await res.text().catch(() => '')
throw new Error('Invoice OCR failed: ' + (text || res.status))
}
return res.json()
}
/**
* Send a photo to Gemini (via hub) for generic barcode / serial extraction.
* @param {string} base64Image — base64 or data URI
* @returns {Promise<{ barcodes: string[] }>}
* @throws {Error} on network/API failure — `useScanner` uses this signature
* to decide whether to queue the photo for retry (see isRetryable()).
*/
export async function scanBarcodes (base64Image) {
const res = await fetch(VISION_BARCODES, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ image: base64Image }),
})
if (!res.ok) {
const text = await res.text().catch(() => '')
throw new Error('Vision scan failed: ' + (text || res.status))
}
const data = await res.json()
return { barcodes: data.barcodes || [] }
}
/**
* Structured equipment label scan — richer schema than scanBarcodes for
* ONT/ONU/router labels. Unique to ops (was not in the old field client).
* @param {string} base64Image — base64 or data URI
* @returns {Promise<object>} See EQUIP_SCHEMA in targo-hub/lib/vision.js
*/
export async function scanEquipmentLabel (base64Image) {
const res = await fetch(VISION_EQUIPMENT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ image: base64Image }),
})
if (!res.ok) {
const text = await res.text().catch(() => '')
throw new Error('Equipment scan failed: ' + (text || res.status))
}
return res.json()
}
/**
* Vision service health probe.
*
* Historically this pinged `/ollama/api/tags` to confirm the local vision
* model was warm. Now that everything is on Gemini via the hub, we just
* check the hub is reachable — the hub itself validates AI_API_KEY on
* startup, so if it's up, Gemini works.
*/
export async function checkOllamaStatus () {
try {
const res = await fetch(`${HUB_URL}/health`, { method: 'GET' })
if (!res.ok) return { online: false, error: 'HTTP ' + res.status }
return { online: true, models: ['gemini-2.5-flash'], hasVision: true }
} catch (e) {
return { online: false, error: e.message }
}
}