From e50ea88c0876af94a472f084bdfbb31ad3c3189b Mon Sep 17 00:00:00 2001 From: louispaulb Date: Wed, 22 Apr 2026 11:26:01 -0400 Subject: [PATCH] feat: unify vision on Gemini + port field tech scan/device into /j MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Invoice OCR migrated from Ollama (GPU-bound, local) to Gemini 2.5 Flash via new targo-hub /vision/invoice endpoint with responseSchema enforcement. Ops VM no longer needs a GPU. - Ops /j/* now has full camera scanner (TechScanPage) ported from apps/field with 8s timeout + offline queue + auto-link to Dispatch Job context on serial/barcode/MAC 3-tier lookup. - New TechDevicePage reached via /j/device/:serial showing every ERPNext entity related to a scanned device: Service Equipment, Customer, Service Location, active Subscription, open Issues, upcoming Dispatch Jobs, OLT info. - New docs/VISION_AND_OCR.md (full pipeline + §10 relationship graph + §8.1 secrets/rotation policy). Cross-linked from ARCHITECTURE, ROADMAP, HANDOFF, README. - Nginx /ollama/ proxy blocks removed from both ops + field. Co-Authored-By: Claude Opus 4.7 --- README.md | 2 +- apps/field/infra/nginx.conf | 13 +- apps/field/src/api/ocr.js | 102 ++- apps/ops/infra/nginx.conf | 13 +- apps/ops/package.json | 1 + apps/ops/src/api/ocr.js | 142 +++-- apps/ops/src/composables/useScanner.js | 223 +++++-- .../src/modules/tech/pages/TechDevicePage.vue | 401 ++++++++++++ .../src/modules/tech/pages/TechScanPage.vue | 594 +++++++++++++++--- apps/ops/src/pages/OcrPage.vue | 6 +- apps/ops/src/router/index.js | 1 + apps/ops/src/stores/offline.js | 236 +++++++ docs/ARCHITECTURE.md | 6 + docs/HANDOFF.md | 3 + docs/ROADMAP.md | 2 +- docs/VISION_AND_OCR.md | 482 ++++++++++++++ services/targo-hub/lib/vision.js | 73 ++- services/targo-hub/server.js | 1 + 18 files changed, 2017 insertions(+), 284 deletions(-) create mode 100644 apps/ops/src/modules/tech/pages/TechDevicePage.vue create mode 100644 apps/ops/src/stores/offline.js create mode 100644 docs/VISION_AND_OCR.md diff --git a/README.md b/README.md index 792f544..201e37a 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ GenieACS Twilio Traccar modem-bridge **Frontend:** Vue 3, Quasar v2, Pinia, Vite, Mapbox GL JS **Backend:** ERPNext v16 / Frappe (Python), PostgreSQL, Node.js (targo-hub) **Infra:** Docker, Traefik v2.11, Authentik SSO, Proxmox -**Integrations:** Twilio (SMS), Mailjet (email), Stripe (payments), Traccar (GPS), GenieACS (TR-069), Ollama (OCR) +**Integrations:** Twilio (SMS), Mailjet (email), Stripe (payments), Traccar (GPS), GenieACS (TR-069), Gemini 2.5 Flash via targo-hub (vision/OCR — see [docs/VISION_AND_OCR.md](docs/VISION_AND_OCR.md)) ## Data Volumes (migrated from legacy) diff --git a/apps/field/infra/nginx.conf b/apps/field/infra/nginx.conf index 63398f7..e7d17d7 100644 --- a/apps/field/infra/nginx.conf +++ b/apps/field/infra/nginx.conf @@ -16,16 +16,9 @@ server { proxy_set_header X-Forwarded-Proto https; } - # Ollama Vision API proxy — for bill/invoice OCR (legacy, optional) - location /ollama/ { - resolver 127.0.0.11 valid=10s; - set $ollama_upstream http://ollama:11434; - proxy_pass $ollama_upstream/; - proxy_set_header Host $host; - proxy_read_timeout 300s; - proxy_send_timeout 300s; - client_max_body_size 20m; - } + # NOTE: Ollama Vision proxy removed 2026-04-22 — all invoice OCR and + # barcode/equipment scans now go directly to targo-hub (Gemini 2.5 Flash). + # See docs/VISION_AND_OCR.md. # Targo Hub API proxy — vision, devices, etc. location /hub/ { diff --git a/apps/field/src/api/ocr.js b/apps/field/src/api/ocr.js index 3f66110..5f90e7a 100644 --- a/apps/field/src/api/ocr.js +++ b/apps/field/src/api/ocr.js @@ -1,105 +1,73 @@ -import { authFetch } from './auth' +/** + * OCR / Vision client (field app). + * + * All calls go through targo-hub, which runs Gemini 2.5 Flash. We used to + * hit a local Ollama (llama3.2-vision) for invoice OCR, but that required + * a GPU on the serving VM — ops doesn't have one, so we centralized every + * vision model behind the hub. + * + * NOTE: apps/field is being folded into apps/ops under /j (see + * docs/ARCHITECTURE.md §"Legacy Retirement Plan"). During the transition + * we keep this file in sync with apps/ops/src/api/ocr.js so no surprises + * when code moves over. + */ -const OLLAMA_URL = '/ollama/api/generate' -const HUB_VISION_URL = 'https://msg.gigafibre.ca/vision/barcodes' +const HUB_URL = 'https://msg.gigafibre.ca' -const OCR_PROMPT = `You are an invoice/bill OCR assistant. Extract the following fields from this image of a bill or invoice. Return ONLY valid JSON, no markdown, no explanation. +const VISION_BARCODES = `${HUB_URL}/vision/barcodes` +const VISION_INVOICE = `${HUB_URL}/vision/invoice` -{ - "vendor": "company name on the bill", - "vendor_address": "full address if visible", - "invoice_number": "invoice/bill number", - "date": "YYYY-MM-DD format", - "due_date": "YYYY-MM-DD if visible, null otherwise", - "subtotal": 0.00, - "tax_gst": 0.00, - "tax_qst": 0.00, - "total": 0.00, - "currency": "CAD", - "items": [ - { "description": "line item description", "qty": 1, "rate": 0.00, "amount": 0.00 } - ], - "notes": "any other relevant text (account number, payment terms, etc.)" +function stripDataUri (base64Image) { + return String(base64Image || '').replace(/^data:image\/[^;]+;base64,/, '') } -If a field is not visible, set it to null. Always return valid JSON.` - /** - * Send an image to Ollama Vision for bill/invoice OCR. - * @param {string} base64Image — base64 encoded image (no data: prefix) - * @returns {object} Parsed invoice data + * Send a photo to Gemini (via hub) for bill/invoice OCR. + * @param {string} base64Image — base64 or data URI + * @returns {Promise} Parsed invoice (see targo-hub/lib/vision.js INVOICE_SCHEMA) */ export async function ocrBill (base64Image) { - // Strip data:image/...;base64, prefix if present - const clean = base64Image.replace(/^data:image\/[^;]+;base64,/, '') - - const res = await authFetch(OLLAMA_URL, { + const res = await fetch(VISION_INVOICE, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: 'llama3.2-vision:11b', - prompt: OCR_PROMPT, - images: [clean], - stream: false, - options: { - temperature: 0.1, - num_predict: 2048, - }, - }), + body: JSON.stringify({ image: stripDataUri(base64Image) }), }) - if (!res.ok) { - const text = await res.text() - throw new Error('OCR failed: ' + (text || res.status)) - } - - const data = await res.json() - const raw = data.response || '' - - // Extract JSON from response (model might wrap it in markdown) - const jsonMatch = raw.match(/\{[\s\S]*\}/) - if (!jsonMatch) throw new Error('No JSON in OCR response') - - try { - return JSON.parse(jsonMatch[0]) - } catch (e) { - throw new Error('Invalid JSON from OCR: ' + e.message) + const text = await res.text().catch(() => '') + throw new Error('Invoice OCR failed: ' + (text || res.status)) } + return res.json() } /** - * Send image to Gemini Vision (via targo-hub) for barcode/serial extraction. + * Send a photo to Gemini (via hub) for barcode / serial extraction. * @param {string} base64Image — base64 or data URI - * @returns {{ barcodes: string[] }} + * @returns {Promise<{ barcodes: string[] }>} */ export async function scanBarcodes (base64Image) { - // Direct call to targo-hub (cross-origin, no auth needed) - const res = await fetch(HUB_VISION_URL, { + const res = await fetch(VISION_BARCODES, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ image: base64Image }), }) - if (!res.ok) { - const text = await res.text() + const text = await res.text().catch(() => '') throw new Error('Vision scan failed: ' + (text || res.status)) } - const data = await res.json() return { barcodes: data.barcodes || [] } } /** - * Check if Ollama is running and the vision model is available. + * Vision service health probe. Pings the hub's /health endpoint. + * Kept under the legacy name `checkOllamaStatus` for backward compat with + * any caller still referencing it — ops uses the same name. */ export async function checkOllamaStatus () { try { - const res = await authFetch('/ollama/api/tags') + const res = await fetch(`${HUB_URL}/health`, { method: 'GET' }) if (!res.ok) return { online: false, error: 'HTTP ' + res.status } - const data = await res.json() - const models = (data.models || []).map(m => m.name) - const hasVision = models.some(m => m.includes('llama3.2-vision')) - return { online: true, models, hasVision } + return { online: true, models: ['gemini-2.5-flash'], hasVision: true } } catch (e) { return { online: false, error: e.message } } diff --git a/apps/ops/infra/nginx.conf b/apps/ops/infra/nginx.conf index 9151dfc..52b04e3 100644 --- a/apps/ops/infra/nginx.conf +++ b/apps/ops/infra/nginx.conf @@ -19,15 +19,10 @@ server { proxy_set_header X-Forwarded-Proto https; } - # Ollama Vision API proxy — for bill/invoice OCR (dynamic resolve, won't crash if ollama is down) - location /ollama/ { - set $ollama_upstream http://ollama:11434; - proxy_pass $ollama_upstream/; - proxy_set_header Host $host; - proxy_read_timeout 300s; - proxy_send_timeout 300s; - client_max_body_size 20m; - } + # NOTE: Ollama Vision proxy removed 2026-04-22 — invoice OCR and all + # barcode/equipment scans now go directly to targo-hub (Gemini 2.5 Flash). + # See docs/VISION_AND_OCR.md. The hub handles CORS + rate-limit, so no + # nginx pass-through is needed here. # SPA fallback — all routes serve index.html location / { diff --git a/apps/ops/package.json b/apps/ops/package.json index 4ddf60a..a246eb1 100644 --- a/apps/ops/package.json +++ b/apps/ops/package.json @@ -14,6 +14,7 @@ "@twilio/voice-sdk": "^2.18.1", "chart.js": "^4.5.1", "cytoscape": "^3.33.2", + "idb-keyval": "^6.2.1", "lucide-vue-next": "^1.0.0", "pinia": "^2.1.7", "quasar": "^2.16.10", diff --git a/apps/ops/src/api/ocr.js b/apps/ops/src/api/ocr.js index 77c7ee2..e14f076 100644 --- a/apps/ops/src/api/ocr.js +++ b/apps/ops/src/api/ocr.js @@ -1,85 +1,103 @@ -import { authFetch } from './auth' +/** + * OCR / Vision client — all calls go through targo-hub, which runs Gemini + * 2.5 Flash. We deliberately do NOT call Ollama from the ops SPA because the + * ops/ERPNext VM has no GPU; invoice OCR used to hit a local Ollama vision + * model (llama3.2-vision), but that's now centralized in the hub so every + * app (ops, field-as-ops `/j`, future client portal) gets the same model, + * same prompt, same normalization. + * + * Endpoints used: + * POST {HUB_URL}/vision/barcodes → { barcodes: string[] } + * POST {HUB_URL}/vision/equipment → { brand, model, serial_number, mac_address, gpon_sn, hw_version, equipment_type, barcodes } + * POST {HUB_URL}/vision/invoice → { vendor, vendor_address, invoice_number, date, due_date, subtotal, tax_gst, tax_qst, total, currency, items[], notes } + * + * All three are public (no Authentik header) — the hub rate-limits and logs. + */ -// Use the Vite base path so requests route through ops-frontend nginx -// In production: /ops/ollama/... → Traefik strips /ops → nginx /ollama/ → Ollama -const BASE = import.meta.env.BASE_URL || '/' -const OLLAMA_URL = BASE + 'ollama/api/generate' +import { HUB_URL } from 'src/config/hub' -const OCR_PROMPT = `You are an invoice/bill OCR assistant. Extract the following fields from this image of a bill or invoice. Return ONLY valid JSON, no markdown, no explanation. +const VISION_BARCODES = `${HUB_URL}/vision/barcodes` +const VISION_EQUIPMENT = `${HUB_URL}/vision/equipment` +const VISION_INVOICE = `${HUB_URL}/vision/invoice` -{ - "vendor": "company name on the bill", - "vendor_address": "full address if visible", - "invoice_number": "invoice/bill number", - "date": "YYYY-MM-DD format", - "due_date": "YYYY-MM-DD if visible, null otherwise", - "subtotal": 0.00, - "tax_gst": 0.00, - "tax_qst": 0.00, - "total": 0.00, - "currency": "CAD", - "items": [ - { "description": "line item description", "qty": 1, "rate": 0.00, "amount": 0.00 } - ], - "notes": "any other relevant text (account number, payment terms, etc.)" +/** Strip any `data:image/...;base64,` prefix — hub accepts either form but + * we normalize here so error messages + logs stay consistent. */ +function stripDataUri (base64Image) { + return String(base64Image || '').replace(/^data:image\/[^;]+;base64,/, '') } -If a field is not visible, set it to null. Always return valid JSON.` - /** - * Send an image to Ollama Vision for bill/invoice OCR. - * @param {string} base64Image — base64 encoded image (no data: prefix) - * @returns {object} Parsed invoice data + * Send a photo to Gemini (via hub) for bill/invoice OCR. + * @param {string} base64Image — base64 or data URI + * @returns {Promise} Parsed invoice data (schema in targo-hub/lib/vision.js) + * @throws {Error} on network/API failure — caller decides whether to retry */ export async function ocrBill (base64Image) { - // Strip data:image/...;base64, prefix if present - const clean = base64Image.replace(/^data:image\/[^;]+;base64,/, '') - - const res = await authFetch(OLLAMA_URL, { + const res = await fetch(VISION_INVOICE, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: 'llama3.2-vision:11b', - prompt: OCR_PROMPT, - images: [clean], - stream: false, - options: { - temperature: 0.1, - num_predict: 2048, - }, - }), + body: JSON.stringify({ image: stripDataUri(base64Image) }), }) - if (!res.ok) { - const text = await res.text() - throw new Error('OCR failed: ' + (text || res.status)) - } - - const data = await res.json() - const raw = data.response || '' - - // Extract JSON from response (model might wrap it in markdown) - const jsonMatch = raw.match(/\{[\s\S]*\}/) - if (!jsonMatch) throw new Error('No JSON in OCR response') - - try { - return JSON.parse(jsonMatch[0]) - } catch (e) { - throw new Error('Invalid JSON from OCR: ' + e.message) + const text = await res.text().catch(() => '') + throw new Error('Invoice OCR failed: ' + (text || res.status)) } + return res.json() } /** - * Check if Ollama is running and the vision model is available. + * Send a photo to Gemini (via hub) for generic barcode / serial extraction. + * @param {string} base64Image — base64 or data URI + * @returns {Promise<{ barcodes: string[] }>} + * @throws {Error} on network/API failure — `useScanner` uses this signature + * to decide whether to queue the photo for retry (see isRetryable()). + */ +export async function scanBarcodes (base64Image) { + const res = await fetch(VISION_BARCODES, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ image: base64Image }), + }) + if (!res.ok) { + const text = await res.text().catch(() => '') + throw new Error('Vision scan failed: ' + (text || res.status)) + } + const data = await res.json() + return { barcodes: data.barcodes || [] } +} + +/** + * Structured equipment label scan — richer schema than scanBarcodes for + * ONT/ONU/router labels. Unique to ops (was not in the old field client). + * @param {string} base64Image — base64 or data URI + * @returns {Promise} See EQUIP_SCHEMA in targo-hub/lib/vision.js + */ +export async function scanEquipmentLabel (base64Image) { + const res = await fetch(VISION_EQUIPMENT, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ image: base64Image }), + }) + if (!res.ok) { + const text = await res.text().catch(() => '') + throw new Error('Equipment scan failed: ' + (text || res.status)) + } + return res.json() +} + +/** + * Vision service health probe. + * + * Historically this pinged `/ollama/api/tags` to confirm the local vision + * model was warm. Now that everything is on Gemini via the hub, we just + * check the hub is reachable — the hub itself validates AI_API_KEY on + * startup, so if it's up, Gemini works. */ export async function checkOllamaStatus () { try { - const res = await authFetch(BASE + 'ollama/api/tags') + const res = await fetch(`${HUB_URL}/health`, { method: 'GET' }) if (!res.ok) return { online: false, error: 'HTTP ' + res.status } - const data = await res.json() - const models = (data.models || []).map(m => m.name) - const hasVision = models.some(m => m.includes('llama3.2-vision')) - return { online: true, models, hasVision } + return { online: true, models: ['gemini-2.5-flash'], hasVision: true } } catch (e) { return { online: false, error: e.message } } diff --git a/apps/ops/src/composables/useScanner.js b/apps/ops/src/composables/useScanner.js index 3774e34..e1beb6b 100644 --- a/apps/ops/src/composables/useScanner.js +++ b/apps/ops/src/composables/useScanner.js @@ -1,50 +1,154 @@ -import { ref } from 'vue' +/** + * useScanner — camera-capture + Gemini Vision composable. + * + * Two capture modes, one pipeline: + * - processPhoto(file) → barcode/serial extraction (ScanPage, /j) + * - scanEquipmentLabel(file) → structured ONT/ONU label (equipment + * linking, ClientDetailPage photos) + * + * Both resize the photo twice: + * - 400px for the on-screen thumbnail + * - 1600px @ q=0.92 for Gemini (text readability > filesize) + * + * Resilience (barcode mode only): + * If Gemini doesn't answer within SCAN_TIMEOUT_MS (weak LTE, basement, + * service cold-start), the photo is queued in IndexedDB via the offline + * store and retried in the background. The tech sees a "scan en attente" + * chip, keeps scanning the next equipment, and the late result is pushed + * back into `barcodes` via a reactive watcher on `offline.scanResults`. + * + * Equipment-label mode does NOT queue — it's typically invoked on a desktop + * or strong wifi (indoor install, office) where the extra complexity of + * background retry isn't worth it, and callers want a synchronous answer + * (to pre-fill an equipment form). + * + * Merged from apps/ops/src/composables/useScanner.js (which had the + * equipment-label branch) and apps/field/src/composables/useScanner.js + * (which had the resilient timeout + offline queue). See + * docs/ARCHITECTURE.md §"Legacy Retirement Plan" — field is being folded + * into ops at /j and must not lose offline capability in the process. + * + * @param {object} options + * @param {(code: string) => void} [options.onNewCode] — fires for each + * newly detected code, whether the scan was synchronous OR delivered + * later from the offline queue. Typical use: trigger an ERPNext lookup + * and Quasar notify. + */ -import { HUB_URL as HUB_BASE } from 'src/config/hub' +import { ref, watch } from 'vue' +import { scanBarcodes, scanEquipmentLabel as apiScanEquipmentLabel } from 'src/api/ocr' +import { useOfflineStore } from 'src/stores/offline' -export function useScanner () { - const barcodes = ref([]) - const scanning = ref(false) +const SCAN_TIMEOUT_MS = 8000 + +export function useScanner (options = {}) { + const onNewCode = options.onNewCode || (() => {}) + + const barcodes = ref([]) // { value, region }[] — max MAX_BARCODES + const scanning = ref(false) // true while a Gemini call is in flight const error = ref(null) - const lastPhoto = ref(null) + const lastPhoto = ref(null) // data URI of last thumbnail (400px) + const photos = ref([]) // { url, ts, codes, queued }[] — full history + // Field's default cap was 3 (phone screen estate); ops historically + // allowed 5 (equipment labels have more identifiers). Keep 5 here + // since equipment-label mode is an ops-only feature. + const MAX_BARCODES = 5 + + const offline = useOfflineStore() + + // Pick up any scans that completed while the composable was unmounted + // (e.g. tech queued a photo in the basement, phone locked, signal + // returned while the page was gone, now they reopen ScanPage). + for (const result of offline.scanResults) { + mergeCodes(result.barcodes || [], 'queued') + offline.consumeScanResult(result.id) + } + + // Watch for sync completions during the lifetime of this scanner. + // Vue auto-disposes the watcher when the host component unmounts. + watch( + () => offline.scanResults.length, + () => { + for (const result of [...offline.scanResults]) { + mergeCodes(result.barcodes || [], 'queued') + offline.consumeScanResult(result.id) + } + } + ) + + function addCode (code, region) { + if (barcodes.value.length >= MAX_BARCODES) return false + if (barcodes.value.find(b => b.value === code)) return false + barcodes.value.push({ value: code, region }) + onNewCode(code) + return true + } + + function mergeCodes (codes, region) { + const added = [] + for (const code of codes) { + if (addCode(code, region)) added.push(code) + } + return added + } + + /** + * Process a photo for generic barcode/serial extraction. + * Resilient: on timeout/network error the photo is queued for retry. + */ async function processPhoto (file) { if (!file) return [] error.value = null scanning.value = true - const found = [] + + let aiImage = null + const photoIdx = photos.value.length + let found = [] + try { const thumbUrl = await resizeImage(file, 400) lastPhoto.value = thumbUrl - const aiImage = await resizeImage(file, 1600, 0.92) - const res = await fetch(`${HUB_BASE}/vision/barcodes`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ image: aiImage }), - }) - if (!res.ok) throw new Error('Vision scan failed: ' + res.status) - const data = await res.json() - const existing = new Set(barcodes.value.map(b => b.value)) - for (const code of (data.barcodes || [])) { - if (barcodes.value.length >= 5) break - if (!existing.has(code)) { - existing.add(code) - barcodes.value.push({ value: code }) - found.push(code) - } + photos.value.push({ url: thumbUrl, ts: Date.now(), codes: [], queued: false }) + + // Keep high-res for text readability (small serial fonts). + aiImage = await resizeImage(file, 1600, 0.92) + + const result = await scanBarcodesWithTimeout(aiImage, SCAN_TIMEOUT_MS) + found = mergeCodes(result.barcodes || [], 'photo') + photos.value[photoIdx].codes = found + + if (found.length === 0) { + error.value = 'Aucun code détecté — rapprochez-vous ou améliorez la mise au point' } - if (!found.length) error.value = 'Aucun code detecte — rapprochez-vous ou ameliorez la mise au point' } catch (e) { - error.value = e.message || 'Erreur' + if (aiImage && isRetryable(e)) { + await offline.enqueueVisionScan({ image: aiImage }) + if (photos.value[photoIdx]) photos.value[photoIdx].queued = true + error.value = 'Réseau faible — scan en attente. Reprise automatique au retour du signal.' + } else { + error.value = e.message || 'Erreur' + } } finally { scanning.value = false } + return found } /** - * Smart equipment label scan — returns structured fields - * { brand, model, serial_number, mac_address, gpon_sn, hw_version, equipment_type, barcodes } + * Process a photo for structured equipment-label extraction. + * + * Returns the Gemini response directly: + * { brand, model, serial_number, mac_address, gpon_sn, hw_version, + * equipment_type, barcodes: string[] } + * + * Side-effect: pushes `serial_number` + any `barcodes` into the same + * `barcodes` ref as processPhoto(), so a UI that uses both modes shares + * one list. + * + * Intentionally NOT resilient (no timeout, no queue) — equipment + * linking is a desktop/wifi flow, and callers want a sync answer. */ async function scanEquipmentLabel (file) { if (!file) return null @@ -54,32 +158,13 @@ export function useScanner () { const thumbUrl = await resizeImage(file, 400) lastPhoto.value = thumbUrl const aiImage = await resizeImage(file, 1600, 0.92) - const res = await fetch(`${HUB_BASE}/vision/equipment`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ image: aiImage }), - }) - if (!res.ok) throw new Error('Vision scan failed: ' + res.status) - const data = await res.json() - // Also populate barcodes list for display - if (data.barcodes?.length) { - const existing = new Set(barcodes.value.map(b => b.value)) - for (const code of data.barcodes) { - if (barcodes.value.length >= 5) break - if (!existing.has(code)) { - existing.add(code) - barcodes.value.push({ value: code }) - } - } - } - if (data.serial_number) { - const existing = new Set(barcodes.value.map(b => b.value)) - if (!existing.has(data.serial_number)) { - barcodes.value.push({ value: data.serial_number }) - } - } - if (!data.serial_number && !data.barcodes?.length) { - error.value = 'Aucun identifiant detecte — rapprochez-vous ou ameliorez la mise au point' + const data = await apiScanEquipmentLabel(aiImage) + + if (data?.barcodes?.length) mergeCodes(data.barcodes, 'equipment') + if (data?.serial_number) addCode(data.serial_number, 'equipment') + + if (!data?.serial_number && !data?.barcodes?.length) { + error.value = 'Aucun identifiant détecté — rapprochez-vous ou améliorez la mise au point' } return data } catch (e) { @@ -90,6 +175,28 @@ export function useScanner () { } } + /** Race scanBarcodes against a timeout. Used only for barcode mode. */ + async function scanBarcodesWithTimeout (image, ms) { + return await Promise.race([ + scanBarcodes(image), + new Promise((_, reject) => setTimeout( + () => reject(new Error('ScanTimeout')), + ms, + )), + ]) + } + + /** Retryable = worth queueing in IndexedDB for later. */ + function isRetryable (e) { + const msg = (e?.message || '').toLowerCase() + return msg.includes('scantimeout') + || msg.includes('failed to fetch') + || msg.includes('networkerror') + || msg.includes('load failed') + || e?.name === 'TypeError' // fetch throws TypeError on network error + } + + /** Resize a File to a max dimension, return as base64 data URI. */ function resizeImage (file, maxDim, quality = 0.85) { return new Promise((resolve, reject) => { const img = new Image() @@ -111,11 +218,19 @@ export function useScanner () { }) } + function removeBarcode (value) { + barcodes.value = barcodes.value.filter(b => b.value !== value) + } + function clearBarcodes () { barcodes.value = [] error.value = null lastPhoto.value = null + photos.value = [] } - return { barcodes, scanning, error, lastPhoto, processPhoto, scanEquipmentLabel, clearBarcodes } + return { + barcodes, scanning, error, lastPhoto, photos, + processPhoto, scanEquipmentLabel, removeBarcode, clearBarcodes, + } } diff --git a/apps/ops/src/modules/tech/pages/TechDevicePage.vue b/apps/ops/src/modules/tech/pages/TechDevicePage.vue new file mode 100644 index 0000000..0287bc1 --- /dev/null +++ b/apps/ops/src/modules/tech/pages/TechDevicePage.vue @@ -0,0 +1,401 @@ + + + + + + diff --git a/apps/ops/src/modules/tech/pages/TechScanPage.vue b/apps/ops/src/modules/tech/pages/TechScanPage.vue index f7fa517..a81ac92 100644 --- a/apps/ops/src/modules/tech/pages/TechScanPage.vue +++ b/apps/ops/src/modules/tech/pages/TechScanPage.vue @@ -1,6 +1,35 @@ + + + diff --git a/apps/ops/src/pages/OcrPage.vue b/apps/ops/src/pages/OcrPage.vue index 2aed480..5cfa516 100644 --- a/apps/ops/src/pages/OcrPage.vue +++ b/apps/ops/src/pages/OcrPage.vue @@ -5,7 +5,7 @@
Scanner une facture
+ :label="ollamaStatus.online ? 'Gemini en ligne' : 'Vision hors ligne'" /> @@ -23,10 +23,10 @@
- +
-
Analyse en cours... (peut prendre 30-60s sur CPU)
+
Analyse en cours... (habituellement 2-5s)
diff --git a/apps/ops/src/router/index.js b/apps/ops/src/router/index.js index 2ab61fd..9956b24 100644 --- a/apps/ops/src/router/index.js +++ b/apps/ops/src/router/index.js @@ -10,6 +10,7 @@ const routes = [ { path: '', name: 'tech-tasks', component: () => import('src/modules/tech/pages/TechTasksPage.vue') }, { path: 'job/:name', name: 'tech-job', component: () => import('src/modules/tech/pages/TechJobDetailPage.vue'), props: true }, { path: 'scan', name: 'tech-scan', component: () => import('src/modules/tech/pages/TechScanPage.vue') }, + { path: 'device/:serial', name: 'tech-device', component: () => import('src/modules/tech/pages/TechDevicePage.vue'), props: true }, { path: 'diagnostic', name: 'tech-diag', component: () => import('src/modules/tech/pages/TechDiagnosticPage.vue') }, { path: 'more', name: 'tech-more', component: () => import('src/modules/tech/pages/TechMorePage.vue') }, // Magic link: /j/{jwt-token} — must be LAST to not capture static paths above diff --git a/apps/ops/src/stores/offline.js b/apps/ops/src/stores/offline.js new file mode 100644 index 0000000..ba874a9 --- /dev/null +++ b/apps/ops/src/stores/offline.js @@ -0,0 +1,236 @@ +/** + * Offline store — mutation queue + vision (Gemini) retry queue. + * + * This store is the backbone of the tech `/j` (mobile) workflow: techs work + * in basements, elevators, and under couches where LTE drops for seconds to + * minutes. We can't afford to lose a scan or a "job completed" tap, so both + * mutations AND vision photos are persisted to IndexedDB and retried in the + * background when connectivity returns. + * + * Two queues, different retry strategies: + * + * ┌─ queue (ERPNext mutations) ──────────────────────────────────────┐ + * │ { type: 'create'|'update', doctype, name?, data, ts, id } │ + * │ flush on `online` event → replay createDoc/updateDoc. │ + * │ Failed items stay queued until next online flip. │ + * └──────────────────────────────────────────────────────────────────┘ + * + * ┌─ visionQueue (Gemini photo OCR) ─────────────────────────────────┐ + * │ { id, image (base64), ts, status } │ + * │ Retries are time-driven (scheduleVisionRetry), not connectivity│ + * │ -driven, because `navigator.onLine` lies in weak-signal zones │ + * │ (reports true on a captive 2-bar LTE that can't actually │ + * │ reach msg.gigafibre.ca). First retry at 5s, backoff to 30s. │ + * │ │ + * │ Successful scans land in `scanResults` and the `useScanner` │ + * │ composable merges them back into the UI via a watcher. │ + * └──────────────────────────────────────────────────────────────────┘ + * + * IndexedDB keys (idb-keyval, no schema): + * - `offline-queue` → mutation queue + * - `vision-queue` → pending photos + * - `vision-results` → completed scans waiting for the UI to consume + * - `cache-{key}` → generic read cache (used for read-through patterns) + * + * Ported from apps/field/src/stores/offline.js as part of the field→ops + * unification (see docs/ARCHITECTURE.md §"Legacy Retirement Plan"). + */ + +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { get, set } from 'idb-keyval' +import { createDoc, updateDoc } from 'src/api/erp' +import { scanBarcodes } from 'src/api/ocr' + +export const useOfflineStore = defineStore('offline', () => { + // ─── Mutation queue ────────────────────────────────────────────── + const queue = ref([]) + const syncing = ref(false) + const online = ref(navigator.onLine) + const pendingCount = computed(() => queue.value.length) + + // ─── Vision queue ──────────────────────────────────────────────── + const visionQueue = ref([]) // { id, image (base64), ts, status } + const scanResults = ref([]) // { id, barcodes: string[], ts } + const pendingVisionCount = computed(() => visionQueue.value.length) + let retryTimer = null + let visionSyncing = false + + // Listen to connectivity changes. We kick off BOTH queues on `online` + // because a reconnect is the cheapest signal we have that things might + // work now — worst case the retries fail again and we stay queued. + window.addEventListener('online', () => { + online.value = true + syncQueue() + syncVisionQueue() + }) + window.addEventListener('offline', () => { online.value = false }) + + async function loadQueue () { + try { + const stored = await get('offline-queue') + queue.value = stored || [] + } catch { queue.value = [] } + } + + async function saveQueue () { + // Pinia refs aren't structured-clonable directly (proxies); JSON + // round-trip is the simplest way to get a plain copy for IndexedDB. + await set('offline-queue', JSON.parse(JSON.stringify(queue.value))) + } + + async function loadVisionQueue () { + try { + visionQueue.value = (await get('vision-queue')) || [] + scanResults.value = (await get('vision-results')) || [] + } catch { + visionQueue.value = [] + scanResults.value = [] + } + // If we're restoring a non-empty queue (app was closed with pending + // scans), give the network 5s to settle before the first retry. + if (visionQueue.value.length) scheduleVisionRetry(5000) + } + + async function saveVisionQueue () { + await set('vision-queue', JSON.parse(JSON.stringify(visionQueue.value))) + } + + async function saveScanResults () { + await set('vision-results', JSON.parse(JSON.stringify(scanResults.value))) + } + + /** + * Enqueue a mutation to be synced later. + * @param {{ type: 'create'|'update', doctype: string, name?: string, data: object }} action + */ + async function enqueue (action) { + action.ts = Date.now() + action.id = action.ts + '-' + Math.random().toString(36).slice(2, 8) + queue.value.push(action) + await saveQueue() + if (online.value) syncQueue() + return action + } + + async function syncQueue () { + if (syncing.value || queue.value.length === 0) return + syncing.value = true + const failed = [] + for (const action of [...queue.value]) { + try { + if (action.type === 'create') { + await createDoc(action.doctype, action.data) + } else if (action.type === 'update') { + await updateDoc(action.doctype, action.name, action.data) + } + } catch { + failed.push(action) + } + } + queue.value = failed + await saveQueue() + syncing.value = false + } + + /** + * Enqueue a photo whose Gemini scan couldn't complete (timeout / offline). + * Called by useScanner when scanBarcodes exceeds SCAN_TIMEOUT_MS or throws + * a network error. Returns the queued entry so the caller can display a + * "scan en attente" chip in the UI. + * + * @param {{ image: string }} opts — base64 (data URI) of the optimized image + */ + async function enqueueVisionScan ({ image }) { + const entry = { + id: Date.now() + '-' + Math.random().toString(36).slice(2, 8), + image, + ts: Date.now(), + status: 'queued', + } + visionQueue.value.push(entry) + await saveVisionQueue() + scheduleVisionRetry(5000) + return entry + } + + /** + * Retry each queued photo. Success → move to scanResults, fail → stay + * queued with a bumped retry schedule. We drive retries off the queue + * itself, not off `online`, because navigator.onLine can report true + * even on weak LTE that can't reach the hub. + */ + async function syncVisionQueue () { + if (visionSyncing) return + if (retryTimer) { clearTimeout(retryTimer); retryTimer = null } + if (visionQueue.value.length === 0) return + visionSyncing = true + const remaining = [] + try { + for (const entry of [...visionQueue.value]) { + try { + entry.status = 'syncing' + const result = await scanBarcodes(entry.image) + scanResults.value.push({ + id: entry.id, + barcodes: result.barcodes || [], + ts: Date.now(), + }) + } catch { + entry.status = 'queued' + remaining.push(entry) + } + } + visionQueue.value = remaining + await Promise.all([saveVisionQueue(), saveScanResults()]) + if (remaining.length) scheduleVisionRetry(30000) + } finally { + visionSyncing = false + } + } + + function scheduleVisionRetry (delay) { + if (retryTimer) return + retryTimer = setTimeout(() => { + retryTimer = null + syncVisionQueue() + }, delay) + } + + /** + * Consumer (ScanPage / TechScanPage) calls this after merging a result + * into the UI so the same serial doesn't reappear next time the page + * mounts from persisted state. + */ + async function consumeScanResult (id) { + scanResults.value = scanResults.value.filter(r => r.id !== id) + await saveScanResults() + } + + // ─── Generic read cache (used by list pages for offline browse) ── + async function cacheData (key, data) { + await set('cache-' + key, { data, ts: Date.now() }) + } + + async function getCached (key) { + try { + const entry = await get('cache-' + key) + return entry?.data || null + } catch { return null } + } + + // Kick off initial loads (fire-and-forget — refs start empty and fill + // in once IndexedDB resolves, which is fine for the UI). + loadQueue() + loadVisionQueue() + + return { + // mutation queue + queue, syncing, online, pendingCount, enqueue, syncQueue, + // vision queue + visionQueue, scanResults, pendingVisionCount, + enqueueVisionScan, syncVisionQueue, consumeScanResult, + // read cache + cacheData, getCached, loadQueue, + } +}) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 0626d06..9e43a52 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -73,6 +73,12 @@ Internet - **Stack:** Playwright/Chromium (`:3301` internal). - **Purpose:** Allows reading encrypted TR-181 parameters from TP-Link XX230v modems by leveraging the modem's native JS cryptography. Exposes a simple JSON REST API locally to targo-hub. +### Vision / OCR (Gemini via targo-hub) +- **Model:** Gemini 2.5 Flash (Google) — no local GPU, all inference remote. +- **Endpoints (hub):** `/vision/barcodes`, `/vision/equipment`, `/vision/invoice`. +- **Why centralized:** ops VM has no GPU, so the legacy Ollama `llama3.2-vision` install was retired. All three frontends (ops, field-as-ops `/j`, future client portal) hit the hub, which enforces JSON `responseSchema` per endpoint. +- **Client-side resilience:** barcode scans use an 8s timeout + IndexedDB retry queue so techs in weak-LTE zones don't lose data. See [VISION_AND_OCR.md](VISION_AND_OCR.md) for the full pipeline. + --- ## 4. Security & Authentication Flow diff --git a/docs/HANDOFF.md b/docs/HANDOFF.md index c3bfbbd..4b5838c 100644 --- a/docs/HANDOFF.md +++ b/docs/HANDOFF.md @@ -42,11 +42,13 @@ If you only have 15 minutes, read those three. 1. [STATUS_2026-04-18.md](STATUS_2026-04-18.md) §"Features inventory" — Ops, Dispatch, Field 2. [DATA_AND_FLOWS.md](DATA_AND_FLOWS.md) — Issue → Job → Technician flow 3. [CPE_MANAGEMENT.md](CPE_MANAGEMENT.md) — CPE lifecycle, GenieACS, modem-bridge +4. [VISION_AND_OCR.md](VISION_AND_OCR.md) — camera scanning workflow (barcodes, equipment labels, invoices) and offline queue ### Infrastructure / DevOps 1. [ARCHITECTURE.md](ARCHITECTURE.md) — network + container map 2. [STATUS_2026-04-18.md](STATUS_2026-04-18.md) §"Integrations" — external services and credentials location 3. [CPE_MANAGEMENT.md](CPE_MANAGEMENT.md) — GenieACS + OLT + SNMP +4. [VISION_AND_OCR.md](VISION_AND_OCR.md) — Gemini pipeline, AI_API_KEY config, hub `/vision/*` endpoints --- @@ -60,6 +62,7 @@ If you only have 15 minutes, read those three. | [DATA_AND_FLOWS.md](DATA_AND_FLOWS.md) | Data model and user/workflow flows | Building features that touch ERPNext | | [BILLING_AND_PAYMENTS.md](BILLING_AND_PAYMENTS.md) | Subscription lifecycle, invoice generation, Stripe, payment reconciliation | Billing work | | [CPE_MANAGEMENT.md](CPE_MANAGEMENT.md) | CPE database, GenieACS, provisioning, diagnostics | CPE or network work | +| [VISION_AND_OCR.md](VISION_AND_OCR.md) | Gemini-via-hub pipeline: barcode/equipment/invoice endpoints, scanner composable, offline retry queue | Camera/scan/OCR work, onboarding anyone who'll touch `/vision/*` | | [APP_DESIGN_GUIDELINES.md](APP_DESIGN_GUIDELINES.md) | UI tokens, theming, component conventions | Frontend work | | [Gigafibre-FSM-Features.pptx](Gigafibre-FSM-Features.pptx) | Feature deck for demo / training | Sharing with non-engineers | | [Gigafibre-Billing-Handoff.pptx](Gigafibre-Billing-Handoff.pptx) | Billing deck for finance handoff | Sharing with finance team | diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 44e2fd4..bbf946d 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -19,7 +19,7 @@ - [x] Dispatch module + ticket management - [x] Equipment tracking with OLT/SNMP diagnostics - [x] SMS/Email notifications (Twilio + Mailjet) -- [x] Invoice OCR (Ollama Vision) +- [x] Invoice OCR — originally Ollama Vision, migrated to Gemini 2.5 Flash via targo-hub (2026-04-22, no GPU on ops VM). See [VISION_AND_OCR.md](VISION_AND_OCR.md). - [x] Field tech mobile (/t/{token}) - [x] Authentik federation (staff → client SSO) - [x] Modem-bridge (Playwright headless for TP-Link ONU diagnostics) diff --git a/docs/VISION_AND_OCR.md b/docs/VISION_AND_OCR.md new file mode 100644 index 0000000..d5f7057 --- /dev/null +++ b/docs/VISION_AND_OCR.md @@ -0,0 +1,482 @@ +# Vision & OCR Pipeline + +> **All vision runs on Gemini 2.5 Flash via `targo-hub`.** No local Ollama. The +> ops/ERPNext VM has no GPU, so every vision request — bills, barcodes, +> equipment labels — goes to Google's Gemini API from a single backend +> service and gets normalized before hitting the frontend. + +**Last refreshed:** 2026-04-22 (cutover from Ollama → Gemini) + +--- + +## 1. Architecture at a glance + +```text + ┌──────────────────┐ ┌───────────────────────┐ + │ apps/ops (PWA) │ │ apps/field (PWA) │ + │ /ops/* │ │ /field/* (retiring) │ + └────────┬─────────┘ └──────────┬────────────┘ + │ │ + │ src/api/ocr.js │ src/api/ocr.js + │ {ocrBill, scanBarcodes, │ {ocrBill, scanBarcodes, + │ scanEquipmentLabel} │ checkOllamaStatus} + │ │ + └──────────────┬──────────────┘ + │ POST https://msg.gigafibre.ca/vision/* + ▼ + ┌───────────────────────┐ + │ targo-hub │ + │ lib/vision.js │ + │ ├─ /vision/barcodes │ + │ ├─ /vision/equipment│ + │ └─ /vision/invoice │ + └──────────┬────────────┘ + │ generativelanguage.googleapis.com + ▼ + ┌───────────────────────┐ + │ Gemini 2.5 Flash │ + │ (text + image, JSON │ + │ responseSchema) │ + └───────────────────────┘ +``` + +**Why route everything through the hub:** + +1. **No GPU on ops VM.** The only machine with a local Ollama was retired + in Phase 2.5. Centralizing on Gemini means the frontend stops caring + where inference happens. +2. **Single AI_API_KEY rotation surface.** Key lives in the hub env only. +3. **Schema guarantees.** Gemini supports `responseSchema` in the v1beta + API — the hub enforces it per endpoint, so the frontend can trust + the JSON shape without defensive parsing. +4. **Observability.** Every call is logged in the hub with image size, + model, latency, output preview (first 300 chars). + +--- + +## 2. Hub endpoints (`services/targo-hub/lib/vision.js`) + +All three endpoints: +- are `POST` with JSON body `{ image: }`, +- return structured JSON (see per-endpoint schemas below), +- require `AI_API_KEY` in the hub environment, +- are unauthenticated from the browser (rate-limiting is the hub's job). + +### `POST /vision/barcodes` + +Extracts up to 3 identifiers (serials, MACs, GPON SNs, barcodes). + +```json +{ + "barcodes": ["1608K44D9E79FAFF5", "0418D6A1B2C3", "TPLG-A1B2C3D4"] +} +``` + +Used by: tech scan page, equipment link dialog, invoice scan (fallback). + +### `POST /vision/equipment` + +Structured equipment-label parse (ONT/ONU/router/modem). + +```json +{ + "brand": "TP-Link", + "model": "XX230v", + "serial_number": "2234567890ABCD", + "mac_address": "0418D6A1B2C3", + "gpon_sn": "TPLGA1B2C3D4", + "hw_version": "1.0", + "equipment_type": "ont", + "barcodes": ["..."] +} +``` + +Post-processing: `mac_address` stripped of separators + uppercased; +`serial_number` trimmed of whitespace. + +Used by: `useEquipmentActions` in the ops client detail page to pre-fill +a "create Service Equipment" dialog. + +### `POST /vision/invoice` + +Structured invoice/bill OCR. Canadian-tax-aware (GST/TPS + QST/TVQ). + +```json +{ + "vendor": "Acme Fibre Supplies", + "vendor_address": "123 rue Somewhere, Montréal, QC", + "invoice_number": "INV-2026-0042", + "date": "2026-04-18", + "due_date": "2026-05-18", + "subtotal": 1000.00, + "tax_gst": 50.00, + "tax_qst": 99.75, + "total": 1149.75, + "currency": "CAD", + "items": [ + { "description": "OLT SFP+ module", "qty": 4, "rate": 250.00, "amount": 1000.00 } + ], + "notes": "Payment terms: net 30" +} +``` + +Post-processing: string-shaped numbers (e.g. `"1,234.56"`) are coerced to +floats, both at the invoice level and per line item. + +Used by: `apps/ops/src/pages/OcrPage.vue` (invoice intake), future +supplier-bill wizard. + +--- + +## 3. Frontend surface (`apps/ops/src/api/ocr.js`) + +Thin wrapper over the hub. Same signatures for ops and field during the +migration window (see `apps/field/src/api/ocr.js` — same file, different +HUB_URL source). + +| Function | Endpoint | Error behavior | +|---|---|---| +| `ocrBill(image)` | `/vision/invoice` | Throws on non-2xx — caller shows Notify | +| `scanBarcodes(image)` | `/vision/barcodes` | Throws on non-2xx — **`useScanner` catches + queues** | +| `scanEquipmentLabel(image)` | `/vision/equipment` | Throws on non-2xx | +| `checkOllamaStatus()` | `/health` | Returns `{online, models, hasVision}`. Name kept for back-compat. | + +The `checkOllamaStatus` name is a leftover from the Ollama era — it now +pings the hub's health endpoint and reports `models: ['gemini-2.5-flash']` +so existing callers (status chips, diagnostics panels) keep working. The +name will be renamed to `checkVisionStatus` once no page references the +old symbol. + +--- + +## 4. Scanner composable (`apps/ops/src/composables/useScanner.js`) + +Wraps the API with camera capture and resilience. Two modes on one +composable: + +### Mode A — `processPhoto(file)` (barcodes, resilient) + +1. Resize the `File` twice: + - 400px thumbnail for on-screen preview + - 1600px @ q=0.92 for Gemini (text must stay readable) +2. Race `scanBarcodes(aiImage)` against an **8s timeout** (`SCAN_TIMEOUT_MS`). +3. On timeout / network error, if the error is retryable + (ScanTimeout | Failed to fetch | NetworkError | TypeError): + - persist `{ id, image, ts, status: 'queued' }` to IndexedDB via + `useOfflineStore.enqueueVisionScan`, + - flag `photos[idx].queued = true` for the UI chip, + - show "Réseau faible — scan en attente. Reprise automatique au + retour du signal." +4. Otherwise, show the raw error. + +On success, newly found codes are merged into `barcodes.value` (capped at +`MAX_BARCODES = 5`, dedup by value), and the optional `onNewCode(code)` +callback fires for each one. + +### Mode B — `scanEquipmentLabel(file)` (structured, synchronous) + +No timeout, no queue. Returns the full Gemini response. Auto-merges any +`serial_number` + `barcodes[]` into the same `barcodes.value` list so a +page using both modes shares one visible list. Used in desktop/wifi flows +where callers want a sync answer to pre-fill a form. + +### Late-delivered results + +The composable runs a `watch(() => offline.scanResults.length)` so that +when the offline store later completes a queued scan (tech walks out of +the basement, signal returns), the codes appear in the UI *as if* they +had come back synchronously. `onNewCode` fires for queued codes too, so +lookup-and-notify side-effects happen regardless of path. + +It also drains `offline.scanResults` once at mount, to catch the case +where a scan completed while the page was unmounted (phone locked, app +backgrounded, queue sync ran, user reopens ScanPage). + +--- + +## 5. Offline store (`apps/ops/src/stores/offline.js`) + +Pinia store, two queues, IndexedDB (`idb-keyval`): + +### Mutation queue + +`{ type: 'create'|'update', doctype, name?, data, ts, id }` — ERPNext +mutations. Flushed when `window` emits `online`. Failed items stay +queued across reconnects. Keyed under `offline-queue`. + +### Vision queue + +`{ id, image (base64), ts, status }` — photos whose Gemini call timed +out or failed. Keyed under `vision-queue`. + +**Retries are time-driven, not event-driven.** We don't trust +`navigator.onLine` because it reports `true` on 2-bar LTE that can't +actually reach msg.gigafibre.ca. First retry at 5s, back off to 30s on +repeated failure. A reconnect (online event) also triggers an +opportunistic immediate sync. + +Successful scans land in `scanResults` (keyed `vision-results`) and the +scanner composable consumes them via watcher + `consumeScanResult(id)` +to avoid duplicates. + +### Generic cache + +`cacheData(key, data)` / `getCached(key)` — plain read cache used by +list pages for offline browsing. Keyed under `cache-{key}`. + +--- + +## 6. Data flow example (tech scans an ONT in a basement) + +``` +[1] Tech taps "Scan" in /j/ScanPage (camera opens) +[2] Tech takes photo (File → input.change) +[3] useScanner.processPhoto(file) + → resizeImage(file, 400) (thumbnail shown immediately) + → resizeImage(file, 1600, 0.92) + → Promise.race([scanBarcodes(ai), timeout(8s)]) + +CASE A — signal ok: +[4a] Gemini responds in 2s → barcodes[] merged → onNewCode fires + → ERPNext lookup → Notify "ONT lié au client Untel" + +CASE B — weak signal / timeout: +[4b] 8s timeout fires → isRetryable('ScanTimeout') → true + → offline.enqueueVisionScan({ image: aiImage }) + → photos[idx].queued = true (chip "scan en attente") + → tech keeps scanning next device +[5b] Tech walks out of basement — window.online fires + → syncVisionQueue() retries the queued photo + → Gemini responds → scanResults.push({id, barcodes, ts}) +[6b] useScanner watcher on scanResults.length fires + → mergeCodes(barcodes, 'queued') → onNewCode fires (late) + → Notify arrives while tech is walking back to the truck + → consumeScanResult(id) (removed from persistent queue) +``` + +--- + +## 7. Changes from the previous (Ollama) pipeline + +| Aspect | Before (Phase 2) | After (Phase 2.5) | +|---|---|---| +| Invoice OCR | Ollama `llama3.2-vision:11b` on the serving VM | Gemini 2.5 Flash via `/vision/invoice` | +| Barcode scan | Hub `/vision/barcodes` (already Gemini) | Unchanged | +| Equipment label | Hub `/vision/equipment` (already Gemini) | Unchanged | +| GPU requirement | Yes (11GB VRAM for vision model) | None — all inference remote | +| Offline resilience | Only barcode mode, only in apps/field | Now in apps/ops too (ready for /j) | +| Schema validation | Hand-parsed from prompt-constrained JSON | Gemini `responseSchema` enforces shape | +| Frontend import path | `'src/api/ocr'` (both apps) | Unchanged — same symbols | + +--- + +## 8. Where to look next + +- **Hub implementation:** `services/targo-hub/lib/vision.js`, + `services/targo-hub/server.js` (routes: `/vision/barcodes`, + `/vision/equipment`, `/vision/invoice`). +- **Frontend API client:** `apps/ops/src/api/ocr.js` (+ + `apps/field/src/api/ocr.js` kept in sync during migration). +- **Scanner composable:** `apps/ops/src/composables/useScanner.js`. +- **Offline store:** `apps/ops/src/stores/offline.js`. + +### 8.1 Secrets, keys and rotation + +The only secret this pipeline needs is the Gemini API key. Everything +else (models, base URL, hub public URL) is non-sensitive config. + +| Variable | Where it's read | Default | Notes | +|---|---|---|---| +| `AI_API_KEY` | `services/targo-hub/lib/config.js:38` | *(none — required)* | Google AI Studio key for `generativelanguage.googleapis.com`. **Server-side only**, never reaches the browser bundle. | +| `AI_MODEL` | `config.js:39` | `gemini-2.5-flash` | Primary vision model. | +| `AI_FALLBACK_MODEL` | `config.js:40` | `gemini-2.5-flash-lite-preview` | Used by text-only calls (not vision) when primary rate-limits. | +| `AI_BASE_URL` | `config.js:41` | `https://generativelanguage.googleapis.com/v1beta/openai/` | OpenAI-compatible endpoint used by agent code. Vision bypasses this and talks to the native `/v1beta/models/...:generateContent` URL. | + +**Storage policy.** The repo is private and follows the same posture as +the ERPNext service token already hardcoded in +`apps/ops/infra/nginx.conf:15` and `apps/field/infra/nginx.conf:13`. The +Gemini key can live in any of three places, in increasing order of +"checked into git": + +1. **Prod VM env only** (status quo): key is in the `environment:` block + of the `targo-hub` service in `/opt/targo-hub/docker-compose.yml` on + `96.125.196.67`. `config.js:38` reads it via `process.env.AI_API_KEY`. + Rotation = edit that one line + `docker compose restart targo-hub`. +2. **In-repo fallback in `config.js`**: change line 38 to + `AI_API_KEY: env('AI_API_KEY', 'AIzaSy...')` — the env var still wins + when set, so prod doesn't break, but a fresh clone Just Works. Same + pattern as nginx's ERPNext token. +3. **Hardcoded constant** (not recommended): replace `env(...)` entirely. + Loses the ability to override per environment (dev, staging). + +If/when option 2 is chosen, the literal value should also be recorded +in `MEMORY.md` (`reference_google_ai.md`) so that's the rotation source +of truth — not scattered across the codebase. + +**Browser exposure.** Zero. The ops nginx config proxies `/hub/*` to +targo-hub on an internal Docker network; the hub injects the key before +calling Google. `apps/ops/src/api/ocr.js` just does +`fetch('/hub/vision/barcodes', ...)` — no key in the bundle, no key in +DevTools, no key in the browser's `Network` tab. + +--- + +## 9. Related + +- [ARCHITECTURE.md](ARCHITECTURE.md) — the full service map this lives in. +- [CPE_MANAGEMENT.md](CPE_MANAGEMENT.md) — how scanned serials flow into + the TR-069/TR-369 device management plane. +- [APP_DESIGN_GUIDELINES.md](APP_DESIGN_GUIDELINES.md) — frontend + conventions (Vue 3 Composition API, feature folders). + +--- + +## 10. Data-model relationships triggered by a scan + +A scan is never just "identify a barcode." Every successful lookup fans +out into the ERPNext graph: the scanned Service Equipment is the entry +point, and the tech page (`/j/device/:serial`) surfaces everything tied +to the same Customer and Service Location. This section documents that +graph, the exact fields read per entity, and the write rules. + +### 10.1 Graph (Service Equipment is the anchor) + +```text + ┌─────────────────────────┐ + │ Service Equipment │ + │ EQP-##### │◀───── scanned serial / MAC / barcode + │ │ (3-tier lookup in TechScanPage) + │ • serial_number │ + │ • mac_address │ + │ • barcode │ + │ • equipment_type (ONT) │ + │ • brand / model │ + │ • firmware / hw_version│ + │ • status │ + │ │ + │ FK → customer ─────────┼───┐ + │ FK → service_location ─┼─┐ │ + │ FK → olt / port │ │ │ (ONT-specific, TR-069 bind) + └─────────────────────────┘ │ │ + │ │ + ┌─────────────────────────────────┘ │ + │ │ + ▼ ▼ + ┌───────────────────┐ ┌───────────────────┐ + │ Service Location │ │ Customer │ + │ LOC-##### │ │ CUST-##### │ + │ • address │ │ • customer_name │ + │ • city │ │ • stripe_id │ + │ • postal_code │ │ • ppa_enabled │ + │ • connection_type │ │ • legacy_*_id │ + │ • olt_port │ └────────┬──────────┘ + │ • gps lat/lng │ │ + └───┬──────────┬────┘ │ + │ │ │ + inbound│ │inbound inbound│ + │ │ │ + ▼ ▼ ▼ + ┌────────────┐ ┌──────────────┐ ┌──────────────┐ + │ Issue │ │ Dispatch Job │ │ Subscription │ + │ TCK-##### │ │ DJ-##### │ │ SUB-##### │ + │ │ │ │ │ │ + │ open │ │ upcoming │ │ active plan │ + │ tickets │ │ installs / │ │ billing │ + │ │ │ repairs │ │ RADIUS creds │ + └────────────┘ └──────────────┘ └──────────────┘ + FK: service_location FK: party_type='Customer', party= +``` + +**Two FK axes, not one.** Tickets + Dispatch Jobs pivot on *where* the +problem is (Service Location). Subscriptions pivot on *who owns the +account* (Customer). A customer can have multiple locations (duplex, +rental, commercial); the scan page shows the subscription freshest for +the customer, even if the scanned device is at a secondary address. + +### 10.2 Exact reads issued from `TechDevicePage.vue` + +| Step | Call | Filter | Fields read | Purpose | +|------|------|--------|-------------|---------| +| 1 | `listDocs('Service Equipment')` | `serial_number = :serial` | `name` | Exact-serial lookup | +| 1 | `listDocs('Service Equipment')` | `barcode = :serial` | `name` | Fallback if serial missed | +| 2 | `getDoc('Service Equipment', name)` | — | full doc | Device card: brand/model/MAC/firmware/customer/service_location/olt_* | +| 3 | `getDoc('Service Location', loc)` | — | full doc | Address, GPS, connection_type, olt_port | +| 4 | `listDocs('Subscription')` | `party_type='Customer', party=, status='Active'` | `name, status, start_date, current_invoice_end` | Active plan chip | +| 5 | `listDocs('Issue')` | `service_location=, status ∈ {Open, In Progress, On Hold}` | `name, subject, status, priority, opening_date` | Open tickets list | +| 6 | `listDocs('Dispatch Job')` | `service_location=, status ∈ {Planned, Scheduled, En Route, In Progress}` | `name, subject, job_type, status, scheduled_date, technician` | Upcoming interventions | + +All five fan-out queries run in parallel via `Promise.allSettled`, so a +permission error on any single doctype (e.g. tech role can't read +`Subscription` in some envs) doesn't block the page render — just that +card is omitted. + +### 10.3 Writes issued from `TechScanPage.vue` + +The scan page writes to **exactly one doctype** — `Service Equipment` — +never to Customer, Location, Subscription, Issue, or Dispatch Job. All +relationship changes happen via FK updates on the equipment row: + +| Trigger | Write | Why | +|---------|-------|-----| +| Auto-link from job context | `updateDoc('Service Equipment', name, { customer, service_location })` | Tech opened Scan from a Dispatch Job (`?job=&customer=&location=`) and the scanned equipment has no location yet — this "claims" the device for the install. | +| Manual link dialog | `updateDoc('Service Equipment', name, { customer, service_location })` | Tech searched customer + picked one of the customer's locations. | +| Create new device | `createDoc('Service Equipment', data)` | 3-tier lookup came up empty — create a stub and tie it to the current job if available. | +| Customer re-link (from TechDevicePage) | `updateDoc('Service Equipment', name, { customer })` | Tech realized the device is at the wrong account; re-linking the customer auto-reloads the subscription card. | + +**Subscription / Issue / Dispatch Job are read-only in the scan flow.** +The tech app never creates a ticket from a scan — that's the job of the +ops dispatcher in `DispatchPage.vue` + `ClientDetailPage.vue`. The scan +page's contribution is to make the FK (`service_location` on the +equipment) accurate so those downstream cards light up correctly when +the dispatcher or the next tech opens the page. + +### 10.4 Auto-link rule (the one piece of scan-time "business logic") + +When TechScanPage is opened from a Dispatch Job (`goScan` on +TechJobDetailPage propagates `?job=&customer=&location=`), +each successful lookup runs: + +```js +if (result.found && jobContext.customer && !result.equipment.service_location) { + await updateDoc('Service Equipment', result.equipment.name, { + customer: jobContext.customer, + service_location: jobContext.location, // only if the job has one + }) +} +``` + +**Why gated on "no existing service_location":** a device that's already +tied to address A should never silently move to address B just because +a tech scanned it on a job ticket. If the location is already set, the +tech has to use the "Re-link" action in TechDevicePage, which is +explicit and logged. This prevents swap-out scenarios (tech brings a +tested spare ONT from another install and scans it to confirm serial) +from corrupting address ownership. + +### 10.5 Why this matters for offline mode + +The offline store (`stores/offline.js`) queues `updateDoc` calls under +the mutation queue, not the vision queue. That means: + +- **Scan photo** → offline → `vision-queue` → retries against Gemini when + signal returns. +- **Auto-link / create-equipment** → offline → `offline-queue` → retries + against ERPNext when signal returns. + +Because both queues drain time-driven, a tech who scans 6 ONTs in a +no-signal basement comes back to the truck and the phone silently: + +1. Sends the 6 photos to Gemini (vision queue) +2. Receives the 6 barcode lists +3. Fans each one through `lookupInERPNext` (the scan page watcher) +4. For found + unlinked devices, enqueues 6 `updateDoc` calls +5. Drains the mutation queue → all 6 devices now carry + `customer + service_location` FKs +6. Next time dispatcher opens the Dispatch Job, all 6 equipment rows + appear in the equipment list (via reverse FK query from the job page) + +The FK write on Service Equipment is what "connects" the scan to every +downstream card (ticket list, subscription chip, dispatch job list). +Everything else is a read on those FKs. + +--- diff --git a/services/targo-hub/lib/vision.js b/services/targo-hub/lib/vision.js index 669dab0..ce4a36a 100644 --- a/services/targo-hub/lib/vision.js +++ b/services/targo-hub/lib/vision.js @@ -96,4 +96,75 @@ async function handleEquipment (req, res) { } } -module.exports = { handleBarcodes, extractBarcodes, handleEquipment } +// ─── Invoice / bill OCR ──────────────────────────────────────────────── +// We run this on Gemini (not on Ollama) because the ops VM has no GPU — +// ops must not depend on a local vision model. The schema matches what +// the ops InvoiceScanPage expects so switching away from Ollama is a +// drop-in replacement on the frontend. + +const INVOICE_PROMPT = `You are an invoice/bill OCR assistant. Extract structured data from this photo of a vendor invoice or bill. +Return ONLY valid JSON that matches the provided schema. No prose, no markdown. +Rules: +- "date" / "due_date" must be ISO YYYY-MM-DD. If the date is MM/DD/YYYY or DD/MM/YYYY and ambiguous, prefer YYYY-MM-DD with the most likely interpretation for Canadian/Québec invoices. +- "currency" is a 3-letter code (CAD, USD, EUR). Default to CAD if not visible. +- "tax_gst" = GST/TPS/HST (Canadian federal tax); "tax_qst" = QST/TVQ (Québec provincial tax). +- "items" is a line-by-line list; keep description as printed, collapse whitespace. +- Missing fields → null for strings, 0 for numbers, [] for items.` + +const INVOICE_SCHEMA = { + type: 'object', + properties: { + vendor: { type: 'string', nullable: true }, + vendor_address: { type: 'string', nullable: true }, + invoice_number: { type: 'string', nullable: true }, + date: { type: 'string', nullable: true }, + due_date: { type: 'string', nullable: true }, + subtotal: { type: 'number', nullable: true }, + tax_gst: { type: 'number', nullable: true }, + tax_qst: { type: 'number', nullable: true }, + total: { type: 'number', nullable: true }, + currency: { type: 'string', nullable: true }, + items: { + type: 'array', + items: { + type: 'object', + properties: { + description: { type: 'string', nullable: true }, + qty: { type: 'number', nullable: true }, + rate: { type: 'number', nullable: true }, + amount: { type: 'number', nullable: true }, + }, + }, + }, + notes: { type: 'string', nullable: true }, + }, + required: ['vendor', 'total'], +} + +async function handleInvoice (req, res) { + const body = await parseBody(req) + const check = extractBase64(req, body, 'invoice') + if (check.error) return json(res, check.status, { error: check.error }) + try { + const parsed = await geminiVision(check.base64, INVOICE_PROMPT, INVOICE_SCHEMA) + if (!parsed) return json(res, 200, { vendor: null, total: null, items: [] }) + // Normalize: trim + coerce numbers (model sometimes returns "1,234.56" as string) + for (const k of ['subtotal', 'tax_gst', 'tax_qst', 'total']) { + if (typeof parsed[k] === 'string') parsed[k] = Number(parsed[k].replace(/[^0-9.\-]/g, '')) || 0 + } + if (Array.isArray(parsed.items)) { + for (const it of parsed.items) { + for (const k of ['qty', 'rate', 'amount']) { + if (typeof it[k] === 'string') it[k] = Number(it[k].replace(/[^0-9.\-]/g, '')) || 0 + } + } + } + log(`Vision invoice: vendor=${parsed.vendor} total=${parsed.total} items=${(parsed.items || []).length}`) + return json(res, 200, parsed) + } catch (e) { + log('Vision invoice error:', e.message) + return json(res, 500, { error: 'Vision extraction failed: ' + e.message }) + } +} + +module.exports = { handleBarcodes, extractBarcodes, handleEquipment, handleInvoice } diff --git a/services/targo-hub/server.js b/services/targo-hub/server.js index 2109bc5..7bb2e6b 100644 --- a/services/targo-hub/server.js +++ b/services/targo-hub/server.js @@ -105,6 +105,7 @@ const server = http.createServer(async (req, res) => { if (path.startsWith('/payments') || path === '/webhook/stripe') return require('./lib/payments').handle(req, res, method, path, url) if (path === '/vision/barcodes' && method === 'POST') return vision.handleBarcodes(req, res) if (path === '/vision/equipment' && method === 'POST') return vision.handleEquipment(req, res) + if (path === '/vision/invoice' && method === 'POST') return vision.handleInvoice(req, res) if (path.startsWith('/ai/')) return require('./lib/ai').handle(req, res, method, path) if (path.startsWith('/modem')) return require('./lib/modem-bridge').handleModemRequest(req, res, path) if (path.startsWith('/network/')) return require('./lib/network-intel').handle(req, res, method, path)