gigafibre-fsm/apps/ops/src/api/ocr.js
louispaulb 2453bc6ef2 feat: Ollama Vision OCR for bill/invoice scanning
- Ollama container running llama3.2-vision:11b on server
- OCR page in ops app: camera/upload → Ollama extracts vendor, date,
  amounts, line items → editable form → create Purchase Invoice
- nginx proxies /ollama/ to Ollama API (both ops + field containers)
- Added createDoc to erp.js API layer

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 23:57:21 -04:00

84 lines
2.5 KiB
JavaScript

import { authFetch } from './auth'
const OLLAMA_URL = '/ollama/api/generate'
const OCR_PROMPT = `You are an invoice/bill OCR assistant. Extract the following fields from this image of a bill or invoice. Return ONLY valid JSON, no markdown, no explanation.
{
"vendor": "company name on the bill",
"vendor_address": "full address if visible",
"invoice_number": "invoice/bill number",
"date": "YYYY-MM-DD format",
"due_date": "YYYY-MM-DD if visible, null otherwise",
"subtotal": 0.00,
"tax_gst": 0.00,
"tax_qst": 0.00,
"total": 0.00,
"currency": "CAD",
"items": [
{ "description": "line item description", "qty": 1, "rate": 0.00, "amount": 0.00 }
],
"notes": "any other relevant text (account number, payment terms, etc.)"
}
If a field is not visible, set it to null. Always return valid JSON.`
/**
* Send an image to Ollama Vision for bill/invoice OCR.
* @param {string} base64Image — base64 encoded image (no data: prefix)
* @returns {object} Parsed invoice data
*/
export async function ocrBill (base64Image) {
// Strip data:image/...;base64, prefix if present
const clean = base64Image.replace(/^data:image\/[^;]+;base64,/, '')
const res = await authFetch(OLLAMA_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: 'llama3.2-vision:11b',
prompt: OCR_PROMPT,
images: [clean],
stream: false,
options: {
temperature: 0.1,
num_predict: 2048,
},
}),
})
if (!res.ok) {
const text = await res.text()
throw new Error('OCR failed: ' + (text || res.status))
}
const data = await res.json()
const raw = data.response || ''
// Extract JSON from response (model might wrap it in markdown)
const jsonMatch = raw.match(/\{[\s\S]*\}/)
if (!jsonMatch) throw new Error('No JSON in OCR response')
try {
return JSON.parse(jsonMatch[0])
} catch (e) {
throw new Error('Invalid JSON from OCR: ' + e.message)
}
}
/**
* Check if Ollama is running and the vision model is available.
*/
export async function checkOllamaStatus () {
try {
const res = await authFetch('/ollama/api/tags')
if (!res.ok) return { online: false, error: 'HTTP ' + res.status }
const data = await res.json()
const models = (data.models || []).map(m => m.name)
const hasVision = models.some(m => m.includes('llama3.2-vision'))
return { online: true, models, hasVision }
} catch (e) {
return { online: false, error: e.message }
}
}