gigafibre-fsm/services/targo-hub/lib/vision.js

'use strict'
const cfg = require('./config')
const { log, json, parseBody } = require('./helpers')

/**
 * POST /vision/barcodes
 * Accepts { image: "base64..." } and uses Gemini Flash Vision to extract barcode values.
 * Returns { barcodes: ["VALUE1", "VALUE2", ...] }
 */
async function handleBarcodes (req, res) {
  if (!cfg.AI_API_KEY) return json(res, 500, { error: 'AI_API_KEY not configured' })

  const body = await parseBody(req)
  if (!body.image) return json(res, 400, { error: 'Missing image field (base64)' })

  // Strip data URI prefix if present
  const base64 = body.image.replace(/^data:image\/[^;]+;base64,/, '')
  const sizeKB = Math.round(base64.length * 3 / 4 / 1024)
  log(`Vision: received image ${sizeKB}KB`)

  try {
    const result = await extractBarcodes(base64)
    return json(res, 200, result)
  } catch (e) {
    log('Vision barcode error:', e.message)
    return json(res, 500, { error: 'Vision extraction failed: ' + e.message })
  }
}

const VISION_PROMPT = `You are reading equipment labels from a photo taken by a field technician. The image may be blurry, tilted, at an angle, or have poor lighting.

Your job: find and read ALL identifiers on this device label. This includes:

1. TEXT PRINTED BELOW OR NEAR A BARCODE — this is the barcode value (ignore the barcode lines themselves)
2. Serial numbers — after "S/N", "SN", "Serial", or standalone long alphanumeric strings (8+ chars)
3. MAC addresses — after "MAC", "MAC ID", "MAC Address" — 12 hex chars, with or without colons/dashes
4. Model numbers — after "M/N", "Model", "P/N"
5. Any other identifier: IMEI, GPON SN, PON SN, hardware version

Examples of values to extract:
- 1608K44D9E79FAFF5 (printed under a barcode)
- TPLG-A1B2C3D4 (serial number)
- 04:18:D6:A1:B2:C3 (MAC address)
- ERLite-3 (model number)
- HWTC87654321 (Huawei serial)

Even if blurry, try your BEST to read each character. Return all identifiers found, maximum 3 most important ones (serial and MAC first, model last).`

/**
 * Call Gemini Flash Vision to extract barcode/serial number values from an image.
 * Uses the native Gemini REST API with JSON response mode.
 */
async function extractBarcodes (base64Image) {
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${cfg.AI_MODEL}:generateContent?key=${cfg.AI_API_KEY}`

  const payload = {
    contents: [{
      parts: [
        { text: VISION_PROMPT },
        {
          inline_data: {
            mime_type: 'image/jpeg',
            data: base64Image
          }
        }
      ]
    }],
    generationConfig: {
      temperature: 0.1,
      maxOutputTokens: 1024,
      responseMimeType: 'application/json',
      responseSchema: {
        type: 'object',
        properties: {
          barcodes: {
            type: 'array',
            items: { type: 'string' },
            maxItems: 3,
          }
        },
        required: ['barcodes'],
      },
    }
  }

  const resp = await fetch(url, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(payload),
  })

  if (!resp.ok) {
    const text = await resp.text()
    throw new Error(`Gemini API ${resp.status}: ${text.slice(0, 200)}`)
  }

  const data = await resp.json()
  const candidate = data.candidates?.[0]
  const text = (candidate?.content?.parts?.[0]?.text || '').trim()
  const finishReason = candidate?.finishReason || 'unknown'

  log(`Vision response (finish: ${finishReason}): ${text.slice(0, 300)}`)

  // Parse response
  let parsed
  try {
    parsed = JSON.parse(text)
  } catch {
    const jsonMatch = text.match(/\{[\s\S]*\}/)
    if (jsonMatch) {
      try { parsed = JSON.parse(jsonMatch[0]) } catch {}
    }
  }

  if (!parsed) {
    log('Vision: could not parse response')
    return { barcodes: [] }
  }

  const arr = Array.isArray(parsed) ? parsed
    : Array.isArray(parsed.barcodes) ? parsed.barcodes
    : []

  const barcodes = arr
    .filter(v => typeof v === 'string' && v.trim().length > 3)
    .map(v => v.trim().replace(/\s+/g, ''))
    .slice(0, 3)

  log(`Vision: extracted ${barcodes.length} barcode(s): ${barcodes.join(', ')}`)
  return { barcodes }
}

// ── POST /vision/equipment ──────────────────────────────────────────────────
// Full equipment label reading — returns structured fields
const EQUIP_PROMPT = `You are reading an equipment label from a photo taken by an ISP field technician.
The device is typically an ONT, ONU, router, modem, decoder, or similar telecom equipment.

Read ALL information visible on the label and return structured JSON.

Look for:
- Brand / Manufacturer: "Hisense", "TP-Link", "Huawei", "ZTE", "Nokia", "Sagemcom", etc.
- Model number: after "Model", "M/N", "P/N", or on a prominent line (e.g. "LTE3415-SHA+", "HG8245H")
- Serial number: after "S/N", "SN", "Serial", or printed under a barcode
- MAC address: after "MAC", "MAC ID", "MAC Address" — 12 hex characters (with or without : or - separators). Return WITHOUT separators, just 12 hex chars.
- GPON SN / PON SN: if present
- Hardware version: "HW Ver", "H/W"
- Any other barcode values visible

Even if the image is blurry or at an angle, try your BEST to read each character.
If a field is not visible, set it to null.`

async function handleEquipment (req, res) {
  if (!cfg.AI_API_KEY) return json(res, 500, { error: 'AI_API_KEY not configured' })

  const body = await parseBody(req)
  if (!body.image) return json(res, 400, { error: 'Missing image field (base64)' })

  const base64 = body.image.replace(/^data:image\/[^;]+;base64,/, '')
  log(`Vision equipment: received image ${Math.round(base64.length * 3 / 4 / 1024)}KB`)

  try {
    const url = `https://generativelanguage.googleapis.com/v1beta/models/${cfg.AI_MODEL}:generateContent?key=${cfg.AI_API_KEY}`

    const payload = {
      contents: [{
        parts: [
          { text: EQUIP_PROMPT },
          { inline_data: { mime_type: 'image/jpeg', data: base64 } },
        ]
      }],
      generationConfig: {
        temperature: 0.1,
        maxOutputTokens: 1024,
        responseMimeType: 'application/json',
        responseSchema: {
          type: 'object',
          properties: {
            brand:         { type: 'string', nullable: true },
            model:         { type: 'string', nullable: true },
            serial_number: { type: 'string', nullable: true },
            mac_address:   { type: 'string', nullable: true },
            gpon_sn:       { type: 'string', nullable: true },
            hw_version:    { type: 'string', nullable: true },
            equipment_type:{ type: 'string', nullable: true },
            barcodes:      { type: 'array', items: { type: 'string' }, maxItems: 5 },
          },
          required: ['serial_number'],
        },
      }
    }

    const resp = await fetch(url, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(payload),
    })

    if (!resp.ok) {
      const text = await resp.text()
      throw new Error(`Gemini API ${resp.status}: ${text.slice(0, 200)}`)
    }

    const data = await resp.json()
    const text = (data.candidates?.[0]?.content?.parts?.[0]?.text || '').trim()
    log(`Vision equipment response: ${text.slice(0, 400)}`)

    let parsed
    try { parsed = JSON.parse(text) } catch {
      const m = text.match(/\{[\s\S]*\}/)
      if (m) try { parsed = JSON.parse(m[0]) } catch {}
    }

    if (!parsed) return json(res, 200, { serial_number: null, barcodes: [] })

    // Clean MAC: remove separators, uppercase
    if (parsed.mac_address) {
      parsed.mac_address = parsed.mac_address.replace(/[:\-.\s]/g, '').toUpperCase()
    }
    // Clean serial
    if (parsed.serial_number) {
      parsed.serial_number = parsed.serial_number.replace(/\s+/g, '').trim()
    }

    log(`Vision equipment: brand=${parsed.brand} model=${parsed.model} sn=${parsed.serial_number} mac=${parsed.mac_address}`)
    return json(res, 200, parsed)
  } catch (e) {
    log('Vision equipment error:', e.message)
    return json(res, 500, { error: 'Vision extraction failed: ' + e.message })
  }
}

module.exports = { handleBarcodes, extractBarcodes, handleEquipment }