'use strict' const { WebSocket } = require('ws') const cfg = require('./config') const { log, json, parseBody, lookupCustomerByPhone, createCommunication, erpFetch } = require('./helpers') const { execTool } = require('./agent') // Gemini Live API WebSocket endpoint const GEMINI_WS = 'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent' const LIVE_MODEL = cfg.VOICE_MODEL || 'models/gemini-2.5-flash-live-preview' // Active voice sessions: callSid → session const sessions = new Map() // ── Audio conversion: Twilio mulaw 8kHz ↔ Gemini PCM16 16kHz/24kHz ── // mulaw → linear PCM16 lookup table (ITU-T G.711) const MULAW_TO_LINEAR = new Int16Array(256) ;(() => { for (let i = 0; i < 256; i++) { let mu = ~i & 0xFF const sign = (mu & 0x80) ? -1 : 1 const exponent = (mu >> 4) & 0x07 const mantissa = mu & 0x0F let magnitude = ((mantissa << 1) + 33) << (exponent + 2) magnitude -= 0x84 MULAW_TO_LINEAR[i] = sign * magnitude } })() // linear PCM16 → mulaw lookup (bias method) function linearToMulaw (sample) { const BIAS = 0x84, CLIP = 32635 const sign = (sample >> 8) & 0x80 if (sign) sample = -sample if (sample > CLIP) sample = CLIP sample += BIAS let exponent = 7 const expMask = 0x4000 for (; exponent > 0; exponent--) { if (sample & (expMask >> (7 - exponent))) break } const mantissa = (sample >> (exponent + 3)) & 0x0F return ~(sign | (exponent << 4) | mantissa) & 0xFF } // Twilio mulaw 8kHz base64 → PCM16 16kHz base64 (for Gemini input) function mulawToGemini (mulawB64) { const mulawBuf = Buffer.from(mulawB64, 'base64') // Decode mulaw → PCM16 @ 8kHz const pcm8k = Buffer.alloc(mulawBuf.length * 2) for (let i = 0; i < mulawBuf.length; i++) { pcm8k.writeInt16LE(MULAW_TO_LINEAR[mulawBuf[i]], i * 2) } // Upsample 8kHz → 16kHz (linear interpolation) const pcm16k = Buffer.alloc(pcm8k.length * 2) for (let i = 0; i < pcm8k.length / 2; i++) { const s0 = pcm8k.readInt16LE(i * 2) const s1 = (i + 1 < pcm8k.length / 2) ? pcm8k.readInt16LE((i + 1) * 2) : s0 pcm16k.writeInt16LE(s0, i * 4) pcm16k.writeInt16LE(Math.round((s0 + s1) / 2), i * 4 + 2) } return pcm16k.toString('base64') } // Gemini PCM16 24kHz base64 → Twilio mulaw 8kHz base64 function geminiToMulaw (pcmB64) { const pcm24k = Buffer.from(pcmB64, 'base64') const samplesIn = pcm24k.length / 2 // Downsample 24kHz → 8kHz (take every 3rd sample) const samplesOut = Math.floor(samplesIn / 3) const mulawBuf = Buffer.alloc(samplesOut) for (let i = 0; i < samplesOut; i++) { const sample = pcm24k.readInt16LE(i * 6) mulawBuf[i] = linearToMulaw(sample) } return mulawBuf.toString('base64') } // ── Gemini Live tool definitions (different format than OpenAI) ── function buildGeminiTools () { const openaiTools = require('./agent-tools.json') return [{ functionDeclarations: openaiTools .filter(t => t.function.name !== 'get_chat_link') // voice doesn't need chat link .map(t => ({ name: t.function.name, description: t.function.description, parameters: t.function.parameters.required?.length ? { type: 'OBJECT', properties: Object.fromEntries(Object.entries(t.function.parameters.properties).map(([k, v]) => [k, { type: v.type.toUpperCase(), description: v.description || '' }])), required: t.function.parameters.required } : { type: 'OBJECT', properties: {} }, })), }] } // ── Voice system prompt ── const VOICE_SYSTEM_PROMPT = `Tu es l'assistant vocal de Gigafibre, fournisseur Internet fibre optique au Québec. Règles vocales: - Parle TOUJOURS en français québécois naturel - Sois conversationnel et chaleureux, comme un vrai agent au téléphone - Phrases courtes — max 1-2 phrases avant de laisser le client répondre - Utilise les outils pour consulter le compte en temps réel - Pour les problèmes techniques, vérifie l'état du modem (check_device_status) avant de suggérer quoi que ce soit - Signal optique < -25 dBm = problème fibre physique → crée un ticket - Modem hors ligne > 10 min → suggère un redémarrage - Ne partage JAMAIS d'infos techniques internes (OLT port, network ID, IP de gestion) - Si tu ne peux pas résoudre, crée un ticket - Quand tu obtiens des données d'un outil, résume en langage simple (pas de jargon technique)` // ── Inbound call handler: lookup caller, build dynamic IVR ── async function handleInboundCall (req, res) { const body = await parseBody(req) const from = body.From || '' const callSid = body.CallSid || '' log(`Voice IN: ${from} (CallSid: ${callSid})`) const customer = await lookupCustomerByPhone(from) if (!customer) { // Unknown caller — polite redirect const twiml = ` Bonjour, bienvenue chez Gigafibre. Nous n'avons pas trouvé votre numéro dans notre système. Veuillez nous joindre au 4 5 0, 6 5 5, 3 2 3 5. ` res.writeHead(200, { 'Content-Type': 'text/xml' }) return res.end(twiml) } // Fetch locations that have active subscriptions (not just any Service Location) let locations = [] try { const subFields = JSON.stringify(['service_location', 'item_code', 'custom_description']) const subFilters = JSON.stringify({ party_type: 'Customer', party: customer.name, status: 'Active' }) const r = await erpFetch(`/api/resource/Subscription?filters=${encodeURIComponent(subFilters)}&fields=${encodeURIComponent(subFields)}&limit_page_length=50`) const subs = (r.status === 200 && r.data?.data) || [] // Deduplicate by service_location, keep the service description const locMap = new Map() for (const s of subs) { if (!s.service_location) continue if (!locMap.has(s.service_location)) locMap.set(s.service_location, { name: s.service_location, service: s.custom_description || s.item_code }) } // Resolve addresses for each unique location for (const [locId, loc] of locMap) { try { const lr = await erpFetch(`/api/resource/Service Location/${encodeURIComponent(locId)}?fields=["address_line","city"]`) if (lr.status === 200 && lr.data?.data) { loc.address_line = lr.data.data.address_line || locId loc.city = lr.data.data.city || '' } } catch { loc.address_line = locId; loc.city = '' } } locations = [...locMap.values()] } catch {} const firstName = (customer.customer_name || '').split(' ')[0] || 'client' if (locations.length === 0) { // No active locations — go straight to voice agent const twiml = buildStreamTwiml(callSid, customer, null, firstName) res.writeHead(200, { 'Content-Type': 'text/xml' }) return res.end(twiml) } if (locations.length === 1) { // Single location — go straight to voice agent with context const twiml = buildStreamTwiml(callSid, customer, locations[0], firstName) res.writeHead(200, { 'Content-Type': 'text/xml' }) return res.end(twiml) } // Multiple locations — dynamic IVR let gatherOptions = '' locations.forEach((loc, i) => { const addr = (loc.address_line || '').replace(/&/g, 'et') gatherOptions += `Pour le ${addr}, appuyez ${i + 1}. ` }) gatherOptions += `Pour autre chose, appuyez 0.` // Store locations in session for the gather callback sessions.set(`gather:${callSid}`, { customer, locations }) const twiml = ` Bonjour ${firstName}! ${gatherOptions} Je n'ai pas reçu de réponse. Je vous transfère à un assistant. ${cfg.HUB_PUBLIC_URL}/voice/connect-agent?callSid=${callSid}&customer=${encodeURIComponent(customer.name)} ` res.writeHead(200, { 'Content-Type': 'text/xml' }) res.end(twiml) } // ── Gather callback: customer pressed a digit ── async function handleGather (req, res) { const body = await parseBody(req) const digit = body.Digits || '0' const callSid = body.CallSid || '' const session = sessions.get(`gather:${callSid}`) sessions.delete(`gather:${callSid}`) if (!session) { res.writeHead(200, { 'Content-Type': 'text/xml' }) return res.end('Erreur de session. Veuillez rappeler.') } const { customer, locations } = session const idx = parseInt(digit, 10) const selectedLoc = (idx >= 1 && idx <= locations.length) ? locations[idx - 1] : null const firstName = (customer.customer_name || '').split(' ')[0] || 'client' const twiml = buildStreamTwiml(callSid, customer, selectedLoc, firstName) res.writeHead(200, { 'Content-Type': 'text/xml' }) res.end(twiml) } // ── Build TwiML that opens a Media Stream to our WebSocket ── function buildStreamTwiml (callSid, customer, location, firstName) { const wsUrl = cfg.HUB_PUBLIC_URL.replace('https://', 'wss://').replace('http://', 'ws://') + '/voice/ws' const meta = JSON.stringify({ callSid, customerId: customer.name, customerName: customer.customer_name, location: location ? { name: location.name, address: location.address_line, city: location.city } : null, }) const locContext = location ? ` pour votre adresse au ${(location.address_line || '').replace(/&/g, 'et')}` : '' return ` Parfait ${firstName}, je vous mets en ligne avec notre assistant${locContext}. Un moment. ` } // ── Connect to agent directly (no location selection) ── async function handleConnectAgent (req, res) { const body = await parseBody(req) const callSid = body.CallSid || req.url?.searchParams?.get('callSid') || '' const customerName = req.url?.searchParams?.get('customer') || '' let customer = null if (customerName) { try { const r = await erpFetch(`/api/resource/Customer/${encodeURIComponent(customerName)}`) if (r.status === 200) customer = r.data.data } catch {} } const firstName = customer ? (customer.customer_name || '').split(' ')[0] : '' const twiml = buildStreamTwiml(callSid, customer || { name: customerName, customer_name: customerName }, null, firstName || 'client') res.writeHead(200, { 'Content-Type': 'text/xml' }) res.end(twiml) } // ── WebSocket handler: Twilio Media Stream ↔ Gemini Live ── function handleMediaStream (ws, req) { let geminiWs = null let streamSid = null let meta = null let callSid = null ws.on('message', async (data) => { let msg try { msg = JSON.parse(data) } catch { return } switch (msg.event) { case 'connected': log('Twilio Media Stream connected') break case 'start': streamSid = msg.start?.streamSid callSid = msg.start?.callSid // Decode metadata passed from TwiML const metaB64 = msg.start?.customParameters?.meta if (metaB64) { try { meta = JSON.parse(Buffer.from(metaB64, 'base64').toString()) } catch {} } log(`Voice session started: ${callSid} customer=${meta?.customerId}`) // Open Gemini Live session geminiWs = await openGeminiSession(ws, streamSid, meta) if (geminiWs) { sessions.set(callSid, { ws, geminiWs, streamSid, meta, startedAt: Date.now() }) } break case 'media': // Forward audio to Gemini (convert mulaw → PCM16) if (geminiWs?.readyState === WebSocket.OPEN && msg.media?.payload) { try { const pcmB64 = mulawToGemini(msg.media.payload) geminiWs.send(JSON.stringify({ realtimeInput: { audio: { data: pcmB64, mimeType: 'audio/pcm;rate=16000' } }, })) } catch {} } break case 'stop': log(`Voice session ended: ${callSid}`) if (geminiWs) { try { geminiWs.close() } catch {} } if (callSid) { const session = sessions.get(callSid) if (session) { const duration = Math.floor((Date.now() - session.startedAt) / 1000) logVoiceSession(meta, duration) sessions.delete(callSid) } } break } }) ws.on('close', () => { if (geminiWs) try { geminiWs.close() } catch {} }) } // ── Open Gemini Live WebSocket session ── async function openGeminiSession (twilioWs, streamSid, meta) { if (!cfg.AI_API_KEY) { log('Voice agent: no AI_API_KEY configured') return null } return new Promise((resolve) => { const url = `${GEMINI_WS}?key=${cfg.AI_API_KEY}` const ws = new WebSocket(url) let setupDone = false const timeout = setTimeout(() => { if (!setupDone) { log('Gemini Live: setup timeout'); ws.close(); resolve(null) } }, 10000) ws.on('open', () => { // Build context from customer data const locContext = meta?.location ? `\nAdresse sélectionnée: ${meta.location.address}, ${meta.location.city} (${meta.location.name})` : '' ws.send(JSON.stringify({ setup: { model: LIVE_MODEL, generationConfig: { responseModalities: ['AUDIO'], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Aoede' } }, }, }, systemInstruction: { parts: [{ text: `${VOICE_SYSTEM_PROMPT}\n\nContexte: Client ${meta?.customerName || 'inconnu'} (${meta?.customerId || '?'})${locContext}` }], }, tools: buildGeminiTools(), realtimeInputConfig: { automaticActivityDetection: { disabled: false, startOfSpeechSensitivity: 'HIGH', endOfSpeechSensitivity: 'MEDIUM', silenceDurationMs: 1200, }, activityHandling: 'START_OF_ACTIVITY_INTERRUPTS', }, inputAudioTranscription: {}, outputAudioTranscription: {}, }, })) }) ws.on('message', async (data) => { let msg try { msg = JSON.parse(data.toString()) } catch { return } // Setup confirmation if (msg.setupComplete) { setupDone = true clearTimeout(timeout) log(`Gemini Live session ready (model: ${LIVE_MODEL})`) resolve(ws) return } // Audio response from Gemini → convert and send to Twilio if (msg.serverContent?.modelTurn?.parts) { for (const part of msg.serverContent.modelTurn.parts) { if (part.inlineData?.data) { try { const mulawB64 = geminiToMulaw(part.inlineData.data) if (twilioWs.readyState === WebSocket.OPEN) { twilioWs.send(JSON.stringify({ event: 'media', streamSid, media: { payload: mulawB64 }, })) } } catch {} } } } // Transcription (for logging) if (msg.serverContent?.outputTranscription?.text) { log(`Voice AI: ${msg.serverContent.outputTranscription.text.substring(0, 100)}`) } if (msg.serverContent?.inputTranscription?.text) { log(`Voice Customer: ${msg.serverContent.inputTranscription.text.substring(0, 100)}`) } // Tool call from Gemini if (msg.toolCall?.functionCalls) { const responses = [] for (const fc of msg.toolCall.functionCalls) { log(`Voice tool call: ${fc.name}(${JSON.stringify(fc.args)})`) const result = await execTool(fc.name, fc.args || {}) log(`Voice tool result: ${fc.name} →`, JSON.stringify(result).substring(0, 200)) responses.push({ id: fc.id, name: fc.name, response: { result } }) } ws.send(JSON.stringify({ toolResponse: { functionResponses: responses } })) } }) ws.on('error', (e) => { log('Gemini Live WS error:', e.message) if (!setupDone) { clearTimeout(timeout); resolve(null) } }) ws.on('close', () => { log('Gemini Live session closed') }) }) } // ── Log voice session to ERPNext ── async function logVoiceSession (meta, duration) { if (!meta?.customerId) return try { const dMin = Math.floor(duration / 60), dSec = duration % 60 const durStr = `${dMin}m${dSec.toString().padStart(2, '0')}s` await createCommunication({ communication_type: 'Communication', communication_medium: 'Phone', sent_or_received: 'Received', sender: 'voice-ai@gigafibre.ca', sender_full_name: 'Gigafibre Voice AI', phone_no: meta.phone || '', content: `Appel vocal IA — Client: ${meta.customerName}, Durée: ${durStr}${meta.location ? ', Adresse: ' + meta.location.address : ''}`, subject: `Appel vocal IA: ${meta.customerName}`, reference_doctype: 'Customer', reference_name: meta.customerId, status: 'Linked', }) log(`Voice session logged: ${meta.customerId} (${durStr})`) } catch (e) { log('Voice session log error:', e.message) } } module.exports = { handleInboundCall, handleGather, handleConnectAgent, handleMediaStream }