gigafibre-fsm/services/targo-hub/lib/serviceability.js
louispaulb 105b0b2a51 feat(ops): per-address competitor column via Québec IHV open data
Replaces the reCAPTCHA-blocked Cogeco scraper with the authoritative Québec
"Accès Internet haute vitesse" open ArcGIS data (providers declared to the
gov by the providers themselves — validated to match Cogeco's own popup).

- hub lib/serviceability.js: address → ADR (Adresse_complete → IdAdresse +
  Etat_hiv, civic+postal match w/ JS street disambiguation) → FRN table
  (IdAdresse → FRN_nom providers + signup URLs). Referer-gated proxy, disk
  cache (90d), polite rate limit. Routes /serviceability/lookup[-batch].
- ops ReportInternetCherPage: "Concurrence (FSI)" column — provider chips
  (Cogeco highlighted), batch-fetched on demand with progress; "Cogeco
  disponible" summary card = churn-risk count; manual Cogeco verify icon kept.

Validated live: 37 Chemin Noël → Cogeco+Targo, 147 Montée Richard → Targo
only, Repentigny → Bell+Cogeco. Endpoints documented in
memory/reference_quebec_ihv.md.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 22:40:47 -04:00

245 lines
11 KiB
JavaScript

'use strict'
/**
* serviceability.js — per-address Internet provider lookup using Québec's
* "Accès Internet haute vitesse" open ArcGIS data (the same data behind the
* gouv.qc.ca interactive map). Authoritative: providers are DECLARED to the
* government by the providers themselves. Replaces the abandoned
* cogeco-checker (Cogeco's own site is gated by reCAPTCHA Enterprise).
*
* Two public ArcGIS services (full recipe in memory/reference_quebec_ihv.md):
* ADR (Adresse_S) → address points: Adresse_complete, Etat_hiv, IdAdresse
* FRN (Fournisseurs_S) → TABLE keyed by IdAdresse: FRN_nom (provider),
* FRN_URL_inscrip (signup link), Date_fin
* The usrsvcs proxy is REFERER-GATED: send `Referer: https://www.quebec.ca/`
* (a direct call 403s). No token, no anti-bot.
*
* Flow: address → ADR (civic + postal, JS street disambiguation) → IdAdresse
* → FRN (where IdAdresse=…) → [{nom, url, date_fin}]
*
* Results are cached on disk (data/serviceability-cache.json) keyed by a
* normalized address, so re-loads are instant and we stay a polite consumer
* of the gov API (rate-limited, low concurrency).
*
* Routes:
* POST /serviceability/lookup { address1, city, zip } → one result
* POST /serviceability/lookup-batch { items:[{key,address1,city,zip}] }
* GET /serviceability/cache-stats
*/
const fs = require('fs')
const path = require('path')
const { json, parseBody, httpRequest, log } = require('./helpers')
const ARCGIS_BASE = 'https://utility.arcgis.com'
const ADR_PATH = '/usrsvcs/servers/396469b496554883b36948d66eba40f5/rest/services/ADR/FeatureServer/0/query'
const FRN_PATH = '/usrsvcs/servers/5aa672072a9f43129b97b53d06eb3ae9/rest/services/FRN/FeatureServer/0/query'
const REFERER = 'https://www.quebec.ca/'
const ETAT_LABEL = { 1: 'Desservie', 2: 'Projet en cours', 3: 'Projet à venir', 4: 'Non admissible' }
const CACHE_FILE = path.join(__dirname, '..', 'data', 'serviceability-cache.json')
const CACHE_TTL_MS = 90 * 24 * 3600 * 1000 // gov data refreshes periodically; 90d is safe
const BATCH_MAX = 80 // max items processed per /lookup-batch call (bounds latency)
// ── disk-persisted cache ────────────────────────────────────────────────────
let cache = new Map()
try {
if (fs.existsSync(CACHE_FILE)) {
const obj = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'))
cache = new Map(Object.entries(obj))
log(`[serviceability] cache loaded: ${cache.size} entries`)
}
} catch (e) { log('[serviceability] cache load failed:', e.message) }
let saveTimer = null
function saveCacheSoon () {
if (saveTimer) return
saveTimer = setTimeout(() => {
saveTimer = null
try {
fs.mkdirSync(path.dirname(CACHE_FILE), { recursive: true })
fs.writeFileSync(CACHE_FILE, JSON.stringify(Object.fromEntries(cache)))
} catch (e) { log('[serviceability] cache save failed:', e.message) }
}, 2000)
}
// ── polite rate limiter (concurrency + min interval) ────────────────────────
const MAX_CONCURRENT = parseInt(process.env.IHV_MAX_CONCURRENT || '3')
const MIN_INTERVAL_MS = parseInt(process.env.IHV_MIN_INTERVAL_MS || '120')
let active = 0
let lastStart = 0
const queue = []
function gate (fn) {
return new Promise((resolve, reject) => {
const run = () => {
active++
const wait = Math.max(0, MIN_INTERVAL_MS - (Date.now() - lastStart))
setTimeout(() => {
lastStart = Date.now()
fn().then(resolve, reject).finally(() => {
active--
if (queue.length) queue.shift()()
})
}, wait)
}
if (active >= MAX_CONCURRENT) queue.push(run); else run()
})
}
// ── helpers ─────────────────────────────────────────────────────────────────
function norm (s) {
return String(s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
.toLowerCase().replace(/[^a-z0-9 ]/g, ' ').replace(/\s+/g, ' ').trim()
}
function sqlEsc (s) { return String(s).replace(/'/g, "''") }
function parseCivic (address1) {
const m = String(address1 || '').trim().match(/^(\d+[a-zA-Z]?)/)
return m ? m[1] : null
}
// distinctive street words (drop civic + generic street-type words)
const STREET_TYPES = new Set(['rue', 'ch', 'chemin', 'rang', 'montee', 'mtee', 'boul', 'boulevard',
'av', 'ave', 'avenue', 'route', 'rte', 'place', 'pl', 'cote', 'côte', 'terrasse', 'impasse',
'croissant', 'allee', 'allée', 'st', 'ste', 'saint', 'sainte', 'de', 'du', 'des', 'la', 'le', 'les'])
function streetWords (address1) {
return norm(address1).split(' ').filter(w => w && !/^\d/.test(w) && !STREET_TYPES.has(w))
}
async function arcgisQuery (pathBase, params) {
const qs = Object.entries(params).map(([k, v]) => `${k}=${encodeURIComponent(v)}`).join('&')
const r = await gate(() => httpRequest(ARCGIS_BASE, `${pathBase}?${qs}`, {
method: 'GET', headers: { Referer: REFERER }, timeout: 15000,
}))
if (r.status !== 200 || !r.data || !Array.isArray(r.data.features)) {
const msg = r.data && r.data.error ? r.data.error.message : `HTTP ${r.status}`
throw new Error('ArcGIS query failed: ' + msg)
}
return r.data.features.map(f => f.attributes)
}
// Resolve an address to a single ADR record (IdAdresse + Etat_hiv).
async function resolveAddress ({ address1, city, zip }) {
const civic = parseCivic(address1)
if (!civic) return null
const postal = String(zip || '').replace(/\s+/g, '').toUpperCase()
const target = norm(`${address1} ${city || ''}`)
const myWords = new Set(streetWords(address1))
// Pick the best candidate by street-token overlap against the input.
const best = (rows) => {
let top = null, topScore = -1
for (const a of rows) {
const cand = norm(a.Adresse_complete)
const candWords = new Set(streetWords(a.Adresse_complete))
let overlap = 0
for (const w of myWords) if (candWords.has(w)) overlap++
// tie-break: also reward overall string containment of city
const cityBonus = city && cand.includes(norm(city)) ? 0.5 : 0
const score = overlap + cityBonus + (cand === target ? 5 : 0)
if (score > topScore) { topScore = score; top = a }
}
return top
}
// 1) civic + postal — most reliable (postal is embedded in Adresse_complete).
if (postal && /^[A-Z]\d[A-Z]\d[A-Z]\d$/.test(postal)) {
const rows = await arcgisQuery(ADR_PATH, {
where: `Adresse_complete LIKE '${sqlEsc(civic)} %' AND Adresse_complete LIKE '%${sqlEsc(postal)}'`,
outFields: 'IdAdresse,Adresse_complete,Etat_hiv,Type_adresse', returnGeometry: 'false',
resultRecordCount: '25', f: 'json',
})
if (rows.length === 1) return rows[0]
if (rows.length > 1) return best(rows)
}
// 2) fallback: civic + distinctive street word, then JS-filter by city.
const sw = streetWords(address1).sort((a, b) => b.length - a.length)[0]
if (sw && /^[a-z]+$/.test(sw)) {
const rows = await arcgisQuery(ADR_PATH, {
where: `Adresse_complete LIKE '${sqlEsc(civic)} %' AND UPPER(Adresse_complete) LIKE '%${sqlEsc(sw.toUpperCase())}%'`,
outFields: 'IdAdresse,Adresse_complete,Etat_hiv,Type_adresse', returnGeometry: 'false',
resultRecordCount: '50', f: 'json',
})
const inCity = city ? rows.filter(a => norm(a.Adresse_complete).includes(norm(city))) : rows
const pool = inCity.length ? inCity : rows
if (pool.length) return best(pool)
}
return null
}
async function fetchProviders (idAdresse) {
const rows = await arcgisQuery(FRN_PATH, {
where: `IdAdresse='${sqlEsc(idAdresse)}'`,
outFields: 'FRN_nom,FRN_URL_inscrip,Date_fin', returnGeometry: 'false',
resultRecordCount: '50', f: 'json',
})
const seen = new Set(); const out = []
for (const a of rows) {
const nom = (a.FRN_nom || '').trim()
if (!nom || seen.has(nom)) continue
seen.add(nom)
out.push({ nom, url: a.FRN_URL_inscrip || null, date_fin: a.Date_fin || null })
}
out.sort((a, b) => a.nom.localeCompare(b.nom, 'fr'))
return out
}
// Main entry: address → { matched, idAdresse, adresse_complete, etat_hiv,
// etat_label, providers:[{nom,url,date_fin}], cogeco:bool }
async function lookupProviders ({ address1, city, zip }) {
const key = `${norm(address1)}|${String(zip || '').replace(/\s+/g, '').toUpperCase() || norm(city)}`
const hit = cache.get(key)
if (hit && (Date.now() - hit.ts) < CACHE_TTL_MS) return { ...hit.v, cached: true }
let result
try {
const adr = await resolveAddress({ address1, city, zip })
if (!adr) {
result = { matched: false, etat_label: 'Adresse introuvable', providers: [], cogeco: false }
} else {
const providers = await fetchProviders(adr.IdAdresse)
result = {
matched: true,
idAdresse: adr.IdAdresse,
adresse_complete: adr.Adresse_complete,
etat_hiv: adr.Etat_hiv,
etat_label: ETAT_LABEL[adr.Etat_hiv] || String(adr.Etat_hiv),
type_adresse: adr.Type_adresse || null,
providers,
cogeco: providers.some(p => /cogeco/i.test(p.nom)),
}
}
} catch (e) {
// Don't cache transient errors — let the next call retry.
return { matched: false, error: e.message, etat_label: 'Erreur', providers: [], cogeco: false }
}
cache.set(key, { ts: Date.now(), v: result })
saveCacheSoon()
return result
}
// ── HTTP handler ─────────────────────────────────────────────────────────────
async function handle (req, res, method, path) {
if (path === '/serviceability/cache-stats' && method === 'GET') {
return json(res, 200, { entries: cache.size, ttl_days: CACHE_TTL_MS / 86400000 })
}
if (path === '/serviceability/lookup' && method === 'POST') {
const b = await parseBody(req)
if (!b.address1) return json(res, 400, { error: 'address1 required' })
return json(res, 200, await lookupProviders(b))
}
if (path === '/serviceability/lookup-batch' && method === 'POST') {
const b = await parseBody(req)
const items = Array.isArray(b.items) ? b.items.slice(0, BATCH_MAX) : []
if (!items.length) return json(res, 400, { error: 'items[] required' })
const results = await Promise.all(items.map(async (it, i) => {
const r = await lookupProviders(it)
return [it.key != null ? it.key : i, r]
}))
return json(res, 200, { results: Object.fromEntries(results), processed: items.length, batch_max: BATCH_MAX })
}
return json(res, 404, { error: 'Not found' })
}
module.exports = { handle, lookupProviders }