Replaces the reCAPTCHA-blocked Cogeco scraper with the authoritative Québec "Accès Internet haute vitesse" open ArcGIS data (providers declared to the gov by the providers themselves — validated to match Cogeco's own popup). - hub lib/serviceability.js: address → ADR (Adresse_complete → IdAdresse + Etat_hiv, civic+postal match w/ JS street disambiguation) → FRN table (IdAdresse → FRN_nom providers + signup URLs). Referer-gated proxy, disk cache (90d), polite rate limit. Routes /serviceability/lookup[-batch]. - ops ReportInternetCherPage: "Concurrence (FSI)" column — provider chips (Cogeco highlighted), batch-fetched on demand with progress; "Cogeco disponible" summary card = churn-risk count; manual Cogeco verify icon kept. Validated live: 37 Chemin Noël → Cogeco+Targo, 147 Montée Richard → Targo only, Repentigny → Bell+Cogeco. Endpoints documented in memory/reference_quebec_ihv.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
245 lines
11 KiB
JavaScript
245 lines
11 KiB
JavaScript
'use strict'
|
|
/**
|
|
* serviceability.js — per-address Internet provider lookup using Québec's
|
|
* "Accès Internet haute vitesse" open ArcGIS data (the same data behind the
|
|
* gouv.qc.ca interactive map). Authoritative: providers are DECLARED to the
|
|
* government by the providers themselves. Replaces the abandoned
|
|
* cogeco-checker (Cogeco's own site is gated by reCAPTCHA Enterprise).
|
|
*
|
|
* Two public ArcGIS services (full recipe in memory/reference_quebec_ihv.md):
|
|
* ADR (Adresse_S) → address points: Adresse_complete, Etat_hiv, IdAdresse
|
|
* FRN (Fournisseurs_S) → TABLE keyed by IdAdresse: FRN_nom (provider),
|
|
* FRN_URL_inscrip (signup link), Date_fin
|
|
* The usrsvcs proxy is REFERER-GATED: send `Referer: https://www.quebec.ca/`
|
|
* (a direct call 403s). No token, no anti-bot.
|
|
*
|
|
* Flow: address → ADR (civic + postal, JS street disambiguation) → IdAdresse
|
|
* → FRN (where IdAdresse=…) → [{nom, url, date_fin}]
|
|
*
|
|
* Results are cached on disk (data/serviceability-cache.json) keyed by a
|
|
* normalized address, so re-loads are instant and we stay a polite consumer
|
|
* of the gov API (rate-limited, low concurrency).
|
|
*
|
|
* Routes:
|
|
* POST /serviceability/lookup { address1, city, zip } → one result
|
|
* POST /serviceability/lookup-batch { items:[{key,address1,city,zip}] }
|
|
* GET /serviceability/cache-stats
|
|
*/
|
|
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const { json, parseBody, httpRequest, log } = require('./helpers')
|
|
|
|
const ARCGIS_BASE = 'https://utility.arcgis.com'
|
|
const ADR_PATH = '/usrsvcs/servers/396469b496554883b36948d66eba40f5/rest/services/ADR/FeatureServer/0/query'
|
|
const FRN_PATH = '/usrsvcs/servers/5aa672072a9f43129b97b53d06eb3ae9/rest/services/FRN/FeatureServer/0/query'
|
|
const REFERER = 'https://www.quebec.ca/'
|
|
|
|
const ETAT_LABEL = { 1: 'Desservie', 2: 'Projet en cours', 3: 'Projet à venir', 4: 'Non admissible' }
|
|
|
|
const CACHE_FILE = path.join(__dirname, '..', 'data', 'serviceability-cache.json')
|
|
const CACHE_TTL_MS = 90 * 24 * 3600 * 1000 // gov data refreshes periodically; 90d is safe
|
|
const BATCH_MAX = 80 // max items processed per /lookup-batch call (bounds latency)
|
|
|
|
// ── disk-persisted cache ────────────────────────────────────────────────────
|
|
let cache = new Map()
|
|
try {
|
|
if (fs.existsSync(CACHE_FILE)) {
|
|
const obj = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'))
|
|
cache = new Map(Object.entries(obj))
|
|
log(`[serviceability] cache loaded: ${cache.size} entries`)
|
|
}
|
|
} catch (e) { log('[serviceability] cache load failed:', e.message) }
|
|
|
|
let saveTimer = null
|
|
function saveCacheSoon () {
|
|
if (saveTimer) return
|
|
saveTimer = setTimeout(() => {
|
|
saveTimer = null
|
|
try {
|
|
fs.mkdirSync(path.dirname(CACHE_FILE), { recursive: true })
|
|
fs.writeFileSync(CACHE_FILE, JSON.stringify(Object.fromEntries(cache)))
|
|
} catch (e) { log('[serviceability] cache save failed:', e.message) }
|
|
}, 2000)
|
|
}
|
|
|
|
// ── polite rate limiter (concurrency + min interval) ────────────────────────
|
|
const MAX_CONCURRENT = parseInt(process.env.IHV_MAX_CONCURRENT || '3')
|
|
const MIN_INTERVAL_MS = parseInt(process.env.IHV_MIN_INTERVAL_MS || '120')
|
|
let active = 0
|
|
let lastStart = 0
|
|
const queue = []
|
|
function gate (fn) {
|
|
return new Promise((resolve, reject) => {
|
|
const run = () => {
|
|
active++
|
|
const wait = Math.max(0, MIN_INTERVAL_MS - (Date.now() - lastStart))
|
|
setTimeout(() => {
|
|
lastStart = Date.now()
|
|
fn().then(resolve, reject).finally(() => {
|
|
active--
|
|
if (queue.length) queue.shift()()
|
|
})
|
|
}, wait)
|
|
}
|
|
if (active >= MAX_CONCURRENT) queue.push(run); else run()
|
|
})
|
|
}
|
|
|
|
// ── helpers ─────────────────────────────────────────────────────────────────
|
|
function norm (s) {
|
|
return String(s || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '')
|
|
.toLowerCase().replace(/[^a-z0-9 ]/g, ' ').replace(/\s+/g, ' ').trim()
|
|
}
|
|
function sqlEsc (s) { return String(s).replace(/'/g, "''") }
|
|
function parseCivic (address1) {
|
|
const m = String(address1 || '').trim().match(/^(\d+[a-zA-Z]?)/)
|
|
return m ? m[1] : null
|
|
}
|
|
// distinctive street words (drop civic + generic street-type words)
|
|
const STREET_TYPES = new Set(['rue', 'ch', 'chemin', 'rang', 'montee', 'mtee', 'boul', 'boulevard',
|
|
'av', 'ave', 'avenue', 'route', 'rte', 'place', 'pl', 'cote', 'côte', 'terrasse', 'impasse',
|
|
'croissant', 'allee', 'allée', 'st', 'ste', 'saint', 'sainte', 'de', 'du', 'des', 'la', 'le', 'les'])
|
|
function streetWords (address1) {
|
|
return norm(address1).split(' ').filter(w => w && !/^\d/.test(w) && !STREET_TYPES.has(w))
|
|
}
|
|
|
|
async function arcgisQuery (pathBase, params) {
|
|
const qs = Object.entries(params).map(([k, v]) => `${k}=${encodeURIComponent(v)}`).join('&')
|
|
const r = await gate(() => httpRequest(ARCGIS_BASE, `${pathBase}?${qs}`, {
|
|
method: 'GET', headers: { Referer: REFERER }, timeout: 15000,
|
|
}))
|
|
if (r.status !== 200 || !r.data || !Array.isArray(r.data.features)) {
|
|
const msg = r.data && r.data.error ? r.data.error.message : `HTTP ${r.status}`
|
|
throw new Error('ArcGIS query failed: ' + msg)
|
|
}
|
|
return r.data.features.map(f => f.attributes)
|
|
}
|
|
|
|
// Resolve an address to a single ADR record (IdAdresse + Etat_hiv).
|
|
async function resolveAddress ({ address1, city, zip }) {
|
|
const civic = parseCivic(address1)
|
|
if (!civic) return null
|
|
const postal = String(zip || '').replace(/\s+/g, '').toUpperCase()
|
|
const target = norm(`${address1} ${city || ''}`)
|
|
const myWords = new Set(streetWords(address1))
|
|
|
|
// Pick the best candidate by street-token overlap against the input.
|
|
const best = (rows) => {
|
|
let top = null, topScore = -1
|
|
for (const a of rows) {
|
|
const cand = norm(a.Adresse_complete)
|
|
const candWords = new Set(streetWords(a.Adresse_complete))
|
|
let overlap = 0
|
|
for (const w of myWords) if (candWords.has(w)) overlap++
|
|
// tie-break: also reward overall string containment of city
|
|
const cityBonus = city && cand.includes(norm(city)) ? 0.5 : 0
|
|
const score = overlap + cityBonus + (cand === target ? 5 : 0)
|
|
if (score > topScore) { topScore = score; top = a }
|
|
}
|
|
return top
|
|
}
|
|
|
|
// 1) civic + postal — most reliable (postal is embedded in Adresse_complete).
|
|
if (postal && /^[A-Z]\d[A-Z]\d[A-Z]\d$/.test(postal)) {
|
|
const rows = await arcgisQuery(ADR_PATH, {
|
|
where: `Adresse_complete LIKE '${sqlEsc(civic)} %' AND Adresse_complete LIKE '%${sqlEsc(postal)}'`,
|
|
outFields: 'IdAdresse,Adresse_complete,Etat_hiv,Type_adresse', returnGeometry: 'false',
|
|
resultRecordCount: '25', f: 'json',
|
|
})
|
|
if (rows.length === 1) return rows[0]
|
|
if (rows.length > 1) return best(rows)
|
|
}
|
|
|
|
// 2) fallback: civic + distinctive street word, then JS-filter by city.
|
|
const sw = streetWords(address1).sort((a, b) => b.length - a.length)[0]
|
|
if (sw && /^[a-z]+$/.test(sw)) {
|
|
const rows = await arcgisQuery(ADR_PATH, {
|
|
where: `Adresse_complete LIKE '${sqlEsc(civic)} %' AND UPPER(Adresse_complete) LIKE '%${sqlEsc(sw.toUpperCase())}%'`,
|
|
outFields: 'IdAdresse,Adresse_complete,Etat_hiv,Type_adresse', returnGeometry: 'false',
|
|
resultRecordCount: '50', f: 'json',
|
|
})
|
|
const inCity = city ? rows.filter(a => norm(a.Adresse_complete).includes(norm(city))) : rows
|
|
const pool = inCity.length ? inCity : rows
|
|
if (pool.length) return best(pool)
|
|
}
|
|
return null
|
|
}
|
|
|
|
async function fetchProviders (idAdresse) {
|
|
const rows = await arcgisQuery(FRN_PATH, {
|
|
where: `IdAdresse='${sqlEsc(idAdresse)}'`,
|
|
outFields: 'FRN_nom,FRN_URL_inscrip,Date_fin', returnGeometry: 'false',
|
|
resultRecordCount: '50', f: 'json',
|
|
})
|
|
const seen = new Set(); const out = []
|
|
for (const a of rows) {
|
|
const nom = (a.FRN_nom || '').trim()
|
|
if (!nom || seen.has(nom)) continue
|
|
seen.add(nom)
|
|
out.push({ nom, url: a.FRN_URL_inscrip || null, date_fin: a.Date_fin || null })
|
|
}
|
|
out.sort((a, b) => a.nom.localeCompare(b.nom, 'fr'))
|
|
return out
|
|
}
|
|
|
|
// Main entry: address → { matched, idAdresse, adresse_complete, etat_hiv,
|
|
// etat_label, providers:[{nom,url,date_fin}], cogeco:bool }
|
|
async function lookupProviders ({ address1, city, zip }) {
|
|
const key = `${norm(address1)}|${String(zip || '').replace(/\s+/g, '').toUpperCase() || norm(city)}`
|
|
const hit = cache.get(key)
|
|
if (hit && (Date.now() - hit.ts) < CACHE_TTL_MS) return { ...hit.v, cached: true }
|
|
|
|
let result
|
|
try {
|
|
const adr = await resolveAddress({ address1, city, zip })
|
|
if (!adr) {
|
|
result = { matched: false, etat_label: 'Adresse introuvable', providers: [], cogeco: false }
|
|
} else {
|
|
const providers = await fetchProviders(adr.IdAdresse)
|
|
result = {
|
|
matched: true,
|
|
idAdresse: adr.IdAdresse,
|
|
adresse_complete: adr.Adresse_complete,
|
|
etat_hiv: adr.Etat_hiv,
|
|
etat_label: ETAT_LABEL[adr.Etat_hiv] || String(adr.Etat_hiv),
|
|
type_adresse: adr.Type_adresse || null,
|
|
providers,
|
|
cogeco: providers.some(p => /cogeco/i.test(p.nom)),
|
|
}
|
|
}
|
|
} catch (e) {
|
|
// Don't cache transient errors — let the next call retry.
|
|
return { matched: false, error: e.message, etat_label: 'Erreur', providers: [], cogeco: false }
|
|
}
|
|
|
|
cache.set(key, { ts: Date.now(), v: result })
|
|
saveCacheSoon()
|
|
return result
|
|
}
|
|
|
|
// ── HTTP handler ─────────────────────────────────────────────────────────────
|
|
async function handle (req, res, method, path) {
|
|
if (path === '/serviceability/cache-stats' && method === 'GET') {
|
|
return json(res, 200, { entries: cache.size, ttl_days: CACHE_TTL_MS / 86400000 })
|
|
}
|
|
if (path === '/serviceability/lookup' && method === 'POST') {
|
|
const b = await parseBody(req)
|
|
if (!b.address1) return json(res, 400, { error: 'address1 required' })
|
|
return json(res, 200, await lookupProviders(b))
|
|
}
|
|
if (path === '/serviceability/lookup-batch' && method === 'POST') {
|
|
const b = await parseBody(req)
|
|
const items = Array.isArray(b.items) ? b.items.slice(0, BATCH_MAX) : []
|
|
if (!items.length) return json(res, 400, { error: 'items[] required' })
|
|
const results = await Promise.all(items.map(async (it, i) => {
|
|
const r = await lookupProviders(it)
|
|
return [it.key != null ? it.key : i, r]
|
|
}))
|
|
return json(res, 200, { results: Object.fromEntries(results), processed: items.length, batch_max: BATCH_MAX })
|
|
}
|
|
return json(res, 404, { error: 'Not found' })
|
|
}
|
|
|
|
module.exports = { handle, lookupProviders }
|