gigafibre-fsm/services/cogeco-checker/lib/cogeco-session.js
louispaulb 5bc42bda9f fix(cogeco-checker): disable browser cache to rule it out as 401 cause
Tested the hypothesis that a warm Chromium cache (the register GET being
re-served stale) was causing the protected address/search 401. Disabled
the HTTP cache (CDP Network.setCacheDisabled), the on-disk cache
(--disk-cache-size=0) and service workers (serviceWorkers:'block').

Result: identical trace — register=200 (freshly minted, not cached),
autocomplete=200, address/search=401. So cache was NOT the cause; the
401 is a server-side authorization decision on the protected endpoint
(reCAPTCHA Enterprise assertion required). Keeping the cache-disable as
hygiene + to definitively rule it out in future debugging.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 21:47:19 -04:00

259 lines
12 KiB
JavaScript

'use strict'
/**
* cogeco-session.js — drives Cogeco's public address-availability checker with
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
* and short-lived JWT the endpoint requires are generated legitimately by the
* page's own JS. A pure HTTP call can't produce those, hence the browser.
*
* Flow (reverse-engineered 2026-06):
* 1. GET /boutique/api/register → mints a SHORT-LIVED JWT (Bearer)
* 2. GET /boutique/api/address/search?query=…&sessionId=… → Loqate suggestions
* 3. pick a suggestion, click "Find out now"
* 4. GET /boutique/api/service-address/search?id=<loqate-id>&version=v10
* &sessionId=… → THE serviceability verdict
*
* CRITICAL: the register token rotates fast and is ~single-use. Requests fired
* on a stale token return 401 — we observed address/search alternating 200/401
* and service-address/search 401'ing outright on the first try. The real page
* silently re-registers and retries; so do we: after clicking "Find out now"
* we wait for a 200 service-address/search response and, on 401/timeout,
* re-click (forcing a fresh register) up to VERDICT_ATTEMPTS times.
*
* We intercept every /boutique/api/* response (tracking service-address/search
* specially) and also read the visible result, then return a normalized
* verdict. Cogeco can change this flow at any time — parse defensively, keep
* `raw`/`captured` for debug.
*/
// playwright-extra + stealth masks the headless automation signals
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
// Enterprise scores against. Falls back to vanilla playwright if the stealth
// stack isn't installed.
let chromium
try {
chromium = require('playwright-extra').chromium
const stealth = require('puppeteer-extra-plugin-stealth')()
chromium.use(stealth)
} catch {
chromium = require('playwright').chromium
}
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
const NAV_TIMEOUT = 45000
const STEP_TIMEOUT = 20000
// The serviceability call (service-address/search) often 401s on a stale token.
// Re-trigger it this many times, waiting for the page to re-register in between.
const VERDICT_ATTEMPTS = 5
const VERDICT_WAIT_MS = 7000 // per-attempt wait for a 200 verdict response
const REREGISTER_PAUSE_MS = 2500 // let the page mint a fresh token before retry
let _browser = null
async function getBrowser () {
if (_browser && _browser.isConnected()) return _browser
_browser = await chromium.launch({
headless: true,
args: [
'--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled',
'--disk-cache-size=0', '--media-cache-size=0', // no on-disk HTTP cache
],
})
return _browser
}
// Pull availability + speeds out of a single serviceability JSON body.
// The exact shape is unconfirmed (the endpoint 401'd during recon), so probe
// several likely flags. Returns {available, max_download_mbps} (nulls if unsure).
function readServiceBody (b) {
const out = { available: null, max_download_mbps: null }
if (!b || typeof b !== 'object') return out
const flat = JSON.stringify(b).toLowerCase()
if (/"(?:serviceable|available|iseligible|qualified|isserviceable|eligible)"\s*:\s*true/.test(flat)) out.available = true
else if (/"(?:serviceable|available|iseligible|qualified|isserviceable|eligible)"\s*:\s*false/.test(flat)) out.available = false
// A non-empty list of plans/products/offers also implies serviceable.
if (out.available === null && /"(?:plans|products|offers|packages)"\s*:\s*\[\s*\{/.test(flat)) out.available = true
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed|maxdownload)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
if (speeds.length) out.max_download_mbps = Math.max(...speeds)
return out
}
// Normalize the whole capture into a stable verdict. Priority:
// 1. a 200 service-address/search body (the authoritative serviceability call)
// 2. any other captured JSON with a serviceability flag
// 3. rendered UI result text (weakest — kept as a last resort)
function interpret (captured, uiText, serviceVerdict) {
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
// 1. Authoritative: the service-address/search 200 body, if we got one.
if (serviceVerdict && serviceVerdict.status === 200) {
const r = readServiceBody(serviceVerdict.body)
if (r.available !== null) {
verdict.available = r.available
verdict.max_download_mbps = r.max_download_mbps
verdict.confidence = 'high'
verdict.source = 'service-address/search'
return verdict
}
}
// 2. Any other captured JSON with an explicit flag.
for (const c of captured) {
if (c.status && c.status !== 200) continue
const r = readServiceBody(c.body)
if (verdict.available === null && r.available !== null) {
verdict.available = r.available; verdict.confidence = 'medium'; verdict.source = 'captured-json'
}
if (r.max_download_mbps) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, r.max_download_mbps)
}
// 3. Last resort: rendered result text.
if (verdict.available === null && uiText) {
const t = uiText.toLowerCase()
if (/great news|good news|we('| a)re in your area|service is available|is available at|select your (plan|package)|choose your (plan|package)/i.test(t)) {
verdict.available = true; verdict.confidence = 'low'; verdict.source = 'ui-text'
} else if (/not (yet )?available|isn't available|unfortunately|pas (encore )?disponible|non disponible|sorry, we|we don't (yet )?(serve|offer)/i.test(t)) {
verdict.available = false; verdict.confidence = 'low'; verdict.source = 'ui-text'
}
}
return verdict
}
async function checkAddress (address, { debug = false } = {}) {
const browser = await getBrowser()
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
// between addresses and keeps each lookup independent.
const ctx = await browser.newContext({
locale: 'en-CA',
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
viewport: { width: 1280, height: 900 },
serviceWorkers: 'block', // a SW could intercept/cache the register + API calls
})
const page = await ctx.newPage()
// DISABLE the HTTP cache for this session. /boutique/api/register is a GET,
// so a warm Chromium cache can re-serve an already-consumed/expired token
// instead of letting the page mint a fresh one — which makes the protected
// address/search call 401 even though register "succeeded". Forcing every
// request (incl. register) onto the network keeps the token fresh. CDP is
// Chromium-only; best-effort (don't fail the check if it's unavailable).
try {
const cdp = await ctx.newCDPSession(page)
await cdp.send('Network.setCacheDisabled', { cacheDisabled: true })
} catch { /* CDP unavailable under this driver — continue uncached-best-effort */ }
const captured = []
// Track the serviceability call specifically; keep the best (200 wins over 401).
let serviceResp = null
page.on('response', async (resp) => {
const u = resp.url()
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
let body = null
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
const rec = { url: u, status: resp.status(), body }
captured.push(rec)
if (/\/service-address\/search/.test(u)) {
if (!serviceResp || (rec.status === 200 && serviceResp.status !== 200)) serviceResp = rec
}
}
})
const result = { address, queried_at: new Date().toISOString() }
try {
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
const btn = page.getByRole('button', { name: label })
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
}
// Open the address dialog.
await page.getByRole('button', { name: /check availability/i }).first()
.click({ timeout: STEP_TIMEOUT })
// Wait for the dialog, then target the combobox inside it (more robust
// than matching the accessible name, which differs EN/FR).
const dialog = page.getByRole('dialog')
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
const input = (await dialog.count().catch(() => 0))
? dialog.getByRole('combobox').first()
: page.getByRole('combobox', { name: /address|adresse/i })
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
await input.fill('')
await input.pressSequentially(address, { delay: 60 })
// Wait for autocomplete suggestions, then pick the first one.
let picked = false
try {
const firstOption = page.getByRole('option').first()
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
await firstOption.click()
picked = true
} catch {
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
}
// Locate the dialog submit button ("Find out now" / "Vérifier" / "Submit").
// It's distinct from the page's "Check Availability" opener (already gone).
const submitBtn = (await dialog.count().catch(() => 0))
? dialog.getByRole('button', { name: /find out|v[ée]rifier|check|submit|continue|suivant|next/i }).first()
: page.getByRole('button', { name: /find out|v[ée]rifier|submit/i }).first()
// Retry loop: the verdict call frequently 401s on a stale token. Click the
// submit button, wait for a 200 service-address/search; on failure pause so
// the page re-registers a fresh token, then re-click. Bail as soon as we
// have a 200 verdict (or a clear UI result).
let attempts = 0
for (let i = 0; i < VERDICT_ATTEMPTS; i++) {
attempts = i + 1
// (Re)submit the lookup if a submit button is present & enabled.
if (await submitBtn.count().catch(() => 0)) {
const enabled = await submitBtn.isEnabled().catch(() => false)
if (enabled) await submitBtn.click({ timeout: 4000 }).catch(() => {})
}
// Wait for a 200 verdict response this round (event-driven, no clock needed).
try {
await page.waitForResponse(
r => /\/service-address\/search/.test(r.url()) && r.status() === 200,
{ timeout: VERDICT_WAIT_MS },
)
} catch { /* timed out waiting for a 200 this round */ }
if (serviceResp && serviceResp.status === 200) break
// Also stop early if the UI already rendered a clear verdict.
const peek = (await page.locator('body').innerText().catch(() => '') || '').toLowerCase()
if (/great news|good news|not (yet )?available|unfortunately|isn't available/.test(peek)) break
await page.waitForTimeout(REREGISTER_PAUSE_MS) // let the page mint a fresh token
}
// Grab the visible result text (whatever the page now shows).
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
Object.assign(result, interpret(captured, uiText, serviceResp), {
picked_suggestion: picked,
verdict_attempts: attempts,
verdict_http_status: serviceResp ? serviceResp.status : null,
})
if (debug) {
result.captured = captured
result.service_response = serviceResp
result.ui_excerpt = uiText.slice(0, 1200)
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
}
} catch (e) {
result.error = e.message
if (debug) {
result.captured = captured
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
}
} finally {
await ctx.close().catch(() => {})
}
return result
}
async function shutdown () {
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
}
module.exports = { checkAddress, shutdown }