gigafibre-fsm/services/cogeco-checker/lib/cogeco-session.js
louispaulb 74b89f5490 feat(cogeco-checker): POC competitor-serviceability microservice (WIP)
Playwright/Chromium microservice (mirrors modem-bridge: node:20-slim +
Chromium, token auth, port 3302, serialized + rate-limited) that drives
Cogeco's public address checker to determine if a competitor serves a
given address.

What works (proven on prod):
- Anti-bot bypass: vanilla headless gets 403 on /boutique/api/register
  (reCAPTCHA Enterprise blocks datacenter headless). Adding
  playwright-extra + stealth flips it to 200 — register + autocomplete
  succeed.
- Reaches Cogeco's address system and pulls real autocomplete
  suggestions. Confirmed it's Loqate/AddressComplete (id + next:
  Retrieve/Find shape).

What's NOT reliable yet (do not use the verdict for decisions):
- The serviceability verdict. The Loqate flow is multi-step
  (Find → Retrieve → Cogeco serviceability) and a single option click
  doesn't complete it, so the final yes/no API call isn't captured.
- Current interpret() falls back to scanning UI text and produces FALSE
  POSITIVES (a rural out-of-Cogeco address returned available=true off
  generic marketing copy). Needs the real Retrieve+serviceability
  endpoint wired before it can be trusted.

Next: capture the post-selection Retrieve + serviceability call (likely
needs a "continue" step and handling the multi-dwelling "N Addresses"
branch), then parse the real verdict + speeds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 20:56:05 -04:00

170 lines
7.1 KiB
JavaScript

'use strict'
/**
* cogeco-session.js — drives Cogeco's public address-availability checker with
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
* and short-lived JWT the endpoint requires are generated legitimately by the
* page's own JS. A pure HTTP call can't produce those, hence the browser.
*
* Flow (reverse-engineered 2026-06):
* 1. load /en/internet/packages
* 2. click "Check Availability" → address dialog
* 3. type the address into the autocomplete combobox
* 4. pick the first suggestion (triggers GET /boutique/api/address/search
* then the serviceability lookup)
* 5. capture the JSON responses + the rendered result text
*
* We intercept every /boutique/api/* and /api/check-avail/* response and also
* read the visible result, then return a normalized verdict. Cogeco can change
* this flow at any time — treat parsing defensively and keep `raw` for debug.
*/
// playwright-extra + stealth masks the headless automation signals
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
// Enterprise scores against. Falls back to vanilla playwright if the stealth
// stack isn't installed.
let chromium
try {
chromium = require('playwright-extra').chromium
const stealth = require('puppeteer-extra-plugin-stealth')()
chromium.use(stealth)
} catch {
chromium = require('playwright').chromium
}
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
const NAV_TIMEOUT = 45000
const STEP_TIMEOUT = 20000
let _browser = null
async function getBrowser () {
if (_browser && _browser.isConnected()) return _browser
_browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'],
})
return _browser
}
// Normalize Cogeco's serviceability payload into a stable verdict. The exact
// shape varies, so we probe several likely fields and fall back to scanning
// the captured JSON + UI text for availability keywords + speed numbers.
function interpret (captured, uiText) {
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
// 1. Look for an explicit serviceability object in the captured responses.
for (const c of captured) {
const b = c.body
if (!b || typeof b !== 'object') continue
const flat = JSON.stringify(b).toLowerCase()
// Common serviceability flags
if (verdict.available === null) {
if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) {
verdict.available = true; verdict.confidence = 'high'
} else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) {
verdict.available = false; verdict.confidence = 'high'
}
}
// Speed markers anywhere in the payload (e.g. download 1000)
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds)
}
// 2. Fall back to the rendered result text.
if (verdict.available === null && uiText) {
const t = uiText.toLowerCase()
if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) {
verdict.available = true; verdict.confidence = 'medium'
} else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) {
verdict.available = false; verdict.confidence = 'medium'
}
}
return verdict
}
async function checkAddress (address, { debug = false } = {}) {
const browser = await getBrowser()
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
// between addresses and keeps each lookup independent.
const ctx = await browser.newContext({
locale: 'en-CA',
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
viewport: { width: 1280, height: 900 },
})
const page = await ctx.newPage()
const captured = []
page.on('response', async (resp) => {
const u = resp.url()
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
let body = null
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
captured.push({ url: u, status: resp.status(), body })
}
})
const result = { address, queried_at: new Date().toISOString() }
try {
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
const btn = page.getByRole('button', { name: label })
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
}
// Open the address dialog.
await page.getByRole('button', { name: /check availability/i }).first()
.click({ timeout: STEP_TIMEOUT })
// Wait for the dialog, then target the combobox inside it (more robust
// than matching the accessible name, which differs EN/FR).
const dialog = page.getByRole('dialog')
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
const input = (await dialog.count().catch(() => 0))
? dialog.getByRole('combobox').first()
: page.getByRole('combobox', { name: /address|adresse/i })
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
await input.fill('')
await input.pressSequentially(address, { delay: 60 })
// Wait for autocomplete suggestions, then pick the first one.
let picked = false
try {
const firstOption = page.getByRole('option').first()
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
await firstOption.click()
picked = true
} catch {
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
}
// Give the serviceability lookup time to fire + render.
await page.waitForTimeout(5000)
// Grab the visible result text (whatever the page now shows).
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked })
if (debug) {
result.captured = captured
result.ui_excerpt = uiText.slice(0, 1200)
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
}
} catch (e) {
result.error = e.message
if (debug) {
result.captured = captured
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
}
} finally {
await ctx.close().catch(() => {})
}
return result
}
async function shutdown () {
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
}
module.exports = { checkAddress, shutdown }