Playwright/Chromium microservice (mirrors modem-bridge: node:20-slim + Chromium, token auth, port 3302, serialized + rate-limited) that drives Cogeco's public address checker to determine if a competitor serves a given address. What works (proven on prod): - Anti-bot bypass: vanilla headless gets 403 on /boutique/api/register (reCAPTCHA Enterprise blocks datacenter headless). Adding playwright-extra + stealth flips it to 200 — register + autocomplete succeed. - Reaches Cogeco's address system and pulls real autocomplete suggestions. Confirmed it's Loqate/AddressComplete (id + next: Retrieve/Find shape). What's NOT reliable yet (do not use the verdict for decisions): - The serviceability verdict. The Loqate flow is multi-step (Find → Retrieve → Cogeco serviceability) and a single option click doesn't complete it, so the final yes/no API call isn't captured. - Current interpret() falls back to scanning UI text and produces FALSE POSITIVES (a rural out-of-Cogeco address returned available=true off generic marketing copy). Needs the real Retrieve+serviceability endpoint wired before it can be trusted. Next: capture the post-selection Retrieve + serviceability call (likely needs a "continue" step and handling the multi-dwelling "N Addresses" branch), then parse the real verdict + speeds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
170 lines
7.1 KiB
JavaScript
170 lines
7.1 KiB
JavaScript
'use strict'
|
|
/**
|
|
* cogeco-session.js — drives Cogeco's public address-availability checker with
|
|
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
|
|
* and short-lived JWT the endpoint requires are generated legitimately by the
|
|
* page's own JS. A pure HTTP call can't produce those, hence the browser.
|
|
*
|
|
* Flow (reverse-engineered 2026-06):
|
|
* 1. load /en/internet/packages
|
|
* 2. click "Check Availability" → address dialog
|
|
* 3. type the address into the autocomplete combobox
|
|
* 4. pick the first suggestion (triggers GET /boutique/api/address/search
|
|
* then the serviceability lookup)
|
|
* 5. capture the JSON responses + the rendered result text
|
|
*
|
|
* We intercept every /boutique/api/* and /api/check-avail/* response and also
|
|
* read the visible result, then return a normalized verdict. Cogeco can change
|
|
* this flow at any time — treat parsing defensively and keep `raw` for debug.
|
|
*/
|
|
|
|
// playwright-extra + stealth masks the headless automation signals
|
|
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
|
|
// Enterprise scores against. Falls back to vanilla playwright if the stealth
|
|
// stack isn't installed.
|
|
let chromium
|
|
try {
|
|
chromium = require('playwright-extra').chromium
|
|
const stealth = require('puppeteer-extra-plugin-stealth')()
|
|
chromium.use(stealth)
|
|
} catch {
|
|
chromium = require('playwright').chromium
|
|
}
|
|
|
|
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
|
|
const NAV_TIMEOUT = 45000
|
|
const STEP_TIMEOUT = 20000
|
|
|
|
let _browser = null
|
|
async function getBrowser () {
|
|
if (_browser && _browser.isConnected()) return _browser
|
|
_browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'],
|
|
})
|
|
return _browser
|
|
}
|
|
|
|
// Normalize Cogeco's serviceability payload into a stable verdict. The exact
|
|
// shape varies, so we probe several likely fields and fall back to scanning
|
|
// the captured JSON + UI text for availability keywords + speed numbers.
|
|
function interpret (captured, uiText) {
|
|
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
|
|
|
|
// 1. Look for an explicit serviceability object in the captured responses.
|
|
for (const c of captured) {
|
|
const b = c.body
|
|
if (!b || typeof b !== 'object') continue
|
|
const flat = JSON.stringify(b).toLowerCase()
|
|
// Common serviceability flags
|
|
if (verdict.available === null) {
|
|
if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) {
|
|
verdict.available = true; verdict.confidence = 'high'
|
|
} else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) {
|
|
verdict.available = false; verdict.confidence = 'high'
|
|
}
|
|
}
|
|
// Speed markers anywhere in the payload (e.g. download 1000)
|
|
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
|
|
if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds)
|
|
}
|
|
|
|
// 2. Fall back to the rendered result text.
|
|
if (verdict.available === null && uiText) {
|
|
const t = uiText.toLowerCase()
|
|
if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) {
|
|
verdict.available = true; verdict.confidence = 'medium'
|
|
} else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) {
|
|
verdict.available = false; verdict.confidence = 'medium'
|
|
}
|
|
}
|
|
return verdict
|
|
}
|
|
|
|
async function checkAddress (address, { debug = false } = {}) {
|
|
const browser = await getBrowser()
|
|
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
|
|
// between addresses and keeps each lookup independent.
|
|
const ctx = await browser.newContext({
|
|
locale: 'en-CA',
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
|
|
viewport: { width: 1280, height: 900 },
|
|
})
|
|
const page = await ctx.newPage()
|
|
const captured = []
|
|
page.on('response', async (resp) => {
|
|
const u = resp.url()
|
|
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
|
|
let body = null
|
|
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
|
|
captured.push({ url: u, status: resp.status(), body })
|
|
}
|
|
})
|
|
|
|
const result = { address, queried_at: new Date().toISOString() }
|
|
try {
|
|
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
|
|
|
|
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
|
|
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
|
|
const btn = page.getByRole('button', { name: label })
|
|
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
|
|
}
|
|
|
|
// Open the address dialog.
|
|
await page.getByRole('button', { name: /check availability/i }).first()
|
|
.click({ timeout: STEP_TIMEOUT })
|
|
|
|
// Wait for the dialog, then target the combobox inside it (more robust
|
|
// than matching the accessible name, which differs EN/FR).
|
|
const dialog = page.getByRole('dialog')
|
|
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
|
|
const input = (await dialog.count().catch(() => 0))
|
|
? dialog.getByRole('combobox').first()
|
|
: page.getByRole('combobox', { name: /address|adresse/i })
|
|
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
|
|
await input.fill('')
|
|
await input.pressSequentially(address, { delay: 60 })
|
|
|
|
// Wait for autocomplete suggestions, then pick the first one.
|
|
let picked = false
|
|
try {
|
|
const firstOption = page.getByRole('option').first()
|
|
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
|
|
await firstOption.click()
|
|
picked = true
|
|
} catch {
|
|
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
|
|
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
|
|
}
|
|
|
|
// Give the serviceability lookup time to fire + render.
|
|
await page.waitForTimeout(5000)
|
|
|
|
// Grab the visible result text (whatever the page now shows).
|
|
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
|
|
|
|
Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked })
|
|
if (debug) {
|
|
result.captured = captured
|
|
result.ui_excerpt = uiText.slice(0, 1200)
|
|
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
|
|
}
|
|
} catch (e) {
|
|
result.error = e.message
|
|
if (debug) {
|
|
result.captured = captured
|
|
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
|
|
}
|
|
} finally {
|
|
await ctx.close().catch(() => {})
|
|
}
|
|
return result
|
|
}
|
|
|
|
async function shutdown () {
|
|
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
|
|
}
|
|
|
|
module.exports = { checkAddress, shutdown }
|