feat(cogeco-checker): POC competitor-serviceability microservice (WIP)

Playwright/Chromium microservice (mirrors modem-bridge: node:20-slim +
Chromium, token auth, port 3302, serialized + rate-limited) that drives
Cogeco's public address checker to determine if a competitor serves a
given address.

What works (proven on prod):
- Anti-bot bypass: vanilla headless gets 403 on /boutique/api/register
  (reCAPTCHA Enterprise blocks datacenter headless). Adding
  playwright-extra + stealth flips it to 200 — register + autocomplete
  succeed.
- Reaches Cogeco's address system and pulls real autocomplete
  suggestions. Confirmed it's Loqate/AddressComplete (id + next:
  Retrieve/Find shape).

What's NOT reliable yet (do not use the verdict for decisions):
- The serviceability verdict. The Loqate flow is multi-step
  (Find → Retrieve → Cogeco serviceability) and a single option click
  doesn't complete it, so the final yes/no API call isn't captured.
- Current interpret() falls back to scanning UI text and produces FALSE
  POSITIVES (a rural out-of-Cogeco address returned available=true off
  generic marketing copy). Needs the real Retrieve+serviceability
  endpoint wired before it can be trusted.

Next: capture the post-selection Retrieve + serviceability call (likely
needs a "continue" step and handling the multi-dwelling "N Addresses"
branch), then parse the real verdict + speeds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
louispaulb 2026-06-01 20:56:05 -04:00
parent ab57a3e135
commit 74b89f5490
6 changed files with 347 additions and 0 deletions

View File

@ -0,0 +1,3 @@
CHECKER_PORT=3302
CHECKER_TOKEN=
CHECKER_MIN_GAP_MS=4000

View File

@ -0,0 +1,39 @@
# cogeco-checker: Headless Chromium for Cogeco address-availability checker
# ~450MB total (node:20-slim + Chromium deps)
# Lighter than node:20 + full playwright install (~800MB)
FROM node:20-slim
# Playwright needs these system deps for Chromium
# Install ALL Chromium dependencies in one shot via playwright's own installer
RUN apt-get update && apt-get install -y --no-install-recommends \
libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
libpango-1.0-0 libcairo2 libasound2 libxshmfence1 \
libxfixes3 libx11-6 libx11-xcb1 libxcb1 libxext6 \
libxrender1 libxi6 libxtst6 libglib2.0-0 libdbus-1-3 \
fonts-liberation \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Create non-root user first so playwright installs in their home
RUN groupadd -r checker && useradd -r -g checker -G audio,video -m checker
COPY package.json .
RUN npm install --production
# Install Chromium as the checker user (so it goes to /home/checker/.cache)
USER checker
RUN npx playwright install chromium 2>&1 | tail -3
USER root
COPY server.js .
COPY lib/ lib/
RUN chown -R checker:checker /app
EXPOSE 3302
USER checker
CMD ["node", "server.js"]

View File

@ -0,0 +1,27 @@
# cogeco-checker: Headless Chromium REST API for Cogeco serviceability checks
# targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
# Internal only (no Traefik), token auth, serialized + rate-limited.
# Needs outbound internet (reaches cogeco.ca), so it sits on the proxy net.
services:
cogeco-checker:
build: .
container_name: cogeco-checker
restart: unless-stopped
volumes:
- ./server.js:/app/server.js:ro
- ./lib:/app/lib:ro
environment:
- CHECKER_PORT=3302
- CHECKER_TOKEN=${CHECKER_TOKEN:-}
- CHECKER_MIN_GAP_MS=${CHECKER_MIN_GAP_MS:-4000}
deploy:
resources:
limits:
memory: 768M
networks:
- proxy
networks:
proxy:
external: true

View File

@ -0,0 +1,169 @@
'use strict'
/**
* cogeco-session.js drives Cogeco's public address-availability checker with
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
* and short-lived JWT the endpoint requires are generated legitimately by the
* page's own JS. A pure HTTP call can't produce those, hence the browser.
*
* Flow (reverse-engineered 2026-06):
* 1. load /en/internet/packages
* 2. click "Check Availability" address dialog
* 3. type the address into the autocomplete combobox
* 4. pick the first suggestion (triggers GET /boutique/api/address/search
* then the serviceability lookup)
* 5. capture the JSON responses + the rendered result text
*
* We intercept every /boutique/api/* and /api/check-avail/* response and also
* read the visible result, then return a normalized verdict. Cogeco can change
* this flow at any time treat parsing defensively and keep `raw` for debug.
*/
// playwright-extra + stealth masks the headless automation signals
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
// Enterprise scores against. Falls back to vanilla playwright if the stealth
// stack isn't installed.
let chromium
try {
chromium = require('playwright-extra').chromium
const stealth = require('puppeteer-extra-plugin-stealth')()
chromium.use(stealth)
} catch {
chromium = require('playwright').chromium
}
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
const NAV_TIMEOUT = 45000
const STEP_TIMEOUT = 20000
let _browser = null
async function getBrowser () {
if (_browser && _browser.isConnected()) return _browser
_browser = await chromium.launch({
headless: true,
args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'],
})
return _browser
}
// Normalize Cogeco's serviceability payload into a stable verdict. The exact
// shape varies, so we probe several likely fields and fall back to scanning
// the captured JSON + UI text for availability keywords + speed numbers.
function interpret (captured, uiText) {
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
// 1. Look for an explicit serviceability object in the captured responses.
for (const c of captured) {
const b = c.body
if (!b || typeof b !== 'object') continue
const flat = JSON.stringify(b).toLowerCase()
// Common serviceability flags
if (verdict.available === null) {
if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) {
verdict.available = true; verdict.confidence = 'high'
} else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) {
verdict.available = false; verdict.confidence = 'high'
}
}
// Speed markers anywhere in the payload (e.g. download 1000)
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds)
}
// 2. Fall back to the rendered result text.
if (verdict.available === null && uiText) {
const t = uiText.toLowerCase()
if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) {
verdict.available = true; verdict.confidence = 'medium'
} else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) {
verdict.available = false; verdict.confidence = 'medium'
}
}
return verdict
}
async function checkAddress (address, { debug = false } = {}) {
const browser = await getBrowser()
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
// between addresses and keeps each lookup independent.
const ctx = await browser.newContext({
locale: 'en-CA',
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
viewport: { width: 1280, height: 900 },
})
const page = await ctx.newPage()
const captured = []
page.on('response', async (resp) => {
const u = resp.url()
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
let body = null
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
captured.push({ url: u, status: resp.status(), body })
}
})
const result = { address, queried_at: new Date().toISOString() }
try {
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
const btn = page.getByRole('button', { name: label })
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
}
// Open the address dialog.
await page.getByRole('button', { name: /check availability/i }).first()
.click({ timeout: STEP_TIMEOUT })
// Wait for the dialog, then target the combobox inside it (more robust
// than matching the accessible name, which differs EN/FR).
const dialog = page.getByRole('dialog')
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
const input = (await dialog.count().catch(() => 0))
? dialog.getByRole('combobox').first()
: page.getByRole('combobox', { name: /address|adresse/i })
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
await input.fill('')
await input.pressSequentially(address, { delay: 60 })
// Wait for autocomplete suggestions, then pick the first one.
let picked = false
try {
const firstOption = page.getByRole('option').first()
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
await firstOption.click()
picked = true
} catch {
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
}
// Give the serviceability lookup time to fire + render.
await page.waitForTimeout(5000)
// Grab the visible result text (whatever the page now shows).
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked })
if (debug) {
result.captured = captured
result.ui_excerpt = uiText.slice(0, 1200)
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
}
} catch (e) {
result.error = e.message
if (debug) {
result.captured = captured
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
}
} finally {
await ctx.close().catch(() => {})
}
return result
}
async function shutdown () {
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
}
module.exports = { checkAddress, shutdown }

View File

@ -0,0 +1,14 @@
{
"name": "cogeco-checker",
"version": "0.1.0",
"description": "Headless-browser competitor serviceability checker — given an address, asks Cogeco's address checker whether internet is available and at what speeds. Internal REST API for targo-hub.",
"main": "server.js",
"scripts": {
"start": "node server.js"
},
"dependencies": {
"playwright": "^1.52.0",
"playwright-extra": "^4.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
}
}

View File

@ -0,0 +1,95 @@
// cogeco-checker/server.js — REST API for competitor (Cogeco) serviceability.
// targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
// Internal only, token auth, rate-limited (real browser + reCAPTCHA upstream).
const http = require('http')
const url = require('url')
const cogeco = require('./lib/cogeco-session')
const PORT = parseInt(process.env.CHECKER_PORT || '3302')
const TOKEN = process.env.CHECKER_TOKEN || ''
// Serialize checks: one real browser context at a time + a small gap so we
// don't hammer Cogeco (reCAPTCHA score protection). Concurrency=1 by design.
const MIN_GAP_MS = parseInt(process.env.CHECKER_MIN_GAP_MS || '4000')
let _chain = Promise.resolve()
let _lastRun = 0
function enqueue (fn) {
const run = _chain.then(async () => {
const wait = Math.max(0, MIN_GAP_MS - (Date.now() - _lastRun))
if (wait) await new Promise(r => setTimeout(r, wait))
try { return await fn() } finally { _lastRun = Date.now() }
})
// Keep the chain alive even if one job throws.
_chain = run.catch(() => {})
return run
}
function json (res, data, status = 200) {
res.writeHead(status, { 'Content-Type': 'application/json' })
res.end(JSON.stringify(data))
}
function err (res, msg, status = 400) { json(res, { error: msg }, status) }
function parseBody (req) {
return new Promise((resolve, reject) => {
let body = ''
req.on('data', c => { body += c })
req.on('end', () => { try { resolve(body ? JSON.parse(body) : {}) } catch { reject(new Error('Invalid JSON')) } })
req.on('error', reject)
})
}
function checkAuth (req, res) {
if (!TOKEN) return true
if (req.headers['authorization'] === `Bearer ${TOKEN}`) return true
err(res, 'Unauthorized', 401)
return false
}
const server = http.createServer(async (req, res) => {
const parsed = url.parse(req.url, true)
const path = parsed.pathname
const method = req.method
res.setHeader('Access-Control-Allow-Origin', '*')
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
res.setHeader('Access-Control-Allow-Headers', 'Authorization, Content-Type')
if (method === 'OPTIONS') { res.writeHead(204); res.end(); return }
if (path === '/health' && method === 'GET') {
return json(res, { status: 'ok', uptime: process.uptime() })
}
if (!checkAuth(req, res)) return
try {
// POST /check { address, debug? } → { available, max_download_mbps, plans, confidence }
if (path === '/check' && method === 'POST') {
const body = await parseBody(req)
const address = (body.address || '').trim()
if (!address || address.length < 5) return err(res, 'address required (min 5 chars)')
const debug = !!body.debug
const out = await enqueue(() => cogeco.checkAddress(address, { debug }))
return json(res, out)
}
err(res, 'Not found', 404)
} catch (e) {
console.error('[cogeco-checker] error:', e)
err(res, 'Internal error: ' + e.message, 500)
}
})
server.listen(PORT, () => {
console.log(`[cogeco-checker] listening on ${PORT}, auth ${TOKEN ? 'on' : 'OFF (dev)'}, min-gap ${MIN_GAP_MS}ms`)
})
for (const sig of ['SIGTERM', 'SIGINT']) {
process.on(sig, async () => {
console.log(`[cogeco-checker] ${sig}, shutting down`)
await cogeco.shutdown()
server.close()
process.exit(0)
})
}
process.on('uncaughtException', e => console.error('[cogeco-checker] uncaught:', e))