feat(cogeco-checker): POC competitor-serviceability microservice (WIP)
Playwright/Chromium microservice (mirrors modem-bridge: node:20-slim + Chromium, token auth, port 3302, serialized + rate-limited) that drives Cogeco's public address checker to determine if a competitor serves a given address. What works (proven on prod): - Anti-bot bypass: vanilla headless gets 403 on /boutique/api/register (reCAPTCHA Enterprise blocks datacenter headless). Adding playwright-extra + stealth flips it to 200 — register + autocomplete succeed. - Reaches Cogeco's address system and pulls real autocomplete suggestions. Confirmed it's Loqate/AddressComplete (id + next: Retrieve/Find shape). What's NOT reliable yet (do not use the verdict for decisions): - The serviceability verdict. The Loqate flow is multi-step (Find → Retrieve → Cogeco serviceability) and a single option click doesn't complete it, so the final yes/no API call isn't captured. - Current interpret() falls back to scanning UI text and produces FALSE POSITIVES (a rural out-of-Cogeco address returned available=true off generic marketing copy). Needs the real Retrieve+serviceability endpoint wired before it can be trusted. Next: capture the post-selection Retrieve + serviceability call (likely needs a "continue" step and handling the multi-dwelling "N Addresses" branch), then parse the real verdict + speeds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ab57a3e135
commit
74b89f5490
3
services/cogeco-checker/.env.example
Normal file
3
services/cogeco-checker/.env.example
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
CHECKER_PORT=3302
|
||||
CHECKER_TOKEN=
|
||||
CHECKER_MIN_GAP_MS=4000
|
||||
39
services/cogeco-checker/Dockerfile
Normal file
39
services/cogeco-checker/Dockerfile
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# cogeco-checker: Headless Chromium for Cogeco address-availability checker
|
||||
# ~450MB total (node:20-slim + Chromium deps)
|
||||
# Lighter than node:20 + full playwright install (~800MB)
|
||||
|
||||
FROM node:20-slim
|
||||
|
||||
# Playwright needs these system deps for Chromium
|
||||
# Install ALL Chromium dependencies in one shot via playwright's own installer
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
|
||||
libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
|
||||
libpango-1.0-0 libcairo2 libasound2 libxshmfence1 \
|
||||
libxfixes3 libx11-6 libx11-xcb1 libxcb1 libxext6 \
|
||||
libxrender1 libxi6 libxtst6 libglib2.0-0 libdbus-1-3 \
|
||||
fonts-liberation \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create non-root user first so playwright installs in their home
|
||||
RUN groupadd -r checker && useradd -r -g checker -G audio,video -m checker
|
||||
|
||||
COPY package.json .
|
||||
RUN npm install --production
|
||||
|
||||
# Install Chromium as the checker user (so it goes to /home/checker/.cache)
|
||||
USER checker
|
||||
RUN npx playwright install chromium 2>&1 | tail -3
|
||||
USER root
|
||||
|
||||
COPY server.js .
|
||||
COPY lib/ lib/
|
||||
RUN chown -R checker:checker /app
|
||||
|
||||
EXPOSE 3302
|
||||
|
||||
USER checker
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
27
services/cogeco-checker/docker-compose.yml
Normal file
27
services/cogeco-checker/docker-compose.yml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# cogeco-checker: Headless Chromium REST API for Cogeco serviceability checks
|
||||
# targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
|
||||
# Internal only (no Traefik), token auth, serialized + rate-limited.
|
||||
# Needs outbound internet (reaches cogeco.ca), so it sits on the proxy net.
|
||||
|
||||
services:
|
||||
cogeco-checker:
|
||||
build: .
|
||||
container_name: cogeco-checker
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./server.js:/app/server.js:ro
|
||||
- ./lib:/app/lib:ro
|
||||
environment:
|
||||
- CHECKER_PORT=3302
|
||||
- CHECKER_TOKEN=${CHECKER_TOKEN:-}
|
||||
- CHECKER_MIN_GAP_MS=${CHECKER_MIN_GAP_MS:-4000}
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 768M
|
||||
networks:
|
||||
- proxy
|
||||
|
||||
networks:
|
||||
proxy:
|
||||
external: true
|
||||
169
services/cogeco-checker/lib/cogeco-session.js
Normal file
169
services/cogeco-checker/lib/cogeco-session.js
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
'use strict'
|
||||
/**
|
||||
* cogeco-session.js — drives Cogeco's public address-availability checker with
|
||||
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
|
||||
* and short-lived JWT the endpoint requires are generated legitimately by the
|
||||
* page's own JS. A pure HTTP call can't produce those, hence the browser.
|
||||
*
|
||||
* Flow (reverse-engineered 2026-06):
|
||||
* 1. load /en/internet/packages
|
||||
* 2. click "Check Availability" → address dialog
|
||||
* 3. type the address into the autocomplete combobox
|
||||
* 4. pick the first suggestion (triggers GET /boutique/api/address/search
|
||||
* then the serviceability lookup)
|
||||
* 5. capture the JSON responses + the rendered result text
|
||||
*
|
||||
* We intercept every /boutique/api/* and /api/check-avail/* response and also
|
||||
* read the visible result, then return a normalized verdict. Cogeco can change
|
||||
* this flow at any time — treat parsing defensively and keep `raw` for debug.
|
||||
*/
|
||||
|
||||
// playwright-extra + stealth masks the headless automation signals
|
||||
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
|
||||
// Enterprise scores against. Falls back to vanilla playwright if the stealth
|
||||
// stack isn't installed.
|
||||
let chromium
|
||||
try {
|
||||
chromium = require('playwright-extra').chromium
|
||||
const stealth = require('puppeteer-extra-plugin-stealth')()
|
||||
chromium.use(stealth)
|
||||
} catch {
|
||||
chromium = require('playwright').chromium
|
||||
}
|
||||
|
||||
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
|
||||
const NAV_TIMEOUT = 45000
|
||||
const STEP_TIMEOUT = 20000
|
||||
|
||||
let _browser = null
|
||||
async function getBrowser () {
|
||||
if (_browser && _browser.isConnected()) return _browser
|
||||
_browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'],
|
||||
})
|
||||
return _browser
|
||||
}
|
||||
|
||||
// Normalize Cogeco's serviceability payload into a stable verdict. The exact
|
||||
// shape varies, so we probe several likely fields and fall back to scanning
|
||||
// the captured JSON + UI text for availability keywords + speed numbers.
|
||||
function interpret (captured, uiText) {
|
||||
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
|
||||
|
||||
// 1. Look for an explicit serviceability object in the captured responses.
|
||||
for (const c of captured) {
|
||||
const b = c.body
|
||||
if (!b || typeof b !== 'object') continue
|
||||
const flat = JSON.stringify(b).toLowerCase()
|
||||
// Common serviceability flags
|
||||
if (verdict.available === null) {
|
||||
if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) {
|
||||
verdict.available = true; verdict.confidence = 'high'
|
||||
} else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) {
|
||||
verdict.available = false; verdict.confidence = 'high'
|
||||
}
|
||||
}
|
||||
// Speed markers anywhere in the payload (e.g. download 1000)
|
||||
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
|
||||
if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds)
|
||||
}
|
||||
|
||||
// 2. Fall back to the rendered result text.
|
||||
if (verdict.available === null && uiText) {
|
||||
const t = uiText.toLowerCase()
|
||||
if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) {
|
||||
verdict.available = true; verdict.confidence = 'medium'
|
||||
} else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) {
|
||||
verdict.available = false; verdict.confidence = 'medium'
|
||||
}
|
||||
}
|
||||
return verdict
|
||||
}
|
||||
|
||||
async function checkAddress (address, { debug = false } = {}) {
|
||||
const browser = await getBrowser()
|
||||
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
|
||||
// between addresses and keeps each lookup independent.
|
||||
const ctx = await browser.newContext({
|
||||
locale: 'en-CA',
|
||||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
|
||||
viewport: { width: 1280, height: 900 },
|
||||
})
|
||||
const page = await ctx.newPage()
|
||||
const captured = []
|
||||
page.on('response', async (resp) => {
|
||||
const u = resp.url()
|
||||
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
|
||||
let body = null
|
||||
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
|
||||
captured.push({ url: u, status: resp.status(), body })
|
||||
}
|
||||
})
|
||||
|
||||
const result = { address, queried_at: new Date().toISOString() }
|
||||
try {
|
||||
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
|
||||
|
||||
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
|
||||
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
|
||||
const btn = page.getByRole('button', { name: label })
|
||||
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
|
||||
}
|
||||
|
||||
// Open the address dialog.
|
||||
await page.getByRole('button', { name: /check availability/i }).first()
|
||||
.click({ timeout: STEP_TIMEOUT })
|
||||
|
||||
// Wait for the dialog, then target the combobox inside it (more robust
|
||||
// than matching the accessible name, which differs EN/FR).
|
||||
const dialog = page.getByRole('dialog')
|
||||
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
|
||||
const input = (await dialog.count().catch(() => 0))
|
||||
? dialog.getByRole('combobox').first()
|
||||
: page.getByRole('combobox', { name: /address|adresse/i })
|
||||
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
|
||||
await input.fill('')
|
||||
await input.pressSequentially(address, { delay: 60 })
|
||||
|
||||
// Wait for autocomplete suggestions, then pick the first one.
|
||||
let picked = false
|
||||
try {
|
||||
const firstOption = page.getByRole('option').first()
|
||||
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
|
||||
await firstOption.click()
|
||||
picked = true
|
||||
} catch {
|
||||
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
|
||||
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
|
||||
}
|
||||
|
||||
// Give the serviceability lookup time to fire + render.
|
||||
await page.waitForTimeout(5000)
|
||||
|
||||
// Grab the visible result text (whatever the page now shows).
|
||||
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
|
||||
|
||||
Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked })
|
||||
if (debug) {
|
||||
result.captured = captured
|
||||
result.ui_excerpt = uiText.slice(0, 1200)
|
||||
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
|
||||
}
|
||||
} catch (e) {
|
||||
result.error = e.message
|
||||
if (debug) {
|
||||
result.captured = captured
|
||||
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
|
||||
}
|
||||
} finally {
|
||||
await ctx.close().catch(() => {})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
async function shutdown () {
|
||||
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
|
||||
}
|
||||
|
||||
module.exports = { checkAddress, shutdown }
|
||||
14
services/cogeco-checker/package.json
Normal file
14
services/cogeco-checker/package.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"name": "cogeco-checker",
|
||||
"version": "0.1.0",
|
||||
"description": "Headless-browser competitor serviceability checker — given an address, asks Cogeco's address checker whether internet is available and at what speeds. Internal REST API for targo-hub.",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
"start": "node server.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"playwright": "^1.52.0",
|
||||
"playwright-extra": "^4.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||
}
|
||||
}
|
||||
95
services/cogeco-checker/server.js
Normal file
95
services/cogeco-checker/server.js
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// cogeco-checker/server.js — REST API for competitor (Cogeco) serviceability.
|
||||
// targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
|
||||
// Internal only, token auth, rate-limited (real browser + reCAPTCHA upstream).
|
||||
|
||||
const http = require('http')
|
||||
const url = require('url')
|
||||
const cogeco = require('./lib/cogeco-session')
|
||||
|
||||
const PORT = parseInt(process.env.CHECKER_PORT || '3302')
|
||||
const TOKEN = process.env.CHECKER_TOKEN || ''
|
||||
// Serialize checks: one real browser context at a time + a small gap so we
|
||||
// don't hammer Cogeco (reCAPTCHA score protection). Concurrency=1 by design.
|
||||
const MIN_GAP_MS = parseInt(process.env.CHECKER_MIN_GAP_MS || '4000')
|
||||
|
||||
let _chain = Promise.resolve()
|
||||
let _lastRun = 0
|
||||
function enqueue (fn) {
|
||||
const run = _chain.then(async () => {
|
||||
const wait = Math.max(0, MIN_GAP_MS - (Date.now() - _lastRun))
|
||||
if (wait) await new Promise(r => setTimeout(r, wait))
|
||||
try { return await fn() } finally { _lastRun = Date.now() }
|
||||
})
|
||||
// Keep the chain alive even if one job throws.
|
||||
_chain = run.catch(() => {})
|
||||
return run
|
||||
}
|
||||
|
||||
function json (res, data, status = 200) {
|
||||
res.writeHead(status, { 'Content-Type': 'application/json' })
|
||||
res.end(JSON.stringify(data))
|
||||
}
|
||||
function err (res, msg, status = 400) { json(res, { error: msg }, status) }
|
||||
|
||||
function parseBody (req) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let body = ''
|
||||
req.on('data', c => { body += c })
|
||||
req.on('end', () => { try { resolve(body ? JSON.parse(body) : {}) } catch { reject(new Error('Invalid JSON')) } })
|
||||
req.on('error', reject)
|
||||
})
|
||||
}
|
||||
|
||||
function checkAuth (req, res) {
|
||||
if (!TOKEN) return true
|
||||
if (req.headers['authorization'] === `Bearer ${TOKEN}`) return true
|
||||
err(res, 'Unauthorized', 401)
|
||||
return false
|
||||
}
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
const parsed = url.parse(req.url, true)
|
||||
const path = parsed.pathname
|
||||
const method = req.method
|
||||
|
||||
res.setHeader('Access-Control-Allow-Origin', '*')
|
||||
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Authorization, Content-Type')
|
||||
if (method === 'OPTIONS') { res.writeHead(204); res.end(); return }
|
||||
|
||||
if (path === '/health' && method === 'GET') {
|
||||
return json(res, { status: 'ok', uptime: process.uptime() })
|
||||
}
|
||||
|
||||
if (!checkAuth(req, res)) return
|
||||
|
||||
try {
|
||||
// POST /check { address, debug? } → { available, max_download_mbps, plans, confidence }
|
||||
if (path === '/check' && method === 'POST') {
|
||||
const body = await parseBody(req)
|
||||
const address = (body.address || '').trim()
|
||||
if (!address || address.length < 5) return err(res, 'address required (min 5 chars)')
|
||||
const debug = !!body.debug
|
||||
const out = await enqueue(() => cogeco.checkAddress(address, { debug }))
|
||||
return json(res, out)
|
||||
}
|
||||
err(res, 'Not found', 404)
|
||||
} catch (e) {
|
||||
console.error('[cogeco-checker] error:', e)
|
||||
err(res, 'Internal error: ' + e.message, 500)
|
||||
}
|
||||
})
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`[cogeco-checker] listening on ${PORT}, auth ${TOKEN ? 'on' : 'OFF (dev)'}, min-gap ${MIN_GAP_MS}ms`)
|
||||
})
|
||||
|
||||
for (const sig of ['SIGTERM', 'SIGINT']) {
|
||||
process.on(sig, async () => {
|
||||
console.log(`[cogeco-checker] ${sig}, shutting down`)
|
||||
await cogeco.shutdown()
|
||||
server.close()
|
||||
process.exit(0)
|
||||
})
|
||||
}
|
||||
process.on('uncaughtException', e => console.error('[cogeco-checker] uncaught:', e))
|
||||
Loading…
Reference in New Issue
Block a user