feat(cogeco-checker): POC competitor-serviceability microservice (WIP)
Playwright/Chromium microservice (mirrors modem-bridge: node:20-slim + Chromium, token auth, port 3302, serialized + rate-limited) that drives Cogeco's public address checker to determine if a competitor serves a given address. What works (proven on prod): - Anti-bot bypass: vanilla headless gets 403 on /boutique/api/register (reCAPTCHA Enterprise blocks datacenter headless). Adding playwright-extra + stealth flips it to 200 — register + autocomplete succeed. - Reaches Cogeco's address system and pulls real autocomplete suggestions. Confirmed it's Loqate/AddressComplete (id + next: Retrieve/Find shape). What's NOT reliable yet (do not use the verdict for decisions): - The serviceability verdict. The Loqate flow is multi-step (Find → Retrieve → Cogeco serviceability) and a single option click doesn't complete it, so the final yes/no API call isn't captured. - Current interpret() falls back to scanning UI text and produces FALSE POSITIVES (a rural out-of-Cogeco address returned available=true off generic marketing copy). Needs the real Retrieve+serviceability endpoint wired before it can be trusted. Next: capture the post-selection Retrieve + serviceability call (likely needs a "continue" step and handling the multi-dwelling "N Addresses" branch), then parse the real verdict + speeds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ab57a3e135
commit
74b89f5490
3
services/cogeco-checker/.env.example
Normal file
3
services/cogeco-checker/.env.example
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
CHECKER_PORT=3302
|
||||||
|
CHECKER_TOKEN=
|
||||||
|
CHECKER_MIN_GAP_MS=4000
|
||||||
39
services/cogeco-checker/Dockerfile
Normal file
39
services/cogeco-checker/Dockerfile
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
# cogeco-checker: Headless Chromium for Cogeco address-availability checker
|
||||||
|
# ~450MB total (node:20-slim + Chromium deps)
|
||||||
|
# Lighter than node:20 + full playwright install (~800MB)
|
||||||
|
|
||||||
|
FROM node:20-slim
|
||||||
|
|
||||||
|
# Playwright needs these system deps for Chromium
|
||||||
|
# Install ALL Chromium dependencies in one shot via playwright's own installer
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
|
||||||
|
libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
|
||||||
|
libpango-1.0-0 libcairo2 libasound2 libxshmfence1 \
|
||||||
|
libxfixes3 libx11-6 libx11-xcb1 libxcb1 libxext6 \
|
||||||
|
libxrender1 libxi6 libxtst6 libglib2.0-0 libdbus-1-3 \
|
||||||
|
fonts-liberation \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Create non-root user first so playwright installs in their home
|
||||||
|
RUN groupadd -r checker && useradd -r -g checker -G audio,video -m checker
|
||||||
|
|
||||||
|
COPY package.json .
|
||||||
|
RUN npm install --production
|
||||||
|
|
||||||
|
# Install Chromium as the checker user (so it goes to /home/checker/.cache)
|
||||||
|
USER checker
|
||||||
|
RUN npx playwright install chromium 2>&1 | tail -3
|
||||||
|
USER root
|
||||||
|
|
||||||
|
COPY server.js .
|
||||||
|
COPY lib/ lib/
|
||||||
|
RUN chown -R checker:checker /app
|
||||||
|
|
||||||
|
EXPOSE 3302
|
||||||
|
|
||||||
|
USER checker
|
||||||
|
|
||||||
|
CMD ["node", "server.js"]
|
||||||
27
services/cogeco-checker/docker-compose.yml
Normal file
27
services/cogeco-checker/docker-compose.yml
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# cogeco-checker: Headless Chromium REST API for Cogeco serviceability checks
|
||||||
|
# targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
|
||||||
|
# Internal only (no Traefik), token auth, serialized + rate-limited.
|
||||||
|
# Needs outbound internet (reaches cogeco.ca), so it sits on the proxy net.
|
||||||
|
|
||||||
|
services:
|
||||||
|
cogeco-checker:
|
||||||
|
build: .
|
||||||
|
container_name: cogeco-checker
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./server.js:/app/server.js:ro
|
||||||
|
- ./lib:/app/lib:ro
|
||||||
|
environment:
|
||||||
|
- CHECKER_PORT=3302
|
||||||
|
- CHECKER_TOKEN=${CHECKER_TOKEN:-}
|
||||||
|
- CHECKER_MIN_GAP_MS=${CHECKER_MIN_GAP_MS:-4000}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 768M
|
||||||
|
networks:
|
||||||
|
- proxy
|
||||||
|
|
||||||
|
networks:
|
||||||
|
proxy:
|
||||||
|
external: true
|
||||||
169
services/cogeco-checker/lib/cogeco-session.js
Normal file
169
services/cogeco-checker/lib/cogeco-session.js
Normal file
|
|
@ -0,0 +1,169 @@
|
||||||
|
'use strict'
|
||||||
|
/**
|
||||||
|
* cogeco-session.js — drives Cogeco's public address-availability checker with
|
||||||
|
* a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token)
|
||||||
|
* and short-lived JWT the endpoint requires are generated legitimately by the
|
||||||
|
* page's own JS. A pure HTTP call can't produce those, hence the browser.
|
||||||
|
*
|
||||||
|
* Flow (reverse-engineered 2026-06):
|
||||||
|
* 1. load /en/internet/packages
|
||||||
|
* 2. click "Check Availability" → address dialog
|
||||||
|
* 3. type the address into the autocomplete combobox
|
||||||
|
* 4. pick the first suggestion (triggers GET /boutique/api/address/search
|
||||||
|
* then the serviceability lookup)
|
||||||
|
* 5. capture the JSON responses + the rendered result text
|
||||||
|
*
|
||||||
|
* We intercept every /boutique/api/* and /api/check-avail/* response and also
|
||||||
|
* read the visible result, then return a normalized verdict. Cogeco can change
|
||||||
|
* this flow at any time — treat parsing defensively and keep `raw` for debug.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// playwright-extra + stealth masks the headless automation signals
|
||||||
|
// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA
|
||||||
|
// Enterprise scores against. Falls back to vanilla playwright if the stealth
|
||||||
|
// stack isn't installed.
|
||||||
|
let chromium
|
||||||
|
try {
|
||||||
|
chromium = require('playwright-extra').chromium
|
||||||
|
const stealth = require('puppeteer-extra-plugin-stealth')()
|
||||||
|
chromium.use(stealth)
|
||||||
|
} catch {
|
||||||
|
chromium = require('playwright').chromium
|
||||||
|
}
|
||||||
|
|
||||||
|
const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages'
|
||||||
|
const NAV_TIMEOUT = 45000
|
||||||
|
const STEP_TIMEOUT = 20000
|
||||||
|
|
||||||
|
let _browser = null
|
||||||
|
async function getBrowser () {
|
||||||
|
if (_browser && _browser.isConnected()) return _browser
|
||||||
|
_browser = await chromium.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'],
|
||||||
|
})
|
||||||
|
return _browser
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize Cogeco's serviceability payload into a stable verdict. The exact
|
||||||
|
// shape varies, so we probe several likely fields and fall back to scanning
|
||||||
|
// the captured JSON + UI text for availability keywords + speed numbers.
|
||||||
|
function interpret (captured, uiText) {
|
||||||
|
const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' }
|
||||||
|
|
||||||
|
// 1. Look for an explicit serviceability object in the captured responses.
|
||||||
|
for (const c of captured) {
|
||||||
|
const b = c.body
|
||||||
|
if (!b || typeof b !== 'object') continue
|
||||||
|
const flat = JSON.stringify(b).toLowerCase()
|
||||||
|
// Common serviceability flags
|
||||||
|
if (verdict.available === null) {
|
||||||
|
if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) {
|
||||||
|
verdict.available = true; verdict.confidence = 'high'
|
||||||
|
} else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) {
|
||||||
|
verdict.available = false; verdict.confidence = 'high'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Speed markers anywhere in the payload (e.g. download 1000)
|
||||||
|
const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10))
|
||||||
|
if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Fall back to the rendered result text.
|
||||||
|
if (verdict.available === null && uiText) {
|
||||||
|
const t = uiText.toLowerCase()
|
||||||
|
if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) {
|
||||||
|
verdict.available = true; verdict.confidence = 'medium'
|
||||||
|
} else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) {
|
||||||
|
verdict.available = false; verdict.confidence = 'medium'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return verdict
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkAddress (address, { debug = false } = {}) {
|
||||||
|
const browser = await getBrowser()
|
||||||
|
// Fresh context per check — avoids carrying a stale reCAPTCHA/session score
|
||||||
|
// between addresses and keeps each lookup independent.
|
||||||
|
const ctx = await browser.newContext({
|
||||||
|
locale: 'en-CA',
|
||||||
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36',
|
||||||
|
viewport: { width: 1280, height: 900 },
|
||||||
|
})
|
||||||
|
const page = await ctx.newPage()
|
||||||
|
const captured = []
|
||||||
|
page.on('response', async (resp) => {
|
||||||
|
const u = resp.url()
|
||||||
|
if (/\/(boutique\/api|api\/check-avail)\//.test(u)) {
|
||||||
|
let body = null
|
||||||
|
try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } }
|
||||||
|
captured.push({ url: u, status: resp.status(), body })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const result = { address, queried_at: new Date().toISOString() }
|
||||||
|
try {
|
||||||
|
await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
|
||||||
|
|
||||||
|
// Dismiss a cookie/consent banner if present (best-effort, non-fatal).
|
||||||
|
for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) {
|
||||||
|
const btn = page.getByRole('button', { name: label })
|
||||||
|
if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the address dialog.
|
||||||
|
await page.getByRole('button', { name: /check availability/i }).first()
|
||||||
|
.click({ timeout: STEP_TIMEOUT })
|
||||||
|
|
||||||
|
// Wait for the dialog, then target the combobox inside it (more robust
|
||||||
|
// than matching the accessible name, which differs EN/FR).
|
||||||
|
const dialog = page.getByRole('dialog')
|
||||||
|
await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {})
|
||||||
|
const input = (await dialog.count().catch(() => 0))
|
||||||
|
? dialog.getByRole('combobox').first()
|
||||||
|
: page.getByRole('combobox', { name: /address|adresse/i })
|
||||||
|
await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT })
|
||||||
|
await input.fill('')
|
||||||
|
await input.pressSequentially(address, { delay: 60 })
|
||||||
|
|
||||||
|
// Wait for autocomplete suggestions, then pick the first one.
|
||||||
|
let picked = false
|
||||||
|
try {
|
||||||
|
const firstOption = page.getByRole('option').first()
|
||||||
|
await firstOption.waitFor({ state: 'visible', timeout: 8000 })
|
||||||
|
await firstOption.click()
|
||||||
|
picked = true
|
||||||
|
} catch {
|
||||||
|
// No dropdown option appeared — try pressing ArrowDown+Enter as a fallback.
|
||||||
|
try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give the serviceability lookup time to fire + render.
|
||||||
|
await page.waitForTimeout(5000)
|
||||||
|
|
||||||
|
// Grab the visible result text (whatever the page now shows).
|
||||||
|
const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000)
|
||||||
|
|
||||||
|
Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked })
|
||||||
|
if (debug) {
|
||||||
|
result.captured = captured
|
||||||
|
result.ui_excerpt = uiText.slice(0, 1200)
|
||||||
|
result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
result.error = e.message
|
||||||
|
if (debug) {
|
||||||
|
result.captured = captured
|
||||||
|
try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await ctx.close().catch(() => {})
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
async function shutdown () {
|
||||||
|
if (_browser) { await _browser.close().catch(() => {}); _browser = null }
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { checkAddress, shutdown }
|
||||||
14
services/cogeco-checker/package.json
Normal file
14
services/cogeco-checker/package.json
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"name": "cogeco-checker",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "Headless-browser competitor serviceability checker — given an address, asks Cogeco's address checker whether internet is available and at what speeds. Internal REST API for targo-hub.",
|
||||||
|
"main": "server.js",
|
||||||
|
"scripts": {
|
||||||
|
"start": "node server.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"playwright": "^1.52.0",
|
||||||
|
"playwright-extra": "^4.3.6",
|
||||||
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
95
services/cogeco-checker/server.js
Normal file
95
services/cogeco-checker/server.js
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
// cogeco-checker/server.js — REST API for competitor (Cogeco) serviceability.
|
||||||
|
// targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker
|
||||||
|
// Internal only, token auth, rate-limited (real browser + reCAPTCHA upstream).
|
||||||
|
|
||||||
|
const http = require('http')
|
||||||
|
const url = require('url')
|
||||||
|
const cogeco = require('./lib/cogeco-session')
|
||||||
|
|
||||||
|
const PORT = parseInt(process.env.CHECKER_PORT || '3302')
|
||||||
|
const TOKEN = process.env.CHECKER_TOKEN || ''
|
||||||
|
// Serialize checks: one real browser context at a time + a small gap so we
|
||||||
|
// don't hammer Cogeco (reCAPTCHA score protection). Concurrency=1 by design.
|
||||||
|
const MIN_GAP_MS = parseInt(process.env.CHECKER_MIN_GAP_MS || '4000')
|
||||||
|
|
||||||
|
let _chain = Promise.resolve()
|
||||||
|
let _lastRun = 0
|
||||||
|
function enqueue (fn) {
|
||||||
|
const run = _chain.then(async () => {
|
||||||
|
const wait = Math.max(0, MIN_GAP_MS - (Date.now() - _lastRun))
|
||||||
|
if (wait) await new Promise(r => setTimeout(r, wait))
|
||||||
|
try { return await fn() } finally { _lastRun = Date.now() }
|
||||||
|
})
|
||||||
|
// Keep the chain alive even if one job throws.
|
||||||
|
_chain = run.catch(() => {})
|
||||||
|
return run
|
||||||
|
}
|
||||||
|
|
||||||
|
function json (res, data, status = 200) {
|
||||||
|
res.writeHead(status, { 'Content-Type': 'application/json' })
|
||||||
|
res.end(JSON.stringify(data))
|
||||||
|
}
|
||||||
|
function err (res, msg, status = 400) { json(res, { error: msg }, status) }
|
||||||
|
|
||||||
|
function parseBody (req) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
let body = ''
|
||||||
|
req.on('data', c => { body += c })
|
||||||
|
req.on('end', () => { try { resolve(body ? JSON.parse(body) : {}) } catch { reject(new Error('Invalid JSON')) } })
|
||||||
|
req.on('error', reject)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function checkAuth (req, res) {
|
||||||
|
if (!TOKEN) return true
|
||||||
|
if (req.headers['authorization'] === `Bearer ${TOKEN}`) return true
|
||||||
|
err(res, 'Unauthorized', 401)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const server = http.createServer(async (req, res) => {
|
||||||
|
const parsed = url.parse(req.url, true)
|
||||||
|
const path = parsed.pathname
|
||||||
|
const method = req.method
|
||||||
|
|
||||||
|
res.setHeader('Access-Control-Allow-Origin', '*')
|
||||||
|
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
|
||||||
|
res.setHeader('Access-Control-Allow-Headers', 'Authorization, Content-Type')
|
||||||
|
if (method === 'OPTIONS') { res.writeHead(204); res.end(); return }
|
||||||
|
|
||||||
|
if (path === '/health' && method === 'GET') {
|
||||||
|
return json(res, { status: 'ok', uptime: process.uptime() })
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!checkAuth(req, res)) return
|
||||||
|
|
||||||
|
try {
|
||||||
|
// POST /check { address, debug? } → { available, max_download_mbps, plans, confidence }
|
||||||
|
if (path === '/check' && method === 'POST') {
|
||||||
|
const body = await parseBody(req)
|
||||||
|
const address = (body.address || '').trim()
|
||||||
|
if (!address || address.length < 5) return err(res, 'address required (min 5 chars)')
|
||||||
|
const debug = !!body.debug
|
||||||
|
const out = await enqueue(() => cogeco.checkAddress(address, { debug }))
|
||||||
|
return json(res, out)
|
||||||
|
}
|
||||||
|
err(res, 'Not found', 404)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[cogeco-checker] error:', e)
|
||||||
|
err(res, 'Internal error: ' + e.message, 500)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
server.listen(PORT, () => {
|
||||||
|
console.log(`[cogeco-checker] listening on ${PORT}, auth ${TOKEN ? 'on' : 'OFF (dev)'}, min-gap ${MIN_GAP_MS}ms`)
|
||||||
|
})
|
||||||
|
|
||||||
|
for (const sig of ['SIGTERM', 'SIGINT']) {
|
||||||
|
process.on(sig, async () => {
|
||||||
|
console.log(`[cogeco-checker] ${sig}, shutting down`)
|
||||||
|
await cogeco.shutdown()
|
||||||
|
server.close()
|
||||||
|
process.exit(0)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
process.on('uncaughtException', e => console.error('[cogeco-checker] uncaught:', e))
|
||||||
Loading…
Reference in New Issue
Block a user