diff --git a/services/cogeco-checker/.env.example b/services/cogeco-checker/.env.example new file mode 100644 index 0000000..95b119f --- /dev/null +++ b/services/cogeco-checker/.env.example @@ -0,0 +1,3 @@ +CHECKER_PORT=3302 +CHECKER_TOKEN= +CHECKER_MIN_GAP_MS=4000 diff --git a/services/cogeco-checker/Dockerfile b/services/cogeco-checker/Dockerfile new file mode 100644 index 0000000..5e5eca2 --- /dev/null +++ b/services/cogeco-checker/Dockerfile @@ -0,0 +1,39 @@ +# cogeco-checker: Headless Chromium for Cogeco address-availability checker +# ~450MB total (node:20-slim + Chromium deps) +# Lighter than node:20 + full playwright install (~800MB) + +FROM node:20-slim + +# Playwright needs these system deps for Chromium +# Install ALL Chromium dependencies in one shot via playwright's own installer +RUN apt-get update && apt-get install -y --no-install-recommends \ + libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \ + libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \ + libpango-1.0-0 libcairo2 libasound2 libxshmfence1 \ + libxfixes3 libx11-6 libx11-xcb1 libxcb1 libxext6 \ + libxrender1 libxi6 libxtst6 libglib2.0-0 libdbus-1-3 \ + fonts-liberation \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Create non-root user first so playwright installs in their home +RUN groupadd -r checker && useradd -r -g checker -G audio,video -m checker + +COPY package.json . +RUN npm install --production + +# Install Chromium as the checker user (so it goes to /home/checker/.cache) +USER checker +RUN npx playwright install chromium 2>&1 | tail -3 +USER root + +COPY server.js . +COPY lib/ lib/ +RUN chown -R checker:checker /app + +EXPOSE 3302 + +USER checker + +CMD ["node", "server.js"] diff --git a/services/cogeco-checker/docker-compose.yml b/services/cogeco-checker/docker-compose.yml new file mode 100644 index 0000000..78e2d88 --- /dev/null +++ b/services/cogeco-checker/docker-compose.yml @@ -0,0 +1,27 @@ +# cogeco-checker: Headless Chromium REST API for Cogeco serviceability checks +# targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker +# Internal only (no Traefik), token auth, serialized + rate-limited. +# Needs outbound internet (reaches cogeco.ca), so it sits on the proxy net. + +services: + cogeco-checker: + build: . + container_name: cogeco-checker + restart: unless-stopped + volumes: + - ./server.js:/app/server.js:ro + - ./lib:/app/lib:ro + environment: + - CHECKER_PORT=3302 + - CHECKER_TOKEN=${CHECKER_TOKEN:-} + - CHECKER_MIN_GAP_MS=${CHECKER_MIN_GAP_MS:-4000} + deploy: + resources: + limits: + memory: 768M + networks: + - proxy + +networks: + proxy: + external: true diff --git a/services/cogeco-checker/lib/cogeco-session.js b/services/cogeco-checker/lib/cogeco-session.js new file mode 100644 index 0000000..b0f8e2f --- /dev/null +++ b/services/cogeco-checker/lib/cogeco-session.js @@ -0,0 +1,169 @@ +'use strict' +/** + * cogeco-session.js — drives Cogeco's public address-availability checker with + * a real headless Chromium (Playwright) so the reCAPTCHA token (x-rc-token) + * and short-lived JWT the endpoint requires are generated legitimately by the + * page's own JS. A pure HTTP call can't produce those, hence the browser. + * + * Flow (reverse-engineered 2026-06): + * 1. load /en/internet/packages + * 2. click "Check Availability" → address dialog + * 3. type the address into the autocomplete combobox + * 4. pick the first suggestion (triggers GET /boutique/api/address/search + * then the serviceability lookup) + * 5. capture the JSON responses + the rendered result text + * + * We intercept every /boutique/api/* and /api/check-avail/* response and also + * read the visible result, then return a normalized verdict. Cogeco can change + * this flow at any time — treat parsing defensively and keep `raw` for debug. + */ + +// playwright-extra + stealth masks the headless automation signals +// (navigator.webdriver, missing plugins, headless UA quirks) that reCAPTCHA +// Enterprise scores against. Falls back to vanilla playwright if the stealth +// stack isn't installed. +let chromium +try { + chromium = require('playwright-extra').chromium + const stealth = require('puppeteer-extra-plugin-stealth')() + chromium.use(stealth) +} catch { + chromium = require('playwright').chromium +} + +const PAGE_URL = 'https://www.cogeco.ca/en/internet/packages' +const NAV_TIMEOUT = 45000 +const STEP_TIMEOUT = 20000 + +let _browser = null +async function getBrowser () { + if (_browser && _browser.isConnected()) return _browser + _browser = await chromium.launch({ + headless: true, + args: ['--no-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'], + }) + return _browser +} + +// Normalize Cogeco's serviceability payload into a stable verdict. The exact +// shape varies, so we probe several likely fields and fall back to scanning +// the captured JSON + UI text for availability keywords + speed numbers. +function interpret (captured, uiText) { + const verdict = { available: null, max_download_mbps: null, plans: [], confidence: 'low' } + + // 1. Look for an explicit serviceability object in the captured responses. + for (const c of captured) { + const b = c.body + if (!b || typeof b !== 'object') continue + const flat = JSON.stringify(b).toLowerCase() + // Common serviceability flags + if (verdict.available === null) { + if (/"serviceable"\s*:\s*true|"available"\s*:\s*true|"iseligible"\s*:\s*true|"qualified"\s*:\s*true/.test(flat)) { + verdict.available = true; verdict.confidence = 'high' + } else if (/"serviceable"\s*:\s*false|"available"\s*:\s*false|"iseligible"\s*:\s*false|"qualified"\s*:\s*false/.test(flat)) { + verdict.available = false; verdict.confidence = 'high' + } + } + // Speed markers anywhere in the payload (e.g. download 1000) + const speeds = [...flat.matchAll(/"(?:download|downloadspeed|speed|maxspeed)"\s*:\s*"?(\d{2,5})"?/g)].map(m => parseInt(m[1], 10)) + if (speeds.length) verdict.max_download_mbps = Math.max(verdict.max_download_mbps || 0, ...speeds) + } + + // 2. Fall back to the rendered result text. + if (verdict.available === null && uiText) { + const t = uiText.toLowerCase() + if (/available|disponible|good news|great news|we('| a)re in your area|select your plan|choose your/i.test(t)) { + verdict.available = true; verdict.confidence = 'medium' + } else if (/not available|non disponible|unfortunately|pas (encore )?disponible|sorry/i.test(t)) { + verdict.available = false; verdict.confidence = 'medium' + } + } + return verdict +} + +async function checkAddress (address, { debug = false } = {}) { + const browser = await getBrowser() + // Fresh context per check — avoids carrying a stale reCAPTCHA/session score + // between addresses and keeps each lookup independent. + const ctx = await browser.newContext({ + locale: 'en-CA', + userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36', + viewport: { width: 1280, height: 900 }, + }) + const page = await ctx.newPage() + const captured = [] + page.on('response', async (resp) => { + const u = resp.url() + if (/\/(boutique\/api|api\/check-avail)\//.test(u)) { + let body = null + try { body = await resp.json() } catch { try { body = (await resp.text()).slice(0, 2000) } catch { /* ignore */ } } + captured.push({ url: u, status: resp.status(), body }) + } + }) + + const result = { address, queried_at: new Date().toISOString() } + try { + await page.goto(PAGE_URL, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT }) + + // Dismiss a cookie/consent banner if present (best-effort, non-fatal). + for (const label of [/accept all/i, /accept/i, /agree/i, /tout accepter/i, /j'accepte/i]) { + const btn = page.getByRole('button', { name: label }) + if (await btn.count().catch(() => 0)) { await btn.first().click().catch(() => {}); break } + } + + // Open the address dialog. + await page.getByRole('button', { name: /check availability/i }).first() + .click({ timeout: STEP_TIMEOUT }) + + // Wait for the dialog, then target the combobox inside it (more robust + // than matching the accessible name, which differs EN/FR). + const dialog = page.getByRole('dialog') + await dialog.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }).catch(() => {}) + const input = (await dialog.count().catch(() => 0)) + ? dialog.getByRole('combobox').first() + : page.getByRole('combobox', { name: /address|adresse/i }) + await input.waitFor({ state: 'visible', timeout: STEP_TIMEOUT }) + await input.fill('') + await input.pressSequentially(address, { delay: 60 }) + + // Wait for autocomplete suggestions, then pick the first one. + let picked = false + try { + const firstOption = page.getByRole('option').first() + await firstOption.waitFor({ state: 'visible', timeout: 8000 }) + await firstOption.click() + picked = true + } catch { + // No dropdown option appeared — try pressing ArrowDown+Enter as a fallback. + try { await input.press('ArrowDown'); await input.press('Enter'); picked = true } catch { /* ignore */ } + } + + // Give the serviceability lookup time to fire + render. + await page.waitForTimeout(5000) + + // Grab the visible result text (whatever the page now shows). + const uiText = (await page.locator('body').innerText().catch(() => '') || '').slice(0, 4000) + + Object.assign(result, interpret(captured, uiText), { picked_suggestion: picked }) + if (debug) { + result.captured = captured + result.ui_excerpt = uiText.slice(0, 1200) + result.screenshot = (await page.screenshot({ fullPage: false }).catch(() => null))?.toString('base64') || null + } + } catch (e) { + result.error = e.message + if (debug) { + result.captured = captured + try { result.ui_excerpt = (await page.locator('body').innerText()).slice(0, 1200) } catch { /* ignore */ } + } + } finally { + await ctx.close().catch(() => {}) + } + return result +} + +async function shutdown () { + if (_browser) { await _browser.close().catch(() => {}); _browser = null } +} + +module.exports = { checkAddress, shutdown } diff --git a/services/cogeco-checker/package.json b/services/cogeco-checker/package.json new file mode 100644 index 0000000..a3d5ce4 --- /dev/null +++ b/services/cogeco-checker/package.json @@ -0,0 +1,14 @@ +{ + "name": "cogeco-checker", + "version": "0.1.0", + "description": "Headless-browser competitor serviceability checker — given an address, asks Cogeco's address checker whether internet is available and at what speeds. Internal REST API for targo-hub.", + "main": "server.js", + "scripts": { + "start": "node server.js" + }, + "dependencies": { + "playwright": "^1.52.0", + "playwright-extra": "^4.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2" + } +} diff --git a/services/cogeco-checker/server.js b/services/cogeco-checker/server.js new file mode 100644 index 0000000..bfdb004 --- /dev/null +++ b/services/cogeco-checker/server.js @@ -0,0 +1,95 @@ +// cogeco-checker/server.js — REST API for competitor (Cogeco) serviceability. +// targo-hub (3300) -> cogeco-checker (3302) -> cogeco.ca address checker +// Internal only, token auth, rate-limited (real browser + reCAPTCHA upstream). + +const http = require('http') +const url = require('url') +const cogeco = require('./lib/cogeco-session') + +const PORT = parseInt(process.env.CHECKER_PORT || '3302') +const TOKEN = process.env.CHECKER_TOKEN || '' +// Serialize checks: one real browser context at a time + a small gap so we +// don't hammer Cogeco (reCAPTCHA score protection). Concurrency=1 by design. +const MIN_GAP_MS = parseInt(process.env.CHECKER_MIN_GAP_MS || '4000') + +let _chain = Promise.resolve() +let _lastRun = 0 +function enqueue (fn) { + const run = _chain.then(async () => { + const wait = Math.max(0, MIN_GAP_MS - (Date.now() - _lastRun)) + if (wait) await new Promise(r => setTimeout(r, wait)) + try { return await fn() } finally { _lastRun = Date.now() } + }) + // Keep the chain alive even if one job throws. + _chain = run.catch(() => {}) + return run +} + +function json (res, data, status = 200) { + res.writeHead(status, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify(data)) +} +function err (res, msg, status = 400) { json(res, { error: msg }, status) } + +function parseBody (req) { + return new Promise((resolve, reject) => { + let body = '' + req.on('data', c => { body += c }) + req.on('end', () => { try { resolve(body ? JSON.parse(body) : {}) } catch { reject(new Error('Invalid JSON')) } }) + req.on('error', reject) + }) +} + +function checkAuth (req, res) { + if (!TOKEN) return true + if (req.headers['authorization'] === `Bearer ${TOKEN}`) return true + err(res, 'Unauthorized', 401) + return false +} + +const server = http.createServer(async (req, res) => { + const parsed = url.parse(req.url, true) + const path = parsed.pathname + const method = req.method + + res.setHeader('Access-Control-Allow-Origin', '*') + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') + res.setHeader('Access-Control-Allow-Headers', 'Authorization, Content-Type') + if (method === 'OPTIONS') { res.writeHead(204); res.end(); return } + + if (path === '/health' && method === 'GET') { + return json(res, { status: 'ok', uptime: process.uptime() }) + } + + if (!checkAuth(req, res)) return + + try { + // POST /check { address, debug? } → { available, max_download_mbps, plans, confidence } + if (path === '/check' && method === 'POST') { + const body = await parseBody(req) + const address = (body.address || '').trim() + if (!address || address.length < 5) return err(res, 'address required (min 5 chars)') + const debug = !!body.debug + const out = await enqueue(() => cogeco.checkAddress(address, { debug })) + return json(res, out) + } + err(res, 'Not found', 404) + } catch (e) { + console.error('[cogeco-checker] error:', e) + err(res, 'Internal error: ' + e.message, 500) + } +}) + +server.listen(PORT, () => { + console.log(`[cogeco-checker] listening on ${PORT}, auth ${TOKEN ? 'on' : 'OFF (dev)'}, min-gap ${MIN_GAP_MS}ms`) +}) + +for (const sig of ['SIGTERM', 'SIGINT']) { + process.on(sig, async () => { + console.log(`[cogeco-checker] ${sig}, shutting down`) + await cogeco.shutdown() + server.close() + process.exit(0) + }) +} +process.on('uncaughtException', e => console.error('[cogeco-checker] uncaught:', e))