#!/usr/bin/env python3 """ Migrate legacy delivery → Service Location, device → Service Equipment. Then link existing Subscriptions and Issues to their Service Location. Dependencies: migrate_all.py must have run first (Customers, Subscriptions, Issues exist). Run inside erpnext-backend-1: nohup python3 /tmp/migrate_locations.py > /tmp/migrate_locations.log 2>&1 & tail -f /tmp/migrate_locations.log Phase 1: Add legacy_delivery_id custom field + column to Service Location Phase 2: Import deliveries → Service Location Phase 3: Import devices → Service Equipment Phase 4: Link Subscriptions → Service Location (via legacy service.delivery_id) Phase 5: Link Issues → Service Location (via legacy ticket.delivery_id) """ import pymysql import psycopg2 import uuid from datetime import datetime, timezone from html import unescape LEGACY = {"host": "legacy-db", "user": "facturation", "password": "VD67owoj", "database": "gestionclient", "connect_timeout": 30, "read_timeout": 600} PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123", "dbname": "_eb65bdc0c4b1b2d6"} ADMIN = "Administrator" # Legacy device category → ERPNext equipment_type DEVICE_TYPE_MAP = { "cpe": "ONT", "ont": "ONT", "onu": "ONT", "modem": "Modem", "routeur": "Routeur", "router": "Routeur", "switch": "Switch", "ap": "AP WiFi", "access point": "AP WiFi", "decodeur": "Decodeur TV", "stb": "Decodeur TV", "telephone": "Telephone IP", "ata": "Telephone IP", "amplificateur": "Amplificateur", } def uid(prefix=""): return prefix + uuid.uuid4().hex[:10] def ts(): return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f") def clean(val): if not val: return "" return unescape(str(val)).strip() # ── Address normalization helpers ───────────────────────────────────────── # These exist because the legacy `gestionclient` data is full of: # - postal codes embedded in address_line (e.g. "12 rue X J0S1B0") # - abbreviations ("Ch" instead of "Chemin", "Av." for "Avenue") # - Cobol-style capitalization ("1Re-Concession" instead of "1re-Concession") # - lowercase or no-space postal codes ("j0s1b0" vs "J0S 1B0") # Without normalization these break the RQA address validator (no exact # match), Mapbox geocoder fuzzy results, and the human eye on customer # cards. We catch them at the import step so re-runs converge on the # canonical form. import re # Common French / Quebec street-type abbreviations seen in legacy data. # Order matters: longer keys first so "Bd." matches before "B." # Pattern intent: match the abbreviation as a full word ("\b...\b"), # THEN optionally swallow a trailing period. Without the trailing # word-boundary inside, "Boul." would match only "Boul" and leave # the dot behind ("Boulevard."). The order is the dot AFTER the # inner \b so the whole-word check happens first. ABBREV_MAP = [ (r'\bCh\b\.?', 'Chemin'), (r'\bRte\b\.?', 'Route'), (r'\bAv\b\.?', 'Avenue'), (r'\bBd\b\.?', 'Boulevard'), (r'\bBoul\b\.?', 'Boulevard'), (r'\bSt[\.-]', 'Saint-'), (r'\bSte[\.-]', 'Sainte-'), (r'\bMtl\b\.?', 'Montréal'), ] # Words that should stay lowercase even when title-casing the address. # RQA standard puts ordinal markers ("1re", "2e", "3e") in lowercase. LOWER_WORDS = { 'de', 'du', 'des', 'la', 'le', 'les', 'l\'', 'd\'', 'au', 'aux', 'à', # Rivière-aux-Outardes, Pointe-au-Chêne 'et', 'sur', # Saint-Pierre-et-Miquelon, Bois-sur-Rivière 'en', # Cap-en-Haut } ORDINAL_RE = re.compile(r'^(\d+)(re|er|e|ere|eme|ème)$', re.I) def normalize_postal_code(pc): """Uppercase + strip + insert canonical space: 'j0s1b0' → 'J0S 1B0'.""" if not pc: return None s = re.sub(r'\s+', '', str(pc)).upper() if not re.match(r'^[A-Z]\d[A-Z]\d[A-Z]\d$', s): return s or None # malformed; pass through so the rep sees it return s[:3] + ' ' + s[3:] def clean_address_line(raw, postal_code=None): """Normalize a legacy address_line for ingestion into Service Location. 1. Strip the postal code if it leaked into the end of the field (legacy bug that caused 48-char address_line on LPB4's neighbour). 2. Expand common Quebec street-type abbreviations. 3. Title-case words but keep articles ('de', 'la', 'des') lowercase and ordinal markers ('1re', '2e') lowercase too. 4. Collapse runs of whitespace into single spaces. """ if not raw: return "" s = unescape(str(raw)).strip() # 1. Strip embedded postal code (with or without space) pc_re = re.compile(r'\s+[A-Z]\d[A-Z]\s?\d[A-Z]\d\s*$', re.I) s = pc_re.sub('', s).strip() # Sometimes the postal code from the dedicated field is doubled if postal_code: pc_clean = re.sub(r'\s+', '', postal_code).upper() s = re.sub(re.escape(pc_clean) + r'\s*$', '', s, flags=re.I).strip() # 2. Expand abbreviations for pat, repl in ABBREV_MAP: s = re.sub(pat, repl, s, flags=re.I) # 3. Word-by-word title casing with exceptions parts = [] for i, word in enumerate(s.split(' ')): if not word: continue lw = word.lower() # Hyphenated chunks: title-case each piece (and apply ordinal rule) if '-' in word: sub = [] for chunk in word.split('-'): cl = chunk.lower() m = ORDINAL_RE.match(cl) if m: sub.append(m.group(1) + m.group(2).lower()) elif cl in LOWER_WORDS: sub.append(cl) else: sub.append(chunk[:1].upper() + chunk[1:].lower()) parts.append('-'.join(sub)) continue # Articles stay lowercase except as the first word if i > 0 and lw in LOWER_WORDS: parts.append(lw) continue # Ordinal markers like "1re", "2e" lowercase m = ORDINAL_RE.match(lw) if m: parts.append(m.group(1) + m.group(2).lower()) continue parts.append(word[:1].upper() + word[1:].lower()) # 4. Collapse double spaces out = re.sub(r'\s+', ' ', ' '.join(parts)).strip() return out def detect_connection_type(devices_for_delivery): """If the legacy delivery has a fibre device (ONT/CPE/ONU), the SL is on fibre. Without devices we leave it empty — the rep will fill in later. """ if not devices_for_delivery: return None for d in devices_for_delivery: cat = clean(d.get('category', '')).lower() nm = clean(d.get('name', '')).lower() mdl = clean(d.get('model', '')).lower() combined = ' '.join([cat, nm, mdl]) if any(k in combined for k in ('ont', 'onu', 'cpe', 'fibre', 'gpon', 'ftth')): return 'Fibre FTTH' return None def log(msg): print("[{}] {}".format(datetime.now(timezone.utc).strftime("%H:%M:%S"), msg), flush=True) def guess_device_type(category, name, model): """Map legacy device category/name to ERPNext equipment_type.""" cat = clean(category).lower() nm = clean(name).lower() mdl = clean(model).lower() combined = "{} {} {}".format(cat, nm, mdl) for key, val in DEVICE_TYPE_MAP.items(): if key in combined: return val # Fallback heuristics if "fibre" in combined or "gpon" in combined: return "ONT" if "wifi" in combined or "wireless" in combined: return "AP WiFi" return "Autre" def main(): log("=" * 60) log("MIGRATE LOCATIONS + EQUIPMENT") log("=" * 60) mc = pymysql.connect(**LEGACY) pg = psycopg2.connect(**PG) pg.autocommit = False pgc = pg.cursor() now = ts() # ============================ # Phase 1: Ensure legacy_delivery_id column exists # ============================ log("") log("--- Phase 1: Ensure custom fields ---") pgc.execute("""SELECT column_name FROM information_schema.columns WHERE table_name = 'tabService Location' AND column_name = 'legacy_delivery_id'""") if not pgc.fetchone(): pgc.execute('ALTER TABLE "tabService Location" ADD COLUMN legacy_delivery_id bigint') # Also register as Custom Field so ERPNext knows about it try: pgc.execute(""" INSERT INTO "tabCustom Field" (name, creation, modified, modified_by, owner, docstatus, idx, dt, label, fieldname, fieldtype, insert_after) VALUES (%s, %s, %s, %s, %s, 0, 0, 'Service Location', 'Legacy Delivery ID', 'legacy_delivery_id', 'Int', 'access_notes') """, (uid("CF-"), now, now, ADMIN, ADMIN)) except: pg.rollback() pg.commit() log(" Added legacy_delivery_id to Service Location") else: log(" legacy_delivery_id already exists") # Ensure legacy_device_id on Service Equipment pgc.execute("""SELECT column_name FROM information_schema.columns WHERE table_name = 'tabService Equipment' AND column_name = 'legacy_device_id'""") if not pgc.fetchone(): pgc.execute('ALTER TABLE "tabService Equipment" ADD COLUMN legacy_device_id bigint') try: pgc.execute(""" INSERT INTO "tabCustom Field" (name, creation, modified, modified_by, owner, docstatus, idx, dt, label, fieldname, fieldtype, insert_after) VALUES (%s, %s, %s, %s, %s, 0, 0, 'Service Equipment', 'Legacy Device ID', 'legacy_device_id', 'Int', 'notes') """, (uid("CF-"), now, now, ADMIN, ADMIN)) except: pg.rollback() pg.commit() log(" Added legacy_device_id to Service Equipment") else: log(" legacy_device_id already exists") # ============================ # Phase 2: Import deliveries → Service Location # ============================ log("") log("=" * 60) log("Phase 2: Deliveries → Service Location") log("=" * 60) cur = mc.cursor(pymysql.cursors.DictCursor) cur.execute("SELECT * FROM delivery ORDER BY id") deliveries = cur.fetchall() log(" {} deliveries loaded".format(len(deliveries))) # Customer mapping pgc.execute('SELECT legacy_account_id, name FROM "tabCustomer" WHERE legacy_account_id > 0') cust_map = {r[0]: r[1] for r in pgc.fetchall()} # Check existing pgc.execute('SELECT legacy_delivery_id FROM "tabService Location" WHERE legacy_delivery_id > 0') existing_loc = set(r[0] for r in pgc.fetchall()) log(" {} already imported".format(len(existing_loc))) # Pre-load device → delivery_id mapping from legacy so we can detect # fibre availability per Service Location at insert time. Keyed by # delivery_id, value = list of devices on that delivery. cur.execute("SELECT delivery_id, category, name, model FROM device WHERE delivery_id IS NOT NULL") devices_by_delivery = {} for row in cur.fetchall(): did_dev = row.get('delivery_id') if did_dev: devices_by_delivery.setdefault(did_dev, []).append(row) log(" {} deliveries have at least one device".format(len(devices_by_delivery))) # delivery_id → Service Location name mapping (for phases 3-5) del_map = {} loc_ok = loc_skip = loc_err = 0 for i, d in enumerate(deliveries): did = d["id"] if did in existing_loc: # Still need the mapping for later phases loc_skip += 1 continue cust_id = cust_map.get(d["account_id"]) if not cust_id: loc_err += 1 continue # ── Normalize address fields BEFORE insert ── # postal_code: "j0s1b0" → "J0S 1B0" (canonical form with space). # address_line: strip embedded postal codes, expand "Ch."→"Chemin" # etc., title-case with French article rules. The migrated dataset # has 96+ "1Re-Concession"-style entries that the RQA validator # can't match without this pass. postal_norm = normalize_postal_code(clean(d.get("zip"))) addr = clean_address_line(d.get("address1"), postal_norm) city_raw = clean(d.get("city")) # City: title-case with the same article rules used for road names # ("Saint-Louis-de-Gonzague" stays correct, "saint-michel" gets # capitalized to "Saint-Michel"). city = clean_address_line(city_raw) if city_raw else "" loc_name_display = clean(d.get("name")) or "{}, {}".format(addr, city) if addr else "Location-{}".format(did) loc_id = uid("LOC-") # Parse GPS lat = 0 lon = 0 try: if d.get("latitude"): lat = float(d["latitude"]) if d.get("longitude"): lon = float(d["longitude"]) except (ValueError, TypeError): pass # Detect fibre availability from the devices we pre-loaded. # Without devices we leave connection_type empty rather than # guessing — the rep fills it later. conn_type = detect_connection_type(devices_by_delivery.get(did, [])) try: pgc.execute(""" INSERT INTO "tabService Location" ( name, creation, modified, modified_by, owner, docstatus, idx, customer, location_name, status, address_line, city, postal_code, province, latitude, longitude, contact_name, contact_phone, connection_type, legacy_delivery_id ) VALUES ( %s, %s, %s, %s, %s, 0, 0, %s, %s, 'Active', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ) """, (loc_id, now, now, ADMIN, ADMIN, cust_id, loc_name_display[:140], addr or "N/A", city or "N/A", postal_norm, clean(d.get("state")) or "QC", lat, lon, clean(d.get("contact")) or None, clean(d.get("tel_home")) or clean(d.get("cell")) or None, conn_type, did)) del_map[did] = loc_id loc_ok += 1 except Exception as e: loc_err += 1 pg.rollback() if loc_err <= 10: log(" ERR del#{} -> {}".format(did, str(e)[:100])) continue if loc_ok % 1000 == 0: pg.commit() log(" [{}/{}] ok={} skip={} err={}".format(i+1, len(deliveries), loc_ok, loc_skip, loc_err)) pg.commit() # Load mapping for skipped (already existing) locations if loc_skip > 0: pgc.execute('SELECT legacy_delivery_id, name FROM "tabService Location" WHERE legacy_delivery_id > 0') for lid, lname in pgc.fetchall(): del_map[lid] = lname log(" Service Locations: {} created | {} skipped | {} errors".format(loc_ok, loc_skip, loc_err)) log(" del_map has {} entries".format(len(del_map))) # ============================ # Phase 3: Import devices → Service Equipment # ============================ log("") log("=" * 60) log("Phase 3: Devices → Service Equipment") log("=" * 60) cur.execute("SELECT * FROM device ORDER BY id") devices = cur.fetchall() log(" {} devices loaded".format(len(devices))) pgc.execute('SELECT legacy_device_id FROM "tabService Equipment" WHERE legacy_device_id > 0') existing_dev = set(r[0] for r in pgc.fetchall()) # device_id → Equipment name mapping (for parent hierarchy) dev_map = {} dev_ok = dev_skip = dev_err = 0 for i, dv in enumerate(devices): dvid = dv["id"] if dvid in existing_dev: dev_skip += 1 continue loc_id = del_map.get(dv.get("delivery_id")) # Get customer from the location's customer, or from delivery → account cust_id = None if loc_id: pgc.execute('SELECT customer FROM "tabService Location" WHERE name = %s', (loc_id,)) row = pgc.fetchone() if row: cust_id = row[0] sn = (clean(dv.get("sn")) or "SN-{}".format(dvid))[:140] mac = clean(dv.get("mac"))[:140] if dv.get("mac") else None equip_type = guess_device_type( dv.get("category"), dv.get("name"), dv.get("model")) equip_id = uid("EQ-") try: pgc.execute(""" INSERT INTO "tabService Equipment" ( name, creation, modified, modified_by, owner, docstatus, idx, equipment_type, brand, model, serial_number, mac_address, customer, service_location, status, ownership, ip_address, login_user, login_password, legacy_device_id ) VALUES ( %s, %s, %s, %s, %s, 0, 0, %s, %s, %s, %s, %s, %s, %s, 'Actif', 'Gigafibre', %s, %s, %s, %s ) """, (equip_id, now, now, ADMIN, ADMIN, equip_type, clean(dv.get("manufacturier")) or None, clean(dv.get("model")) or None, sn[:140], mac or None, cust_id, loc_id, clean(dv.get("manage")) or None, clean(dv.get("user")) or None, clean(dv.get("pass")) or None, dvid)) dev_map[dvid] = equip_id dev_ok += 1 except Exception as e: pg.rollback() # Retry with unique SN on duplicate key if "unique constraint" in str(e).lower() and "serial_number" in str(e).lower(): sn = "{}-{}".format(sn[:130], dvid) try: pgc.execute(""" INSERT INTO "tabService Equipment" ( name, creation, modified, modified_by, owner, docstatus, idx, equipment_type, brand, model, serial_number, mac_address, customer, service_location, status, ownership, ip_address, login_user, login_password, legacy_device_id ) VALUES ( %s, %s, %s, %s, %s, 0, 0, %s, %s, %s, %s, %s, %s, %s, 'Actif', 'Gigafibre', %s, %s, %s, %s ) """, (equip_id, now, now, ADMIN, ADMIN, equip_type, clean(dv.get("manufacturier")) or None, clean(dv.get("model")) or None, sn, mac, cust_id, loc_id, clean(dv.get("manage")) or None, clean(dv.get("user")) or None, clean(dv.get("pass")) or None, dvid)) dev_map[dvid] = equip_id dev_ok += 1 continue except Exception as e2: pg.rollback() dev_err += 1 if dev_err <= 10: log(" ERR dev#{} -> {}".format(dvid, str(e)[:100])) continue if dev_ok % 1000 == 0: pg.commit() log(" [{}/{}] ok={} skip={} err={}".format(i+1, len(devices), dev_ok, dev_skip, dev_err)) pg.commit() log(" Equipment: {} created | {} skipped | {} errors".format(dev_ok, dev_skip, dev_err)) # Phase 3b: Set parent equipment (device hierarchy) log(" Setting device parent hierarchy...") parent_set = 0 for dv in devices: if dv.get("parent") and dv["parent"] > 0: child_eq = dev_map.get(dv["id"]) parent_eq = dev_map.get(dv["parent"]) if child_eq and parent_eq: # No native parent field on Service Equipment, store in notes for now pgc.execute(""" UPDATE "tabService Equipment" SET notes = COALESCE(notes, '') || 'Parent: ' || %s || E'\n' WHERE name = %s """, (parent_eq, child_eq)) parent_set += 1 pg.commit() log(" {} parent links set".format(parent_set)) # ============================ # Phase 4: Link Subscriptions → Service Location # ============================ log("") log("=" * 60) log("Phase 4: Link Subscriptions → Service Location") log("=" * 60) # Get service → delivery mapping from legacy cur.execute("SELECT id, delivery_id FROM service WHERE status = 1 AND delivery_id > 0") svc_to_del = {r["id"]: r["delivery_id"] for r in cur.fetchall()} log(" {} service→delivery mappings".format(len(svc_to_del))) # Get subscriptions with legacy_service_id pgc.execute(""" SELECT name, legacy_service_id FROM "tabSubscription" WHERE legacy_service_id > 0 AND (service_location IS NULL OR service_location = '') """) subs_to_link = pgc.fetchall() log(" {} subscriptions to link".format(len(subs_to_link))) sub_linked = sub_miss = 0 for sub_name, legacy_svc_id in subs_to_link: del_id = svc_to_del.get(legacy_svc_id) if not del_id: sub_miss += 1 continue loc_id = del_map.get(del_id) if not loc_id: sub_miss += 1 continue pgc.execute(""" UPDATE "tabSubscription" SET service_location = %s, modified = NOW() WHERE name = %s """, (loc_id, sub_name)) sub_linked += 1 if sub_linked % 5000 == 0: pg.commit() log(" {} linked...".format(sub_linked)) pg.commit() log(" Subscriptions linked: {} | missed: {}".format(sub_linked, sub_miss)) # ============================ # Phase 5: Link Issues → Service Location # ============================ log("") log("=" * 60) log("Phase 5: Link Issues → Service Location") log("=" * 60) # Get ticket → delivery mapping from legacy cur.execute("SELECT id, delivery_id FROM ticket WHERE delivery_id > 0") tkt_to_del = {r["id"]: r["delivery_id"] for r in cur.fetchall()} log(" {} ticket→delivery mappings".format(len(tkt_to_del))) # Get issues with legacy_ticket_id that need linking pgc.execute(""" SELECT name, legacy_ticket_id FROM "tabIssue" WHERE legacy_ticket_id > 0 AND (service_location IS NULL OR service_location = '') """) issues_to_link = pgc.fetchall() log(" {} issues to link".format(len(issues_to_link))) iss_linked = iss_miss = 0 for issue_name, legacy_tkt_id in issues_to_link: del_id = tkt_to_del.get(legacy_tkt_id) if not del_id: iss_miss += 1 continue loc_id = del_map.get(del_id) if not loc_id: iss_miss += 1 continue pgc.execute(""" UPDATE "tabIssue" SET service_location = %s, modified = NOW() WHERE name = %s """, (loc_id, issue_name)) iss_linked += 1 if iss_linked % 10000 == 0: pg.commit() log(" {} linked...".format(iss_linked)) pg.commit() log(" Issues linked: {} | missed: {}".format(iss_linked, iss_miss)) # ============================ # Summary # ============================ mc.close() pg.close() log("") log("=" * 60) log("MIGRATION LOCATIONS + EQUIPMENT COMPLETE") log("=" * 60) log(" Service Locations: {} created".format(loc_ok)) log(" Service Equipment: {} created ({} parent links)".format(dev_ok, parent_set)) log(" Subscriptions → Location: {} linked".format(sub_linked)) log(" Issues → Location: {} linked".format(iss_linked)) log("=" * 60) log("") log("Next: bench --site erp.gigafibre.ca clear-cache") if __name__ == "__main__": main()