#!/usr/bin/env python3 """ Fix broken customer links: replace customer_name with CUST-xxx in: - Sales Invoice (customer field) - Subscription (party field) - Issue (customer field) Run inside erpnext-backend-1: nohup python3 /tmp/fix_customer_links.py > /tmp/fix_customer_links.log 2>&1 & Safe: only updates rows where the field does NOT already start with 'CUST-'. Handles duplicate customer names by skipping them (logged as warnings). """ import psycopg2 from datetime import datetime, timezone PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123", "dbname": "_eb65bdc0c4b1b2d6"} def log(msg): print("[{}] {}".format(datetime.now(timezone.utc).strftime("%H:%M:%S"), msg), flush=True) def main(): log("=== Fix Customer Links ===") pg = psycopg2.connect(**PG) pgc = pg.cursor() # 1. Build customer_name → CUST-xxx mapping log("Building customer_name → CUST-xxx mapping...") pgc.execute('SELECT name, customer_name FROM "tabCustomer"') rows = pgc.fetchall() # Detect duplicates: if two customers share the same customer_name, we can't # reliably fix by name alone. We'll use legacy_account_id as fallback. name_to_cust = {} # customer_name → CUST-xxx (only if unique) name_dupes = set() for cust_id, cust_name in rows: if cust_name in name_to_cust: name_dupes.add(cust_name) else: name_to_cust[cust_name] = cust_id # Remove duplicates from the mapping for dupe in name_dupes: del name_to_cust[dupe] log(" {} customers total, {} unique names, {} duplicates excluded".format( len(rows), len(name_to_cust), len(name_dupes))) if name_dupes: # Show first 20 dupes for d in sorted(name_dupes)[:20]: log(" DUPE: '{}'".format(d)) if len(name_dupes) > 20: log(" ... and {} more duplicates".format(len(name_dupes) - 20)) # For duplicates, build a secondary mapping using legacy_account_id # We'll try to resolve them via the document's legacy fields pgc.execute('SELECT name, customer_name, legacy_account_id FROM "tabCustomer" WHERE legacy_account_id > 0') legacy_map = {} # legacy_account_id → CUST-xxx cust_to_legacy = {} # customer_name → [legacy_account_id, ...] (for dupes) for cust_id, cust_name, legacy_id in pgc.fetchall(): legacy_map[legacy_id] = cust_id if cust_name in name_dupes: cust_to_legacy.setdefault(cust_name, []).append((legacy_id, cust_id)) # ===================== # 2. Fix Sales Invoices # ===================== log("") log("--- Fixing Sales Invoices ---") # Get broken invoices (customer field is NOT a CUST-xxx ID) pgc.execute(""" SELECT name, customer, legacy_invoice_id FROM "tabSales Invoice" WHERE customer NOT LIKE 'CUST-%%' """) broken_inv = pgc.fetchall() log(" {} broken invoices to fix".format(len(broken_inv))) # For invoices, we can also try to resolve via legacy_invoice_id → account_id # But first try the simple name mapping inv_fixed = inv_skip = inv_dupe_fixed = 0 for sinv_name, current_customer, legacy_inv_id in broken_inv: cust_id = name_to_cust.get(current_customer) if not cust_id and current_customer in name_dupes and legacy_inv_id: # Try to resolve duplicate via legacy invoice → account mapping # We'd need legacy data for this, so skip for now and count inv_skip += 1 continue if not cust_id: inv_skip += 1 continue pgc.execute(""" UPDATE "tabSales Invoice" SET customer = %s, modified = NOW() WHERE name = %s """, (cust_id, sinv_name)) inv_fixed += 1 if inv_fixed % 5000 == 0: pg.commit() log(" {} fixed, {} skipped...".format(inv_fixed, inv_skip)) pg.commit() log(" DONE: {} fixed, {} skipped (dupes/unmapped)".format(inv_fixed, inv_skip)) # ===================== # 3. Fix Subscriptions # ===================== log("") log("--- Fixing Subscriptions ---") pgc.execute(""" SELECT name, party FROM "tabSubscription" WHERE party_type = 'Customer' AND party NOT LIKE 'CUST-%%' """) broken_sub = pgc.fetchall() log(" {} broken subscriptions to fix".format(len(broken_sub))) sub_fixed = sub_skip = 0 for sub_name, current_party in broken_sub: cust_id = name_to_cust.get(current_party) if not cust_id: sub_skip += 1 continue pgc.execute(""" UPDATE "tabSubscription" SET party = %s, modified = NOW() WHERE name = %s """, (cust_id, sub_name)) sub_fixed += 1 if sub_fixed % 5000 == 0: pg.commit() log(" {} fixed, {} skipped...".format(sub_fixed, sub_skip)) pg.commit() log(" DONE: {} fixed, {} skipped (dupes/unmapped)".format(sub_fixed, sub_skip)) # ===================== # 4. Fix Issues # ===================== log("") log("--- Fixing Issues ---") pgc.execute(""" SELECT name, customer FROM "tabIssue" WHERE customer IS NOT NULL AND customer != '' AND customer NOT LIKE 'CUST-%%' """) broken_iss = pgc.fetchall() log(" {} broken issues to fix".format(len(broken_iss))) iss_fixed = iss_skip = 0 for issue_name, current_customer in broken_iss: cust_id = name_to_cust.get(current_customer) if not cust_id: iss_skip += 1 continue pgc.execute(""" UPDATE "tabIssue" SET customer = %s, modified = NOW() WHERE name = %s """, (cust_id, issue_name)) iss_fixed += 1 if iss_fixed % 5000 == 0: pg.commit() log(" {} fixed, {} skipped...".format(iss_fixed, iss_skip)) pg.commit() # ===================== # 5. Fix duplicate names via legacy MariaDB lookup # ===================== total_skipped = inv_skip + sub_skip + iss_skip if total_skipped > 0 and name_dupes: log("") log("--- Phase 2: Resolving duplicates via legacy DB ---") try: import pymysql LEGACY = {"host": "10.100.80.100", "user": "facturation", "password": "VD67owoj", "database": "gestionclient", "connect_timeout": 30, "read_timeout": 300} mc = pymysql.connect(**LEGACY) mcur = mc.cursor(pymysql.cursors.DictCursor) # Build invoice_id → account_id mapping for broken invoices pgc.execute(""" SELECT name, customer, legacy_invoice_id FROM "tabSales Invoice" WHERE customer NOT LIKE 'CUST-%%' AND legacy_invoice_id > 0 """) still_broken_inv = pgc.fetchall() if still_broken_inv: log(" Resolving {} invoices via legacy invoice→account mapping...".format(len(still_broken_inv))) legacy_inv_ids = [r[2] for r in still_broken_inv] # Batch lookup inv_to_acct = {} chunk = 10000 for s in range(0, len(legacy_inv_ids), chunk): batch = legacy_inv_ids[s:s+chunk] mcur.execute("SELECT id, account_id FROM invoice WHERE id IN ({})".format( ",".join(["%s"] * len(batch))), batch) for r in mcur.fetchall(): inv_to_acct[r["id"]] = r["account_id"] inv2_fixed = 0 for sinv_name, current_customer, legacy_inv_id in still_broken_inv: acct_id = inv_to_acct.get(legacy_inv_id) if acct_id and acct_id in legacy_map: cust_id = legacy_map[acct_id] pgc.execute('UPDATE "tabSales Invoice" SET customer = %s, modified = NOW() WHERE name = %s', (cust_id, sinv_name)) inv2_fixed += 1 pg.commit() log(" {} additional invoices fixed via legacy lookup".format(inv2_fixed)) inv_fixed += inv2_fixed # Resolve subscriptions via legacy_service_id → delivery → account pgc.execute(""" SELECT name, party, legacy_service_id FROM "tabSubscription" WHERE party_type = 'Customer' AND party NOT LIKE 'CUST-%%' AND legacy_service_id > 0 """) still_broken_sub = pgc.fetchall() if still_broken_sub: log(" Resolving {} subscriptions via legacy service→account mapping...".format(len(still_broken_sub))) legacy_svc_ids = [r[2] for r in still_broken_sub] svc_to_acct = {} for s in range(0, len(legacy_svc_ids), chunk): batch = legacy_svc_ids[s:s+chunk] mcur.execute(""" SELECT s.id, d.account_id FROM service s JOIN delivery d ON s.delivery_id = d.id WHERE s.id IN ({}) """.format(",".join(["%s"] * len(batch))), batch) for r in mcur.fetchall(): svc_to_acct[r["id"]] = r["account_id"] sub2_fixed = 0 for sub_name, current_party, legacy_svc_id in still_broken_sub: acct_id = svc_to_acct.get(legacy_svc_id) if acct_id and acct_id in legacy_map: cust_id = legacy_map[acct_id] pgc.execute('UPDATE "tabSubscription" SET party = %s, modified = NOW() WHERE name = %s', (cust_id, sub_name)) sub2_fixed += 1 pg.commit() log(" {} additional subscriptions fixed via legacy lookup".format(sub2_fixed)) sub_fixed += sub2_fixed # Resolve issues via legacy_ticket_id → ticket.account_id pgc.execute(""" SELECT name, customer, legacy_ticket_id FROM "tabIssue" WHERE customer IS NOT NULL AND customer != '' AND customer NOT LIKE 'CUST-%%' AND legacy_ticket_id > 0 """) still_broken_iss = pgc.fetchall() if still_broken_iss: log(" Resolving {} issues via legacy ticket→account mapping...".format(len(still_broken_iss))) legacy_tkt_ids = [r[2] for r in still_broken_iss] tkt_to_acct = {} for s in range(0, len(legacy_tkt_ids), chunk): batch = legacy_tkt_ids[s:s+chunk] mcur.execute("SELECT id, account_id FROM ticket WHERE id IN ({})".format( ",".join(["%s"] * len(batch))), batch) for r in mcur.fetchall(): tkt_to_acct[r["id"]] = r["account_id"] iss2_fixed = 0 for issue_name, current_customer, legacy_tkt_id in still_broken_iss: acct_id = tkt_to_acct.get(legacy_tkt_id) if acct_id and acct_id in legacy_map: cust_id = legacy_map[acct_id] pgc.execute('UPDATE "tabIssue" SET customer = %s, modified = NOW() WHERE name = %s', (cust_id, issue_name)) iss2_fixed += 1 pg.commit() log(" {} additional issues fixed via legacy lookup".format(iss2_fixed)) iss_fixed += iss2_fixed mc.close() except ImportError: log(" pymysql not available — skipping legacy lookup phase") except Exception as e: log(" Legacy lookup error: {}".format(str(e)[:200])) iss_log = " DONE: {} fixed, {} skipped".format(iss_fixed, iss_skip) log(iss_log) # ===================== # Summary # ===================== pg.close() log("") log("=" * 60) log("FIX CUSTOMER LINKS — SUMMARY") log("=" * 60) log(" Sales Invoices: {} fixed".format(inv_fixed)) log(" Subscriptions: {} fixed".format(sub_fixed)) log(" Issues: {} fixed".format(iss_fixed)) log("") log(" Duplicate names excluded from simple mapping: {}".format(len(name_dupes))) log("=" * 60) log("") log("Next: bench --site erp.gigafibre.ca clear-cache") if __name__ == "__main__": main()