feat: complete data mirror — all customers + 115K invoices

- 8,636 terminated customers imported (disabled=1, terminate reason/company/notes preserved)
- Total customers: 15,303 (100% of legacy)
- 33,131 Subscription.party links fixed (CUST-xxx)
- 115,721 Sales Invoices (24 months) + 658K line items
- Custom field: Sales Invoice.legacy_invoice_id
- All invoices as Draft (not submitted, not sent)

Customer lifecycle preserved:
  Active → services, subscriptions, invoices
  Terminated → disabled=1, customer_details has departure reason/competitor

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
louispaulb 2026-03-28 16:09:16 -04:00
parent 22377bb381
commit 4f74376412
3 changed files with 630 additions and 0 deletions

View File

@ -0,0 +1,274 @@
#!/usr/bin/env python3
"""
1. Fix Subscription.party Customer.name (CUST-xxx)
2. Import legacy invoices (last 24 months) as Sales Invoice
Direct PG. Detached.
"""
import pymysql
import psycopg2
import uuid
from datetime import datetime, timezone
from html import unescape
LEGACY = {"host": "10.100.80.100", "user": "facturation", "password": "VD67owoj",
"database": "gestionclient", "connect_timeout": 30, "read_timeout": 600}
PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123",
"dbname": "_eb65bdc0c4b1b2d6"}
ADMIN = "Administrator"
COMPANY = "TARGO"
def uid(p=""):
return p + uuid.uuid4().hex[:10]
def now():
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
def ts_to_date(ts):
if not ts or ts <= 0:
return None
try:
return datetime.fromtimestamp(int(ts), tz=timezone.utc).strftime("%Y-%m-%d")
except:
return None
def clean(v):
if not v: return ""
return unescape(str(v)).strip()
def log(msg):
print(msg, flush=True)
def main():
ts = now()
# =============================
# PART 1: Fix Subscription.party
# =============================
log("=== Part 1: Fix Subscription.party ===")
pg = psycopg2.connect(**PG)
pgc = pg.cursor()
# Fix party field: match customer_name → actual name
pgc.execute("""
UPDATE "tabSubscription" s
SET party = c.name
FROM "tabCustomer" c
WHERE s.party = c.customer_name
AND s.party_type = 'Customer'
AND s.party NOT LIKE 'CUST-%'
""")
fixed_party = pgc.rowcount
pg.commit()
log(" Fixed {} Subscription.party → CUST-xxx".format(fixed_party))
# Also fix any remaining by legacy_service_id mapping
pgc.execute("""
SELECT s.name as sub_name, s.legacy_service_id
FROM "tabSubscription" s
WHERE s.party NOT LIKE 'CUST-%' AND s.legacy_service_id > 0
""")
remaining = pgc.fetchall()
if remaining:
mc = pymysql.connect(**LEGACY)
cur = mc.cursor(pymysql.cursors.DictCursor)
cur.execute("SELECT s.id, d.account_id FROM service s JOIN delivery d ON s.delivery_id = d.id WHERE s.status = 1")
svc_acct = {r["id"]: r["account_id"] for r in cur.fetchall()}
mc.close()
pgc.execute('SELECT legacy_account_id, name FROM "tabCustomer" WHERE legacy_account_id > 0')
cust_map = {r[0]: r[1] for r in pgc.fetchall()}
fixed2 = 0
for sub_name, svc_id in remaining:
acct_id = svc_acct.get(svc_id)
if acct_id:
cust_name = cust_map.get(acct_id)
if cust_name:
pgc.execute('UPDATE "tabSubscription" SET party = %s WHERE name = %s', (cust_name, sub_name))
fixed2 += 1
pg.commit()
log(" Fixed {} more via legacy_service_id".format(fixed2))
else:
log(" No remaining fixes needed")
# Still need legacy data for Part 2
mc = pymysql.connect(**LEGACY)
cur = mc.cursor(pymysql.cursors.DictCursor)
# =============================
# PART 2: Import Invoices
# =============================
log("")
log("=== Part 2: Import Legacy Invoices (24 months) ===")
mc = pymysql.connect(**LEGACY)
cur = mc.cursor(pymysql.cursors.DictCursor)
# Invoices from last 24 months
cutoff = int((datetime.now(timezone.utc).timestamp())) - (24 * 30 * 86400)
cur.execute("""
SELECT i.id, i.account_id, i.date_orig, i.total_amt, i.billed_amt,
i.billing_status, i.due_date, i.notes
FROM invoice i
WHERE i.billing_status = 1 AND i.date_orig >= %s
ORDER BY i.id
""", (cutoff,))
invoices = cur.fetchall()
log(" {} invoices to import".format(len(invoices)))
# Invoice items
inv_ids = [i["id"] for i in invoices]
items_by_inv = {}
if inv_ids:
# Batch query in chunks
chunk = 10000
for start in range(0, len(inv_ids), chunk):
batch = inv_ids[start:start+chunk]
placeholders = ",".join(["%s"] * len(batch))
cur.execute("""
SELECT invoice_id, sku, quantity, unitary_price, product_name, service_id
FROM invoice_item WHERE invoice_id IN ({})
""".format(placeholders), batch)
for r in cur.fetchall():
items_by_inv.setdefault(r["invoice_id"], []).append(r)
mc.close()
log(" {} invoice items loaded".format(sum(len(v) for v in items_by_inv.values())))
# Customer mapping
pgc.execute('SELECT legacy_account_id, name, customer_name FROM "tabCustomer" WHERE legacy_account_id > 0')
cust_map = {r[0]: (r[1], r[2]) for r in pgc.fetchall()}
# Check existing invoices
pgc.execute('SELECT name FROM "tabSales Invoice" WHERE name LIKE %s', ('SINV-LEG-%',))
existing_inv = set(r[0] for r in pgc.fetchall())
# Item existence check
pgc.execute('SELECT item_code FROM "tabItem"')
valid_items = set(r[0] for r in pgc.fetchall())
# Get receivable + income accounts
pgc.execute("""SELECT name FROM "tabAccount" WHERE account_type = 'Receivable' AND company = 'TARGO' AND is_group = 0 LIMIT 1""")
receivable = pgc.fetchone()[0]
pgc.execute("""SELECT name FROM "tabAccount" WHERE root_type = 'Income' AND company = 'TARGO' AND is_group = 0 LIMIT 1""")
income_row = pgc.fetchone()
income_acct = income_row[0] if income_row else "Revenus autres - T"
inv_ok = inv_skip = inv_err = item_ok = 0
for i, inv in enumerate(invoices):
inv_name = "SINV-LEG-{}".format(inv["id"])
if inv_name in existing_inv:
inv_skip += 1
continue
cust_data = cust_map.get(inv["account_id"])
if not cust_data:
inv_err += 1
continue
cust_name, cust_display = cust_data
posting_date = ts_to_date(inv["date_orig"]) or "2025-01-01"
due_date = ts_to_date(inv["due_date"]) or posting_date
total = float(inv["total_amt"] or 0)
try:
# Sales Invoice header
pgc.execute("""
INSERT INTO "tabSales Invoice" (
name, creation, modified, modified_by, owner, docstatus, idx,
naming_series, title, customer, customer_name, company,
posting_date, due_date, currency, conversion_rate,
selling_price_list, price_list_currency,
base_grand_total, grand_total, base_net_total, net_total,
base_total, total,
outstanding_amount, base_rounded_total, rounded_total,
is_return, is_debit_note, disable_rounded_total,
debit_to, party_account_currency,
status, docstatus
) VALUES (
%s, %s, %s, %s, %s, 0, 0,
'ACC-SINV-.YYYY.-', %s, %s, %s, %s,
%s, %s, 'CAD', 1,
'Standard Selling', 'CAD',
%s, %s, %s, %s,
%s, %s,
%s, %s, %s,
0, 0, 1,
%s, 'CAD',
'Draft', 0
)
""", (inv_name, ts, ts, ADMIN, ADMIN,
cust_display, cust_name, cust_display, COMPANY,
posting_date, due_date,
total, total, total, total,
total, total,
total, total, total,
receivable))
# Invoice items
line_items = items_by_inv.get(inv["id"], [])
for j, li in enumerate(line_items):
sku = clean(li.get("sku")) or "MISC"
qty = float(li.get("quantity") or 1)
rate = float(li.get("unitary_price") or 0)
amount = round(qty * rate, 2)
desc = clean(li.get("product_name")) or sku
# Use valid item or fallback
item_code = sku if sku in valid_items else None
pgc.execute("""
INSERT INTO "tabSales Invoice Item" (
name, creation, modified, modified_by, owner, docstatus, idx,
item_code, item_name, description, qty, rate, amount,
base_rate, base_amount, base_net_rate, base_net_amount,
net_rate, net_amount,
stock_uom, uom, conversion_factor,
income_account, cost_center,
parent, parentfield, parenttype
) VALUES (
%s, %s, %s, %s, %s, 0, %s,
%s, %s, %s, %s, %s, %s,
%s, %s, %s, %s,
%s, %s,
'Nos', 'Nos', 1,
%s, 'Main - T',
%s, 'items', 'Sales Invoice'
)
""", (uid("SII-"), ts, ts, ADMIN, ADMIN, j+1,
item_code, desc[:140], desc[:140], qty, rate, amount,
rate, amount, rate, amount,
rate, amount,
income_acct,
inv_name))
item_ok += 1
inv_ok += 1
except Exception as e:
inv_err += 1
pg.rollback()
if inv_err <= 10:
log(" ERR inv#{} -> {}".format(inv["id"], str(e)[:100]))
continue
if inv_ok % 2000 == 0:
pg.commit()
log(" [{}/{}] inv={} items={} skip={} err={}".format(
i+1, len(invoices), inv_ok, item_ok, inv_skip, inv_err))
pg.commit()
pg.close()
log("")
log("=" * 60)
log("Subscriptions fixed: {}".format(fixed_party))
log("Invoices: {} created, {} skipped, {} errors".format(inv_ok, inv_skip, inv_err))
log("Invoice Items: {}".format(item_ok))
log("=" * 60)
log("bench --site erp.gigafibre.ca clear-cache")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""Import legacy invoices (24 months) as Sales Invoice drafts. Direct PG."""
import pymysql
import psycopg2
import uuid
from datetime import datetime, timezone
from html import unescape
LEGACY = {"host": "10.100.80.100", "user": "facturation", "password": "VD67owoj",
"database": "gestionclient", "connect_timeout": 30, "read_timeout": 600}
PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123",
"dbname": "_eb65bdc0c4b1b2d6"}
ADMIN = "Administrator"
COMPANY = "TARGO"
def uid(p=""):
return p + uuid.uuid4().hex[:10]
def now():
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
def ts_to_date(t):
if not t or t <= 0: return None
try: return datetime.fromtimestamp(int(t), tz=timezone.utc).strftime("%Y-%m-%d")
except: return None
def clean(v):
if not v: return ""
return unescape(str(v)).strip()
def log(msg):
print(msg, flush=True)
def main():
ts = now()
log("=== Import Invoices (24 months) ===")
mc = pymysql.connect(**LEGACY)
cur = mc.cursor(pymysql.cursors.DictCursor)
cutoff = int(datetime.now(timezone.utc).timestamp()) - (24 * 30 * 86400)
cur.execute("""SELECT * FROM invoice WHERE billing_status = 1 AND date_orig >= %s ORDER BY id""", (cutoff,))
invoices = cur.fetchall()
log(" {} invoices".format(len(invoices)))
inv_ids = [i["id"] for i in invoices]
items_by_inv = {}
chunk = 10000
for s in range(0, len(inv_ids), chunk):
batch = inv_ids[s:s+chunk]
cur.execute("SELECT * FROM invoice_item WHERE invoice_id IN ({})".format(",".join(["%s"]*len(batch))), batch)
for r in cur.fetchall():
items_by_inv.setdefault(r["invoice_id"], []).append(r)
mc.close()
log(" {} items loaded".format(sum(len(v) for v in items_by_inv.values())))
pg = psycopg2.connect(**PG)
pgc = pg.cursor()
pgc.execute('SELECT legacy_account_id, name, customer_name FROM "tabCustomer" WHERE legacy_account_id > 0')
cust_map = {r[0]: (r[1], r[2]) for r in pgc.fetchall()}
pgc.execute('SELECT item_code FROM "tabItem"')
valid_items = set(r[0] for r in pgc.fetchall())
pgc.execute("""SELECT name FROM "tabAccount" WHERE account_type = 'Receivable' AND company = %s AND is_group = 0 LIMIT 1""", (COMPANY,))
receivable = pgc.fetchone()[0]
pgc.execute("""SELECT name FROM "tabAccount" WHERE root_type = 'Income' AND company = %s AND is_group = 0 LIMIT 1""", (COMPANY,))
income_acct = pgc.fetchone()[0]
pgc.execute('SELECT legacy_invoice_id FROM "tabSales Invoice" WHERE legacy_invoice_id > 0')
existing = set(r[0] for r in pgc.fetchall())
log(" {} already exist".format(len(existing)))
inv_ok = inv_skip = inv_err = item_ok = 0
for i, inv in enumerate(invoices):
if inv["id"] in existing:
inv_skip += 1
continue
cust_data = cust_map.get(inv["account_id"])
if not cust_data:
inv_err += 1
continue
cust_name, cust_display = cust_data
posting_date = ts_to_date(inv["date_orig"]) or "2025-01-01"
due_date = ts_to_date(inv["due_date"]) or posting_date
total = round(float(inv["total_amt"] or 0), 2)
sinv_name = uid("SINV-")
try:
pgc.execute("""
INSERT INTO "tabSales Invoice" (
name, creation, modified, modified_by, owner, docstatus, idx,
naming_series, customer, customer_name, company,
posting_date, due_date, currency, conversion_rate,
selling_price_list, price_list_currency,
base_grand_total, grand_total, base_net_total, net_total,
base_total, total,
outstanding_amount, base_rounded_total, rounded_total,
is_return, is_debit_note, disable_rounded_total,
debit_to, party_account_currency,
status, legacy_invoice_id
) VALUES (
%s, %s, %s, %s, %s, 0, 0,
'ACC-SINV-.YYYY.-', %s, %s, %s,
%s, %s, 'CAD', 1,
'Standard Selling', 'CAD',
%s, %s, %s, %s,
%s, %s,
%s, %s, %s,
0, 0, 1,
%s, 'CAD',
'Draft', %s
)
""", (sinv_name, ts, ts, ADMIN, ADMIN,
cust_name, cust_display, COMPANY,
posting_date, due_date,
total, total, total, total,
total, total,
total, total, total,
receivable, inv["id"]))
for j, li in enumerate(items_by_inv.get(inv["id"], [])):
sku = clean(li.get("sku")) or "MISC"
qty = float(li.get("quantity") or 1)
rate = float(li.get("unitary_price") or 0)
amount = round(qty * rate, 2)
desc = clean(li.get("product_name")) or sku
item_code = sku if sku in valid_items else None
pgc.execute("""
INSERT INTO "tabSales Invoice Item" (
name, creation, modified, modified_by, owner, docstatus, idx,
item_code, item_name, description, qty, rate, amount,
base_rate, base_amount, base_net_rate, base_net_amount,
net_rate, net_amount,
stock_uom, uom, conversion_factor,
income_account, cost_center,
parent, parentfield, parenttype
) VALUES (
%s, %s, %s, %s, %s, 0, %s,
%s, %s, %s, %s, %s, %s,
%s, %s, %s, %s,
%s, %s,
'Nos', 'Nos', 1,
%s, 'Main - T',
%s, 'items', 'Sales Invoice'
)
""", (uid("SII-"), ts, ts, ADMIN, ADMIN, j+1,
item_code, desc[:140], desc[:140], qty, rate, amount,
rate, amount, rate, amount,
rate, amount,
income_acct, sinv_name))
item_ok += 1
inv_ok += 1
except Exception as e:
inv_err += 1
pg.rollback()
if inv_err <= 10:
log(" ERR inv#{} -> {}".format(inv["id"], str(e)[:100]))
continue
if inv_ok % 2000 == 0:
pg.commit()
log(" [{}/{}] inv={} items={} skip={} err={}".format(i+1, len(invoices), inv_ok, item_ok, inv_skip, inv_err))
pg.commit()
pg.close()
log("")
log("=" * 60)
log("Invoices: {} created, {} skipped, {} errors".format(inv_ok, inv_skip, inv_err))
log("Items: {}".format(item_ok))
log("=" * 60)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""Import terminated customers (status=4) with their terminate reason as Lead intelligence."""
import pymysql
import psycopg2
import uuid
from datetime import datetime, timezone
from html import unescape
LEGACY = {"host": "10.100.80.100", "user": "facturation", "password": "VD67owoj",
"database": "gestionclient", "connect_timeout": 30, "read_timeout": 300}
PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123",
"dbname": "_eb65bdc0c4b1b2d6"}
ADMIN = "Administrator"
GROUP_MAP = {1: "Individual", 4: "Commercial", 5: "Individual", 6: "Individual",
7: "Individual", 8: "Commercial", 9: "Government", 10: "Non Profit"}
def uid(p=""):
return p + uuid.uuid4().hex[:10]
def now():
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
def clean(v):
if not v: return ""
return unescape(str(v)).strip()
def log(msg):
print(msg, flush=True)
def main():
ts = now()
log("=== Import Terminated Customers (status 3,4,5) ===")
mc = pymysql.connect(**LEGACY)
cur = mc.cursor(pymysql.cursors.DictCursor)
cur.execute("SELECT * FROM account WHERE status IN (3,4,5) ORDER BY id")
accounts = cur.fetchall()
cur.execute("SELECT * FROM delivery WHERE account_id IN (SELECT id FROM account WHERE status IN (3,4,5)) ORDER BY account_id")
deliveries = cur.fetchall()
mc.close()
log(" {} terminated accounts, {} deliveries".format(len(accounts), len(deliveries)))
del_by = {}
for d in deliveries:
del_by.setdefault(d["account_id"], []).append(d)
pg = psycopg2.connect(**PG)
pgc = pg.cursor()
pgc.execute('SELECT legacy_account_id FROM "tabCustomer" WHERE legacy_account_id > 0')
existing = set(r[0] for r in pgc.fetchall())
c_ok = c_addr = c_skip = c_err = 0
for i, a in enumerate(accounts):
aid = a["id"]
if aid in existing:
c_skip += 1
continue
first = clean(a["first_name"])
last = clean(a["last_name"])
company = clean(a["company"])
if company:
ctype, cname = "Company", company
else:
ctype, cname = "Individual", "{} {}".format(first, last).strip() or "Client-{}".format(aid)
cust_id = uid("CUST-")
group = GROUP_MAP.get(a["group_id"], "Individual")
# Build customer_details with terminate info
details = ""
if a.get("terminate_reason"):
details += "Raison départ: {}\n".format(clean(a["terminate_reason"]))
if a.get("terminate_cie"):
details += "Parti chez: {}\n".format(clean(a["terminate_cie"]))
if a.get("terminate_note"):
details += "Notes: {}\n".format(clean(a["terminate_note"])[:500])
if a.get("terminate_date"):
details += "Date: {}\n".format(clean(a["terminate_date"]))
try:
pgc.execute("""
INSERT INTO "tabCustomer" (
name, creation, modified, modified_by, owner, docstatus, idx,
naming_series, customer_name, customer_type, customer_group,
territory, default_currency, language, disabled,
legacy_account_id, legacy_customer_id, ppa_enabled, stripe_id,
customer_pos_id, customer_details
) VALUES (
%s, %s, %s, %s, %s, 0, 0,
'CUST-.YYYY.-', %s, %s, %s,
'Canada', 'CAD', 'fr', 1,
%s, %s, 0, %s,
%s, %s
)
""", (cust_id, ts, ts, ADMIN, ADMIN,
cname, ctype, group,
aid, clean(a.get("customer_id")),
clean(a.get("stripe_id")) or None,
clean(a.get("customer_id")),
details or None))
c_ok += 1
# Contact
email = clean(a.get("email"))
if first or email:
cont_id = uid("CONT-")
full = "{} {}".format(first, last).strip()
pgc.execute("""
INSERT INTO "tabContact" (
name, creation, modified, modified_by, owner, docstatus, idx,
first_name, last_name, full_name, email_id, phone, mobile_no, status
) VALUES (%s, %s, %s, %s, %s, 0, 0, %s, %s, %s, %s, %s, %s, 'Open')
""", (cont_id, ts, ts, ADMIN, ADMIN,
first or cname, last or None, full or cname,
email or None, clean(a.get("tel_home")) or None,
clean(a.get("cell")) or None))
pgc.execute("""
INSERT INTO "tabDynamic Link" (
name, creation, modified, modified_by, owner, docstatus, idx,
link_doctype, link_name, link_title, parent, parentfield, parenttype
) VALUES (%s, %s, %s, %s, %s, 0, 1, 'Customer', %s, %s, %s, 'links', 'Contact')
""", (uid("DL-"), ts, ts, ADMIN, ADMIN, cust_id, cname, cont_id))
# Addresses
for j, d in enumerate(del_by.get(aid, [])):
addr1 = clean(d.get("address1"))
city = clean(d.get("city"))
if not addr1 and not city: continue
addr_id = uid("ADDR-")
pgc.execute("""
INSERT INTO "tabAddress" (
name, creation, modified, modified_by, owner, docstatus, idx,
address_title, address_type, address_line1, city, state,
pincode, country, is_primary_address, is_shipping_address
) VALUES (%s, %s, %s, %s, %s, 0, 0,
%s, 'Shipping', %s, %s, %s, %s, 'Canada', %s, 1)
""", (addr_id, ts, ts, ADMIN, ADMIN,
clean(d.get("name")) or cname, addr1 or "N/A", city or "N/A",
clean(d.get("state")) or "QC", clean(d.get("zip")),
1 if j == 0 else 0))
pgc.execute("""
INSERT INTO "tabDynamic Link" (
name, creation, modified, modified_by, owner, docstatus, idx,
link_doctype, link_name, link_title, parent, parentfield, parenttype
) VALUES (%s, %s, %s, %s, %s, 0, %s, 'Customer', %s, %s, %s, 'links', 'Address')
""", (uid("DL-"), ts, ts, ADMIN, ADMIN, j+1, cust_id, cname, addr_id))
c_addr += 1
except Exception as e:
c_err += 1
pg.rollback()
if c_err <= 10:
log(" ERR #{} {} -> {}".format(aid, cname[:30], str(e)[:80]))
continue
if c_ok % 500 == 0:
pg.commit()
log(" [{}/{}] created={} addr={} skip={} err={}".format(i+1, len(accounts), c_ok, c_addr, c_skip, c_err))
pg.commit()
pg.close()
log("")
log("=" * 60)
log("Terminated Customers: {} created, {} skipped, {} errors".format(c_ok, c_skip, c_err))
log("Addresses: {}".format(c_addr))
log("All terminated customers have disabled=1 + terminate details in customer_details")
log("=" * 60)
if __name__ == "__main__":
main()