#!/usr/bin/env python3 """ Import RQA (Réseau Québécois d'Adresses) CSV into PostgreSQL civic_addresses table. Handles the ~2.8GB CSV file with streaming/batched inserts. Usage: python3 import_rqa_addresses.py /tmp/RQA_CSV/RQA.csv Or from Docker: docker cp import_rqa_addresses.py frappe_docker-db-1:/tmp/ docker exec frappe_docker-db-1 python3 /tmp/import_rqa_addresses.py /tmp/RQA.csv """ import csv import sys import os import subprocess import io DB = "_171cf82a99ac0463" BATCH_SIZE = 10000 def get_csv_path(): if len(sys.argv) > 1: return sys.argv[1] # Auto-detect from unzipped location for p in ['/tmp/RQA_CSV/RQA.csv', '/tmp/RQA.csv', '/tmp/RQA_CSV.csv']: if os.path.exists(p): return p print("Usage: python3 import_rqa_addresses.py ") sys.exit(1) def main(): csv_path = get_csv_path() print(f"Reading: {csv_path}") # First peek at the header to understand columns with open(csv_path, 'r', encoding='utf-8-sig', errors='replace') as f: reader = csv.reader(f, delimiter=',') header = next(reader) print(f"Columns ({len(header)}): {header[:15]}...") # Show first row row = next(reader) print(f"Sample row: {row[:15]}...") print(f"\nHeader fields:") for i, h in enumerate(header): print(f" {i}: {h}") if __name__ == '__main__': main()