#!/usr/bin/env python3 """ Geocode Service Locations using the rqa_addresses (Adresses Québec) table. Matches by extracting numero from address_line, then fuzzy-matching against rqa_addresses using city + street similarity. Run inside erpnext-backend-1: nohup python3 /tmp/geocode_locations.py > /tmp/geocode_locations.log 2>&1 & """ import psycopg2 import re from datetime import datetime, timezone PG = {"host": "db", "port": 5432, "user": "postgres", "password": "123", "dbname": "_eb65bdc0c4b1b2d6"} # Common city name normalizations for matching CITY_NORMALIZE = { "st-": "saint-", "ste-": "sainte-", "st ": "saint-", "ste ": "sainte-", "st.": "saint-", "ste.": "sainte-", } def log(msg): print("[{}] {}".format(datetime.now(timezone.utc).strftime("%H:%M:%S"), msg), flush=True) def normalize_city(city): """Normalize city name for matching.""" c = city.lower().strip() for old, new in CITY_NORMALIZE.items(): if c.startswith(old): c = new + c[len(old):] # Remove accents would be ideal but keep it simple # Remove " de " variants c = re.sub(r'\s+de\s+', '-', c) c = re.sub(r'\s+', '-', c) return c def extract_numero(address): """Extract civic number from address string.""" addr = address.strip() # Try to find number at start: "1185 Route 133" → "1185" m = re.match(r'^(\d+[A-Za-z]?)\s*[,\s]', addr) if m: return m.group(1) # Just digits at start m = re.match(r'^(\d+)', addr) if m: return m.group(1) return None def main(): log("=== Geocode Service Locations via AQ ===") pg = psycopg2.connect(**PG) pg.autocommit = False pgc = pg.cursor() # Get locations needing GPS pgc.execute(""" SELECT name, address_line, city, postal_code FROM "tabService Location" WHERE (latitude = 0 OR latitude IS NULL) AND address_line NOT IN ('N/A', '', 'xxx') AND city NOT IN ('N/A', '') """) locations = pgc.fetchall() log(" {} locations to geocode".format(len(locations))) matched = missed = 0 for i, (loc_name, addr, city, postal) in enumerate(locations): lat = lon = None # Strategy 1: Match by postal code + numero (most precise) numero = extract_numero(addr) if postal and len(postal) >= 6: postal_clean = postal.strip().upper().replace(" ", "") if numero: pgc.execute(""" SELECT latitude, longitude FROM rqa_addresses WHERE REPLACE(UPPER(code_postal), ' ', '') = %s AND numero = %s LIMIT 1 """, (postal_clean, numero)) row = pgc.fetchone() if row: lat, lon = row # Strategy 2: Match by numero + city + fuzzy street if not lat and numero and city: city_norm = normalize_city(city) # Build search string for trigram matching search = "{} {}".format(numero, addr.lower()) pgc.execute(""" SELECT latitude, longitude, similarity(search_text, %s) as sim FROM rqa_addresses WHERE numero = %s AND LOWER(ville) %% %s ORDER BY similarity(search_text, %s) DESC LIMIT 1 """, (search, numero, city_norm, search)) row = pgc.fetchone() if row and row[2] > 0.15: lat, lon = row[0], row[1] # Strategy 3: Full address fuzzy match against address_full if not lat and city: full_addr = "{}, {}".format(addr, city).lower() pgc.execute(""" SELECT latitude, longitude, similarity(address_full, %s) as sim FROM rqa_addresses WHERE address_full %% %s ORDER BY similarity(address_full, %s) DESC LIMIT 1 """, (full_addr, full_addr, full_addr)) row = pgc.fetchone() if row and row[2] > 0.25: lat, lon = row[0], row[1] if lat and lon: pgc.execute(""" UPDATE "tabService Location" SET latitude = %s, longitude = %s, modified = NOW() WHERE name = %s """, (lat, lon, loc_name)) matched += 1 else: missed += 1 if (matched + missed) % 500 == 0: pg.commit() log(" [{}/{}] matched={} missed={}".format(i+1, len(locations), matched, missed)) pg.commit() pg.close() log("") log("=" * 60) log("GEOCODE COMPLETE") log(" Matched: {} ({:.1f}%)".format(matched, 100*matched/len(locations) if locations else 0)) log(" Missed: {}".format(missed)) log("=" * 60) if __name__ == "__main__": main()