From ecde83c9530bb3556392b817cbe5bb3067133278 Mon Sep 17 00:00:00 2001 From: John Peck Date: Wed, 10 Dec 2025 04:35:29 +0000 Subject: [PATCH] fix upsert --- newpower2.py | 65 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/newpower2.py b/newpower2.py index 9e1981d..8f18c10 100644 --- a/newpower2.py +++ b/newpower2.py @@ -96,13 +96,57 @@ class PowerDB: self.conn.close() def upsert_outage(self, data): + incident_id_from_data = data.get('incidentid') + utility_name = data.get('utility') + lat = data.get('lat') + lon = data.get('lon') + start_time = data.get('start') # This is expected to be a datetime object + + unique_outage_key = None + + # Prioritize incidentid if it seems valid and is a string + # Check for common unreliable/placeholder values + if incident_id_from_data and isinstance(incident_id_from_data, str) and \ + incident_id_from_data.strip() not in ["0", "unknown", "null", "N/A", ""]: + unique_outage_key = f"{utility_name}_{incident_id_from_data.strip()}" + elif lat is not None and lon is not None and utility_name is not None: + # Fallback to a synthesized key based on location and recency + try: + # Round lat/lon to 4 decimal places (approx 11 meters precision) + rounded_lat = round(float(lat), 4) + rounded_lon = round(float(lon), 4) + + # If start_time is missing, use the current hour as a fallback bucket. + # This groups recent, location-similar outages without a start time together. + if start_time: + time_bucket = start_time.strftime('%Y%m%d%H') # YYYYMMDDHH + else: + time_bucket = datetime.now(timezone.utc).strftime('%Y%m%d%H') + + unique_outage_key = f"{utility_name}_{rounded_lat}_{rounded_lon}_{time_bucket}" + except (ValueError, TypeError) as e: + logger.error(f"Error synthesizing unique_outage_key from lat/lon/start_time: {e}. Data: {data}") + raise ValueError("Failed to synthesize unique_outage_key due to missing or malformed data.") + else: + logger.error(f"Insufficient data to create a unique_outage_key (missing incidentid, or lat/lon/utility). Data: {data}") + raise ValueError("Insufficient data to create a unique_outage_key.") + + if unique_outage_key is None: + logger.error(f"Failed to generate a unique_outage_key for data: {data}") + raise ValueError("Unique outage key could not be generated.") + + # Ensure the utility name is consistently passed in the data dictionary + if utility_name is None: + logger.warning(f"Utility name missing in outage data for incident {incident_id_from_data}. Using 'UNKNOWN'.") + utility_name = "UNKNOWN" + sql = """ INSERT INTO newpower (incidentid, utility, lat, lon, pointgeom, areageom, start_time, etr, - outagen, peakoutage, cause, crew_status, active, last_change, fetch_time, geom) + outagen, peakoutage, cause, crew_status, active, last_change, fetch_time, geom, unique_outage_key) VALUES - (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) - ON CONFLICT (pointgeom) DO UPDATE SET + (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s) + ON CONFLICT (unique_outage_key) DO UPDATE SET outagen = EXCLUDED.outagen, peakoutage = GREATEST(newpower.peakoutage, EXCLUDED.outagen), cause = EXCLUDED.cause, @@ -110,17 +154,22 @@ class PowerDB: crew_status = EXCLUDED.crew_status, last_change = EXCLUDED.last_change, fetch_time = EXCLUDED.fetch_time, - active = TRUE + active = TRUE, + lat = EXCLUDED.lat, + lon = EXCLUDED.lon, + pointgeom = EXCLUDED.pointgeom, + areageom = EXCLUDED.areageom, + geom = EXCLUDED.geom """ peak = data.get('outagen', 0) # Clean areageom before insertion, referencing old power2.py logic areageom = remove_external_curly_braces(data.get('areageom')) params = ( - data.get('incidentid'), data.get('utility'), data.get('lat'), data.get('lon'), - data.get('pointgeom'), areageom, data.get('start'), data.get('etr'), + data.get('incidentid'), utility_name, lat, lon, + data.get('pointgeom'), areageom, start_time, data.get('etr'), data.get('outagen'), peak, data.get('cause'), data.get('crew_status'), - True, data.get('last_change', datetime.now(timezone.utc)), datetime.now(timezone.utc), - data.get('lon'), data.get('lat') + True, data.get('last_change', datetime.now(timezone.utc)), datetime.now(timezone.utc), # last_change, fetch_time + lon, lat, unique_outage_key # geom, unique_outage_key ) with self.conn.cursor() as cursor: cursor.execute(sql, params)