test/ky511.py

import requests
import json
import traceback # For detailed error reporting
import psycopg2 # For PostgreSQL interaction
from psycopg2 import extras
import os
from datetime import datetime
import pytz # <--- Import pytz for timezone handling

# --- Database Configuration (Use Environment Variables!) ---
DB_NAME = os.environ.get("PG_DBNAME", "nws")
DB_USER = os.environ.get("PG_USER", "nws")
DB_PASSWORD = os.environ.get("PG_PASSWORD", "nws")
DB_HOST = os.environ.get("PG_HOST", "localhost")
DB_PORT = os.environ.get("PG_PORT", "5432")
DB_SCHEMA = "ky511" # Specify schema if used, otherwise set to None or remove usage
DB_TABLE = "ky511"
TABLE_NAME_QUALIFIED = f"{DB_SCHEMA}.{DB_TABLE}" if DB_SCHEMA else DB_TABLE

# --- Looker Studio API Configuration ---
# ... (URL, HEADERS, COOKIES, PAYLOADS remain the same - ensure they are valid/fresh) ...
URL = "https://lookerstudio.google.com/batchedDataV2?appVersion=20250324_0406"
# HEADERS, COOKIES, payload_req1, payload_req2, combined_payload - keep as provided
# (Make sure HEADERS/COOKIES are up-to-date for authentication)
HEADERS = { "authority": "lookerstudio.google.com", "method": "POST", "path": "/batchedDataV2?appVersion=20250324_0406", "scheme": "https", "accept": "application/json, text/plain, */*", "accept-encoding": "gzip, deflate, br, zstd", "accept-language": "en-US,en;q=0.9", "cache-control": "no-cache", "origin": "https://lookerstudio.google.com", "pragma": "no-cache", "priority": "u=1, i", "referer": "https://lookerstudio.google.com/reporting/1413fcfb-1416-4e56-8967-55f8e9f30ec8/page/p_pbm4eo88qc", "sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "x-client-data": "CIS2yQEIorbJAQipncoBCMHbygEIk6HLAQiFoM0BCP6lzgEIvdXOAQjJ4M4BCIbizgEIu+fOAQjS6M4BCKzpzgE=", "x-rap-xsrf-token": "AImk1AIPFDSOkwsENSc2sLLwBANEib1ysQ:1744403672035", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", "Content-Type": "application/json", }
COOKIES = { "SID": "g.a000vwi83sZx47d1q9Po8BgPR3jBX7lpsrzWSuPIKyp6RycUEctVzyy0oo1gNvQbwZa53AWllwACgYKAVQSARMSFQHGX2MiUpZ3JZlRTzUS5L-5fPmgpBoVAUF8yKprZzPlNM_X83070Ct35bq_0076", "__Secure-1PSID": "g.a000vwi83sZx47d1q9Po8BgPR3jBX7lpsrzWSuPIKyp6RycUEctVKNqZm8qWU9ZC2DlRVPn0igACgYKAfQSARMSFQHGX2MiYGW66m_L_p1vlqiuEg-IrxoVAUF8yKrgRbA9tcA4VinZix0qlexX0076", "__Secure-3PSID": "g.a000vwi83sZx47d1q9Po8BgPR3jBX7lpsrzWSuPIKyp6RycUEctVnGF-cJAl9Wr1rJ-NOCW63wACgYKAZASARMSFQHGX2Mi1gYcRRnI8v2AdvofwvFG5BoVAUF8yKooSw-opCZ1-vxmkQXCB7330076", "HSID": "AJifEcy5MKbQSoqIz", "SSID": "AB87p4RIXK_USto4D", "APISID": "fQw9oTJbNnptFKdr/AIXxJuvoAk3qrIeWi", "SAPISID": "3zFHAXrGoL_XNe0X/A7AafTXBL2NRj7N2h", "__Secure-1PAPISID": "3zFHAXrGoL_XNe0X/A7AafTXBL2NRj7N2h", "__Secure-3PAPISID": "3zFHAXrGoL_XNe0X/A7AafTXBL2NRj7N2h", "AEC": "AVcja2e2MbzHDDL2yqJDMgUeDL-S3BEFB_yK293aRhQwshqv22Bd7IQ9mvg", "S": "maestro=IUwVdLn0rPZ27SY26uYPEonS9u1J9oQi8YzVJpqIc-w", "_gid": "GA1.3.1639311329.1744393279", "NID": "523=n5TR7moG8fZ7ZefkfCPQgx3aIuzhv_Vqk6CI0ah9HTWaoS-D4CtVwh6nIKaFY3hPLBPf_vf3THtu0nnJXc2ZaD5i3MpiIqOD8zBQbDuCp5G8rB3an7Jt7wdVuE8S3326_SF6FEBti9omWc2wNU43MbtEXSC7g4YP2sVzLohrGiUi_FHMIRDHj8OUKtDg9HjD3U3RzLRZk-IlyQyoRrwz0Ubf7IKxTsMvt57VWJvB2gqODqJJvaGWUdASl5lvPuTR7hlNkDyZwRC9rr-T2XtGUa10wUS_5bERpv6A2wTXXquQodkB25oDZvnhoS7FmO-SSAV-fweVu9tGlQ566w1yfBS0iANk8_iSCqhqFx6R1fyB9Nwxyf_g6ncw6nOmIHrtsIFiwAGJgwno2WRNey5nEgZbd5O-ew3z2K_aiPLvgeJCvY82L5swZctp7ZJtPFgGqXuAj3i6KtDhsTtizx0PbPpb7bvZ2nJ-SxlkR4s84fp_NxcXZKEdQfzOzoL6nAQMG9Kh28t_yKvcordnv25-55J_n_yzPDH78dTcLBzWS5dEcLP_Tt7HlSevSbP_2-NNoBGhM76vmMyIsMS0FucwPPKExrF6pwC3kc4g-4gruLlIWEHSnYodVcXQxD7Y2pD-0MBD7O8s-fCBDhlk1OgWfTHC1JnyrkUIMuqzj5-_LbuRIDPbN8YjVF0lO7jeyX0I9JjHHU3qc9EvhZ5LqpZKpRgTl5U4Prsmgq9K0o3nqvNoRMAbSTLX2kKGfhZKxtZT-Ezzrs_jLuZELFL8u98joYHS6v-guEvjD2Kg_XeWgRz3zAKBojSdtJnSuD7qjsnrV6IagTb8Yoazd7Gz3g-mlz8XTtSx1v7sStbFLnpMUlk", "_ga": "GA1.1.1275703331.1743917675", "_ga_LPCKLD3Z7X": "GS1.1.1744403669.9.1.1744403672.0.0.0", "RAP_XSRF_TOKEN": "AImk1AIPFDSOkwsENSc2sLLwBANEib1ysQ:1744403672035", "__Secure-1PSIDTS": "sidts-CjIB7pHpta3VWPdkJx7cNCUjUpy6c-d9WA3bvc3_9icXKqMA3kaW8Nh6_NJ-S4_OkQRZtRAA", "__Secure-3PSIDTS": "sidts-CjIB7pHpta3VWPdkJx7cNCUjUpy6c-d9WA3bvc3_9icXKqMA3kaW8Nh6_NJ-S4_OkQRZtRAA", "SIDCC": "AKEyXzUvhhICXSbNTp0dL7c4St5hDPC_ghsZ_PlMfsv7M8YVMlV8EibT-8IAsh7PmcPAtY38PJY", "__Secure-1PSIDCC": "AKEyXzUDzbYtACzfEgsC_j1S2ay0dayt4PwLSUoOjlS1xMBQ9FeL52NBdnlZBn_KWMM4a8jFaNv5", "__Secure-3PSIDCC": "AKEyXzVCxfyVIsm9apiA7JdTK3UKhcQUNLBOKQFxN8QDJyeLPojNFUBUs_K_X0xOR2vzZsBBEk5V", "_gat": "1", "_ga_S4FJY0X3VX": "GS1.1.1744401723.8.1.1744404147.0.0.0", }
payload_req1 = { "requestContext": {"reportContext": {"reportId": "1413fcfb-1416-4e56-8967-55f8e9f30ec8", "pageId": "p_pbm4eo88qc", "mode": 1, "componentId": "cd-rdkny9a9qc", "displayType": "simple-table", "actionId": "crossFilters"}, "requestMode": 0}, "datasetSpec": {"dataset": [{"datasourceId": "c2fc8cdd-46bb-454c-bf09-90ebfd4067d7", "revisionNumber": 0, "parameterOverrides": []}], "queryFields": [{"name": "qt_3nwfu9yq1c", "datasetNs": "d0", "tableNs": "t0", "resultTransformation": {"analyticalFunction": 0, "isRelativeToBase": False}, "dataTransformation": {"sourceFieldName": "_Event_Id_"}}, {"name": "qt_8yjok4izsc", "datasetNs": "d0", "tableNs": "t0", "resultTransformation": {"analyticalFunction": 0, "isRelativeToBase": False}, "dataTransformation": {"sourceFieldName": "_DateTime_EST_"}}, {"name": "qt_sfkc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_KYTC_Type_"}}, {"name": "qt_4e66idhbrc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_Incident_Source_"}}, {"name": "qt_re76qrqe2c", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_District_", "aggregation": 0}}, {"name": "qt_tfkc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_County_Name_"}}, {"name": "qt_ufkc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_Route_Label_"}}, {"name": "qt_vfkc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_BMP_Initial_"}}, {"name": "qt_o7kc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_EMP_Initial_"}}, {"name": "qt_p7kc163arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_Description_"}}], "sortData": [{"sortColumn": {"name": "qt_8yjok4izsc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_DateTime_EST_"}}, "sortDir": 1}], "includeRowsCount": True, "relatedDimensionMask": {"addDisplay": False, "addUniqueId": False, "addLatLong": False}, "paginateInfo": {"startRow": 1, "rowsCount": 500}, "dsFilterOverrides": [], "filters": [{"filterDefinition": {"filterExpression": {"include": False, "conceptType": 0, "concept": {"ns": "t0", "name": "qt_exs3vib9qc"}, "filterConditionType": "PT", "stringValues": ["Shoulder"], "numberValues": [], "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_Source_Type_"}}}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_vwmdhfhbrc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_Incident_Source_"}}, "filterConditionType": "IN", "stringValues": ["KYTC Reported"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_kjyfx83arc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_County_Name_"}}, "filterConditionType": "IN", "stringValues": ["Boyd", "Carter", "Fleming", "Greenup"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_uiren73arc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_KYTC_Type_"}}, "filterConditionType": "IN", "stringValues": ["Weather"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}], "features": [], "dateRanges": [], "contextNsCount": 1, "calculatedField": [], "needGeocoding": False, "geoFieldMask": [], "multipleGeocodeFields": [], "timezone": "America/New_York"}, "role": "main", "retryHints": {"useClientControlledRetry": True, "isLastRetry": False, "retryCount": 0, "originalRequestId": "cd-rdkny9a9qc_0_0"} }
payload_req2 = { "requestContext": {"reportContext": {"reportId": "1413fcfb-1416-4e56-8967-55f8e9f30ec8", "pageId": "p_pbm4eo88qc", "mode": 1, "componentId": "cd-3aflvp88qc", "displayType": "google-map", "actionId": "crossFilters"}, "requestMode": 0}, "datasetSpec": {"dataset": [{"datasourceId": "c2fc8cdd-46bb-454c-bf09-90ebfd4067d7", "revisionNumber": 0, "parameterOverrides": []}], "queryFields": [{"name": "qt_pbotw53arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_Incident_Id_"}}, {"name": "qt_qbotw53arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_KYTC_Type_"}}, {"name": "qt_7grvqe4arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "qt_7grvqe4arc", "textFormula": "CONCAT(t0._KYTC_Type_, \" - \", t0._Route_Label_, \" - \", t0._BMP_Initial_)", "sourceType": 1, "frontendTextFormula": "CONCAT(t0._KYTC_Type_,\" - \",t0._Route_Label_,\" - \",t0._BMP_Initial_)", "formulaOutputDataType": 0}}, {"name": "qt_5prae63arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_SHAPE_", "aggregation": 16, "outputGeoType": 1}}], "sortData": [{"sortColumn": {"name": "qt_pbotw53arc", "datasetNs": "d0", "tableNs": "t0", "dataTransformation": {"sourceFieldName": "_Incident_Id_"}}, "sortDir": 1}], "includeRowsCount": True, "relatedDimensionMask": {"addDisplay": False, "addUniqueId": False, "addLatLong": True}, "paginateInfo": {"startRow": 1, "rowsCount": 100000}, "dsFilterOverrides": [], "filters": [{"filterDefinition": {"filterExpression": {"include": False, "conceptType": 0, "concept": {"ns": "t0", "name": "qt_exs3vib9qc"}, "filterConditionType": "PT", "stringValues": ["Shoulder"], "numberValues": [], "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_Source_Type_"}}}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_vwmdhfhbrc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_Incident_Source_"}}, "filterConditionType": "IN", "stringValues": ["KYTC Reported"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_kjyfx83arc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_County_Name_"}}, "filterConditionType": "IN", "stringValues": ["Boyd", "Carter", "Fleming", "Greenup"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}, {"filterDefinition": {"filterExpression": {"include": True, "conceptType": 0, "concept": {"name": "qt_uiren73arc", "ns": "t0"}, "queryTimeTransformation": {"dataTransformation": {"sourceFieldName": "_KYTC_Type_"}}, "filterConditionType": "IN", "stringValues": ["Weather"]}}, "dataSubsetNs": {"datasetNs": "d0", "tableNs": "t0", "contextNs": "c0"}, "version": 3, "isCanvasFilter": True}], "features": [], "dateRanges": [], "contextNsCount": 1, "calculatedField": [], "needGeocoding": False, "geoFieldMask": [], "geoVertices": 100000, "multipleGeocodeFields": [], "timezone": "America/New_York"}, "role": "main", "retryHints": {"useClientControlledRetry": True, "isLastRetry": False, "retryCount": 0, "originalRequestId": "cd-3aflvp88qc_0_0"} }
combined_payload = {"dataRequest": [payload_req1, payload_req2]}

# --- Key Mappings and Constants ---
KEY_RENAME_MAP = { 'qt_3nwfu9yq1c': 'id', 'qt_8yjok4izsc': 'dtg', 'qt_4e66idhbrc': 'source', 'qt_ufkc163arc': 'route', 'qt_p7kc163arc': 'remark' }
COORDINATE_KEY_OLD = 'qt_5prae63arc'
LONGITUDE_KEY_NEW = 'lon'
LATITUDE_KEY_NEW = 'lat'
BMP_KEY_ORIGINAL = 'qt_vfkc163arc'
COUNTY_KEY_ORIGINAL = 'qt_tfkc163arc'

# --- Define the US Eastern Timezone ---
# This timezone correctly handles switches between EST and EDT
eastern_tz = pytz.timezone('America/New_York')

# --- Helper Function (Column-to-Row) ---
# ... (process_table_dataset function remains the same) ...
def process_table_dataset(table_dataset):
    if not table_dataset: return []
    try:
        column_info = table_dataset.get('columnInfo', [])
        columns_data = table_dataset.get('column', [])
        num_records = table_dataset.get('totalCount', 0)
        if num_records == 0 and columns_data:
             first_col_data = columns_data[0]
             data_key = next((k for k in first_col_data if isinstance(first_col_data[k], dict) and 'values' in first_col_data[k]), None)
             if data_key: num_records = len(first_col_data[data_key].get('values', []))
        if not column_info or not columns_data or num_records == 0: return []
        column_names = [info.get('name', f'unknown_col_{i}') for i, info in enumerate(column_info)]
        processed_events = []
        for i in range(num_records):
            event = {}
            for j, col_name in enumerate(column_names):
                if j >= len(columns_data): event[col_name] = None; continue
                col_data_dict = columns_data[j]
                value = None
                data_key = next((k for k in col_data_dict if isinstance(col_data_dict[k], dict) and 'values' in col_data_dict[k]), None)
                if data_key:
                    values_list = col_data_dict[data_key].get('values', [])
                    if i < len(values_list): value = values_list[i]
                    else: value = None
                else: value = None
                event[col_name] = value
            processed_events.append(event)
        return processed_events
    except Exception as e: print(f"Error processing table dataset: {e}"); traceback.print_exc(); return []


# --- Data Fetching and Processing Function ---
def fetch_and_process_data():
    print("Sending combined request...")
    try:
        response = requests.post(URL, headers=HEADERS, cookies=COOKIES, json=combined_payload)
        response.raise_for_status()
        print(f"Request successful! Status Code: {response.status_code}")
        cleaned_text = response.text.lstrip(")]}'")
        response_data = json.loads(cleaned_text)

        table_data_list, map_data_list = [], []
        if 'dataResponse' in response_data and isinstance(response_data['dataResponse'], list) and len(response_data['dataResponse']) >= 2:
            try:
                table_subset = response_data['dataResponse'][0].get('dataSubset', [{}])[0]
                table_dataset = table_subset.get('dataset', {}).get('tableDataset')
                if table_dataset: table_data_list = process_table_dataset(table_dataset)
            except IndexError: pass
            try:
                map_subset = response_data['dataResponse'][1].get('dataSubset', [{}])[0]
                map_dataset = map_subset.get('dataset', {}).get('tableDataset')
                if map_dataset: map_data_list = process_table_dataset(map_dataset)
            except IndexError: pass
        else: print(f"Error: Expected 2 'dataResponse' items."); return []

        if not table_data_list or not map_data_list: print("Error: Failed to process datasets."); return []

        print(f"\nMerging {len(table_data_list)} table events and {len(map_data_list)} map events...")
        merged_events_dict = {}
        for event1 in table_data_list:
            event_type = event1.get('qt_sfkc163arc')
            route_label = event1.get('qt_ufkc163arc')
            bmp_value = event1.get(BMP_KEY_ORIGINAL)
            if event_type is not None and route_label is not None and bmp_value is not None:
                bmp_str = str(int(bmp_value)) if isinstance(bmp_value, float) and bmp_value.is_integer() else str(bmp_value)
                join_key = f"{event_type} - {route_label} - {bmp_str}"
                merged_events_dict[join_key] = event1.copy()

        merge_success_count = 0
        for event2 in map_data_list:
            join_key = event2.get('qt_7grvqe4arc')
            if join_key in merged_events_dict:
                existing_event = merged_events_dict[join_key]
                for key, value in event2.items():
                    if key == COORDINATE_KEY_OLD:
                        if isinstance(value, str) and value.strip().startswith('{'):
                            try:
                                geo_data = json.loads(value)
                                if geo_data.get('type') == 'Point' and 'coordinates' in geo_data: existing_event[key] = tuple(geo_data['coordinates'])
                                else: existing_event[key] = value
                            except json.JSONDecodeError: existing_event[key] = value
                        else: existing_event[key] = value
                    elif key not in existing_event: existing_event[key] = value
                merge_success_count += 1
        #print(f"-> Merged data for {merge_success_count} events.")

        #print("\nApplying final transformations (including timezone handling)...") # <--- Updated print message
        final_transformed_list = []
        parse_error_count = 0
        # Define potential datetime formats from Looker Studio (add more if needed)
        # Common formats:
        # %m/%d/%Y %I:%M:%S %p  (e.g., 05/20/2024 02:30:00 PM)
        # %Y-%m-%d %H:%M:%S      (e.g., 2024-05-20 14:30:00)
        # %Y%m%d%H%M%S          (e.g., 20240520143000) - Less common for display but possible
        possible_dt_formats = [
            '%m/%d/%Y %I:%M:%S %p',  # e.g., 05/20/2024 02:30:00 PM
            '%Y-%m-%d %H:%M:%S',      # e.g., 2024-05-20 14:30:00
            '%Y-%m-%dT%H:%M:%S',     # <--- ADD THIS FORMAT (e.g., 2025-04-12T06:16:06)
            # Add other formats if you observe them in the data
        ]


        for event in merged_events_dict.values():
            transformed_event = {}
            aware_dtg = None # Initialize dtg as None
            try:
                # --- Timezone Handling for 'dtg' ---
                raw_dtg_value = event.get('qt_8yjok4izsc')
                if raw_dtg_value:
                    parsed_naive = None
                    for fmt in possible_dt_formats:
                        try:
                            # Attempt to parse using the current format
                            parsed_naive = datetime.strptime(str(raw_dtg_value), fmt)
                            break # Stop trying formats if one succeeds
                        except ValueError:
                            continue # Try the next format

                    if parsed_naive:
                        # Successfully parsed, now make it timezone-aware (ET)
                        aware_dtg = eastern_tz.localize(parsed_naive, is_dst=None) # is_dst=None handles ambiguous times
                        # print(f"DEBUG: Raw: {raw_dtg_value}, Parsed Naive: {parsed_naive}, Aware ET: {aware_dtg}, UTC: {aware_dtg.astimezone(pytz.utc)}") # Optional debug print
                    else:
                        # Could not parse with any known format
                        print(f"Warning: Could not parse dtg value '{raw_dtg_value}' for event ID {event.get('qt_3nwfu9yq1c')} using known formats.")
                        parse_error_count += 1
                else:
                    print(f"Warning: Missing dtg value for event ID {event.get('qt_3nwfu9yq1c')}.")
                    parse_error_count += 1

                # --- Standard Key Renaming and Data Extraction ---
                for old_key, new_key in KEY_RENAME_MAP.items():
                    if old_key == 'qt_8yjok4izsc':
                        transformed_event[new_key] = aware_dtg # Assign the aware datetime object or None
                    elif old_key in event:
                        transformed_event[new_key] = event[old_key]

                if COORDINATE_KEY_OLD in event:
                    coordinates = event[COORDINATE_KEY_OLD]
                    if isinstance(coordinates, (list, tuple)) and len(coordinates) >= 2:
                        try:
                            transformed_event[LONGITUDE_KEY_NEW] = float(coordinates[0])
                            transformed_event[LATITUDE_KEY_NEW] = float(coordinates[1])
                        except (TypeError, ValueError): pass # Keep lon/lat None if conversion fails
                keys_to_keep = [BMP_KEY_ORIGINAL, COUNTY_KEY_ORIGINAL]
                for key in keys_to_keep:
                    if key in event: transformed_event[key] = event[key]
                # Ensure source is added if not already mapped
                if 'source' not in transformed_event and 'qt_4e66idhbrc' in event:
                     transformed_event['source'] = event['qt_4e66idhbrc']

                # Only add event if it has an ID (required for DB)
                if transformed_event.get('id'):
                    final_transformed_list.append(transformed_event)
                else:
                     print(f"Warning: Skipping event due to missing ID. Original data snippet: {str(event)[:100]}")

            except Exception as e: print(f"Error during final transformation for event: {e}"); traceback.print_exc()

        if parse_error_count > 0:
            print(f"-> Encountered {parse_error_count} datetime parsing issues.")
        #print(f"-> Finished transformation. Resulting valid events: {len(final_transformed_list)}.")
        return final_transformed_list

    except requests.exceptions.RequestException as e:
        print(f"API Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"Failed to decode API JSON response: {e}")
        if 'cleaned_text' in locals():
            print("Raw response snippet:", cleaned_text[:500])
    except Exception as e:
        print(f"An unexpected error occurred during data fetching/processing: {e}")
        traceback.print_exc()
    return []

# --- Database Upsert and Post-Update Function ---
def upsert_and_update_db(events):
    """Upserts events and runs post-update spatial queries."""
    if not events:
        print("No events to process.")
        return

    conn = None
    cursor = None
    upserted_count = 0
    upsert_error_count = 0

    # SQL definitions (NO COUNTY in upsert)
    # Ensure first_seen and last_seen_in_feed columns are TIMESTAMPTZ
    # psycopg2 handles timezone-aware datetime objects correctly for TIMESTAMPTZ
    sql_upsert = f"""
        INSERT INTO {TABLE_NAME_QUALIFIED}
            (id, first_seen, initial_description, latest_description, last_updated, last_seen_in_feed, geom, source, route, remark)
        VALUES
            (%(id)s, %(first_seen)s, %(initial_desc)s, %(latest_desc)s, NOW(), %(last_seen)s, ST_SetSRID(ST_MakePoint(%(lon)s, %(lat)s), 4326), %(source)s, %(route)s, %(remark)s)
        ON CONFLICT (id) DO UPDATE SET
            latest_description = excluded.latest_description,
            last_updated = NOW(),
            last_seen_in_feed = excluded.last_seen_in_feed,
            geom = excluded.geom,
            source = excluded.source,
            route = excluded.route,
            remark = excluded.remark,
            -- Keep original first_seen and initial_description on update
            first_seen = {TABLE_NAME_QUALIFIED}.first_seen,
            initial_description = {TABLE_NAME_QUALIFIED}.initial_description;
    """
    sql_upsert_no_geom = f"""
        INSERT INTO {TABLE_NAME_QUALIFIED}
            (id, first_seen, initial_description, latest_description, last_updated, last_seen_in_feed, geom, source, route, remark)
        VALUES
            (%(id)s, %(first_seen)s, %(initial_desc)s, %(latest_desc)s, NOW(), %(last_seen)s, NULL, %(source)s, %(route)s, %(remark)s)
        ON CONFLICT (id) DO UPDATE SET
            latest_description = excluded.latest_description,
            last_updated = NOW(),
            last_seen_in_feed = excluded.last_seen_in_feed,
            geom = NULL, -- Keep geom NULL on update if it was NULL
            source = excluded.source,
            route = excluded.route,
            remark = excluded.remark,
            -- Keep original first_seen and initial_description on update
            first_seen = {TABLE_NAME_QUALIFIED}.first_seen,
            initial_description = {TABLE_NAME_QUALIFIED}.initial_description;
    """

    # Post-update SQL definitions (remain the same)
    post_update_sqls = [
        f'UPDATE {TABLE_NAME_QUALIFIED} SET county = county.countyname from public.county WHERE ST_Contains(county.geom,{TABLE_NAME_QUALIFIED}.geom) AND {TABLE_NAME_QUALIFIED}.county IS NULL', # Optional: only update if null
        f'UPDATE {TABLE_NAME_QUALIFIED} SET cwa = fzone.cwa from public.fzone WHERE ST_Contains(fzone.geom,{TABLE_NAME_QUALIFIED}.geom) AND {TABLE_NAME_QUALIFIED}.cwa IS NULL', # Optional: only update if null
        f'UPDATE {TABLE_NAME_QUALIFIED} SET st = county.state from public.county WHERE ST_Contains(county.geom,{TABLE_NAME_QUALIFIED}.geom) AND {TABLE_NAME_QUALIFIED}.st IS NULL' # Optional: only update if null
    ]

    try:
        #print(f"\nConnecting to PostgreSQL database '{DB_NAME}' on {DB_HOST}...")
        conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD, host=DB_HOST, port=DB_PORT)
        cursor = conn.cursor()
        #print("Connection successful.")

        # --- Stage 1: Upsert Events ---
        #print("Starting upsert stage...")
        for event in events:
            try:
                event_id = event.get('id')
                if not event_id:
                    upsert_error_count += 1
                    #print("Skipping event due to missing ID in upsert stage.")
                    continue

                # Get the timezone-aware datetime object (or None)
                dtg_aware = event.get('dtg')

                lon_val = event.get('lon'); lat_val = event.get('lat')
                route_val = event.get('route'); remark_val = event.get('remark')
                bmp_val = event.get(BMP_KEY_ORIGINAL)

                desc_parts = []
                if route_val: desc_parts.append(route_val)
                if bmp_val is not None: desc_parts.append(f"at MM {str(int(bmp_val)) if isinstance(bmp_val, float) and bmp_val.is_integer() else str(bmp_val)}")
                if remark_val: desc_parts.append(remark_val)
                full_desc = " ".join(desc_parts) if desc_parts else None

                data_dict = {
                    'id': event_id,
                    # Pass the aware datetime object directly. If it's None, psycopg2 handles it as NULL.
                    'first_seen': dtg_aware,
                    'initial_desc': full_desc,
                    'latest_desc': full_desc,
                    'last_seen': dtg_aware,
                    'lon': lon_val,
                    'lat': lat_val,
                    'source': event.get('source'),
                    'route': route_val,
                    'remark': remark_val
                }

                # Choose the correct SQL based on geometry presence
                if lon_val is not None and lat_val is not None:
                    cursor.execute(sql_upsert, data_dict)
                else:
                    cursor.execute(sql_upsert_no_geom, data_dict)
                upserted_count += 1

            except psycopg2.Error as db_err:
                upsert_error_count += 1
                print(f"DB upsert error ID '{event_id}': {db_err}")
                conn.rollback() # Rollback this event
            except Exception as e:
                upsert_error_count += 1
                print(f"Prep/Exec error ID '{event_id}': {e}")
                traceback.print_exc()
                # No rollback here needed as the error is before execute or handled by psycopg2 block

        # Commit upsert stage (committing successful ones)
        if upsert_error_count > 0:
            print(f"Upsert stage completed with {upsert_error_count} errors for individual events (rolled back individually).")
        #print(f"Committing {upserted_count} successful upserts...")
        conn.commit()
        #print(f"Upsert stage committed.")

        # --- Stage 2: Post-Update Spatial Queries ---
        #print("\nRunning post-upsert spatial updates...")
        post_update_errors = 0
        for i, update_sql in enumerate(post_update_sqls, 1):
            #print(f"Executing post-update {i}/{len(post_update_sqls)}: {update_sql[:100]}...") # Print start of query
            try:
                cursor.execute(update_sql)
                #print(f" -> Update {i} successful ({cursor.rowcount} rows affected).")
                # Commit each successful post-update step individually
                conn.commit()
                #print(f"   Committed post-update step {i}.")
            except psycopg2.Error as post_db_err:
                post_update_errors += 1
                print(f" -> Database error executing post-update {i}: {post_db_err}")
                print(f"    Failed SQL: {update_sql}")
                conn.rollback() # Rollback this specific update attempt
                print("   Rolled back post-update transaction attempt.")
                # Decide if script should stop entirely on post-update failure
                # break # Optional: Stop processing further post-updates if one fails
            except Exception as post_e:
                post_update_errors += 1
                print(f" -> Unexpected error executing post-update {i}: {post_e}")
                print(f"    Failed SQL: {update_sql}")
                conn.rollback()
                print("   Rolled back post-update transaction attempt.")
                # break # Optional: Stop processing further post-updates if one fails

        if post_update_errors == 0:
            print("Post-upsert updates completed successfully.")
        else:
            print(f"Post-upsert updates encountered {post_update_errors} errors (each failed step was rolled back).")

    except psycopg2.OperationalError as e:
        print(f"Database connection failed: {e}")
    except Exception as e:
        print(f"An unexpected error occurred during database operations: {e}")
        traceback.print_exc()
        if conn:
            conn.rollback() # Rollback any outstanding transaction on major error
    finally:
        if cursor: cursor.close()
        if conn: conn.close()
        print("Database connection closed.")

# --- Script Entry Point ---
if __name__ == "__main__":
    processed_events = fetch_and_process_data()
    if processed_events: # Only run DB operations if we got data
        upsert_and_update_db(processed_events)
    else:
        print("No processed events found, skipping database operations.")
    print("\nScript finished.")