Files
test/ohgo.py
2025-11-27 22:25:36 +00:00

236 lines
10 KiB
Python

import requests
# import polyline # Not used in the provided snippet
import json
import psycopg2
import psycopg2.extensions # Not strictly necessary for this code, but fine to keep
from datetime import datetime, timezone # timezone not used, but datetime is
# from geojson import Point, Feature, FeatureCollection, dump # Not used
# import pandas as pd # Not used
from requests.packages.urllib3.exceptions import InsecureRequestWarning
# --- Configuration ---
DB_HOST = 'localhost'
DB_NAME = 'nws'
DB_USER = 'nws'
DB_PASSWORD = 'nws' # Consider using environment variables or a secrets manager
# API endpoint (contains specific coordinates, ensure this is intended)
OHGO_API_URL = "https://api.ohgo.com/roadmarkers/TrafficSpeedAndAlertMarkers?pointData={%22lowLongitude%22:-83.55941510027988,%22highLongitude%22:-80.38711285418613,%22lowLatitude%22:38.243663436655325,%22highLatitude%22:40.169517342197835,%22routeDirection%22:%22%22,%22routeName%22:%22%22}"
# Optional Proxies
PROXIES = {"http": "http://nws:nws@localhost:9000",
"https": "http://nws:nws@localhost:9000"} # Add https if needed
# Disable insecure request warnings (use cautiously)
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# --- Main Logic ---
def fetch_ohgo_data(session, url):
"""Fetches data from the OHGO API."""
print(f"INFO: Fetching data from {url}...")
try:
response = session.get(url, timeout=30) # Add a timeout
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
print("INFO: Data fetched successfully.")
return response.json()
except requests.exceptions.RequestException as e:
print(f"ERROR: Failed to fetch data from API: {e}")
return None
except json.JSONDecodeError as e:
print(f"ERROR: Failed to parse JSON response: {e}")
print(f"Response text: {response.text[:500]}...") # Log part of the response
return None
def prepare_data_for_upsert(ohgo_data):
"""Processes API data and prepares it for database insertion."""
ohgoclosures_for_upsert = []
# Get timestamp ONCE for this batch, use Python datetime object
current_timestamp_obj = datetime.utcnow()
if not ohgo_data or 'TrafficAlertMarkers' not in ohgo_data:
print("WARNING: No 'TrafficAlertMarkers' found in API response.")
return []
alerts = ohgo_data['TrafficAlertMarkers']
print(f"INFO: Processing {len(alerts)} alerts from API.")
for j in alerts:
category = j.get('Category')
# Filter for relevant categories
if category == "Flooding" or category == "Weather":
# Prepare tuple with exactly 10 items for the efficient UPSERT SQL
info_tuple = (
j.get('Latitude'),
j.get('Longitude'),
j.get('Id'),
j.get('Location'),
j.get('Description'),
category, # Use variable already fetched
j.get('RoadStatus'),
j.get('RouteName'),
current_timestamp_obj, # Use datetime object for 'start'
current_timestamp_obj # Use datetime object for 'lastupdate'
)
# Basic validation: check if critical fields like ID are present
if info_tuple[2] is None:
print(f"WARNING: Skipping record due to missing ID: {j}")
continue
if info_tuple[0] is None or info_tuple[1] is None:
print(f"WARNING: Skipping record {info_tuple[2]} due to missing Lat/Lon: {j}")
continue
ohgoclosures_for_upsert.append(info_tuple)
print(f"INFO: Prepared {len(ohgoclosures_for_upsert)} records for database.")
return ohgoclosures_for_upsert
def update_database(conn, data_to_upsert):
"""Performs batch upsert and subsequent updates in the database."""
if not data_to_upsert:
print("INFO: No data to update in the database.")
return
# SQL for batch UPSERT using ON CONFLICT...DO UPDATE SET...EXCLUDED
upsert_sql = """
INSERT INTO ohgo (
lat, lon, id, location, description, category,
roadstatus, routename, start, lastupdate
)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) -- 10 placeholders
ON CONFLICT (id) DO UPDATE SET
lat = EXCLUDED.lat,
lon = EXCLUDED.lon,
location = EXCLUDED.location,
description = EXCLUDED.description,
category = EXCLUDED.category,
roadstatus = EXCLUDED.roadstatus,
lastupdate = EXCLUDED.lastupdate;
"""
# Use context managers for cursor and transaction handling
try:
with conn: # Start transaction
with conn.cursor() as cursor:
# --- 1. Batch Upsert ---
print(f"INFO: Upserting {len(data_to_upsert)} records...")
cursor.executemany(upsert_sql, data_to_upsert)
# Note: rowcount might be unreliable with executemany depending on driver/DB version
print(f"INFO: Upsert complete. {cursor.rowcount} rows affected (Note: count may be inaccurate).")
# --- 2. Post-Processing Updates ---
print("INFO: Performing post-processing updates...")
# Update geometry (Requires spatial index on ohgo.geom for performance)
print("INFO: Updating geometry...")
cursor.execute("""
UPDATE ohgo
SET geom = ST_SetSRID(ST_MakePoint(lon, lat), 4326)
WHERE lat IS NOT NULL AND lon IS NOT NULL AND geom IS NULL;
""")
print(f"INFO: Updated geometry for {cursor.rowcount} records.")
# Update endtime based on lastupdate
print("INFO: Updating end times...")
cursor.execute("""
UPDATE ohgo
SET endtime = NULL
WHERE lastupdate > now() - interval '0.25 hours';
""")
print(f"INFO: Reset endtime for {cursor.rowcount} recent records.")
cursor.execute("""
UPDATE ohgo
SET endtime = lastupdate
WHERE endtime IS NULL AND lastupdate < now() - interval '1 hours';
""")
print(f"INFO: Set endtime for {cursor.rowcount} older records.")
# Update administrative boundaries (CRITICAL: Requires spatial indexes!)
print("INFO: Updating County/CWA/State...")
# Add WHERE geom IS NOT NULL for efficiency
cursor.execute("""
UPDATE ohgo o SET county = c.countyname
FROM public.county c
WHERE o.county IS NULL AND o.geom IS NOT NULL
AND ST_Contains(c.geom, o.geom);
""")
print(f"INFO: Updated county for {cursor.rowcount} records.")
cursor.execute("""
UPDATE ohgo o SET cwa = f.cwa
FROM public.fzone f
WHERE o.cwa IS NULL AND o.geom IS NOT NULL
AND ST_Contains(f.geom, o.geom);
""")
print(f"INFO: Updated cwa for {cursor.rowcount} records.")
cursor.execute("""
UPDATE ohgo o SET state = c.state
FROM public.county c
WHERE o.state IS NULL AND o.geom IS NOT NULL
AND ST_Contains(c.geom, o.geom);
""")
print(f"INFO: Updated state for {cursor.rowcount} records.")
# Set default boolean flags (Combined)
print("INFO: Setting default flags...")
cursor.execute("""
UPDATE ohgo
SET lsr = false, hide = false
WHERE lsr IS NULL;
""")
print(f"INFO: Set default flags for {cursor.rowcount} records.")
# Clean description (Combined)
print("INFO: Cleaning descriptions...")
cursor.execute("""
UPDATE ohgo
SET description = TRIM(REPLACE(REPLACE(description, 'Use alternate route.', ''), E'\n', ' '))
WHERE description LIKE '%Use alternate route.%' OR description LIKE E'%\n%';
""")
print(f"INFO: Cleaned description for {cursor.rowcount} records.")
print("INFO: Database operations successful. Transaction committed.") # Commit happens automatically on exiting 'with conn:'
except psycopg2.Error as e:
print(f"ERROR: Database error occurred: {e}")
print("ERROR: Transaction automatically rolled back.")
# Optionally re-raise the exception if calling code needs to handle it
# raise
except Exception as e:
print(f"ERROR: An unexpected error occurred during database operations: {e}")
# Transaction is still rolled back by 'with conn:' exiting on exception
# raise
# --- Script Execution ---
if __name__ == "__main__":
conn = None # Initialize connection variable
try:
# Establish database connection
print(f"INFO: Connecting to database '{DB_NAME}' on '{DB_HOST}'...")
conn = psycopg2.connect(host=DB_HOST, database=DB_NAME, user=DB_USER, password=DB_PASSWORD)
print("INFO: Database connection successful.")
# Create a requests session
S = requests.Session()
S.verify = False # Disables SSL verification - use with caution
#S.proxies = PROXIES # Use the defined proxies
# Fetch data
ohgo_json_data = fetch_ohgo_data(S, OHGO_API_URL)
if ohgo_json_data:
# Prepare data
prepared_data = prepare_data_for_upsert(ohgo_json_data)
# Update database
update_database(conn, prepared_data)
except psycopg2.OperationalError as e:
print(f"FATAL: Could not connect to database: {e}")
except Exception as e:
print(f"FATAL: An unexpected error occurred: {e}")
finally:
# Ensure the connection is closed
if conn:
conn.close()
print("INFO: Database connection closed.")
print("INFO: Script finished.")