import time import json import psycopg2 import psycopg2.extensions from psycopg2.extras import Json import re import pandas as pd import requests import xmltodict import datetime from html import escape from tabulate import tabulate def clean_text(text): """Clean text by removing problematic characters that cause encoding issues.""" if text is None: return '' # Convert to string if not already text = str(text) # Remove the problematic 'Â' character that appears with degree symbols text = text.replace('\u00a0', ' ') # Non-breaking space to regular space text = text.replace('\xc2', '') # Remove standalone Â character text = text.replace('Â°', '°') # Fix degree symbol encoding issue text = text.replace('Â°F', '°F') # Fix degree F encoding issue text = text.replace('Â°C', '°C') # Fix degree C encoding issue text = text.replace('\u00a0', ' ') # Remove non-breaking spaces text = text.encode('utf-8', errors='ignore').decode('utf-8') # Handle other encoding issues return text allobs = [] states = ['wv', 'oh', 'va', 'ky'] ohcounties = ['-LW-', '-GL-', '-JC-', '-MS-', '-AT-', '-PY-', '-WS-', '-MG-', '-VN-'] vacounties = ['-DC-', '-BC-'] kycounties = ['-LR-', '-CT-', '-GP-', '-BD-'] datewanted = datetime.date.today().strftime("%m/%d/%Y") try: for state in states: url = f'https://data.cocorahs.org/export/exportreports.aspx?state={state}&Format=XML&Date={datewanted}&responsefields=all' response = requests.get(url) response.raise_for_status() # Check for HTTP errors data = xmltodict.parse(response.content.decode('utf-8')) # Explicitly decode as UTF-8 try: daily_reports = data.get('Cocorahs', {}).get('DailyPrecipReports') if daily_reports is None: print(f"No reports found for state {state}") continue reports = daily_reports.get('DailyPrecipReport') if reports is None: print(f"No reports data found for state {state}") continue # Handle case where reports might be a single dict or a list if isinstance(reports, dict): reports = [reports] for report in reports: if state == 'wv': allobs.append(report) else: for county in eval(state + 'counties'): station_number = report.get('StationNumber', '') if county in station_number: allobs.append(report) except (KeyError, TypeError) as e: print(f"Error processing data for state {state}: {e}") continue # Process observations finalobs = [] for obs in allobs: tempob = [ clean_text(obs.get('DateTimeStamp', '')), clean_text(obs.get('StationNumber', '')), clean_text(obs.get('StationName', '')), clean_text(obs.get('TotalPrecipAmt', '')), clean_text(obs.get('NewSnowDepth', '')), clean_text(obs.get('TotalSnowDepth', '')), clean_text(obs.get('Notes', '')) ] finalobs.append(tempob) # Write to file with UTF-8 encoding with open('/var/www/html/work/today.txt', 'w', encoding='utf-8') as f: f.write(tabulate( finalobs, headers=["Date/Time of Ob (Z)", "Station Number", "Station Name", "New Precip", "New Snow", "Snow Depth", "Comments"], tablefmt='plain' # Changed to 'plain' for simpler text output )) # Write HTML table to today.html html_content = """ Cocorahs Weather Data - """ + datewanted + """

Cocorahs Weather Data - """ + datewanted + """

""" for row in finalobs: html_content += " \n" for cell in row: html_content += f" \n" html_content += " \n" html_content += """

Date/Time of Ob (Z)	Station Number	Station Name	New Precip	New Snow	Snow Depth	Comments
{escape(str(cell))}

""" with open('/var/www/html/work/today.html', 'w', encoding='utf-8') as f: f.write(html_content) except requests.RequestException as e: print(f"Error fetching data: {e}") except Exception as e: print(f"Unexpected error: {e}")