From 4fb52f58d4a01186d1a36e3945c2f2f455b0bcfe Mon Sep 17 00:00:00 2001 From: John Peck Date: Sun, 7 Dec 2025 03:58:57 +0000 Subject: [PATCH] clean-up working --- get_rpc_config_auto.py | 17 ++--- newpower2.py | 160 ++++++++++++++++++++--------------------- 2 files changed, 86 insertions(+), 91 deletions(-) diff --git a/get_rpc_config_auto.py b/get_rpc_config_auto.py index 3743b57..750618a 100644 --- a/get_rpc_config_auto.py +++ b/get_rpc_config_auto.py @@ -20,14 +20,11 @@ def analyze_gwt_response(response_text): return candidates def fetch_live_data(map_url): - """ - Uses a real browser to fetch data AND capture the raw request details. - """ print(f"--- Browser Fetch: {map_url} ---") data_result = None captured_headers = None captured_cookies = None - captured_body = None # <--- New: Capture raw body + captured_body = None with sync_playwright() as p: browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled']) @@ -39,7 +36,7 @@ def fetch_live_data(map_url): def handle_request(request): nonlocal captured_headers, captured_body if ".rpc" in request.url and request.method == "POST": - # Capture the request details blindly before we even know if it works + # Capture request blindly first if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data: captured_headers = request.headers captured_body = request.post_data @@ -75,15 +72,14 @@ def get_fresh_config(map_url): data, headers, cookies, body = fetch_live_data(map_url) if headers and body: - # Minimal cleaning: Only remove headers that 'requests' MUST generate itself - # This keeps all custom NISC/GWT headers safe. + # Minimal Cleaning: Remove headers requests handles automatically forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'} clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden} return { 'headers': clean_headers, - 'body': body, # Save exact body - 'url': headers.get('url', map_url.replace('.html', '') + '/GWT.rpc'), # Best guess URL if missing + 'body': body, + 'url': headers.get('url', map_url.replace('.html', '') + 'GWT.rpc'), 'cookies': cookies, 'user_agent': headers.get('user-agent') } @@ -93,5 +89,4 @@ if __name__ == "__main__": url = input("Enter Map URL: ") res = get_fresh_config(url) if res: - print("Success! Captured Body length:", len(res['body'])) - print("Captured Headers:", res['headers'].keys()) \ No newline at end of file + print("Success! Captured new config.") \ No newline at end of file diff --git a/newpower2.py b/newpower2.py index bdba0ce..17562dd 100644 --- a/newpower2.py +++ b/newpower2.py @@ -5,13 +5,14 @@ import psycopg2 import mercantile import logging import os +import re from datetime import datetime, timezone, timedelta from abc import ABC, abstractmethod from urllib.parse import urlparse from pyproj import Transformer from requests.packages.urllib3.exceptions import InsecureRequestWarning -# Import the helper module +# Import the helper module for auto-repair import get_rpc_config_auto requests.packages.urllib3.disable_warnings(InsecureRequestWarning) @@ -43,18 +44,16 @@ def save_providers(providers): logger.info("Configuration saved to providers.json") def update_provider_config(provider_name, new_settings): + """Updates a specific provider in the JSON file safely""" providers = load_providers() updated = False for p in providers: if p.get('name') == provider_name: - if 'headers' in new_settings: p['headers'] = new_settings['headers'] - if 'body' in new_settings: p['body'] = new_settings['body'] - if 'url' in new_settings: p['url'] = new_settings['url'] - if 'cookies' in new_settings: p['cookies'] = new_settings['cookies'] + # Update all relevant fields + for key in ['headers', 'body', 'url', 'cookies', 'user_agent']: + if key in new_settings: + p[key] = new_settings[key] - # <--- NEW: Save User-Agent - if 'user_agent' in new_settings: p['user_agent'] = new_settings['user_agent'] - p['last_auto_update'] = datetime.now(timezone.utc).isoformat() updated = True break @@ -240,11 +239,9 @@ class GwtRpcProvider(BaseProvider): self.state_filter = config.get('state_filter') self.map_url = config.get('map_url') - # 1. Set User-Agent (Dynamic > Default) - # We try to use the one from config if available (captured from actual browser) - ua = config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36') + # 1. Base Headers self.session.headers.update({ - 'User-Agent': ua, + 'User-Agent': config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'), 'Accept': '*/*', 'Sec-Fetch-Site': 'same-origin' }) @@ -252,7 +249,7 @@ class GwtRpcProvider(BaseProvider): parsed_url = urlparse(config.get('url')) self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"}) - # Load Cookies + # 2. Load Cookies if config.get('cookies'): for cookie in config['cookies']: try: @@ -267,6 +264,7 @@ class GwtRpcProvider(BaseProvider): 'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9}, 'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1} } + if config.get('epsg'): try: self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True) @@ -275,39 +273,62 @@ class GwtRpcProvider(BaseProvider): def attempt_auto_repair(self): if not self.map_url: return False - # ... (Cooldown check - keep as is) ... + # --- Cooldown Check --- + last_update = self.config.get('last_auto_update') + if last_update: + try: + last_dt = datetime.fromisoformat(last_update) + if last_dt.tzinfo is None: last_dt = last_dt.replace(tzinfo=timezone.utc) + if datetime.now(timezone.utc) - last_dt < timedelta(hours=AUTO_UPDATE_COOLDOWN_HOURS): + logger.info(f"Skipping auto-repair for {self.name} (Cooldown active).") + return False + except ValueError: pass logger.info(f"Attempting Auto-Repair for {self.name}...") try: - # We expect 4 return values now - new_settings = get_rpc_config_auto.get_fresh_config(self.map_url) + # Expecting 4 values: data, headers, cookies, body + new_data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url) - if new_settings: + if valid_headers and valid_body: logger.info(f"Repair successful! Updating {self.name}.") - # Update In-Memory - self.config.update(new_settings) - self.config['last_auto_update'] = datetime.now(timezone.utc).isoformat() + # Clean Headers (Blacklist approach) + excluded = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding', 'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'} + clean_headers = {k: v for k, v in valid_headers.items() if k.lower() not in excluded} - # Update Session Cookies - self.session.cookies.clear() - if new_settings.get('cookies'): - for c in new_settings['cookies']: - self.session.cookies.set(c['name'], c['value'], domain=c['domain'], path=c['path']) - - # Update Session UA - if new_settings.get('user_agent'): - self.session.headers.update({'User-Agent': new_settings['user_agent']}) + # Ensure Referer is set correctly for next time + clean_headers['Referer'] = self.map_url - # Persist to disk + # Update In-Memory Config + current_time = datetime.now(timezone.utc).isoformat() + self.config['headers'] = clean_headers + self.config['body'] = valid_body + self.config['cookies'] = valid_cookies + self.config['user_agent'] = valid_headers.get('user-agent') + self.config['last_auto_update'] = current_time + + # Update Session + self.session.cookies.clear() + for cookie in valid_cookies: + self.session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path']) + if valid_headers.get('user_agent'): + self.session.headers.update({'User-Agent': valid_headers.get('user-agent')}) + + # Save to Disk + new_settings = { + 'headers': clean_headers, + 'body': valid_body, + 'cookies': valid_cookies, + 'user_agent': valid_headers.get('user-agent') + } update_provider_config(self.name, new_settings) return True except Exception as e: logger.error(f"Auto-repair failed: {e}") return False - + def fetch(self, is_retry=False): url = self.config.get('url') headers = self.config.get('headers', {}) @@ -315,8 +336,8 @@ class GwtRpcProvider(BaseProvider): if not url: return [] - # --- STRATEGY A: Standard Requests (Fast) --- try: + # 3. Dynamic Origin Update parsed_url = urlparse(url) origin = f"{parsed_url.scheme}://{parsed_url.netloc}" @@ -324,78 +345,57 @@ class GwtRpcProvider(BaseProvider): correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin ua = headers.get('User-Agent', self.session.headers['User-Agent']) - if "Headless" in ua: - ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' - + if "Headless" in ua: # Fallback safety + ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + self.session.headers.update({ 'Origin': origin, 'Referer': correct_referer, 'User-Agent': ua }) + # Prime if missing cookies or map_url is defined if self.map_url and not self.config.get('cookies'): try: self.session.get(correct_referer, verify=False, timeout=10) except: pass req_headers = headers.copy() - if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8' + req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8' req_headers['Referer'] = correct_referer req_headers['User-Agent'] = ua - - # Debug log (Optional - disable if too noisy) - # logger.info(f"Sending Headers: {json.dumps(req_headers, indent=2)}") - resp = self.session.post(url, headers=req_headers, data=body, verify=False) + # Only fetch if we have a body + if body: + resp = self.session.post(url, headers=req_headers, data=body, verify=False) + else: + resp = type('obj', (object,), {'status_code': 500, 'text': 'No Body', 'ok': False})() - # --- STRATEGY B: Browser Fallback & Self-Heal --- - if resp.status_code == 500 or "//EX" in resp.text: - logger.warning(f"Standard fetch failed for {self.name} (Status: {resp.status_code}). Switching to Browser Fetch.") + # 5. Error Handling & Retry + failed = False + if "//EX" in resp.text: failed = True + if resp.status_code == 500: failed = True + + if failed: + logger.error(f"GWT Failure for {self.name} (Status: {resp.status_code}).") - if self.map_url: - # 1. Fetch data AND credentials via Browser - data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url) - - if data: - logger.info(f"Browser success! Self-healing {self.name} configuration...") - - # --- HEADER CLEANING FIX --- - # Instead of selecting specific headers, we exclude known transport headers. - # This preserves custom headers like 'coop.nisc.outagewebmap.configname' - excluded = { - 'content-length', 'host', 'connection', 'cookie', 'accept-encoding', - 'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin' - } - - clean_headers = {} - for k, v in valid_headers.items(): - if k.lower() not in excluded: - clean_headers[k] = v - - # Ensure we force the correct Referer for next time - clean_headers['Referer'] = self.map_url + if is_retry: + logger.error(f"Retry failed for {self.name}. Aborting.") + return [] - # 3. Save to JSON so next run is FAST - new_settings = { - 'headers': clean_headers, - 'cookies': valid_cookies, - 'body': valid_body, - 'user_agent': valid_headers.get('user-agent') - } - update_provider_config(self.name, new_settings) - - return self._extract_outages(data) - - logger.error(f"Browser Fetch failed for {self.name}.") - return [] + if self.attempt_auto_repair(): + logger.info("Retrying fetch with new settings...") + return self.fetch(is_retry=True) + else: + return [] if not resp.ok: return [] text = resp.text if text.startswith('//OK'): text = text[4:] return self._extract_outages(json.loads(text)) - except Exception as e: logger.error(f"Fetch error {self.name}: {e}") - return [] + return [] + def _extract_outages(self, data_list): results = [] if not self.transformer: return []