clean-up working

This commit is contained in:
2025-12-07 03:58:57 +00:00
parent f9f0fc5093
commit 4fb52f58d4
2 changed files with 86 additions and 91 deletions

View File

@@ -20,14 +20,11 @@ def analyze_gwt_response(response_text):
return candidates
def fetch_live_data(map_url):
"""
Uses a real browser to fetch data AND capture the raw request details.
"""
print(f"--- Browser Fetch: {map_url} ---")
data_result = None
captured_headers = None
captured_cookies = None
captured_body = None # <--- New: Capture raw body
captured_body = None
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled'])
@@ -39,7 +36,7 @@ def fetch_live_data(map_url):
def handle_request(request):
nonlocal captured_headers, captured_body
if ".rpc" in request.url and request.method == "POST":
# Capture the request details blindly before we even know if it works
# Capture request blindly first
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data:
captured_headers = request.headers
captured_body = request.post_data
@@ -75,15 +72,14 @@ def get_fresh_config(map_url):
data, headers, cookies, body = fetch_live_data(map_url)
if headers and body:
# Minimal cleaning: Only remove headers that 'requests' MUST generate itself
# This keeps all custom NISC/GWT headers safe.
# Minimal Cleaning: Remove headers requests handles automatically
forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'}
clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden}
return {
'headers': clean_headers,
'body': body, # Save exact body
'url': headers.get('url', map_url.replace('.html', '') + '/GWT.rpc'), # Best guess URL if missing
'body': body,
'url': headers.get('url', map_url.replace('.html', '') + 'GWT.rpc'),
'cookies': cookies,
'user_agent': headers.get('user-agent')
}
@@ -93,5 +89,4 @@ if __name__ == "__main__":
url = input("Enter Map URL: ")
res = get_fresh_config(url)
if res:
print("Success! Captured Body length:", len(res['body']))
print("Captured Headers:", res['headers'].keys())
print("Success! Captured new config.")

View File

@@ -5,13 +5,14 @@ import psycopg2
import mercantile
import logging
import os
import re
from datetime import datetime, timezone, timedelta
from abc import ABC, abstractmethod
from urllib.parse import urlparse
from pyproj import Transformer
from requests.packages.urllib3.exceptions import InsecureRequestWarning
# Import the helper module
# Import the helper module for auto-repair
import get_rpc_config_auto
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
@@ -43,17 +44,15 @@ def save_providers(providers):
logger.info("Configuration saved to providers.json")
def update_provider_config(provider_name, new_settings):
"""Updates a specific provider in the JSON file safely"""
providers = load_providers()
updated = False
for p in providers:
if p.get('name') == provider_name:
if 'headers' in new_settings: p['headers'] = new_settings['headers']
if 'body' in new_settings: p['body'] = new_settings['body']
if 'url' in new_settings: p['url'] = new_settings['url']
if 'cookies' in new_settings: p['cookies'] = new_settings['cookies']
# <--- NEW: Save User-Agent
if 'user_agent' in new_settings: p['user_agent'] = new_settings['user_agent']
# Update all relevant fields
for key in ['headers', 'body', 'url', 'cookies', 'user_agent']:
if key in new_settings:
p[key] = new_settings[key]
p['last_auto_update'] = datetime.now(timezone.utc).isoformat()
updated = True
@@ -240,11 +239,9 @@ class GwtRpcProvider(BaseProvider):
self.state_filter = config.get('state_filter')
self.map_url = config.get('map_url')
# 1. Set User-Agent (Dynamic > Default)
# We try to use the one from config if available (captured from actual browser)
ua = config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
# 1. Base Headers
self.session.headers.update({
'User-Agent': ua,
'User-Agent': config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'),
'Accept': '*/*',
'Sec-Fetch-Site': 'same-origin'
})
@@ -252,7 +249,7 @@ class GwtRpcProvider(BaseProvider):
parsed_url = urlparse(config.get('url'))
self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"})
# Load Cookies
# 2. Load Cookies
if config.get('cookies'):
for cookie in config['cookies']:
try:
@@ -267,6 +264,7 @@ class GwtRpcProvider(BaseProvider):
'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9},
'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1}
}
if config.get('epsg'):
try:
self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True)
@@ -275,32 +273,55 @@ class GwtRpcProvider(BaseProvider):
def attempt_auto_repair(self):
if not self.map_url: return False
# ... (Cooldown check - keep as is) ...
# --- Cooldown Check ---
last_update = self.config.get('last_auto_update')
if last_update:
try:
last_dt = datetime.fromisoformat(last_update)
if last_dt.tzinfo is None: last_dt = last_dt.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) - last_dt < timedelta(hours=AUTO_UPDATE_COOLDOWN_HOURS):
logger.info(f"Skipping auto-repair for {self.name} (Cooldown active).")
return False
except ValueError: pass
logger.info(f"Attempting Auto-Repair for {self.name}...")
try:
# We expect 4 return values now
new_settings = get_rpc_config_auto.get_fresh_config(self.map_url)
# Expecting 4 values: data, headers, cookies, body
new_data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
if new_settings:
if valid_headers and valid_body:
logger.info(f"Repair successful! Updating {self.name}.")
# Update In-Memory
self.config.update(new_settings)
self.config['last_auto_update'] = datetime.now(timezone.utc).isoformat()
# Clean Headers (Blacklist approach)
excluded = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding', 'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'}
clean_headers = {k: v for k, v in valid_headers.items() if k.lower() not in excluded}
# Update Session Cookies
# Ensure Referer is set correctly for next time
clean_headers['Referer'] = self.map_url
# Update In-Memory Config
current_time = datetime.now(timezone.utc).isoformat()
self.config['headers'] = clean_headers
self.config['body'] = valid_body
self.config['cookies'] = valid_cookies
self.config['user_agent'] = valid_headers.get('user-agent')
self.config['last_auto_update'] = current_time
# Update Session
self.session.cookies.clear()
if new_settings.get('cookies'):
for c in new_settings['cookies']:
self.session.cookies.set(c['name'], c['value'], domain=c['domain'], path=c['path'])
for cookie in valid_cookies:
self.session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'])
if valid_headers.get('user_agent'):
self.session.headers.update({'User-Agent': valid_headers.get('user-agent')})
# Update Session UA
if new_settings.get('user_agent'):
self.session.headers.update({'User-Agent': new_settings['user_agent']})
# Persist to disk
# Save to Disk
new_settings = {
'headers': clean_headers,
'body': valid_body,
'cookies': valid_cookies,
'user_agent': valid_headers.get('user-agent')
}
update_provider_config(self.name, new_settings)
return True
except Exception as e:
@@ -315,8 +336,8 @@ class GwtRpcProvider(BaseProvider):
if not url: return []
# --- STRATEGY A: Standard Requests (Fast) ---
try:
# 3. Dynamic Origin Update
parsed_url = urlparse(url)
origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
@@ -324,7 +345,7 @@ class GwtRpcProvider(BaseProvider):
correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin
ua = headers.get('User-Agent', self.session.headers['User-Agent'])
if "Headless" in ua:
if "Headless" in ua: # Fallback safety
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
self.session.headers.update({
@@ -333,69 +354,48 @@ class GwtRpcProvider(BaseProvider):
'User-Agent': ua
})
# Prime if missing cookies or map_url is defined
if self.map_url and not self.config.get('cookies'):
try: self.session.get(correct_referer, verify=False, timeout=10)
except: pass
req_headers = headers.copy()
if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
req_headers['Referer'] = correct_referer
req_headers['User-Agent'] = ua
# Debug log (Optional - disable if too noisy)
# logger.info(f"Sending Headers: {json.dumps(req_headers, indent=2)}")
# Only fetch if we have a body
if body:
resp = self.session.post(url, headers=req_headers, data=body, verify=False)
else:
resp = type('obj', (object,), {'status_code': 500, 'text': 'No Body', 'ok': False})()
# --- STRATEGY B: Browser Fallback & Self-Heal ---
if resp.status_code == 500 or "//EX" in resp.text:
logger.warning(f"Standard fetch failed for {self.name} (Status: {resp.status_code}). Switching to Browser Fetch.")
# 5. Error Handling & Retry
failed = False
if "//EX" in resp.text: failed = True
if resp.status_code == 500: failed = True
if self.map_url:
# 1. Fetch data AND credentials via Browser
data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
if failed:
logger.error(f"GWT Failure for {self.name} (Status: {resp.status_code}).")
if data:
logger.info(f"Browser success! Self-healing {self.name} configuration...")
if is_retry:
logger.error(f"Retry failed for {self.name}. Aborting.")
return []
# --- HEADER CLEANING FIX ---
# Instead of selecting specific headers, we exclude known transport headers.
# This preserves custom headers like 'coop.nisc.outagewebmap.configname'
excluded = {
'content-length', 'host', 'connection', 'cookie', 'accept-encoding',
'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'
}
clean_headers = {}
for k, v in valid_headers.items():
if k.lower() not in excluded:
clean_headers[k] = v
# Ensure we force the correct Referer for next time
clean_headers['Referer'] = self.map_url
# 3. Save to JSON so next run is FAST
new_settings = {
'headers': clean_headers,
'cookies': valid_cookies,
'body': valid_body,
'user_agent': valid_headers.get('user-agent')
}
update_provider_config(self.name, new_settings)
return self._extract_outages(data)
logger.error(f"Browser Fetch failed for {self.name}.")
if self.attempt_auto_repair():
logger.info("Retrying fetch with new settings...")
return self.fetch(is_retry=True)
else:
return []
if not resp.ok: return []
text = resp.text
if text.startswith('//OK'): text = text[4:]
return self._extract_outages(json.loads(text))
except Exception as e:
logger.error(f"Fetch error {self.name}: {e}")
return []
def _extract_outages(self, data_list):
results = []
if not self.transformer: return []