clean-up working

This commit is contained in:
2025-12-07 03:58:57 +00:00
parent f9f0fc5093
commit 4fb52f58d4
2 changed files with 86 additions and 91 deletions

View File

@@ -20,14 +20,11 @@ def analyze_gwt_response(response_text):
return candidates return candidates
def fetch_live_data(map_url): def fetch_live_data(map_url):
"""
Uses a real browser to fetch data AND capture the raw request details.
"""
print(f"--- Browser Fetch: {map_url} ---") print(f"--- Browser Fetch: {map_url} ---")
data_result = None data_result = None
captured_headers = None captured_headers = None
captured_cookies = None captured_cookies = None
captured_body = None # <--- New: Capture raw body captured_body = None
with sync_playwright() as p: with sync_playwright() as p:
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled']) browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled'])
@@ -39,7 +36,7 @@ def fetch_live_data(map_url):
def handle_request(request): def handle_request(request):
nonlocal captured_headers, captured_body nonlocal captured_headers, captured_body
if ".rpc" in request.url and request.method == "POST": if ".rpc" in request.url and request.method == "POST":
# Capture the request details blindly before we even know if it works # Capture request blindly first
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data: if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data:
captured_headers = request.headers captured_headers = request.headers
captured_body = request.post_data captured_body = request.post_data
@@ -75,15 +72,14 @@ def get_fresh_config(map_url):
data, headers, cookies, body = fetch_live_data(map_url) data, headers, cookies, body = fetch_live_data(map_url)
if headers and body: if headers and body:
# Minimal cleaning: Only remove headers that 'requests' MUST generate itself # Minimal Cleaning: Remove headers requests handles automatically
# This keeps all custom NISC/GWT headers safe.
forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'} forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'}
clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden} clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden}
return { return {
'headers': clean_headers, 'headers': clean_headers,
'body': body, # Save exact body 'body': body,
'url': headers.get('url', map_url.replace('.html', '') + '/GWT.rpc'), # Best guess URL if missing 'url': headers.get('url', map_url.replace('.html', '') + 'GWT.rpc'),
'cookies': cookies, 'cookies': cookies,
'user_agent': headers.get('user-agent') 'user_agent': headers.get('user-agent')
} }
@@ -93,5 +89,4 @@ if __name__ == "__main__":
url = input("Enter Map URL: ") url = input("Enter Map URL: ")
res = get_fresh_config(url) res = get_fresh_config(url)
if res: if res:
print("Success! Captured Body length:", len(res['body'])) print("Success! Captured new config.")
print("Captured Headers:", res['headers'].keys())

View File

@@ -5,13 +5,14 @@ import psycopg2
import mercantile import mercantile
import logging import logging
import os import os
import re
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from urllib.parse import urlparse from urllib.parse import urlparse
from pyproj import Transformer from pyproj import Transformer
from requests.packages.urllib3.exceptions import InsecureRequestWarning from requests.packages.urllib3.exceptions import InsecureRequestWarning
# Import the helper module # Import the helper module for auto-repair
import get_rpc_config_auto import get_rpc_config_auto
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
@@ -43,18 +44,16 @@ def save_providers(providers):
logger.info("Configuration saved to providers.json") logger.info("Configuration saved to providers.json")
def update_provider_config(provider_name, new_settings): def update_provider_config(provider_name, new_settings):
"""Updates a specific provider in the JSON file safely"""
providers = load_providers() providers = load_providers()
updated = False updated = False
for p in providers: for p in providers:
if p.get('name') == provider_name: if p.get('name') == provider_name:
if 'headers' in new_settings: p['headers'] = new_settings['headers'] # Update all relevant fields
if 'body' in new_settings: p['body'] = new_settings['body'] for key in ['headers', 'body', 'url', 'cookies', 'user_agent']:
if 'url' in new_settings: p['url'] = new_settings['url'] if key in new_settings:
if 'cookies' in new_settings: p['cookies'] = new_settings['cookies'] p[key] = new_settings[key]
# <--- NEW: Save User-Agent
if 'user_agent' in new_settings: p['user_agent'] = new_settings['user_agent']
p['last_auto_update'] = datetime.now(timezone.utc).isoformat() p['last_auto_update'] = datetime.now(timezone.utc).isoformat()
updated = True updated = True
break break
@@ -240,11 +239,9 @@ class GwtRpcProvider(BaseProvider):
self.state_filter = config.get('state_filter') self.state_filter = config.get('state_filter')
self.map_url = config.get('map_url') self.map_url = config.get('map_url')
# 1. Set User-Agent (Dynamic > Default) # 1. Base Headers
# We try to use the one from config if available (captured from actual browser)
ua = config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
self.session.headers.update({ self.session.headers.update({
'User-Agent': ua, 'User-Agent': config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'),
'Accept': '*/*', 'Accept': '*/*',
'Sec-Fetch-Site': 'same-origin' 'Sec-Fetch-Site': 'same-origin'
}) })
@@ -252,7 +249,7 @@ class GwtRpcProvider(BaseProvider):
parsed_url = urlparse(config.get('url')) parsed_url = urlparse(config.get('url'))
self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"}) self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"})
# Load Cookies # 2. Load Cookies
if config.get('cookies'): if config.get('cookies'):
for cookie in config['cookies']: for cookie in config['cookies']:
try: try:
@@ -267,6 +264,7 @@ class GwtRpcProvider(BaseProvider):
'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9}, 'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9},
'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1} 'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1}
} }
if config.get('epsg'): if config.get('epsg'):
try: try:
self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True) self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True)
@@ -275,39 +273,62 @@ class GwtRpcProvider(BaseProvider):
def attempt_auto_repair(self): def attempt_auto_repair(self):
if not self.map_url: return False if not self.map_url: return False
# ... (Cooldown check - keep as is) ... # --- Cooldown Check ---
last_update = self.config.get('last_auto_update')
if last_update:
try:
last_dt = datetime.fromisoformat(last_update)
if last_dt.tzinfo is None: last_dt = last_dt.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) - last_dt < timedelta(hours=AUTO_UPDATE_COOLDOWN_HOURS):
logger.info(f"Skipping auto-repair for {self.name} (Cooldown active).")
return False
except ValueError: pass
logger.info(f"Attempting Auto-Repair for {self.name}...") logger.info(f"Attempting Auto-Repair for {self.name}...")
try: try:
# We expect 4 return values now # Expecting 4 values: data, headers, cookies, body
new_settings = get_rpc_config_auto.get_fresh_config(self.map_url) new_data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
if new_settings: if valid_headers and valid_body:
logger.info(f"Repair successful! Updating {self.name}.") logger.info(f"Repair successful! Updating {self.name}.")
# Update In-Memory # Clean Headers (Blacklist approach)
self.config.update(new_settings) excluded = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding', 'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'}
self.config['last_auto_update'] = datetime.now(timezone.utc).isoformat() clean_headers = {k: v for k, v in valid_headers.items() if k.lower() not in excluded}
# Update Session Cookies # Ensure Referer is set correctly for next time
self.session.cookies.clear() clean_headers['Referer'] = self.map_url
if new_settings.get('cookies'):
for c in new_settings['cookies']:
self.session.cookies.set(c['name'], c['value'], domain=c['domain'], path=c['path'])
# Update Session UA
if new_settings.get('user_agent'):
self.session.headers.update({'User-Agent': new_settings['user_agent']})
# Persist to disk # Update In-Memory Config
current_time = datetime.now(timezone.utc).isoformat()
self.config['headers'] = clean_headers
self.config['body'] = valid_body
self.config['cookies'] = valid_cookies
self.config['user_agent'] = valid_headers.get('user-agent')
self.config['last_auto_update'] = current_time
# Update Session
self.session.cookies.clear()
for cookie in valid_cookies:
self.session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'])
if valid_headers.get('user_agent'):
self.session.headers.update({'User-Agent': valid_headers.get('user-agent')})
# Save to Disk
new_settings = {
'headers': clean_headers,
'body': valid_body,
'cookies': valid_cookies,
'user_agent': valid_headers.get('user-agent')
}
update_provider_config(self.name, new_settings) update_provider_config(self.name, new_settings)
return True return True
except Exception as e: except Exception as e:
logger.error(f"Auto-repair failed: {e}") logger.error(f"Auto-repair failed: {e}")
return False return False
def fetch(self, is_retry=False): def fetch(self, is_retry=False):
url = self.config.get('url') url = self.config.get('url')
headers = self.config.get('headers', {}) headers = self.config.get('headers', {})
@@ -315,8 +336,8 @@ class GwtRpcProvider(BaseProvider):
if not url: return [] if not url: return []
# --- STRATEGY A: Standard Requests (Fast) ---
try: try:
# 3. Dynamic Origin Update
parsed_url = urlparse(url) parsed_url = urlparse(url)
origin = f"{parsed_url.scheme}://{parsed_url.netloc}" origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
@@ -324,78 +345,57 @@ class GwtRpcProvider(BaseProvider):
correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin
ua = headers.get('User-Agent', self.session.headers['User-Agent']) ua = headers.get('User-Agent', self.session.headers['User-Agent'])
if "Headless" in ua: if "Headless" in ua: # Fallback safety
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
self.session.headers.update({ self.session.headers.update({
'Origin': origin, 'Origin': origin,
'Referer': correct_referer, 'Referer': correct_referer,
'User-Agent': ua 'User-Agent': ua
}) })
# Prime if missing cookies or map_url is defined
if self.map_url and not self.config.get('cookies'): if self.map_url and not self.config.get('cookies'):
try: self.session.get(correct_referer, verify=False, timeout=10) try: self.session.get(correct_referer, verify=False, timeout=10)
except: pass except: pass
req_headers = headers.copy() req_headers = headers.copy()
if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8' req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
req_headers['Referer'] = correct_referer req_headers['Referer'] = correct_referer
req_headers['User-Agent'] = ua req_headers['User-Agent'] = ua
# Debug log (Optional - disable if too noisy)
# logger.info(f"Sending Headers: {json.dumps(req_headers, indent=2)}")
resp = self.session.post(url, headers=req_headers, data=body, verify=False) # Only fetch if we have a body
if body:
resp = self.session.post(url, headers=req_headers, data=body, verify=False)
else:
resp = type('obj', (object,), {'status_code': 500, 'text': 'No Body', 'ok': False})()
# --- STRATEGY B: Browser Fallback & Self-Heal --- # 5. Error Handling & Retry
if resp.status_code == 500 or "//EX" in resp.text: failed = False
logger.warning(f"Standard fetch failed for {self.name} (Status: {resp.status_code}). Switching to Browser Fetch.") if "//EX" in resp.text: failed = True
if resp.status_code == 500: failed = True
if failed:
logger.error(f"GWT Failure for {self.name} (Status: {resp.status_code}).")
if self.map_url: if is_retry:
# 1. Fetch data AND credentials via Browser logger.error(f"Retry failed for {self.name}. Aborting.")
data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url) return []
if data:
logger.info(f"Browser success! Self-healing {self.name} configuration...")
# --- HEADER CLEANING FIX ---
# Instead of selecting specific headers, we exclude known transport headers.
# This preserves custom headers like 'coop.nisc.outagewebmap.configname'
excluded = {
'content-length', 'host', 'connection', 'cookie', 'accept-encoding',
'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'
}
clean_headers = {}
for k, v in valid_headers.items():
if k.lower() not in excluded:
clean_headers[k] = v
# Ensure we force the correct Referer for next time
clean_headers['Referer'] = self.map_url
# 3. Save to JSON so next run is FAST if self.attempt_auto_repair():
new_settings = { logger.info("Retrying fetch with new settings...")
'headers': clean_headers, return self.fetch(is_retry=True)
'cookies': valid_cookies, else:
'body': valid_body, return []
'user_agent': valid_headers.get('user-agent')
}
update_provider_config(self.name, new_settings)
return self._extract_outages(data)
logger.error(f"Browser Fetch failed for {self.name}.")
return []
if not resp.ok: return [] if not resp.ok: return []
text = resp.text text = resp.text
if text.startswith('//OK'): text = text[4:] if text.startswith('//OK'): text = text[4:]
return self._extract_outages(json.loads(text)) return self._extract_outages(json.loads(text))
except Exception as e: except Exception as e:
logger.error(f"Fetch error {self.name}: {e}") logger.error(f"Fetch error {self.name}: {e}")
return [] return []
def _extract_outages(self, data_list): def _extract_outages(self, data_list):
results = [] results = []
if not self.transformer: return [] if not self.transformer: return []