clean-up working
This commit is contained in:
@@ -20,14 +20,11 @@ def analyze_gwt_response(response_text):
|
|||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
def fetch_live_data(map_url):
|
def fetch_live_data(map_url):
|
||||||
"""
|
|
||||||
Uses a real browser to fetch data AND capture the raw request details.
|
|
||||||
"""
|
|
||||||
print(f"--- Browser Fetch: {map_url} ---")
|
print(f"--- Browser Fetch: {map_url} ---")
|
||||||
data_result = None
|
data_result = None
|
||||||
captured_headers = None
|
captured_headers = None
|
||||||
captured_cookies = None
|
captured_cookies = None
|
||||||
captured_body = None # <--- New: Capture raw body
|
captured_body = None
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled'])
|
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled'])
|
||||||
@@ -39,7 +36,7 @@ def fetch_live_data(map_url):
|
|||||||
def handle_request(request):
|
def handle_request(request):
|
||||||
nonlocal captured_headers, captured_body
|
nonlocal captured_headers, captured_body
|
||||||
if ".rpc" in request.url and request.method == "POST":
|
if ".rpc" in request.url and request.method == "POST":
|
||||||
# Capture the request details blindly before we even know if it works
|
# Capture request blindly first
|
||||||
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data:
|
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data:
|
||||||
captured_headers = request.headers
|
captured_headers = request.headers
|
||||||
captured_body = request.post_data
|
captured_body = request.post_data
|
||||||
@@ -75,15 +72,14 @@ def get_fresh_config(map_url):
|
|||||||
data, headers, cookies, body = fetch_live_data(map_url)
|
data, headers, cookies, body = fetch_live_data(map_url)
|
||||||
|
|
||||||
if headers and body:
|
if headers and body:
|
||||||
# Minimal cleaning: Only remove headers that 'requests' MUST generate itself
|
# Minimal Cleaning: Remove headers requests handles automatically
|
||||||
# This keeps all custom NISC/GWT headers safe.
|
|
||||||
forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'}
|
forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'}
|
||||||
clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden}
|
clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'headers': clean_headers,
|
'headers': clean_headers,
|
||||||
'body': body, # Save exact body
|
'body': body,
|
||||||
'url': headers.get('url', map_url.replace('.html', '') + '/GWT.rpc'), # Best guess URL if missing
|
'url': headers.get('url', map_url.replace('.html', '') + 'GWT.rpc'),
|
||||||
'cookies': cookies,
|
'cookies': cookies,
|
||||||
'user_agent': headers.get('user-agent')
|
'user_agent': headers.get('user-agent')
|
||||||
}
|
}
|
||||||
@@ -93,5 +89,4 @@ if __name__ == "__main__":
|
|||||||
url = input("Enter Map URL: ")
|
url = input("Enter Map URL: ")
|
||||||
res = get_fresh_config(url)
|
res = get_fresh_config(url)
|
||||||
if res:
|
if res:
|
||||||
print("Success! Captured Body length:", len(res['body']))
|
print("Success! Captured new config.")
|
||||||
print("Captured Headers:", res['headers'].keys())
|
|
||||||
160
newpower2.py
160
newpower2.py
@@ -5,13 +5,14 @@ import psycopg2
|
|||||||
import mercantile
|
import mercantile
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from pyproj import Transformer
|
from pyproj import Transformer
|
||||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
||||||
# Import the helper module
|
# Import the helper module for auto-repair
|
||||||
import get_rpc_config_auto
|
import get_rpc_config_auto
|
||||||
|
|
||||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
@@ -43,18 +44,16 @@ def save_providers(providers):
|
|||||||
logger.info("Configuration saved to providers.json")
|
logger.info("Configuration saved to providers.json")
|
||||||
|
|
||||||
def update_provider_config(provider_name, new_settings):
|
def update_provider_config(provider_name, new_settings):
|
||||||
|
"""Updates a specific provider in the JSON file safely"""
|
||||||
providers = load_providers()
|
providers = load_providers()
|
||||||
updated = False
|
updated = False
|
||||||
for p in providers:
|
for p in providers:
|
||||||
if p.get('name') == provider_name:
|
if p.get('name') == provider_name:
|
||||||
if 'headers' in new_settings: p['headers'] = new_settings['headers']
|
# Update all relevant fields
|
||||||
if 'body' in new_settings: p['body'] = new_settings['body']
|
for key in ['headers', 'body', 'url', 'cookies', 'user_agent']:
|
||||||
if 'url' in new_settings: p['url'] = new_settings['url']
|
if key in new_settings:
|
||||||
if 'cookies' in new_settings: p['cookies'] = new_settings['cookies']
|
p[key] = new_settings[key]
|
||||||
|
|
||||||
# <--- NEW: Save User-Agent
|
|
||||||
if 'user_agent' in new_settings: p['user_agent'] = new_settings['user_agent']
|
|
||||||
|
|
||||||
p['last_auto_update'] = datetime.now(timezone.utc).isoformat()
|
p['last_auto_update'] = datetime.now(timezone.utc).isoformat()
|
||||||
updated = True
|
updated = True
|
||||||
break
|
break
|
||||||
@@ -240,11 +239,9 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
self.state_filter = config.get('state_filter')
|
self.state_filter = config.get('state_filter')
|
||||||
self.map_url = config.get('map_url')
|
self.map_url = config.get('map_url')
|
||||||
|
|
||||||
# 1. Set User-Agent (Dynamic > Default)
|
# 1. Base Headers
|
||||||
# We try to use the one from config if available (captured from actual browser)
|
|
||||||
ua = config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
|
|
||||||
self.session.headers.update({
|
self.session.headers.update({
|
||||||
'User-Agent': ua,
|
'User-Agent': config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'),
|
||||||
'Accept': '*/*',
|
'Accept': '*/*',
|
||||||
'Sec-Fetch-Site': 'same-origin'
|
'Sec-Fetch-Site': 'same-origin'
|
||||||
})
|
})
|
||||||
@@ -252,7 +249,7 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
parsed_url = urlparse(config.get('url'))
|
parsed_url = urlparse(config.get('url'))
|
||||||
self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"})
|
self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"})
|
||||||
|
|
||||||
# Load Cookies
|
# 2. Load Cookies
|
||||||
if config.get('cookies'):
|
if config.get('cookies'):
|
||||||
for cookie in config['cookies']:
|
for cookie in config['cookies']:
|
||||||
try:
|
try:
|
||||||
@@ -267,6 +264,7 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9},
|
'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9},
|
||||||
'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1}
|
'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1}
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.get('epsg'):
|
if config.get('epsg'):
|
||||||
try:
|
try:
|
||||||
self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True)
|
self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True)
|
||||||
@@ -275,39 +273,62 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
def attempt_auto_repair(self):
|
def attempt_auto_repair(self):
|
||||||
if not self.map_url: return False
|
if not self.map_url: return False
|
||||||
|
|
||||||
# ... (Cooldown check - keep as is) ...
|
# --- Cooldown Check ---
|
||||||
|
last_update = self.config.get('last_auto_update')
|
||||||
|
if last_update:
|
||||||
|
try:
|
||||||
|
last_dt = datetime.fromisoformat(last_update)
|
||||||
|
if last_dt.tzinfo is None: last_dt = last_dt.replace(tzinfo=timezone.utc)
|
||||||
|
if datetime.now(timezone.utc) - last_dt < timedelta(hours=AUTO_UPDATE_COOLDOWN_HOURS):
|
||||||
|
logger.info(f"Skipping auto-repair for {self.name} (Cooldown active).")
|
||||||
|
return False
|
||||||
|
except ValueError: pass
|
||||||
|
|
||||||
logger.info(f"Attempting Auto-Repair for {self.name}...")
|
logger.info(f"Attempting Auto-Repair for {self.name}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# We expect 4 return values now
|
# Expecting 4 values: data, headers, cookies, body
|
||||||
new_settings = get_rpc_config_auto.get_fresh_config(self.map_url)
|
new_data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
|
||||||
|
|
||||||
if new_settings:
|
if valid_headers and valid_body:
|
||||||
logger.info(f"Repair successful! Updating {self.name}.")
|
logger.info(f"Repair successful! Updating {self.name}.")
|
||||||
|
|
||||||
# Update In-Memory
|
# Clean Headers (Blacklist approach)
|
||||||
self.config.update(new_settings)
|
excluded = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding', 'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'}
|
||||||
self.config['last_auto_update'] = datetime.now(timezone.utc).isoformat()
|
clean_headers = {k: v for k, v in valid_headers.items() if k.lower() not in excluded}
|
||||||
|
|
||||||
# Update Session Cookies
|
# Ensure Referer is set correctly for next time
|
||||||
self.session.cookies.clear()
|
clean_headers['Referer'] = self.map_url
|
||||||
if new_settings.get('cookies'):
|
|
||||||
for c in new_settings['cookies']:
|
|
||||||
self.session.cookies.set(c['name'], c['value'], domain=c['domain'], path=c['path'])
|
|
||||||
|
|
||||||
# Update Session UA
|
|
||||||
if new_settings.get('user_agent'):
|
|
||||||
self.session.headers.update({'User-Agent': new_settings['user_agent']})
|
|
||||||
|
|
||||||
# Persist to disk
|
# Update In-Memory Config
|
||||||
|
current_time = datetime.now(timezone.utc).isoformat()
|
||||||
|
self.config['headers'] = clean_headers
|
||||||
|
self.config['body'] = valid_body
|
||||||
|
self.config['cookies'] = valid_cookies
|
||||||
|
self.config['user_agent'] = valid_headers.get('user-agent')
|
||||||
|
self.config['last_auto_update'] = current_time
|
||||||
|
|
||||||
|
# Update Session
|
||||||
|
self.session.cookies.clear()
|
||||||
|
for cookie in valid_cookies:
|
||||||
|
self.session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'])
|
||||||
|
if valid_headers.get('user_agent'):
|
||||||
|
self.session.headers.update({'User-Agent': valid_headers.get('user-agent')})
|
||||||
|
|
||||||
|
# Save to Disk
|
||||||
|
new_settings = {
|
||||||
|
'headers': clean_headers,
|
||||||
|
'body': valid_body,
|
||||||
|
'cookies': valid_cookies,
|
||||||
|
'user_agent': valid_headers.get('user-agent')
|
||||||
|
}
|
||||||
update_provider_config(self.name, new_settings)
|
update_provider_config(self.name, new_settings)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Auto-repair failed: {e}")
|
logger.error(f"Auto-repair failed: {e}")
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fetch(self, is_retry=False):
|
def fetch(self, is_retry=False):
|
||||||
url = self.config.get('url')
|
url = self.config.get('url')
|
||||||
headers = self.config.get('headers', {})
|
headers = self.config.get('headers', {})
|
||||||
@@ -315,8 +336,8 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
|
|
||||||
if not url: return []
|
if not url: return []
|
||||||
|
|
||||||
# --- STRATEGY A: Standard Requests (Fast) ---
|
|
||||||
try:
|
try:
|
||||||
|
# 3. Dynamic Origin Update
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||||
|
|
||||||
@@ -324,78 +345,57 @@ class GwtRpcProvider(BaseProvider):
|
|||||||
correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin
|
correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin
|
||||||
|
|
||||||
ua = headers.get('User-Agent', self.session.headers['User-Agent'])
|
ua = headers.get('User-Agent', self.session.headers['User-Agent'])
|
||||||
if "Headless" in ua:
|
if "Headless" in ua: # Fallback safety
|
||||||
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||||
|
|
||||||
self.session.headers.update({
|
self.session.headers.update({
|
||||||
'Origin': origin,
|
'Origin': origin,
|
||||||
'Referer': correct_referer,
|
'Referer': correct_referer,
|
||||||
'User-Agent': ua
|
'User-Agent': ua
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Prime if missing cookies or map_url is defined
|
||||||
if self.map_url and not self.config.get('cookies'):
|
if self.map_url and not self.config.get('cookies'):
|
||||||
try: self.session.get(correct_referer, verify=False, timeout=10)
|
try: self.session.get(correct_referer, verify=False, timeout=10)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
req_headers = headers.copy()
|
req_headers = headers.copy()
|
||||||
if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
|
req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
|
||||||
req_headers['Referer'] = correct_referer
|
req_headers['Referer'] = correct_referer
|
||||||
req_headers['User-Agent'] = ua
|
req_headers['User-Agent'] = ua
|
||||||
|
|
||||||
# Debug log (Optional - disable if too noisy)
|
|
||||||
# logger.info(f"Sending Headers: {json.dumps(req_headers, indent=2)}")
|
|
||||||
|
|
||||||
resp = self.session.post(url, headers=req_headers, data=body, verify=False)
|
# Only fetch if we have a body
|
||||||
|
if body:
|
||||||
|
resp = self.session.post(url, headers=req_headers, data=body, verify=False)
|
||||||
|
else:
|
||||||
|
resp = type('obj', (object,), {'status_code': 500, 'text': 'No Body', 'ok': False})()
|
||||||
|
|
||||||
# --- STRATEGY B: Browser Fallback & Self-Heal ---
|
# 5. Error Handling & Retry
|
||||||
if resp.status_code == 500 or "//EX" in resp.text:
|
failed = False
|
||||||
logger.warning(f"Standard fetch failed for {self.name} (Status: {resp.status_code}). Switching to Browser Fetch.")
|
if "//EX" in resp.text: failed = True
|
||||||
|
if resp.status_code == 500: failed = True
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
logger.error(f"GWT Failure for {self.name} (Status: {resp.status_code}).")
|
||||||
|
|
||||||
if self.map_url:
|
if is_retry:
|
||||||
# 1. Fetch data AND credentials via Browser
|
logger.error(f"Retry failed for {self.name}. Aborting.")
|
||||||
data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
|
return []
|
||||||
|
|
||||||
if data:
|
|
||||||
logger.info(f"Browser success! Self-healing {self.name} configuration...")
|
|
||||||
|
|
||||||
# --- HEADER CLEANING FIX ---
|
|
||||||
# Instead of selecting specific headers, we exclude known transport headers.
|
|
||||||
# This preserves custom headers like 'coop.nisc.outagewebmap.configname'
|
|
||||||
excluded = {
|
|
||||||
'content-length', 'host', 'connection', 'cookie', 'accept-encoding',
|
|
||||||
'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'
|
|
||||||
}
|
|
||||||
|
|
||||||
clean_headers = {}
|
|
||||||
for k, v in valid_headers.items():
|
|
||||||
if k.lower() not in excluded:
|
|
||||||
clean_headers[k] = v
|
|
||||||
|
|
||||||
# Ensure we force the correct Referer for next time
|
|
||||||
clean_headers['Referer'] = self.map_url
|
|
||||||
|
|
||||||
# 3. Save to JSON so next run is FAST
|
if self.attempt_auto_repair():
|
||||||
new_settings = {
|
logger.info("Retrying fetch with new settings...")
|
||||||
'headers': clean_headers,
|
return self.fetch(is_retry=True)
|
||||||
'cookies': valid_cookies,
|
else:
|
||||||
'body': valid_body,
|
return []
|
||||||
'user_agent': valid_headers.get('user-agent')
|
|
||||||
}
|
|
||||||
update_provider_config(self.name, new_settings)
|
|
||||||
|
|
||||||
return self._extract_outages(data)
|
|
||||||
|
|
||||||
logger.error(f"Browser Fetch failed for {self.name}.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
if not resp.ok: return []
|
if not resp.ok: return []
|
||||||
text = resp.text
|
text = resp.text
|
||||||
if text.startswith('//OK'): text = text[4:]
|
if text.startswith('//OK'): text = text[4:]
|
||||||
return self._extract_outages(json.loads(text))
|
return self._extract_outages(json.loads(text))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Fetch error {self.name}: {e}")
|
logger.error(f"Fetch error {self.name}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _extract_outages(self, data_list):
|
def _extract_outages(self, data_list):
|
||||||
results = []
|
results = []
|
||||||
if not self.transformer: return []
|
if not self.transformer: return []
|
||||||
|
|||||||
Reference in New Issue
Block a user