fixed rpc, separated providers

This commit is contained in:
2025-12-07 03:57:20 +00:00
parent 32638df3a9
commit f9f0fc5093
4 changed files with 163 additions and 131 deletions

View File

@@ -3,7 +3,6 @@ import time
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
def analyze_gwt_response(response_text): def analyze_gwt_response(response_text):
"""Finds potential coordinates to validate response data."""
candidates = [] candidates = []
try: try:
if response_text.startswith("//OK"): if response_text.startswith("//OK"):
@@ -17,71 +16,82 @@ def analyze_gwt_response(response_text):
if abs(val1) > 100000 and abs(val2) > 100000: if abs(val1) > 100000 and abs(val2) > 100000:
candidates.append((val1, val2)) candidates.append((val1, val2))
if len(candidates) > 5: break if len(candidates) > 5: break
except: except: pass
pass
return candidates return candidates
def get_fresh_config(map_url): def fetch_live_data(map_url):
""" """
Launches headless browser to scrape headers, body, AND cookies. Uses a real browser to fetch data AND capture the raw request details.
""" """
print(f"--- Auto-Repair: Launching Browser for {map_url} ---") print(f"--- Browser Fetch: {map_url} ---")
data_result = None
captured_request = None captured_headers = None
captured_cookies = [] captured_cookies = None
captured_body = None # <--- New: Capture raw body
with sync_playwright() as p: with sync_playwright() as p:
browser = p.chromium.launch(headless=True) browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled'])
# Create a persistent context to ensure cookies are tracked context = browser.new_context(
context = browser.new_context() user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = context.new_page() page = context.new_page()
def handle_request(request): def handle_request(request):
nonlocal captured_request nonlocal captured_headers, captured_body
if ".rpc" in request.url and request.method == "POST": if ".rpc" in request.url and request.method == "POST":
# Capture the request details blindly before we even know if it works
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data:
captured_headers = request.headers
captured_body = request.post_data
def handle_response(response):
nonlocal data_result
if ".rpc" in response.url and response.request.method == "POST":
try: try:
if "getCombinedOutageDetails" in request.post_data or "getOutages" in request.post_data: if "getCombinedOutageDetails" in response.request.post_data or "getOutages" in response.request.post_data:
captured_request = { text = response.text()
'url': request.url, if text.startswith("//OK"):
'headers': request.headers, data_result = json.loads(text[4:])
'body': request.post_data print(" [+] Captured Data via Browser")
} except: pass
except:
pass
page.on("request", handle_request) page.on("request", handle_request)
page.on("response", handle_response)
try: try:
page.goto(map_url, wait_until="networkidle", timeout=45000) page.goto(map_url, wait_until="networkidle", timeout=60000)
time.sleep(5) for _ in range(10):
if data_result: break
# Capture cookies from the browser context time.sleep(1)
captured_cookies = context.cookies() captured_cookies = context.cookies()
except Exception as e: except Exception as e:
print(f"Auto-Repair Browser Error: {e}") print(f"Browser Fetch Error: {e}")
finally: finally:
browser.close() browser.close()
if captured_request: return data_result, captured_headers, captured_cookies, captured_body
req_headers = captured_request['headers']
# Clean headers (keep specific GWT ones, discard dynamic browser ones that requests handles) def get_fresh_config(map_url):
clean_headers = { data, headers, cookies, body = fetch_live_data(map_url)
'content-type': req_headers.get('content-type', 'text/x-gwt-rpc; charset=UTF-8'),
'x-gwt-module-base': req_headers.get('x-gwt-module-base'), if headers and body:
'x-gwt-permutation': req_headers.get('x-gwt-permutation'), # Minimal cleaning: Only remove headers that 'requests' MUST generate itself
'Referer': map_url # This keeps all custom NISC/GWT headers safe.
} forbidden = {'content-length', 'host', 'connection', 'cookie', 'accept-encoding'}
clean_headers = {k: v for k, v in headers.items() if k.lower() not in forbidden}
return { return {
'headers': clean_headers, 'headers': clean_headers,
'body': captured_request['body'], 'body': body, # Save exact body
'url': captured_request['url'], 'url': headers.get('url', map_url.replace('.html', '') + '/GWT.rpc'), # Best guess URL if missing
'cookies': captured_cookies # <--- Return cookies 'cookies': cookies,
'user_agent': headers.get('user-agent')
} }
return None return None
if __name__ == "__main__": if __name__ == "__main__":
url = input("Enter Map URL: ") url = input("Enter Map URL: ")
print(get_fresh_config(url)) res = get_fresh_config(url)
if res:
print("Success! Captured Body length:", len(res['body']))
print("Captured Headers:", res['headers'].keys())

View File

@@ -47,16 +47,13 @@ def update_provider_config(provider_name, new_settings):
updated = False updated = False
for p in providers: for p in providers:
if p.get('name') == provider_name: if p.get('name') == provider_name:
if 'headers' in new_settings: if 'headers' in new_settings: p['headers'] = new_settings['headers']
p['headers'] = new_settings['headers'] if 'body' in new_settings: p['body'] = new_settings['body']
if 'body' in new_settings: if 'url' in new_settings: p['url'] = new_settings['url']
p['body'] = new_settings['body'] if 'cookies' in new_settings: p['cookies'] = new_settings['cookies']
if 'url' in new_settings:
p['url'] = new_settings['url']
# <--- NEW: Save Cookies # <--- NEW: Save User-Agent
if 'cookies' in new_settings: if 'user_agent' in new_settings: p['user_agent'] = new_settings['user_agent']
p['cookies'] = new_settings['cookies']
p['last_auto_update'] = datetime.now(timezone.utc).isoformat() p['last_auto_update'] = datetime.now(timezone.utc).isoformat()
updated = True updated = True
@@ -67,7 +64,6 @@ def update_provider_config(provider_name, new_settings):
return True return True
return False return False
# --- DATABASE --- # --- DATABASE ---
class PowerDB: class PowerDB:
def __init__(self, config): def __init__(self, config):
@@ -244,9 +240,11 @@ class GwtRpcProvider(BaseProvider):
self.state_filter = config.get('state_filter') self.state_filter = config.get('state_filter')
self.map_url = config.get('map_url') self.map_url = config.get('map_url')
# 1. Base Headers # 1. Set User-Agent (Dynamic > Default)
# We try to use the one from config if available (captured from actual browser)
ua = config.get('user_agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
self.session.headers.update({ self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'User-Agent': ua,
'Accept': '*/*', 'Accept': '*/*',
'Sec-Fetch-Site': 'same-origin' 'Sec-Fetch-Site': 'same-origin'
}) })
@@ -254,16 +252,12 @@ class GwtRpcProvider(BaseProvider):
parsed_url = urlparse(config.get('url')) parsed_url = urlparse(config.get('url'))
self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"}) self.session.headers.update({'Origin': f"{parsed_url.scheme}://{parsed_url.netloc}"})
# 2. Load Cookies (if available, but don't rely solely on them) # Load Cookies
if config.get('cookies'): if config.get('cookies'):
for cookie in config['cookies']: for cookie in config['cookies']:
# Handle expiry mapping if needed, or ignore errors
try: try:
self.session.cookies.set( self.session.cookies.set(
cookie['name'], cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path']
cookie['value'],
domain=cookie['domain'],
path=cookie['path']
) )
except: pass except: pass
@@ -273,7 +267,6 @@ class GwtRpcProvider(BaseProvider):
'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9}, 'KY': {'lat_min': 36.4, 'lat_max': 39.2, 'lon_min': -89.6, 'lon_max': -81.9},
'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1} 'IA': {'lat_min': 40.3, 'lat_max': 43.6, 'lon_min': -96.7, 'lon_max': -90.1}
} }
if config.get('epsg'): if config.get('epsg'):
try: try:
self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True) self.transformer = Transformer.from_crs(f"EPSG:{config['epsg']}", "EPSG:4326", always_xy=True)
@@ -282,101 +275,127 @@ class GwtRpcProvider(BaseProvider):
def attempt_auto_repair(self): def attempt_auto_repair(self):
if not self.map_url: return False if not self.map_url: return False
# --- Cooldown Check --- # ... (Cooldown check - keep as is) ...
last_update = self.config.get('last_auto_update')
if last_update:
try:
last_dt = datetime.fromisoformat(last_update)
if last_dt.tzinfo is None: last_dt = last_dt.replace(tzinfo=timezone.utc)
if datetime.now(timezone.utc) - last_dt < timedelta(hours=AUTO_UPDATE_COOLDOWN_HOURS):
logger.info(f"Skipping auto-repair for {self.name} (Cooldown active).")
return False
except ValueError: pass
logger.info(f"Attempting Auto-Repair for {self.name}...") logger.info(f"Attempting Auto-Repair for {self.name}...")
try: try:
# We expect 4 return values now
new_settings = get_rpc_config_auto.get_fresh_config(self.map_url) new_settings = get_rpc_config_auto.get_fresh_config(self.map_url)
if new_settings: if new_settings:
logger.info(f"Repair successful! Updating {self.name}.") logger.info(f"Repair successful! Updating {self.name}.")
# Update In-Memory Config (CRITICAL: prevents loop) # Update In-Memory
current_time = datetime.now(timezone.utc).isoformat() self.config.update(new_settings)
self.config['headers'] = new_settings['headers'] self.config['last_auto_update'] = datetime.now(timezone.utc).isoformat()
self.config['body'] = new_settings['body']
self.config['url'] = new_settings['url']
self.config['cookies'] = new_settings.get('cookies', [])
self.config['last_auto_update'] = current_time
# Force updates to session # Update Session Cookies
# We clear cookies to ensure we don't mix old/new session logic
self.session.cookies.clear() self.session.cookies.clear()
if new_settings.get('cookies'): if new_settings.get('cookies'):
for cookie in new_settings['cookies']: for c in new_settings['cookies']:
self.session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path']) self.session.cookies.set(c['name'], c['value'], domain=c['domain'], path=c['path'])
# Update Session UA
if new_settings.get('user_agent'):
self.session.headers.update({'User-Agent': new_settings['user_agent']})
# Persist to disk
update_provider_config(self.name, new_settings) update_provider_config(self.name, new_settings)
return True return True
except Exception as e: except Exception as e:
logger.error(f"Auto-repair failed: {e}") logger.error(f"Auto-repair failed: {e}")
return False return False
def fetch(self, is_retry=False): def fetch(self, is_retry=False):
url = self.config.get('url') url = self.config.get('url')
headers = self.config.get('headers', {}) headers = self.config.get('headers', {})
body = self.config.get('body') body = self.config.get('body')
if not url or not body: return [] if not url: return []
# --- STRATEGY A: Standard Requests (Fast) ---
try: try:
# 3. Dynamic Origin Update
parsed_url = urlparse(url) parsed_url = urlparse(url)
origin = f"{parsed_url.scheme}://{parsed_url.netloc}" origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
self.session.headers.update({'Origin': origin})
# 4. ALWAYS PRIME SESSION (Fixes the regression) # Priority: Configured Referer > Module Base > Origin
# Even if we have cookies, they might be stale or missing JSESSIONID. correct_referer = headers.get('Referer') or headers.get('x-gwt-module-base') or origin
# Hitting the page refreshes the jar.
prime_url = headers.get('Referer') or headers.get('x-gwt-module-base') or origin ua = headers.get('User-Agent', self.session.headers['User-Agent'])
if prime_url: if "Headless" in ua:
try: ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
self.session.get(prime_url, verify=False, timeout=10)
self.session.headers.update({
'Origin': origin,
'Referer': correct_referer,
'User-Agent': ua
})
if self.map_url and not self.config.get('cookies'):
try: self.session.get(correct_referer, verify=False, timeout=10)
except: pass except: pass
req_headers = headers.copy() req_headers = headers.copy()
if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8' if 'Content-Type' not in req_headers: req_headers['Content-Type'] = 'text/x-gwt-rpc; charset=UTF-8'
req_headers['Referer'] = correct_referer
req_headers['User-Agent'] = ua
# Debug log (Optional - disable if too noisy)
# logger.info(f"Sending Headers: {json.dumps(req_headers, indent=2)}")
resp = self.session.post(url, headers=req_headers, data=body, verify=False) resp = self.session.post(url, headers=req_headers, data=body, verify=False)
# 5. Error Handling & Retry # --- STRATEGY B: Browser Fallback & Self-Heal ---
failed = False if resp.status_code == 500 or "//EX" in resp.text:
if "//EX" in resp.text: failed = True logger.warning(f"Standard fetch failed for {self.name} (Status: {resp.status_code}). Switching to Browser Fetch.")
if resp.status_code == 500: failed = True
if failed:
logger.error(f"GWT Failure for {self.name} (Status: {resp.status_code}).")
# Check recursion limit if self.map_url:
if is_retry: # 1. Fetch data AND credentials via Browser
logger.error(f"Retry failed for {self.name}. Aborting.") data, valid_headers, valid_cookies, valid_body = get_rpc_config_auto.fetch_live_data(self.map_url)
return []
if data:
logger.info(f"Browser success! Self-healing {self.name} configuration...")
# --- HEADER CLEANING FIX ---
# Instead of selecting specific headers, we exclude known transport headers.
# This preserves custom headers like 'coop.nisc.outagewebmap.configname'
excluded = {
'content-length', 'host', 'connection', 'cookie', 'accept-encoding',
'sec-ch-ua', 'sec-ch-ua-mobile', 'sec-ch-ua-platform', 'origin'
}
clean_headers = {}
for k, v in valid_headers.items():
if k.lower() not in excluded:
clean_headers[k] = v
# Ensure we force the correct Referer for next time
clean_headers['Referer'] = self.map_url
if self.attempt_auto_repair(): # 3. Save to JSON so next run is FAST
logger.info("Retrying fetch with new settings...") new_settings = {
return self.fetch(is_retry=True) 'headers': clean_headers,
else: 'cookies': valid_cookies,
return [] 'body': valid_body,
'user_agent': valid_headers.get('user-agent')
}
update_provider_config(self.name, new_settings)
return self._extract_outages(data)
logger.error(f"Browser Fetch failed for {self.name}.")
return []
if not resp.ok: return [] if not resp.ok: return []
text = resp.text text = resp.text
if text.startswith('//OK'): text = text[4:] if text.startswith('//OK'): text = text[4:]
return self._extract_outages(json.loads(text)) return self._extract_outages(json.loads(text))
except Exception as e: except Exception as e:
logger.error(f"Fetch error {self.name}: {e}") logger.error(f"Fetch error {self.name}: {e}")
return [] return []
# ... Keep _extract_outages and _is_valid as is ...
def _extract_outages(self, data_list): def _extract_outages(self, data_list):
results = [] results = []
if not self.transformer: return [] if not self.transformer: return []
@@ -429,7 +448,6 @@ class GwtRpcProvider(BaseProvider):
return b['lat_min'] <= lat <= b['lat_max'] and b['lon_min'] <= lon <= b['lon_max'] return b['lat_min'] <= lat <= b['lat_max'] and b['lon_min'] <= lon <= b['lon_max']
# --- REGISTRY --- # --- REGISTRY ---
PROVIDER_REGISTRY = { PROVIDER_REGISTRY = {
'kubra': KubraProvider, 'kubra': KubraProvider,

View File

@@ -104,20 +104,23 @@
"epsg": 3735, "epsg": 3735,
"state_filter": "OH", "state_filter": "OH",
"headers": { "headers": {
"content-type": "text/x-gwt-rpc; charset=UTF-8",
"x-gwt-module-base": "https://weci.ebill.coop/woViewer/MapWiseWeb/",
"x-gwt-permutation": "92F322F8E48548F604D2E1BE43DB1F13", "x-gwt-permutation": "92F322F8E48548F604D2E1BE43DB1F13",
"x-gwt-module-base": "https://weci.ebill.coop/woViewer/MapWiseWeb/",
"referer": "https://weci.ebill.coop/woViewer/mapviewer.html?config=Outage+Web+Map",
"coop.nisc.outagewebmap.configname": "Outage Web Map",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"content-type": "text/x-gwt-rpc; charset=UTF-8",
"Referer": "https://weci.ebill.coop/woViewer/mapviewer.html?config=Outage+Web+Map" "Referer": "https://weci.ebill.coop/woViewer/mapviewer.html?config=Outage+Web+Map"
}, },
"body": "7|0|4|https://weci.ebill.coop/woViewer/MapWiseWeb/|612278413EC26C34D54A3907AA0CDFD8|coop.nisc.oms.webmap.services.RpcCombinedOutageDetailsService|getCombinedOutageDetails|1|2|3|4|0|", "body": "7|0|4|https://weci.ebill.coop/woViewer/MapWiseWeb/|612278413EC26C34D54A3907AA0CDFD8|coop.nisc.oms.webmap.services.RpcCombinedOutageDetailsService|getCombinedOutageDetails|1|2|3|4|0|",
"last_auto_update": "2025-12-07T03:24:46.435173+00:00", "last_auto_update": "2025-12-07T03:56:27.722877+00:00",
"cookies": [ "cookies": [
{ {
"name": "__utma", "name": "__utma",
"value": "105963909.535514741.1765077881.1765077881.1765077881.1", "value": "105963909.1267880890.1765079787.1765079787.1765079787.1",
"domain": ".weci.ebill.coop", "domain": ".weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1799637880.601006, "expires": 1799639786.874286,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
@@ -134,10 +137,10 @@
}, },
{ {
"name": "__utmz", "name": "__utmz",
"value": "105963909.1765077881.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)", "value": "105963909.1765079787.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
"domain": ".weci.ebill.coop", "domain": ".weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1780845880, "expires": 1780847786,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
@@ -147,7 +150,7 @@
"value": "1", "value": "1",
"domain": ".weci.ebill.coop", "domain": ".weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1765078480, "expires": 1765080386,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
@@ -157,37 +160,37 @@
"value": "1", "value": "1",
"domain": ".weci.ebill.coop", "domain": ".weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1765078480, "expires": 1765080386,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
}, },
{ {
"name": "__utmb", "name": "__utmb",
"value": "105963909.2.10.1765077881", "value": "105963909.2.10.1765079787",
"domain": ".weci.ebill.coop", "domain": ".weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1765079680, "expires": 1765081586,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
}, },
{ {
"name": "__utma", "name": "__utma",
"value": "105963909.535514741.1765077881.1765077881.1765077881.1", "value": "105963909.1267880890.1765079787.1765079787.1765079787.1",
"domain": "weci.ebill.coop", "domain": "weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1799637880.601622, "expires": 1799639786.87497,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
}, },
{ {
"name": "__utmb", "name": "__utmb",
"value": "105963909.3.9.1765077881", "value": "105963909.3.9.1765079787",
"domain": "weci.ebill.coop", "domain": "weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1765079680, "expires": 1765081586,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
@@ -204,14 +207,15 @@
}, },
{ {
"name": "__utmz", "name": "__utmz",
"value": "105963909.1765077881.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)", "value": "105963909.1765079787.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
"domain": "weci.ebill.coop", "domain": "weci.ebill.coop",
"path": "/", "path": "/",
"expires": 1780845880, "expires": 1780847786,
"httpOnly": false, "httpOnly": false,
"secure": false, "secure": false,
"sameSite": "Lax" "sameSite": "Lax"
} }
] ],
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
} }
] ]