643 lines
28 KiB
Python
643 lines
28 KiB
Python
#!/usr/bin/env python3
|
|
import json
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
import re
|
|
import sys
|
|
import warnings
|
|
import psycopg2
|
|
import traceback
|
|
import os
|
|
import time
|
|
from google.oauth2 import service_account
|
|
from googleapiclient.discovery import build
|
|
from googleapiclient.http import MediaIoBaseDownload
|
|
import io
|
|
|
|
# Configuration
|
|
SCOPES = ['https://www.googleapis.com/auth/drive'] # Full access for sync and delete
|
|
SERVICE_ACCOUNT_FILE = '/var/www/html/work/noaa_staff.json' # Path to your service account JSON key
|
|
DRIVE_FOLDER_ID = '1xCPU7Lhy-2cTg2Ul6tSQt6iRZeBGH3AW' # Replace with your Google Drive folder ID
|
|
LOCAL_DIR = os.path.expanduser('/var/www/html/work/NOAA')
|
|
USER_EMAIL = 'stoat@stoat.org'
|
|
|
|
|
|
conn = psycopg2.connect(host='localhost', database='nws', user='nws', password='nws')
|
|
cursor = conn.cursor()
|
|
|
|
def get_drive_service():
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
SERVICE_ACCOUNT_FILE, scopes=SCOPES)
|
|
credentials = credentials.with_subject(USER_EMAIL) # Impersonate your account
|
|
return build('drive', 'v3', credentials=credentials)
|
|
|
|
def get_folder_files(service, folder_id):
|
|
query = f"'{folder_id}' in parents and trashed=false"
|
|
results = service.files().list(q=query, fields="files(id, name, mimeType, modifiedTime, size)").execute()
|
|
return results.get('files', [])
|
|
|
|
def download_file(service, file_id, file_name, local_path, modified_time):
|
|
request = service.files().get_media(fileId=file_id)
|
|
fh = io.FileIO(local_path, 'wb')
|
|
downloader = MediaIoBaseDownload(fh, request)
|
|
done = False
|
|
while not done:
|
|
status, done = downloader.next_chunk()
|
|
fh.close()
|
|
mod_time = time.mktime(time.strptime(modified_time, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
|
os.utime(local_path, times=(mod_time, mod_time))
|
|
|
|
def sync_folder():
|
|
if not os.path.exists(LOCAL_DIR):
|
|
os.makedirs(LOCAL_DIR)
|
|
|
|
service = get_drive_service()
|
|
drive_files = get_folder_files(service, DRIVE_FOLDER_ID)
|
|
|
|
local_files = {f: os.path.getmtime(os.path.join(LOCAL_DIR, f)) for f in os.listdir(LOCAL_DIR) if os.path.isfile(os.path.join(LOCAL_DIR, f))}
|
|
|
|
for file in drive_files:
|
|
if file['mimeType'] == 'application/vnd.google-apps.folder':
|
|
continue
|
|
file_name = file['name']
|
|
file_id = file['id']
|
|
modified_time = file['modifiedTime']
|
|
local_path = os.path.join(LOCAL_DIR, file_name)
|
|
|
|
drive_mod_time = time.mktime(time.strptime(modified_time, "%Y-%m-%dT%H:%M:%S.%fZ"))
|
|
|
|
if file_name not in local_files or abs(local_files[file_name] - drive_mod_time) > 1:
|
|
print(f"Syncing {file_name}...")
|
|
download_file(service, file_id, file_name, local_path, modified_time)
|
|
else:
|
|
print(f"{file_name} is up-to-date.")
|
|
|
|
for local_file in local_files:
|
|
if local_file not in [f['name'] for f in drive_files]:
|
|
print(f"Removing {local_file} from local directory...")
|
|
os.remove(os.path.join(LOCAL_DIR, local_file))
|
|
|
|
def remove_files(service, filenames):
|
|
"""
|
|
Remove specified files from both local sync folder and the Google Drive folder.
|
|
With Editor permissions, files are moved to Trash and unlinked from the folder.
|
|
Args:
|
|
service: Google Drive API service instance.
|
|
filenames (list): List of filenames to remove.
|
|
"""
|
|
drive_files = get_folder_files(service, DRIVE_FOLDER_ID)
|
|
drive_file_map = {f['name']: f['id'] for f in drive_files}
|
|
|
|
for filename in filenames:
|
|
# Remove from local folder
|
|
local_path = os.path.join(LOCAL_DIR, filename)
|
|
if os.path.exists(local_path):
|
|
try:
|
|
os.remove(local_path)
|
|
print(f"Removed {filename} from local directory.")
|
|
except Exception as e:
|
|
print(f"Error removing {filename} locally: {e}")
|
|
else:
|
|
print(f"{filename} not found in local directory.")
|
|
|
|
# Remove from Google Drive folder (move to Trash and unlink)
|
|
if filename in drive_file_map:
|
|
file_id = drive_file_map[filename]
|
|
try:
|
|
# Move to Trash and remove from the folder
|
|
service.files().update(
|
|
fileId=file_id,
|
|
body={'trashed': True}, # Move to Trash
|
|
removeParents=DRIVE_FOLDER_ID # Unlink from the original folder
|
|
).execute()
|
|
print(f"Moved {filename} to Trash and removed from folder in Google Drive.")
|
|
except Exception as e:
|
|
print(f"Error processing {filename} in Google Drive: {e}")
|
|
else:
|
|
print(f"{filename} not found in Google Drive folder.")
|
|
|
|
|
|
def excel_to_dict(file_path, sheet_name=0):
|
|
# Read the Excel file
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", category=UserWarning, module=re.escape('openpyxl.styles.stylesheet'))
|
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
|
|
|
# Convert DataFrame to dictionary where headers are keys
|
|
# 'records' orientation makes each row a separate dict
|
|
result = df.to_dict(orient='index')
|
|
return result
|
|
|
|
|
|
|
|
def filter_dict_by_wfo(data, active="active"):
|
|
|
|
return {key: inner_dict for key, inner_dict in data.items()
|
|
#and 'WFO' in inner_dict['NOAA_ORG_TITLE']
|
|
if 'NOAA_ORG_TITLE' in inner_dict and "NOAA" in inner_dict['EMPL_CODE']}
|
|
|
|
|
|
def collect_and_organize_by_org(data, fields_to_collect, position_title_lookup):
|
|
"""
|
|
Collect specific fields, normalize NOAA_POSITION_TITLE, and organize by NOAA_ORG_TITLE with counts.
|
|
|
|
:param data: Dictionary with nested personnel data
|
|
:param fields_to_collect: List of fields to extract
|
|
:param position_title_lookup: Dict mapping NOAA_POSITION_TITLE variations to standardized titles
|
|
:return: Tuple of collected data, org-specific counts, and overall position counts
|
|
"""
|
|
collected_data = {}
|
|
org_title_counts = {} # NOAA_ORG_TITLE -> NOAA_POSITION_TITLE -> count
|
|
overall_position_counts = {} # Overall NOAA_POSITION_TITLE -> count
|
|
|
|
# Loop through the data
|
|
for outer_key, inner_dict in data.items():
|
|
entry = {}
|
|
|
|
# Collect specified fields
|
|
for field in fields_to_collect:
|
|
if field in inner_dict:
|
|
if field == 'NOAA_POSITION_TITLE':
|
|
raw_title = inner_dict[field].strip()
|
|
normalized_title = position_title_lookup.get(raw_title, raw_title)
|
|
entry['ORIG_TITLE'] = raw_title
|
|
entry[field] = normalized_title
|
|
else:
|
|
entry[field] = inner_dict[field]
|
|
entry['ORIG_TITLE'] = inner_dict['NOAA_POSITION_TITLE']
|
|
else:
|
|
entry[field] = ''
|
|
|
|
# Store the entry
|
|
collected_data[outer_key] = entry
|
|
|
|
return collected_data
|
|
|
|
|
|
|
|
|
|
|
|
def collect_data(file):
|
|
#data = csv_dict(import_file=r"C:\Users\john.peck\Downloads\NSD.xlsx")
|
|
data = excel_to_dict(file, sheet_name=0)
|
|
data = filter_dict_by_wfo(data)
|
|
|
|
fields_to_collect = [
|
|
'NOAA_POSITION_TITLE', 'ACCT_STATUS', 'OFFICE', 'NOAA_ORG_TITLE', 'PERSON_ID', 'EMPL_CODE',
|
|
'LAST_NAME', 'FIRST_NAME', 'MIDDLE_NAME', 'MGR_NAME', 'LAST_UPDATED'
|
|
]
|
|
|
|
|
|
|
|
|
|
# Lookup table for NOAA_POSITION_TITLE normalization
|
|
position_title_lookup = {
|
|
'Electronic Technician': 'Electronics Technician',
|
|
'Electronics Tech': 'Electronics Technician',
|
|
'Electronics Technician': 'Electronics Technician',
|
|
'El Tech': 'Electronics Technician',
|
|
'ET': 'Electronics Technician',
|
|
'El Tech': 'Electronics Technician',
|
|
'EKA - Electronics Technician': 'Electronics Technician',
|
|
'Electronics Tecnician': 'Electronics Technician',
|
|
'FGZ - Electronics Technician': 'Electronics Technician',
|
|
'EL TECH': 'Electronics Technician',
|
|
'PQR - Electronics Technician': 'Electronics Technician',
|
|
'MSO - El Tech': 'Electronics Technician',
|
|
'TFX - Electronics Technician': 'Electronics Technician',
|
|
'Eltech': 'Electronics Technician',
|
|
'GGW - Electronics Technician': 'Electronics Technician',
|
|
'SEW - El Tech': 'Electronics Technician',
|
|
'Electrical Technician': 'Electronics Technician',
|
|
'Electronic Techncian': 'Electronics Technician',
|
|
'Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist Intern': 'Meteorologist (Could Include Leads)',
|
|
'General Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'NOAA Federal Employee': 'Meteorologist (Could Include Leads)',
|
|
'Met Intern': 'Meteorologist (Could Include Leads)',
|
|
'Journey Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist - IMET': 'Meteorologist (Could Include Leads)',
|
|
'METEOROLOGIST': 'Meteorologist (Could Include Leads)',
|
|
'Meteorlogist': 'Meteorologist (Could Include Leads)',
|
|
'PDT - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'MTR - General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'LKN - Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Meteorolgist': 'Meteorologist (Could Include Leads)',
|
|
'PIH - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Meterologist': 'Meteorologist (Could Include Leads)',
|
|
'Journeyman Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Meteorological Intern': 'Meteorologist (Could Include Leads)',
|
|
'OTX - Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'NWS Intern': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist - General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'MET Intern': 'Meteorologist (Could Include Leads)',
|
|
'MIT': 'Meteorologist (Could Include Leads)',
|
|
'Forecaster/Incident Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Entry Level Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist and IMET': 'Meteorologist (Could Include Leads)',
|
|
'Fire Weather Program Manager': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist Intern WSFO JAN': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist ASA': 'Meteorologist (Could Include Leads)',
|
|
'Lead Meteorologist and IMET': 'Meteorologist (Could Include Leads)',
|
|
'meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'PIH - General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'TFX - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'SEW Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Metorologist': 'Meteorologist (Could Include Leads)',
|
|
'MET': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist General': 'Meteorologist (Could Include Leads)',
|
|
'Meteorogist': 'Meteorologist (Could Include Leads)',
|
|
'LKN - General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'EKA - Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist - Journey': 'Meteorologist (Could Include Leads)',
|
|
'REV General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'VEF - General Forecaster': 'Meteorologist (Could Include Leads)',
|
|
'MTR - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Metorologist - National NWSChat Admin': 'Meteorologist (Could Include Leads)',
|
|
'MSO-Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'VEF - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'GGW - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist - Journey': 'Meteorologist (Could Include Leads)',
|
|
'EKA - Meteorologist': 'Meteorologist (Could Include Leads)',
|
|
'Meteorologist Senior Forecaster': 'Lead Meteorologist',
|
|
'Senior Forecaster - LIX': 'Lead Meteorologist',
|
|
'TWC - Lead Forecaster': 'Lead Meteorologist',
|
|
'Meteorologist - Lead': 'Lead Meteorologist',
|
|
'Senior Forecaster-Fire Weather Program Manager': 'Lead Meteorologist',
|
|
'Lead Forecasters': 'Lead Meteorologist',
|
|
'Meteorologist - Senior': 'Lead Meteorologist',
|
|
'lead Meteorologist': 'Lead Meteorologist',
|
|
'Senior Forecaster Lead Meteorologist': 'Lead Meteorologist',
|
|
'Lead Meteorologist': 'Lead Meteorologist',
|
|
'Senior Meteorologist': 'Lead Meteorologist',
|
|
'Senior Forecaster': 'Lead Meteorologist',
|
|
'Lead Forecaster': 'Lead Meteorologist',
|
|
'Meteorologist - Lead Forecaster': 'Lead Meteorologist',
|
|
'Meteorologist - Senior Forecaster': 'Lead Meteorologist',
|
|
'Meteorologist Lead Forecaster': 'Lead Meteorologist',
|
|
'Information Technology Officer': 'ITO (May Include non ITO IT at WFOs)',
|
|
'IT Officer': 'ITO (May Include non ITO IT at WFOs)',
|
|
'ITO': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technology Specialist': 'ITO (May Include non ITO IT at WFOs)',
|
|
'IT Specialist': 'ITO (May Include non ITO IT at WFOs)',
|
|
'FGZ ITO': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technology Specialist ITO': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technology Officer(ITO)/Meteorologist': 'ITO (May Include non ITO IT at WFOs)',
|
|
'VEF - Information Technology Officer': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technolgy Officer': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technology Officer -ITO': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Supervisory IT Specialist': 'ITO (May Include non ITO IT at WFOs)',
|
|
'IT Specialist - Systems Administrator': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Information Technology Specialist ITO (May Include non ITO IT at WFOs)': 'ITO (May Include non ITO IT at WFOs)',
|
|
'Electronics Systems Analyst': 'ESA',
|
|
'Electronic System Analyst': 'ESA',
|
|
'Electronic Systems Analyst': 'ESA',
|
|
'Electronics System Analyst': 'ESA',
|
|
'Electronic Systems Analyst - ESA': 'ESA',
|
|
'AESA': 'ESA',
|
|
'IT Specialist - Electronics System Analyst': 'ESA',
|
|
'OTX ESA': 'ESA',
|
|
'HNX - Electronic Systems Analyst': 'ESA',
|
|
'Supervisory Information Technology Specialist - ESA': 'ESA',
|
|
'Electronic Systems Analyst - ESA IT Specialist': 'ESA',
|
|
'IT Specialist A-ESA': 'ESA',
|
|
'Electronics Systems Analyst ESA': 'ESA',
|
|
'STO ESA': 'ESA',
|
|
'Electronics Systems Analyst -ESA': 'ESA',
|
|
'Assistant ESA': 'ESA',
|
|
'PQR - Assistant ESA': 'ESA',
|
|
'Electronic Systems Analyst -ESA': 'ESA',
|
|
'Meteorologist - Science Operations Officer': 'SOO',
|
|
'SOO': 'SOO',
|
|
'Science and Operations Officer': 'SOO',
|
|
'Science Operations Officer': 'SOO',
|
|
'Meteorologist - SOO': 'SOO',
|
|
'Meteorologist - Science and Operations Officer': 'SOO',
|
|
'Science and Operations Officer - AMIC': 'SOO',
|
|
'Meteorologist -SOO': 'SOO',
|
|
'Science Operations Officer SOO': 'SOO',
|
|
'Science and Operations Officer - SOO': 'SOO',
|
|
'Science amp; Operations Officer': 'SOO',
|
|
'Meteorologist SOO': 'SOO',
|
|
'Science and Operations Officer DOC NOAA NWS Taunton MA': 'SOO',
|
|
'Science and Operations Officer - NWS New York - NY': 'SOO',
|
|
'Warning Coordination Meteorologist': 'WCM',
|
|
'WCM': 'WCM',
|
|
'Meteorologist - Warning Coordination Meteorologist': 'WCM',
|
|
'Warning Coordination Meteorololgist': 'WCM',
|
|
'Warning Coordination Meteorologist - WCM': 'WCM',
|
|
'Meteorologist WCM': 'WCM',
|
|
'WCM - Meteorologist': 'WCM',
|
|
'Warning and Coordination Meteorologist': 'WCM',
|
|
'HNX WCM': 'WCM',
|
|
'Warning Coordination Meeorologist': 'WCM',
|
|
'Warning Coordination Meteorlogist': 'WCM',
|
|
'Meteorologist In Charge - MIC': 'MIC',
|
|
'MIC': 'MIC',
|
|
'Meteorologist-In-Charge': 'MIC',
|
|
'Meteorologist In Charge': 'MIC',
|
|
'Meteorologist in Charge': 'MIC',
|
|
'Meteorologist-in-Charge': 'MIC',
|
|
'Meterorologist in Charge MIC': 'MIC',
|
|
'Meteorologist-in-Charge MIC': 'MIC',
|
|
'Meteorologist In Charge MIC': 'MIC',
|
|
'HNX MIC': 'MIC',
|
|
'Observations Program Leader': 'OPL',
|
|
'OPL': 'OPL',
|
|
'Observation Program Leader': 'OPL',
|
|
'Observing Program Leader': 'OPL',
|
|
'Observation Program Leader -OPL': 'OPL',
|
|
'Observation Program Leader - OPL': 'OPL',
|
|
'Observing Progam Leader': 'OPL',
|
|
'Observer Program Leader': 'OPL',
|
|
'Observer Program Leader -OPL': 'OPL',
|
|
'Observing Program Leader - OPL': 'OPL',
|
|
'PIH - Observation Program Lead': 'OPL',
|
|
'Observing Program Lead': 'OPL',
|
|
'Observations Program Leader - OPL': 'OPL',
|
|
'Observation Programs Lead': 'OPL',
|
|
'Meteorological Technician - OPL': 'OPL',
|
|
'Meteorologist - Observing Progam Leader': 'OPL',
|
|
'Lead Meteorological Technician': 'OPL',
|
|
'Observation Program Manager': 'OPL',
|
|
'Cooperative Program Manager': 'OPL',
|
|
'Data Acquisition Program Manager': 'OPL',
|
|
'Senior Hydrologist': 'Service Hydrologist',
|
|
'Senior Service Hydrologist': 'Service Hydrologist',
|
|
'Hydrologist': 'Service Hydrologist',
|
|
'Service Hydrologist': 'Service Hydrologist',
|
|
'Hydrologic Forecaster': 'Service Hydrologist',
|
|
'Service Hydrologist Meteorologist': 'Service Hydrologist',
|
|
'Lead Hydrologist': 'Service Hydrologist',
|
|
'Sr. Service Hydrologist': 'Service Hydrologist',
|
|
'Senior Service Hydrologist/Meteorologist': 'Service Hydrologist',
|
|
'EKA Hydrologist': 'Service Hydrologist',
|
|
'Meteorological Technician': 'HMT',
|
|
'HMT': 'HMT',
|
|
'Port Meteorological Officer': 'HMT',
|
|
'Hydro-Meteorological Technician': 'HMT',
|
|
'Hydrometeorological Technician': 'HMT',
|
|
'PMO': 'HMT',
|
|
'AROS Site Operator': 'HMT',
|
|
'Upper Air Weather Observer': 'HMT',
|
|
'Meteorologist Technician': 'HMT',
|
|
'Ice-SST Specialist': 'HMT',
|
|
'Great Lakes PMO': 'HMT',
|
|
'Ice SST Specialist': 'HMT',
|
|
'Upper Air Weather Observer': 'HMT',
|
|
'ASA': 'ASA',
|
|
'Administrative Support Asst.': 'ASA',
|
|
'Administrative Support Assistant': 'ASA',
|
|
'Administrative Support Assistant ASA': 'ASA',
|
|
'Administative Support Assistant': 'ASA',
|
|
'ASa': 'ASA',
|
|
'FGZ - Administrative Support': 'ASA',
|
|
'Administrative Support Assitant': 'ASA',
|
|
'Administrative Support Assistant - ASA - COTR': 'ASA',
|
|
'Administrative Assistant': 'ASA',
|
|
'Admin Suppt Asst': 'ASA',
|
|
'Supervisory Meteorologist': 'Unspecified',
|
|
'Operations Manager': 'Unspecified',
|
|
'Director of Operations': 'Unspecified',
|
|
'Assistant Meteorologist Ice-SST': 'Unspecified',
|
|
'Skillbridge Electronics Technician': 'Unspecified',
|
|
'Regional Equipment Specialist ER NWR Focal Point': 'Unspecified',
|
|
'Virtual Volunteer': 'Unspecified',
|
|
'WRH Service Evolution Program Leader': 'Unspecified',
|
|
'Applications Integration Meteorologist': 'Unspecified',
|
|
'Skillbridge Volunteer': 'Unspecified',
|
|
'Contrator': 'Contractor',
|
|
'Contracto': 'Contractor',
|
|
'Contractor': 'Contractor',
|
|
'FET': 'Facilities Engineering Technician',
|
|
'FET': 'FET',
|
|
'VEF - Engineering Technician': 'FET',
|
|
'Facilities Technician': 'FET',
|
|
'Engineering Technician': 'FET',
|
|
'Facilities Engineering Technician': 'FET',
|
|
'Field Engineering Tech': 'FET',
|
|
'Facilities Engineering Tech': 'FET',
|
|
'Field Engineering Technician': 'FET',
|
|
'Regional Maintenance Specialist': 'RMS',
|
|
'RMS': 'RMS',
|
|
'ASOS RMS': 'RMS',
|
|
'Pathways Student': 'Pathways',
|
|
'Pathway Student Trainee': 'Pathways',
|
|
'Pathways Intern': 'Pathways',
|
|
'Pathways': 'Pathways',
|
|
'Pathway': 'Pathways',
|
|
'MTR - Student Intern': 'Pathways',
|
|
'Student Fellow': 'Pathways',
|
|
'Hollings Scholar': 'Pathways',
|
|
'Student Trainee Meteorology': 'Pathways',
|
|
'Pathway Intern': 'Pathways',
|
|
'Emergency Response Specialist': 'ERS',
|
|
'ERS': 'ERS',
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
collected_data = collect_and_organize_by_org(data, fields_to_collect, position_title_lookup)
|
|
return collected_data
|
|
|
|
|
|
|
|
def loop_through_xls():
|
|
"""
|
|
Loops through Excel files in a directory and returns a dictionary containing
|
|
file information sorted by modification time.
|
|
|
|
Returns:
|
|
dict: Dictionary with file names, paths, and modification times
|
|
"""
|
|
directory = "/var/www/html/work/NOAA"
|
|
result = {}
|
|
|
|
# Get list of .xlsx files
|
|
xlsx_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')]
|
|
|
|
# Sort files by modification time
|
|
xlsx_files.sort(key=lambda f: os.path.getmtime(os.path.join(directory, f)))
|
|
|
|
# Populate result dictionary
|
|
|
|
for file in xlsx_files:
|
|
full_path = os.path.join(directory, file)
|
|
# Get modification time and convert to datetime
|
|
mod_time = datetime.fromtimestamp(os.path.getmtime(full_path))
|
|
|
|
# Add file info to result dictionary using filename as key
|
|
result[file] = {
|
|
'path': full_path,
|
|
'last_updated': mod_time,
|
|
'file_name': file
|
|
}
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
def get_inner_dict(big_dict, j):
|
|
return next((inner for inner in big_dict.values() if inner['PERSON_ID'] == j), None)
|
|
|
|
|
|
def compare_personids(personids, person_dict):
|
|
try:
|
|
# Extract person IDs from the dictionary into a set, only if 'PERSON_ID' exists
|
|
dict_person_ids = {inner_dict['PERSON_ID'] for inner_dict in person_dict.values() if 'PERSON_ID' in inner_dict}
|
|
|
|
# Convert the list of person IDs to a set
|
|
list_person_ids = set(personids)
|
|
|
|
# Compute the three sets
|
|
in_both = list_person_ids & dict_person_ids # Intersection: IDs in both
|
|
only_in_list = list_person_ids - dict_person_ids # Difference: IDs only in list
|
|
only_in_dict = dict_person_ids - list_person_ids # Difference: IDs only in dict
|
|
|
|
# Return results in a dictionary
|
|
return {
|
|
'in_both': in_both,
|
|
'only_in_list': only_in_list,
|
|
'only_in_dict': only_in_dict
|
|
}
|
|
except Exception as e:
|
|
# Output the error and traceback
|
|
print("Content-Type: text/plain\n")
|
|
print("An error occurred:\n")
|
|
traceback.print_exc(file=sys.stdout)
|
|
|
|
|
|
|
|
def insert_data(data):
|
|
try:
|
|
#replace this timestamp with the latest value in the data
|
|
|
|
#now = datetime.now()
|
|
#formatted_time = now.strftime("%m/%d/%Y %I:%M:%S %p")
|
|
sql = "SELECT DISTINCT personid FROM nws ORDER BY personid"
|
|
cursor.execute(sql)
|
|
personids_tuple = cursor.fetchall()
|
|
personids = [row[0] for row in personids_tuple]
|
|
for i in data:
|
|
post_data = data[i]
|
|
formatted_time = i
|
|
result = compare_personids(personids, post_data)
|
|
both = result['in_both']
|
|
nowgone = result['only_in_list']
|
|
onlynew = result['only_in_dict']
|
|
|
|
# Process 'both'
|
|
for j in both:
|
|
record = get_inner_dict(post_data, j)
|
|
sql = """
|
|
INSERT INTO nws (personid, first, middle, last, title, otitle, status, lastupdate, office, mgrname, orgtitle, recordtime)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT DO NOTHING
|
|
"""
|
|
parms = (
|
|
record['PERSON_ID'], record['FIRST_NAME'], record['MIDDLE_NAME'], record['LAST_NAME'],
|
|
record['NOAA_POSITION_TITLE'], record['ORIG_TITLE'], record['ACCT_STATUS'],
|
|
formatted_time, record['OFFICE'], record['MGR_NAME'], record['NOAA_ORG_TITLE'], record['LAST_UPDATED']
|
|
)
|
|
cursor.execute(sql, parms)
|
|
|
|
# Process 'nowgone'
|
|
for j in nowgone:
|
|
cursor.execute("SELECT * FROM nws WHERE personid = %s ORDER BY lastupdate DESC LIMIT 1", (j,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
column_names = [desc[0] for desc in cursor.description]
|
|
result = dict(zip(column_names, row))
|
|
if result['status'] != "gone":
|
|
sql = """
|
|
INSERT INTO nws (personid, first, middle, last, title, otitle, status, lastupdate, office, mgrname, orgtitle, recordtime)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT DO NOTHING
|
|
"""
|
|
parms = (
|
|
j, result['first'], result['middle'], result['last'], result['title'],
|
|
result['otitle'], 'inactive', formatted_time, result['office'],
|
|
result['mgrname'], result['orgtitle'], result['lastupdate']
|
|
)
|
|
cursor.execute(sql, parms)
|
|
|
|
# Process 'onlynew'
|
|
for j in onlynew:
|
|
record = get_inner_dict(post_data, j)
|
|
sql = """
|
|
INSERT INTO nws (personid, first, middle, last, title, otitle, status, lastupdate, office, mgrname, orgtitle, recordtime)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT DO NOTHING
|
|
"""
|
|
parms = (
|
|
record['PERSON_ID'], record['FIRST_NAME'], record['MIDDLE_NAME'], record['LAST_NAME'],
|
|
record['NOAA_POSITION_TITLE'], record['ORIG_TITLE'], record['ACCT_STATUS'],
|
|
formatted_time, record['OFFICE'], record['MGR_NAME'], record['NOAA_ORG_TITLE'], record['LAST_UPDATED']
|
|
)
|
|
cursor.execute(sql, parms)
|
|
|
|
conn.commit() # Single commit at the end
|
|
cursor.execute("update nws set status = 'gone' where status = '' or status = 'NaN'")
|
|
conn.commit()
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
alldata = {}
|
|
sync_folder()
|
|
deletable = []
|
|
xlsx_files = loop_through_xls()
|
|
print(xlsx_files)
|
|
for p in xlsx_files:
|
|
full_path = xlsx_files[p]['path']
|
|
update = xlsx_files[p]['last_updated']
|
|
file = xlsx_files[p]['file_name']
|
|
# Get the formatted update time
|
|
formatted_time = update.strftime('%m/%d/%Y %I:%M:%S %p')
|
|
# Collect data from the file
|
|
datedata = collect_data(full_path)
|
|
deletable.append(file)
|
|
# Add additional file info if desired (optional)
|
|
#datedata['path'] = xlsx_files[p]['path']
|
|
|
|
# Use the formatted time as the key, with datedata as the value
|
|
alldata[formatted_time] = datedata
|
|
#print(post_json_to_cgi(alldata))
|
|
#call database insert here
|
|
insert_data(alldata)
|
|
#print(alldata)
|
|
#newalldata = remove_duplicate_records(alldata)
|
|
#with open("nws.json", "w") as file:
|
|
# json.dump(newalldata, file, indent=4)
|
|
#print(post_json_to_cgi(newalldata))
|
|
service = get_drive_service()
|
|
|
|
|
|
|
|
# Example: Remove specific files from both local and Drive
|
|
files_to_remove = deletable # Replace with your filenames
|
|
remove_files(service, files_to_remove)
|
|
conn.close()
|