Fork of Lightning_Report adding: - n8n_report_branch.json: workflow branch for storm-triggered report delivery - report_service/: FastAPI microservice wrapping create_docx_report() so n8n can produce byte-identical reports without fighting the Python Code sandbox Made-with: Cursor
464 lines
18 KiB
Python
464 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Batch generate lightning reports for multiple wind farms.
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import logging
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
|
|
from src.api.data_fetcher import APIDataFetcher
|
|
from src.data.loader import load_turbine_data, load_lightning_data_from_csv
|
|
from src.analysis.risk import calculate_turbine_risks
|
|
from src.analysis.grouping import create_turbine_groups
|
|
from src.reporting.docx import create_docx_report
|
|
from src.reporting.filename_utils import farm_local_date_range_from_config, slugify_ascii_underscore
|
|
from src.utils import (
|
|
filter_lightning_data_by_date_range,
|
|
format_date_ddmmyyyy,
|
|
format_period_display_for_report,
|
|
normalize_local_time_to_timezone,
|
|
)
|
|
from src.config import config as global_config
|
|
|
|
load_dotenv()
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler(f'batch_generation_{datetime.now().strftime("%Y-%m-%d")}.log')
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def load_wind_farms_config(config_path: str) -> dict:
|
|
"""Load wind farms configuration from JSON file."""
|
|
try:
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
config = json.load(f)
|
|
logger.info(f"Loaded configuration from {config_path}")
|
|
return config
|
|
except FileNotFoundError:
|
|
logger.error(f"Configuration file not found: {config_path}")
|
|
raise
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Invalid JSON in configuration file: {e}")
|
|
raise
|
|
|
|
|
|
def filter_enabled_farms(wind_farms: list) -> tuple:
|
|
"""Filter farms by enabled status."""
|
|
enabled = []
|
|
disabled = []
|
|
|
|
for farm in wind_farms:
|
|
is_enabled = farm.get('enabled', True)
|
|
if is_enabled:
|
|
enabled.append(farm)
|
|
else:
|
|
disabled.append(farm)
|
|
|
|
return enabled, disabled
|
|
|
|
|
|
def get_location_bounds(farm: dict, turbine_df: pd.DataFrame, api_fetcher: APIDataFetcher) -> dict:
|
|
"""Get location bounds for API query."""
|
|
location_config = farm['api_params']['location_bounds']
|
|
|
|
if location_config['method'] == 'auto':
|
|
max_distance_ring = max(farm['distance_rings'])
|
|
padding_km = location_config.get('padding_km', 5)
|
|
|
|
bounds = api_fetcher.calculate_location_bounds(
|
|
turbine_df,
|
|
max_distance_ring,
|
|
padding_km
|
|
)
|
|
return bounds
|
|
else:
|
|
return {
|
|
'center_lat': location_config['center_lat'],
|
|
'center_lng': location_config['center_lng'],
|
|
'radius_km': location_config['radius_km']
|
|
}
|
|
|
|
|
|
def update_global_config(farm: dict, start_date: str = None, end_date: str = None):
|
|
"""Update global config with farm-specific settings."""
|
|
global_config.distance_rings = farm.get('distance_rings', global_config.distance_rings)
|
|
global_config.ring_colors = farm.get('ring_colors', global_config.ring_colors)
|
|
# DOCX title is based on the top-level `name` field for the farm.
|
|
global_config.wind_farm_name = farm.get('name', 'Unknown')
|
|
global_config.timezone = farm['report_config'].get('timezone', None)
|
|
|
|
# Lightning data source configuration (auto-detected from farm config)
|
|
lightning_source_type = farm.get('lightning_source_type')
|
|
if lightning_source_type:
|
|
global_config.lightning_source_type = lightning_source_type
|
|
if lightning_source_type == 'csv':
|
|
global_config.lightning_csv = farm.get('lightning_csv')
|
|
elif lightning_source_type == 'api':
|
|
global_config.lightning_json = farm.get('lightning_json')
|
|
|
|
# Set date range if provided (for reporting)
|
|
if start_date and end_date:
|
|
global_config.analysis_start_date = start_date
|
|
global_config.analysis_end_date = end_date
|
|
|
|
# Update grouping params if specified in farm config
|
|
if 'grouping_params' in farm:
|
|
global_config.grouping_params = farm['grouping_params']
|
|
|
|
logger.debug(f"Updated global config: distance_rings={global_config.distance_rings}, wind_farm_name={global_config.wind_farm_name}")
|
|
|
|
|
|
def convert_api_response_to_dataframe(records: list, data_type: str = 'lightning') -> pd.DataFrame:
|
|
"""
|
|
Convert API response to DataFrame format expected by existing code.
|
|
|
|
Args:
|
|
records: List of records from API
|
|
data_type: 'lightning' or 'storm'
|
|
|
|
Returns:
|
|
DataFrame in expected format
|
|
"""
|
|
if not records:
|
|
if data_type == 'lightning':
|
|
return pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time'])
|
|
else:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(records)
|
|
|
|
if data_type == 'lightning':
|
|
if 'local_time' not in df.columns and 'timestamp' in df.columns:
|
|
df['local_time'] = pd.to_datetime(df['timestamp'])
|
|
elif 'local_time' in df.columns:
|
|
df['local_time'] = pd.to_datetime(df['local_time'])
|
|
|
|
if 'current_abs' not in df.columns and 'current' in df.columns:
|
|
df['current_abs'] = df['current'].abs()
|
|
|
|
return df
|
|
|
|
|
|
def process_farm(farm: dict, api_fetcher: APIDataFetcher, config: dict) -> dict:
|
|
"""Process a single farm and generate report."""
|
|
farm_id = farm['farm_id']
|
|
farm_name = farm.get('name', farm_id)
|
|
|
|
logger.info(f"Processing farm: {farm_id} ({farm_name})")
|
|
|
|
try:
|
|
start_time = datetime.now()
|
|
|
|
# Update global config with farm-specific settings BEFORE processing
|
|
# (dates will be set later after they're determined)
|
|
update_global_config(farm)
|
|
|
|
turbine_file = farm['coordinates_file']
|
|
turbine_df = load_turbine_data(turbine_file)
|
|
logger.info(f"Loaded {len(turbine_df)} turbines")
|
|
|
|
location_bounds = get_location_bounds(farm, turbine_df, api_fetcher)
|
|
|
|
query_start, query_end = APIDataFetcher.determine_query_date_range(farm, config['api_config'])
|
|
start_date_str = query_start.strftime('%Y-%m-%d')
|
|
end_date_str = query_end.strftime('%Y-%m-%d')
|
|
|
|
source_type = farm.get('lightning_source_type', 'api')
|
|
|
|
if source_type == 'csv':
|
|
lightning_df = load_lightning_data_from_csv(farm.get('lightning_csv'))
|
|
logger.info(f"Loaded {len(lightning_df)} lightning records from CSV for {farm_id}")
|
|
else:
|
|
logger.info(f"Fetching lightning data from API for period: {start_date_str} to {end_date_str}")
|
|
lightning_records = api_fetcher.fetch_lightning_data(
|
|
center_lat=location_bounds['center_lat'],
|
|
center_lng=location_bounds['center_lng'],
|
|
radius_km=location_bounds['radius_km'],
|
|
start_date=start_date_str,
|
|
end_date=end_date_str
|
|
)
|
|
lightning_df = convert_api_response_to_dataframe(lightning_records, 'lightning')
|
|
logger.info(f"Converted {len(lightning_df)} lightning records to DataFrame")
|
|
|
|
if len(lightning_df) == 0:
|
|
logger.warning(f"No lightning data found for {farm_id}")
|
|
lightning_df = pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time', 'current_abs'])
|
|
|
|
storm_records = api_fetcher.fetch_storm_data(
|
|
center_lat=location_bounds['center_lat'],
|
|
center_lng=location_bounds['center_lng'],
|
|
radius_km=location_bounds['radius_km'],
|
|
start_date=start_date_str,
|
|
end_date=end_date_str
|
|
)
|
|
|
|
date_range_cfg = farm.get('api_params', {}).get('date_range', {})
|
|
start_filter = None
|
|
end_filter = None
|
|
method = date_range_cfg.get('method')
|
|
|
|
if source_type != 'csv':
|
|
if method == 'manual':
|
|
start_filter = date_range_cfg.get('start_date')
|
|
end_filter = date_range_cfg.get('end_date')
|
|
else:
|
|
query_range_cfg = date_range_cfg.get('query_range', {})
|
|
start_filter = query_range_cfg.get('start_date')
|
|
end_filter = query_range_cfg.get('end_date')
|
|
|
|
if len(lightning_df) > 0 and (start_filter is not None or end_filter is not None):
|
|
lightning_df = filter_lightning_data_by_date_range(lightning_df, start_filter, end_filter)
|
|
|
|
farm_tz = farm.get('report_config', {}).get('timezone')
|
|
if len(lightning_df) > 0 and farm_tz:
|
|
lightning_df = normalize_local_time_to_timezone(lightning_df, 'local_time', farm_tz)
|
|
|
|
turbine_df = calculate_turbine_risks(turbine_df, lightning_df)
|
|
|
|
group_data = create_turbine_groups(turbine_df)
|
|
logger.info(f"Created {group_data['total_groups']} groups")
|
|
|
|
# Determine actual dates for report (display strings: DD-MM-YYYY or DD-MM-YYYY HH:MM in local time)
|
|
if source_type == 'csv' and len(lightning_df) > 0:
|
|
local_times = pd.to_datetime(lightning_df['local_time'])
|
|
start_val = local_times.min()
|
|
end_val = local_times.max()
|
|
actual_start = start_val.strftime('%d-%m-%Y %H:%M')
|
|
actual_end = end_val.strftime('%d-%m-%Y %H:%M')
|
|
else:
|
|
if method == 'manual' and date_range_cfg.get('start_date') is not None and date_range_cfg.get('end_date') is not None:
|
|
actual_start, actual_end = format_period_display_for_report(start_filter, end_filter)
|
|
if not actual_start or not actual_end:
|
|
actual_start = format_date_ddmmyyyy(query_start)
|
|
actual_end = format_date_ddmmyyyy(query_end)
|
|
elif start_filter is not None and end_filter is not None:
|
|
actual_start, actual_end = format_period_display_for_report(start_filter, end_filter)
|
|
if not actual_start or not actual_end:
|
|
actual_start = format_date_ddmmyyyy(query_start)
|
|
actual_end = format_date_ddmmyyyy(query_end)
|
|
else:
|
|
actual_start = format_date_ddmmyyyy(query_start)
|
|
actual_end = format_date_ddmmyyyy(query_end)
|
|
|
|
# Update global config with dates for PDF generation
|
|
update_global_config(farm, actual_start, actual_end)
|
|
|
|
output_dir = Path(farm['report_config']['output_directory'])
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
local_range = farm_local_date_range_from_config(farm)
|
|
safe_name = slugify_ascii_underscore(farm.get("name", farm_id))
|
|
docx_filename = (
|
|
f"{safe_name}_{local_range.start_date_yyyy_mm_dd}"
|
|
f"_{local_range.end_date_yyyy_mm_dd}_report.docx"
|
|
)
|
|
docx_path = output_dir / docx_filename
|
|
|
|
create_docx_report(
|
|
str(docx_path),
|
|
turbine_df,
|
|
lightning_df,
|
|
storm_data_path=None,
|
|
storm_data_records=storm_records if storm_records else None,
|
|
)
|
|
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
logger.info(f"Successfully generated report for {farm_id} in {processing_time:.1f}s")
|
|
|
|
return {
|
|
'farm_id': farm_id,
|
|
'name': farm_name,
|
|
'status': 'success',
|
|
'report_path': str(docx_path),
|
|
'docx_path': str(docx_path),
|
|
'pdf_path': None,
|
|
'location': location_bounds,
|
|
'processing_time_seconds': processing_time,
|
|
'lightning_records': len(lightning_df),
|
|
'storm_records': len(storm_records)
|
|
}
|
|
|
|
except Exception as e:
|
|
processing_time = (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0
|
|
logger.error(f"Failed to process farm {farm_id}: {e}", exc_info=True)
|
|
|
|
return {
|
|
'farm_id': farm_id,
|
|
'name': farm_name,
|
|
'status': 'failed',
|
|
'error': str(e),
|
|
'processing_time_seconds': processing_time
|
|
}
|
|
|
|
|
|
def generate_batch_summary(results: list, total_farms: int, enabled_count: int,
|
|
disabled_count: int, start_time: datetime) -> dict:
|
|
"""Generate batch summary report."""
|
|
successful = [r for r in results if r['status'] == 'success']
|
|
failed = [r for r in results if r['status'] == 'failed']
|
|
skipped = disabled_count
|
|
|
|
total_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
summary = {
|
|
'batch_date': datetime.now().strftime('%Y-%m-%d'),
|
|
'batch_time': datetime.now().strftime('%H:%M:%S'),
|
|
'total_farms': total_farms,
|
|
'enabled_farms': enabled_count,
|
|
'disabled_farms': disabled_count,
|
|
'processed': len(results),
|
|
'successful': len(successful),
|
|
'failed': len(failed),
|
|
'skipped': skipped,
|
|
'processing_time_seconds': total_time,
|
|
'results': results
|
|
}
|
|
|
|
return summary
|
|
|
|
|
|
def save_batch_summary(summary: dict, output_dir: str):
|
|
"""Save batch summary to JSON file."""
|
|
output_path = Path(output_dir) / f"batch_summary_{summary['batch_date']}.json"
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(summary, f, indent=2)
|
|
|
|
logger.info(f"Batch summary saved to {output_path}")
|
|
return output_path
|
|
|
|
|
|
def list_farms(config: dict):
|
|
"""List all farms and their enabled status."""
|
|
print("\nWind Farms Configuration:")
|
|
print("=" * 60)
|
|
|
|
for i, farm in enumerate(config['wind_farms'], 1):
|
|
enabled = farm.get('enabled', True)
|
|
status = "✓ Enabled" if enabled else "✗ Disabled"
|
|
farm_id = farm['farm_id']
|
|
name = farm.get('name', 'N/A')
|
|
|
|
print(f"{i}. {status}: {farm_id} - {name}")
|
|
|
|
enabled, disabled = filter_enabled_farms(config['wind_farms'])
|
|
print(f"\nTotal: {len(config['wind_farms'])} farms ({len(enabled)} enabled, {len(disabled)} disabled)")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Batch generate lightning reports')
|
|
parser.add_argument('--config', required=True, help='Path to wind_farms_config.json')
|
|
parser.add_argument('--farm-id', help='Process specific farm only')
|
|
parser.add_argument('--force-all', action='store_true',
|
|
help='Process all farms, ignoring enabled flag')
|
|
parser.add_argument('--force', action='store_true',
|
|
help='Process even if disabled')
|
|
parser.add_argument('--list-farms', action='store_true',
|
|
help='List all farms and their enabled status')
|
|
parser.add_argument('--output-dir', help='Override output directory')
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
config = load_wind_farms_config(args.config)
|
|
|
|
if args.list_farms:
|
|
list_farms(config)
|
|
return
|
|
|
|
api_config = config['api_config']
|
|
api_fetcher = APIDataFetcher(
|
|
base_url=api_config['base_url'],
|
|
timeout=api_config.get('timeout_seconds', 30),
|
|
retry_attempts=api_config.get('retry_attempts', 3)
|
|
)
|
|
|
|
wind_farms = config['wind_farms']
|
|
|
|
if args.force_all:
|
|
farms_to_process = wind_farms
|
|
logger.info(f"Processing all {len(farms_to_process)} farms (--force-all)")
|
|
else:
|
|
enabled_farms, disabled_farms = filter_enabled_farms(wind_farms)
|
|
farms_to_process = enabled_farms
|
|
|
|
if disabled_farms:
|
|
logger.info(f"Skipping {len(disabled_farms)} disabled farms:")
|
|
for farm in disabled_farms:
|
|
logger.info(f" - {farm['farm_id']}: {farm.get('name', 'N/A')}")
|
|
|
|
if args.farm_id:
|
|
farms_to_process = [f for f in farms_to_process if f['farm_id'] == args.farm_id]
|
|
if not farms_to_process:
|
|
logger.error(f"Farm '{args.farm_id}' not found or not enabled")
|
|
return
|
|
|
|
if not farms_to_process:
|
|
logger.warning("No farms to process")
|
|
return
|
|
|
|
logger.info(f"Processing {len(farms_to_process)} farm(s)")
|
|
start_time = datetime.now()
|
|
|
|
results = []
|
|
for i, farm in enumerate(farms_to_process, 1):
|
|
logger.info(f"\n[{i}/{len(farms_to_process)}] Processing {farm['farm_id']}...")
|
|
result = process_farm(farm, api_fetcher, config)
|
|
results.append(result)
|
|
|
|
enabled_count, disabled_count = filter_enabled_farms(wind_farms)
|
|
summary = generate_batch_summary(
|
|
results,
|
|
len(wind_farms),
|
|
len(enabled_count),
|
|
len(disabled_count),
|
|
start_time
|
|
)
|
|
|
|
output_base = config.get('output_base_directory', 'reports/')
|
|
save_batch_summary(summary, output_base)
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Batch Processing Summary")
|
|
print("=" * 60)
|
|
print(f"Total farms: {summary['total_farms']}")
|
|
print(f"Enabled: {summary['enabled_farms']}")
|
|
print(f"Disabled: {summary['disabled_farms']}")
|
|
print(f"Processed: {summary['processed']}")
|
|
print(f"Successful: {summary['successful']}")
|
|
print(f"Failed: {summary['failed']}")
|
|
print(f"Total time: {summary['processing_time_seconds']:.1f}s")
|
|
print("=" * 60)
|
|
|
|
if summary['failed'] > 0:
|
|
print("\nFailed farms:")
|
|
for result in [r for r in results if r['status'] == 'failed']:
|
|
print(f" - {result['farm_id']}: {result.get('error', 'Unknown error')}")
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Batch processing interrupted by user")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
logger.error(f"Batch processing failed: {e}", exc_info=True)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|