#!/usr/bin/env python3 """ Batch generate lightning reports for multiple wind farms. """ import sys import json import argparse import logging from datetime import datetime from pathlib import Path import pandas as pd from dotenv import load_dotenv from src.api.data_fetcher import APIDataFetcher from src.data.loader import load_turbine_data, load_lightning_data_from_csv from src.analysis.risk import calculate_turbine_risks from src.analysis.grouping import create_turbine_groups from src.reporting.docx import create_docx_report from src.reporting.filename_utils import farm_local_date_range_from_config, slugify_ascii_underscore from src.utils import ( filter_lightning_data_by_date_range, format_date_ddmmyyyy, format_period_display_for_report, normalize_local_time_to_timezone, ) from src.config import config as global_config load_dotenv() logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler(f'batch_generation_{datetime.now().strftime("%Y-%m-%d")}.log') ] ) logger = logging.getLogger(__name__) def load_wind_farms_config(config_path: str) -> dict: """Load wind farms configuration from JSON file.""" try: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) logger.info(f"Loaded configuration from {config_path}") return config except FileNotFoundError: logger.error(f"Configuration file not found: {config_path}") raise except json.JSONDecodeError as e: logger.error(f"Invalid JSON in configuration file: {e}") raise def filter_enabled_farms(wind_farms: list) -> tuple: """Filter farms by enabled status.""" enabled = [] disabled = [] for farm in wind_farms: is_enabled = farm.get('enabled', True) if is_enabled: enabled.append(farm) else: disabled.append(farm) return enabled, disabled def get_location_bounds(farm: dict, turbine_df: pd.DataFrame, api_fetcher: APIDataFetcher) -> dict: """Get location bounds for API query.""" location_config = farm['api_params']['location_bounds'] if location_config['method'] == 'auto': max_distance_ring = max(farm['distance_rings']) padding_km = location_config.get('padding_km', 5) bounds = api_fetcher.calculate_location_bounds( turbine_df, max_distance_ring, padding_km ) return bounds else: return { 'center_lat': location_config['center_lat'], 'center_lng': location_config['center_lng'], 'radius_km': location_config['radius_km'] } def update_global_config(farm: dict, start_date: str = None, end_date: str = None): """Update global config with farm-specific settings.""" global_config.distance_rings = farm.get('distance_rings', global_config.distance_rings) global_config.ring_colors = farm.get('ring_colors', global_config.ring_colors) # DOCX title is based on the top-level `name` field for the farm. global_config.wind_farm_name = farm.get('name', 'Unknown') global_config.timezone = farm['report_config'].get('timezone', None) # Lightning data source configuration (auto-detected from farm config) lightning_source_type = farm.get('lightning_source_type') if lightning_source_type: global_config.lightning_source_type = lightning_source_type if lightning_source_type == 'csv': global_config.lightning_csv = farm.get('lightning_csv') elif lightning_source_type == 'api': global_config.lightning_json = farm.get('lightning_json') # Set date range if provided (for reporting) if start_date and end_date: global_config.analysis_start_date = start_date global_config.analysis_end_date = end_date # Update grouping params if specified in farm config if 'grouping_params' in farm: global_config.grouping_params = farm['grouping_params'] logger.debug(f"Updated global config: distance_rings={global_config.distance_rings}, wind_farm_name={global_config.wind_farm_name}") def convert_api_response_to_dataframe(records: list, data_type: str = 'lightning') -> pd.DataFrame: """ Convert API response to DataFrame format expected by existing code. Args: records: List of records from API data_type: 'lightning' or 'storm' Returns: DataFrame in expected format """ if not records: if data_type == 'lightning': return pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time']) else: return pd.DataFrame() df = pd.DataFrame(records) if data_type == 'lightning': if 'local_time' not in df.columns and 'timestamp' in df.columns: df['local_time'] = pd.to_datetime(df['timestamp']) elif 'local_time' in df.columns: df['local_time'] = pd.to_datetime(df['local_time']) if 'current_abs' not in df.columns and 'current' in df.columns: df['current_abs'] = df['current'].abs() return df def process_farm(farm: dict, api_fetcher: APIDataFetcher, config: dict) -> dict: """Process a single farm and generate report.""" farm_id = farm['farm_id'] farm_name = farm.get('name', farm_id) logger.info(f"Processing farm: {farm_id} ({farm_name})") try: start_time = datetime.now() # Update global config with farm-specific settings BEFORE processing # (dates will be set later after they're determined) update_global_config(farm) turbine_file = farm['coordinates_file'] turbine_df = load_turbine_data(turbine_file) logger.info(f"Loaded {len(turbine_df)} turbines") location_bounds = get_location_bounds(farm, turbine_df, api_fetcher) query_start, query_end = APIDataFetcher.determine_query_date_range(farm, config['api_config']) start_date_str = query_start.strftime('%Y-%m-%d') end_date_str = query_end.strftime('%Y-%m-%d') source_type = farm.get('lightning_source_type', 'api') if source_type == 'csv': lightning_df = load_lightning_data_from_csv(farm.get('lightning_csv')) logger.info(f"Loaded {len(lightning_df)} lightning records from CSV for {farm_id}") else: logger.info(f"Fetching lightning data from API for period: {start_date_str} to {end_date_str}") lightning_records = api_fetcher.fetch_lightning_data( center_lat=location_bounds['center_lat'], center_lng=location_bounds['center_lng'], radius_km=location_bounds['radius_km'], start_date=start_date_str, end_date=end_date_str ) lightning_df = convert_api_response_to_dataframe(lightning_records, 'lightning') logger.info(f"Converted {len(lightning_df)} lightning records to DataFrame") if len(lightning_df) == 0: logger.warning(f"No lightning data found for {farm_id}") lightning_df = pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time', 'current_abs']) storm_records = api_fetcher.fetch_storm_data( center_lat=location_bounds['center_lat'], center_lng=location_bounds['center_lng'], radius_km=location_bounds['radius_km'], start_date=start_date_str, end_date=end_date_str ) date_range_cfg = farm.get('api_params', {}).get('date_range', {}) start_filter = None end_filter = None method = date_range_cfg.get('method') if source_type != 'csv': if method == 'manual': start_filter = date_range_cfg.get('start_date') end_filter = date_range_cfg.get('end_date') else: query_range_cfg = date_range_cfg.get('query_range', {}) start_filter = query_range_cfg.get('start_date') end_filter = query_range_cfg.get('end_date') if len(lightning_df) > 0 and (start_filter is not None or end_filter is not None): lightning_df = filter_lightning_data_by_date_range(lightning_df, start_filter, end_filter) farm_tz = farm.get('report_config', {}).get('timezone') if len(lightning_df) > 0 and farm_tz: lightning_df = normalize_local_time_to_timezone(lightning_df, 'local_time', farm_tz) turbine_df = calculate_turbine_risks(turbine_df, lightning_df) group_data = create_turbine_groups(turbine_df) logger.info(f"Created {group_data['total_groups']} groups") # Determine actual dates for report (display strings: DD-MM-YYYY or DD-MM-YYYY HH:MM in local time) if source_type == 'csv' and len(lightning_df) > 0: local_times = pd.to_datetime(lightning_df['local_time']) start_val = local_times.min() end_val = local_times.max() actual_start = start_val.strftime('%d-%m-%Y %H:%M') actual_end = end_val.strftime('%d-%m-%Y %H:%M') else: if method == 'manual' and date_range_cfg.get('start_date') is not None and date_range_cfg.get('end_date') is not None: actual_start, actual_end = format_period_display_for_report(start_filter, end_filter) if not actual_start or not actual_end: actual_start = format_date_ddmmyyyy(query_start) actual_end = format_date_ddmmyyyy(query_end) elif start_filter is not None and end_filter is not None: actual_start, actual_end = format_period_display_for_report(start_filter, end_filter) if not actual_start or not actual_end: actual_start = format_date_ddmmyyyy(query_start) actual_end = format_date_ddmmyyyy(query_end) else: actual_start = format_date_ddmmyyyy(query_start) actual_end = format_date_ddmmyyyy(query_end) # Update global config with dates for PDF generation update_global_config(farm, actual_start, actual_end) output_dir = Path(farm['report_config']['output_directory']) output_dir.mkdir(parents=True, exist_ok=True) local_range = farm_local_date_range_from_config(farm) safe_name = slugify_ascii_underscore(farm.get("name", farm_id)) docx_filename = ( f"{safe_name}_{local_range.start_date_yyyy_mm_dd}" f"_{local_range.end_date_yyyy_mm_dd}_report.docx" ) docx_path = output_dir / docx_filename create_docx_report( str(docx_path), turbine_df, lightning_df, storm_data_path=None, storm_data_records=storm_records if storm_records else None, ) processing_time = (datetime.now() - start_time).total_seconds() logger.info(f"Successfully generated report for {farm_id} in {processing_time:.1f}s") return { 'farm_id': farm_id, 'name': farm_name, 'status': 'success', 'report_path': str(docx_path), 'docx_path': str(docx_path), 'pdf_path': None, 'location': location_bounds, 'processing_time_seconds': processing_time, 'lightning_records': len(lightning_df), 'storm_records': len(storm_records) } except Exception as e: processing_time = (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0 logger.error(f"Failed to process farm {farm_id}: {e}", exc_info=True) return { 'farm_id': farm_id, 'name': farm_name, 'status': 'failed', 'error': str(e), 'processing_time_seconds': processing_time } def generate_batch_summary(results: list, total_farms: int, enabled_count: int, disabled_count: int, start_time: datetime) -> dict: """Generate batch summary report.""" successful = [r for r in results if r['status'] == 'success'] failed = [r for r in results if r['status'] == 'failed'] skipped = disabled_count total_time = (datetime.now() - start_time).total_seconds() summary = { 'batch_date': datetime.now().strftime('%Y-%m-%d'), 'batch_time': datetime.now().strftime('%H:%M:%S'), 'total_farms': total_farms, 'enabled_farms': enabled_count, 'disabled_farms': disabled_count, 'processed': len(results), 'successful': len(successful), 'failed': len(failed), 'skipped': skipped, 'processing_time_seconds': total_time, 'results': results } return summary def save_batch_summary(summary: dict, output_dir: str): """Save batch summary to JSON file.""" output_path = Path(output_dir) / f"batch_summary_{summary['batch_date']}.json" output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: json.dump(summary, f, indent=2) logger.info(f"Batch summary saved to {output_path}") return output_path def list_farms(config: dict): """List all farms and their enabled status.""" print("\nWind Farms Configuration:") print("=" * 60) for i, farm in enumerate(config['wind_farms'], 1): enabled = farm.get('enabled', True) status = "✓ Enabled" if enabled else "✗ Disabled" farm_id = farm['farm_id'] name = farm.get('name', 'N/A') print(f"{i}. {status}: {farm_id} - {name}") enabled, disabled = filter_enabled_farms(config['wind_farms']) print(f"\nTotal: {len(config['wind_farms'])} farms ({len(enabled)} enabled, {len(disabled)} disabled)") def main(): parser = argparse.ArgumentParser(description='Batch generate lightning reports') parser.add_argument('--config', required=True, help='Path to wind_farms_config.json') parser.add_argument('--farm-id', help='Process specific farm only') parser.add_argument('--force-all', action='store_true', help='Process all farms, ignoring enabled flag') parser.add_argument('--force', action='store_true', help='Process even if disabled') parser.add_argument('--list-farms', action='store_true', help='List all farms and their enabled status') parser.add_argument('--output-dir', help='Override output directory') args = parser.parse_args() try: config = load_wind_farms_config(args.config) if args.list_farms: list_farms(config) return api_config = config['api_config'] api_fetcher = APIDataFetcher( base_url=api_config['base_url'], timeout=api_config.get('timeout_seconds', 30), retry_attempts=api_config.get('retry_attempts', 3) ) wind_farms = config['wind_farms'] if args.force_all: farms_to_process = wind_farms logger.info(f"Processing all {len(farms_to_process)} farms (--force-all)") else: enabled_farms, disabled_farms = filter_enabled_farms(wind_farms) farms_to_process = enabled_farms if disabled_farms: logger.info(f"Skipping {len(disabled_farms)} disabled farms:") for farm in disabled_farms: logger.info(f" - {farm['farm_id']}: {farm.get('name', 'N/A')}") if args.farm_id: farms_to_process = [f for f in farms_to_process if f['farm_id'] == args.farm_id] if not farms_to_process: logger.error(f"Farm '{args.farm_id}' not found or not enabled") return if not farms_to_process: logger.warning("No farms to process") return logger.info(f"Processing {len(farms_to_process)} farm(s)") start_time = datetime.now() results = [] for i, farm in enumerate(farms_to_process, 1): logger.info(f"\n[{i}/{len(farms_to_process)}] Processing {farm['farm_id']}...") result = process_farm(farm, api_fetcher, config) results.append(result) enabled_count, disabled_count = filter_enabled_farms(wind_farms) summary = generate_batch_summary( results, len(wind_farms), len(enabled_count), len(disabled_count), start_time ) output_base = config.get('output_base_directory', 'reports/') save_batch_summary(summary, output_base) print("\n" + "=" * 60) print("Batch Processing Summary") print("=" * 60) print(f"Total farms: {summary['total_farms']}") print(f"Enabled: {summary['enabled_farms']}") print(f"Disabled: {summary['disabled_farms']}") print(f"Processed: {summary['processed']}") print(f"Successful: {summary['successful']}") print(f"Failed: {summary['failed']}") print(f"Total time: {summary['processing_time_seconds']:.1f}s") print("=" * 60) if summary['failed'] > 0: print("\nFailed farms:") for result in [r for r in results if r['status'] == 'failed']: print(f" - {result['farm_id']}: {result.get('error', 'Unknown error')}") except KeyboardInterrupt: logger.info("Batch processing interrupted by user") sys.exit(1) except Exception as e: logger.error(f"Batch processing failed: {e}", exc_info=True) sys.exit(1) if __name__ == '__main__': main()