BE-LightningReport/batch_generate.py
erdemerikci 45d80dfaa6 Initial import: Lightning_Report with n8n integration
Fork of Lightning_Report adding:
- n8n_report_branch.json: workflow branch for storm-triggered report delivery
- report_service/: FastAPI microservice wrapping create_docx_report() so n8n
  can produce byte-identical reports without fighting the Python Code sandbox

Made-with: Cursor
2026-04-22 15:13:08 +03:00

464 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Batch generate lightning reports for multiple wind farms.
"""
import sys
import json
import argparse
import logging
from datetime import datetime
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
from src.api.data_fetcher import APIDataFetcher
from src.data.loader import load_turbine_data, load_lightning_data_from_csv
from src.analysis.risk import calculate_turbine_risks
from src.analysis.grouping import create_turbine_groups
from src.reporting.docx import create_docx_report
from src.reporting.filename_utils import farm_local_date_range_from_config, slugify_ascii_underscore
from src.utils import (
filter_lightning_data_by_date_range,
format_date_ddmmyyyy,
format_period_display_for_report,
normalize_local_time_to_timezone,
)
from src.config import config as global_config
load_dotenv()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(f'batch_generation_{datetime.now().strftime("%Y-%m-%d")}.log')
]
)
logger = logging.getLogger(__name__)
def load_wind_farms_config(config_path: str) -> dict:
"""Load wind farms configuration from JSON file."""
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
logger.info(f"Loaded configuration from {config_path}")
return config
except FileNotFoundError:
logger.error(f"Configuration file not found: {config_path}")
raise
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON in configuration file: {e}")
raise
def filter_enabled_farms(wind_farms: list) -> tuple:
"""Filter farms by enabled status."""
enabled = []
disabled = []
for farm in wind_farms:
is_enabled = farm.get('enabled', True)
if is_enabled:
enabled.append(farm)
else:
disabled.append(farm)
return enabled, disabled
def get_location_bounds(farm: dict, turbine_df: pd.DataFrame, api_fetcher: APIDataFetcher) -> dict:
"""Get location bounds for API query."""
location_config = farm['api_params']['location_bounds']
if location_config['method'] == 'auto':
max_distance_ring = max(farm['distance_rings'])
padding_km = location_config.get('padding_km', 5)
bounds = api_fetcher.calculate_location_bounds(
turbine_df,
max_distance_ring,
padding_km
)
return bounds
else:
return {
'center_lat': location_config['center_lat'],
'center_lng': location_config['center_lng'],
'radius_km': location_config['radius_km']
}
def update_global_config(farm: dict, start_date: str = None, end_date: str = None):
"""Update global config with farm-specific settings."""
global_config.distance_rings = farm.get('distance_rings', global_config.distance_rings)
global_config.ring_colors = farm.get('ring_colors', global_config.ring_colors)
# DOCX title is based on the top-level `name` field for the farm.
global_config.wind_farm_name = farm.get('name', 'Unknown')
global_config.timezone = farm['report_config'].get('timezone', None)
# Lightning data source configuration (auto-detected from farm config)
lightning_source_type = farm.get('lightning_source_type')
if lightning_source_type:
global_config.lightning_source_type = lightning_source_type
if lightning_source_type == 'csv':
global_config.lightning_csv = farm.get('lightning_csv')
elif lightning_source_type == 'api':
global_config.lightning_json = farm.get('lightning_json')
# Set date range if provided (for reporting)
if start_date and end_date:
global_config.analysis_start_date = start_date
global_config.analysis_end_date = end_date
# Update grouping params if specified in farm config
if 'grouping_params' in farm:
global_config.grouping_params = farm['grouping_params']
logger.debug(f"Updated global config: distance_rings={global_config.distance_rings}, wind_farm_name={global_config.wind_farm_name}")
def convert_api_response_to_dataframe(records: list, data_type: str = 'lightning') -> pd.DataFrame:
"""
Convert API response to DataFrame format expected by existing code.
Args:
records: List of records from API
data_type: 'lightning' or 'storm'
Returns:
DataFrame in expected format
"""
if not records:
if data_type == 'lightning':
return pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time'])
else:
return pd.DataFrame()
df = pd.DataFrame(records)
if data_type == 'lightning':
if 'local_time' not in df.columns and 'timestamp' in df.columns:
df['local_time'] = pd.to_datetime(df['timestamp'])
elif 'local_time' in df.columns:
df['local_time'] = pd.to_datetime(df['local_time'])
if 'current_abs' not in df.columns and 'current' in df.columns:
df['current_abs'] = df['current'].abs()
return df
def process_farm(farm: dict, api_fetcher: APIDataFetcher, config: dict) -> dict:
"""Process a single farm and generate report."""
farm_id = farm['farm_id']
farm_name = farm.get('name', farm_id)
logger.info(f"Processing farm: {farm_id} ({farm_name})")
try:
start_time = datetime.now()
# Update global config with farm-specific settings BEFORE processing
# (dates will be set later after they're determined)
update_global_config(farm)
turbine_file = farm['coordinates_file']
turbine_df = load_turbine_data(turbine_file)
logger.info(f"Loaded {len(turbine_df)} turbines")
location_bounds = get_location_bounds(farm, turbine_df, api_fetcher)
query_start, query_end = APIDataFetcher.determine_query_date_range(farm, config['api_config'])
start_date_str = query_start.strftime('%Y-%m-%d')
end_date_str = query_end.strftime('%Y-%m-%d')
source_type = farm.get('lightning_source_type', 'api')
if source_type == 'csv':
lightning_df = load_lightning_data_from_csv(farm.get('lightning_csv'))
logger.info(f"Loaded {len(lightning_df)} lightning records from CSV for {farm_id}")
else:
logger.info(f"Fetching lightning data from API for period: {start_date_str} to {end_date_str}")
lightning_records = api_fetcher.fetch_lightning_data(
center_lat=location_bounds['center_lat'],
center_lng=location_bounds['center_lng'],
radius_km=location_bounds['radius_km'],
start_date=start_date_str,
end_date=end_date_str
)
lightning_df = convert_api_response_to_dataframe(lightning_records, 'lightning')
logger.info(f"Converted {len(lightning_df)} lightning records to DataFrame")
if len(lightning_df) == 0:
logger.warning(f"No lightning data found for {farm_id}")
lightning_df = pd.DataFrame(columns=['lat', 'lng', 'current', 'p_type', 'local_time', 'current_abs'])
storm_records = api_fetcher.fetch_storm_data(
center_lat=location_bounds['center_lat'],
center_lng=location_bounds['center_lng'],
radius_km=location_bounds['radius_km'],
start_date=start_date_str,
end_date=end_date_str
)
date_range_cfg = farm.get('api_params', {}).get('date_range', {})
start_filter = None
end_filter = None
method = date_range_cfg.get('method')
if source_type != 'csv':
if method == 'manual':
start_filter = date_range_cfg.get('start_date')
end_filter = date_range_cfg.get('end_date')
else:
query_range_cfg = date_range_cfg.get('query_range', {})
start_filter = query_range_cfg.get('start_date')
end_filter = query_range_cfg.get('end_date')
if len(lightning_df) > 0 and (start_filter is not None or end_filter is not None):
lightning_df = filter_lightning_data_by_date_range(lightning_df, start_filter, end_filter)
farm_tz = farm.get('report_config', {}).get('timezone')
if len(lightning_df) > 0 and farm_tz:
lightning_df = normalize_local_time_to_timezone(lightning_df, 'local_time', farm_tz)
turbine_df = calculate_turbine_risks(turbine_df, lightning_df)
group_data = create_turbine_groups(turbine_df)
logger.info(f"Created {group_data['total_groups']} groups")
# Determine actual dates for report (display strings: DD-MM-YYYY or DD-MM-YYYY HH:MM in local time)
if source_type == 'csv' and len(lightning_df) > 0:
local_times = pd.to_datetime(lightning_df['local_time'])
start_val = local_times.min()
end_val = local_times.max()
actual_start = start_val.strftime('%d-%m-%Y %H:%M')
actual_end = end_val.strftime('%d-%m-%Y %H:%M')
else:
if method == 'manual' and date_range_cfg.get('start_date') is not None and date_range_cfg.get('end_date') is not None:
actual_start, actual_end = format_period_display_for_report(start_filter, end_filter)
if not actual_start or not actual_end:
actual_start = format_date_ddmmyyyy(query_start)
actual_end = format_date_ddmmyyyy(query_end)
elif start_filter is not None and end_filter is not None:
actual_start, actual_end = format_period_display_for_report(start_filter, end_filter)
if not actual_start or not actual_end:
actual_start = format_date_ddmmyyyy(query_start)
actual_end = format_date_ddmmyyyy(query_end)
else:
actual_start = format_date_ddmmyyyy(query_start)
actual_end = format_date_ddmmyyyy(query_end)
# Update global config with dates for PDF generation
update_global_config(farm, actual_start, actual_end)
output_dir = Path(farm['report_config']['output_directory'])
output_dir.mkdir(parents=True, exist_ok=True)
local_range = farm_local_date_range_from_config(farm)
safe_name = slugify_ascii_underscore(farm.get("name", farm_id))
docx_filename = (
f"{safe_name}_{local_range.start_date_yyyy_mm_dd}"
f"_{local_range.end_date_yyyy_mm_dd}_report.docx"
)
docx_path = output_dir / docx_filename
create_docx_report(
str(docx_path),
turbine_df,
lightning_df,
storm_data_path=None,
storm_data_records=storm_records if storm_records else None,
)
processing_time = (datetime.now() - start_time).total_seconds()
logger.info(f"Successfully generated report for {farm_id} in {processing_time:.1f}s")
return {
'farm_id': farm_id,
'name': farm_name,
'status': 'success',
'report_path': str(docx_path),
'docx_path': str(docx_path),
'pdf_path': None,
'location': location_bounds,
'processing_time_seconds': processing_time,
'lightning_records': len(lightning_df),
'storm_records': len(storm_records)
}
except Exception as e:
processing_time = (datetime.now() - start_time).total_seconds() if 'start_time' in locals() else 0
logger.error(f"Failed to process farm {farm_id}: {e}", exc_info=True)
return {
'farm_id': farm_id,
'name': farm_name,
'status': 'failed',
'error': str(e),
'processing_time_seconds': processing_time
}
def generate_batch_summary(results: list, total_farms: int, enabled_count: int,
disabled_count: int, start_time: datetime) -> dict:
"""Generate batch summary report."""
successful = [r for r in results if r['status'] == 'success']
failed = [r for r in results if r['status'] == 'failed']
skipped = disabled_count
total_time = (datetime.now() - start_time).total_seconds()
summary = {
'batch_date': datetime.now().strftime('%Y-%m-%d'),
'batch_time': datetime.now().strftime('%H:%M:%S'),
'total_farms': total_farms,
'enabled_farms': enabled_count,
'disabled_farms': disabled_count,
'processed': len(results),
'successful': len(successful),
'failed': len(failed),
'skipped': skipped,
'processing_time_seconds': total_time,
'results': results
}
return summary
def save_batch_summary(summary: dict, output_dir: str):
"""Save batch summary to JSON file."""
output_path = Path(output_dir) / f"batch_summary_{summary['batch_date']}.json"
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(summary, f, indent=2)
logger.info(f"Batch summary saved to {output_path}")
return output_path
def list_farms(config: dict):
"""List all farms and their enabled status."""
print("\nWind Farms Configuration:")
print("=" * 60)
for i, farm in enumerate(config['wind_farms'], 1):
enabled = farm.get('enabled', True)
status = "✓ Enabled" if enabled else "✗ Disabled"
farm_id = farm['farm_id']
name = farm.get('name', 'N/A')
print(f"{i}. {status}: {farm_id} - {name}")
enabled, disabled = filter_enabled_farms(config['wind_farms'])
print(f"\nTotal: {len(config['wind_farms'])} farms ({len(enabled)} enabled, {len(disabled)} disabled)")
def main():
parser = argparse.ArgumentParser(description='Batch generate lightning reports')
parser.add_argument('--config', required=True, help='Path to wind_farms_config.json')
parser.add_argument('--farm-id', help='Process specific farm only')
parser.add_argument('--force-all', action='store_true',
help='Process all farms, ignoring enabled flag')
parser.add_argument('--force', action='store_true',
help='Process even if disabled')
parser.add_argument('--list-farms', action='store_true',
help='List all farms and their enabled status')
parser.add_argument('--output-dir', help='Override output directory')
args = parser.parse_args()
try:
config = load_wind_farms_config(args.config)
if args.list_farms:
list_farms(config)
return
api_config = config['api_config']
api_fetcher = APIDataFetcher(
base_url=api_config['base_url'],
timeout=api_config.get('timeout_seconds', 30),
retry_attempts=api_config.get('retry_attempts', 3)
)
wind_farms = config['wind_farms']
if args.force_all:
farms_to_process = wind_farms
logger.info(f"Processing all {len(farms_to_process)} farms (--force-all)")
else:
enabled_farms, disabled_farms = filter_enabled_farms(wind_farms)
farms_to_process = enabled_farms
if disabled_farms:
logger.info(f"Skipping {len(disabled_farms)} disabled farms:")
for farm in disabled_farms:
logger.info(f" - {farm['farm_id']}: {farm.get('name', 'N/A')}")
if args.farm_id:
farms_to_process = [f for f in farms_to_process if f['farm_id'] == args.farm_id]
if not farms_to_process:
logger.error(f"Farm '{args.farm_id}' not found or not enabled")
return
if not farms_to_process:
logger.warning("No farms to process")
return
logger.info(f"Processing {len(farms_to_process)} farm(s)")
start_time = datetime.now()
results = []
for i, farm in enumerate(farms_to_process, 1):
logger.info(f"\n[{i}/{len(farms_to_process)}] Processing {farm['farm_id']}...")
result = process_farm(farm, api_fetcher, config)
results.append(result)
enabled_count, disabled_count = filter_enabled_farms(wind_farms)
summary = generate_batch_summary(
results,
len(wind_farms),
len(enabled_count),
len(disabled_count),
start_time
)
output_base = config.get('output_base_directory', 'reports/')
save_batch_summary(summary, output_base)
print("\n" + "=" * 60)
print("Batch Processing Summary")
print("=" * 60)
print(f"Total farms: {summary['total_farms']}")
print(f"Enabled: {summary['enabled_farms']}")
print(f"Disabled: {summary['disabled_farms']}")
print(f"Processed: {summary['processed']}")
print(f"Successful: {summary['successful']}")
print(f"Failed: {summary['failed']}")
print(f"Total time: {summary['processing_time_seconds']:.1f}s")
print("=" * 60)
if summary['failed'] > 0:
print("\nFailed farms:")
for result in [r for r in results if r['status'] == 'failed']:
print(f" - {result['farm_id']}: {result.get('error', 'Unknown error')}")
except KeyboardInterrupt:
logger.info("Batch processing interrupted by user")
sys.exit(1)
except Exception as e:
logger.error(f"Batch processing failed: {e}", exc_info=True)
sys.exit(1)
if __name__ == '__main__':
main()