BE-LightningReport/separate_by_month.py
erdemerikci 45d80dfaa6 Initial import: Lightning_Report with n8n integration
Fork of Lightning_Report adding:
- n8n_report_branch.json: workflow branch for storm-triggered report delivery
- report_service/: FastAPI microservice wrapping create_docx_report() so n8n
  can produce byte-identical reports without fighting the Python Code sandbox

Made-with: Cursor
2026-04-22 15:13:08 +03:00

100 lines
3.4 KiB
Python

#!/usr/bin/env python3
import json
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any
import logging
from collections import defaultdict
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def separate_json_by_month(input_file_path: str, output_dir: str = None) -> Dict[str, str]:
"""
Separate JSON file into smaller files based on months in creation_time.
Args:
input_file_path: Path to the input JSON file
output_dir: Directory to save separated files (optional)
Returns:
Dictionary mapping month to output file path
"""
try:
logger.info(f"Reading JSON file: {input_file_path}")
with open(input_file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Successfully read {len(data)} records")
if output_dir is None:
input_path = Path(input_file_path)
output_dir = input_path.parent / f"{input_path.stem}_separated"
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
month_data = defaultdict(list)
for record in data:
try:
creation_time = record['creation_time']
date_obj = datetime.strptime(creation_time[:10], '%Y-%m-%d')
month_key = date_obj.strftime('%Y-%m')
month_data[month_key].append(record)
except (KeyError, ValueError) as e:
logger.warning(f"Skipping record with invalid creation_time: {e}")
continue
output_files = {}
for month, records in month_data.items():
output_file = output_path / f"firtina_sorgulama_{month}.json"
logger.info(f"Writing {len(records)} records for {month} to {output_file}")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(records, f, indent=2, ensure_ascii=False, default=str)
file_size = output_file.stat().st_size / 1024
logger.info(f"Created {output_file} ({file_size:.2f} KB)")
output_files[month] = str(output_file)
logger.info(f"Separation completed. Created {len(output_files)} files in {output_path}")
return output_files
except FileNotFoundError:
logger.error(f"JSON file not found: {input_file_path}")
raise
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON format: {e}")
raise
except Exception as e:
logger.error(f"Error separating JSON by month: {str(e)}")
raise
def main():
"""Main function to handle command line execution."""
if len(sys.argv) < 2:
print("Usage: python separate_by_month.py <json_file_path> [output_directory]")
sys.exit(1)
input_file_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else None
try:
result_files = separate_json_by_month(input_file_path, output_dir)
print(f"Separation completed successfully!")
print("Created files:")
for month, file_path in result_files.items():
print(f" {month}: {file_path}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()