BE-LightningReport/separate_by_month.py

#!/usr/bin/env python3

import json
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any
import logging
from collections import defaultdict

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def separate_json_by_month(input_file_path: str, output_dir: str = None) -> Dict[str, str]:
    """
    Separate JSON file into smaller files based on months in creation_time.

    Args:
        input_file_path: Path to the input JSON file
        output_dir: Directory to save separated files (optional)

    Returns:
        Dictionary mapping month to output file path
    """
    try:
        logger.info(f"Reading JSON file: {input_file_path}")

        with open(input_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        logger.info(f"Successfully read {len(data)} records")

        if output_dir is None:
            input_path = Path(input_file_path)
            output_dir = input_path.parent / f"{input_path.stem}_separated"

        output_path = Path(output_dir)
        output_path.mkdir(exist_ok=True)

        month_data = defaultdict(list)

        for record in data:
            try:
                creation_time = record['creation_time']
                date_obj = datetime.strptime(creation_time[:10], '%Y-%m-%d')
                month_key = date_obj.strftime('%Y-%m')
                month_data[month_key].append(record)
            except (KeyError, ValueError) as e:
                logger.warning(f"Skipping record with invalid creation_time: {e}")
                continue

        output_files = {}

        for month, records in month_data.items():
            output_file = output_path / f"firtina_sorgulama_{month}.json"

            logger.info(f"Writing {len(records)} records for {month} to {output_file}")

            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(records, f, indent=2, ensure_ascii=False, default=str)

            file_size = output_file.stat().st_size / 1024
            logger.info(f"Created {output_file} ({file_size:.2f} KB)")
            output_files[month] = str(output_file)

        logger.info(f"Separation completed. Created {len(output_files)} files in {output_path}")

        return output_files

    except FileNotFoundError:
        logger.error(f"JSON file not found: {input_file_path}")
        raise
    except json.JSONDecodeError as e:
        logger.error(f"Invalid JSON format: {e}")
        raise
    except Exception as e:
        logger.error(f"Error separating JSON by month: {str(e)}")
        raise

def main():
    """Main function to handle command line execution."""
    if len(sys.argv) < 2:
        print("Usage: python separate_by_month.py <json_file_path> [output_directory]")
        sys.exit(1)

    input_file_path = sys.argv[1]
    output_dir = sys.argv[2] if len(sys.argv) > 2 else None

    try:
        result_files = separate_json_by_month(input_file_path, output_dir)
        print(f"Separation completed successfully!")
        print("Created files:")
        for month, file_path in result_files.items():
            print(f"  {month}: {file_path}")
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()