#!/usr/bin/env python3 import json import sys from pathlib import Path from datetime import datetime from typing import Dict, List, Any import logging from collections import defaultdict logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def separate_json_by_month(input_file_path: str, output_dir: str = None) -> Dict[str, str]: """ Separate JSON file into smaller files based on months in creation_time. Args: input_file_path: Path to the input JSON file output_dir: Directory to save separated files (optional) Returns: Dictionary mapping month to output file path """ try: logger.info(f"Reading JSON file: {input_file_path}") with open(input_file_path, 'r', encoding='utf-8') as f: data = json.load(f) logger.info(f"Successfully read {len(data)} records") if output_dir is None: input_path = Path(input_file_path) output_dir = input_path.parent / f"{input_path.stem}_separated" output_path = Path(output_dir) output_path.mkdir(exist_ok=True) month_data = defaultdict(list) for record in data: try: creation_time = record['creation_time'] date_obj = datetime.strptime(creation_time[:10], '%Y-%m-%d') month_key = date_obj.strftime('%Y-%m') month_data[month_key].append(record) except (KeyError, ValueError) as e: logger.warning(f"Skipping record with invalid creation_time: {e}") continue output_files = {} for month, records in month_data.items(): output_file = output_path / f"firtina_sorgulama_{month}.json" logger.info(f"Writing {len(records)} records for {month} to {output_file}") with open(output_file, 'w', encoding='utf-8') as f: json.dump(records, f, indent=2, ensure_ascii=False, default=str) file_size = output_file.stat().st_size / 1024 logger.info(f"Created {output_file} ({file_size:.2f} KB)") output_files[month] = str(output_file) logger.info(f"Separation completed. Created {len(output_files)} files in {output_path}") return output_files except FileNotFoundError: logger.error(f"JSON file not found: {input_file_path}") raise except json.JSONDecodeError as e: logger.error(f"Invalid JSON format: {e}") raise except Exception as e: logger.error(f"Error separating JSON by month: {str(e)}") raise def main(): """Main function to handle command line execution.""" if len(sys.argv) < 2: print("Usage: python separate_by_month.py [output_directory]") sys.exit(1) input_file_path = sys.argv[1] output_dir = sys.argv[2] if len(sys.argv) > 2 else None try: result_files = separate_json_by_month(input_file_path, output_dir) print(f"Separation completed successfully!") print("Created files:") for month, file_path in result_files.items(): print(f" {month}: {file_path}") except Exception as e: print(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()