import json import csv from pathlib import Path from datetime import datetime # --- CONFIGURATION --- BASE_PATH = 'C:/temp/discord/' OUTPUT_FILENAME = 'output_timestamps_clean.csv' def standardize_timestamp(ts_string): """Converts varying timestamp formats into a standard YYYY-MM-DD HH:MM:SS format.""" if not ts_string: return None # Format 1: "02.06.2021 06:26" try: dt = datetime.strptime(ts_string, "%d.%m.%Y %H:%M") return dt.strftime("%Y-%m-%d %H:%M:%S") except ValueError: pass # Format 2: "2021-02-17T20:35:29.083Z" (ISO 8601) try: dt = datetime.strptime(ts_string.replace("\"","")[:19].replace('T', ' '), "%Y-%m-%d %H:%M:%S") #dt = datetime.fromisoformat(ts_string[:17].replace('T', ' ')) return dt.strftime("%Y-%m-%d %H:%M:%S") except ValueError: pass # Fallback: return original string if it matches neither format return ts_string def process_file(file_path, writer): """Attempts to read a file as standard JSON, falls back to JSON Lines if needed.""" with open(file_path, 'r', encoding='utf-8') as f: try: # Attempt 1: Standard JSON data = json.load(f) records = data if isinstance(data, list) else [data] for entry in records: if isinstance(entry, dict): # Check both capitalization variants just in case raw_ts = entry.get('Timestamp') or entry.get('timestamp') if raw_ts: clean_ts = standardize_timestamp(raw_ts) writer.writerow([clean_ts, file_path.name]) return except json.JSONDecodeError: pass # Attempt 2: JSON Lines f.seek(0) for line in f: line = line.strip() if not line: continue try: entry = json.loads(line) records = entry if isinstance(entry, list) else [entry] for rec in records: if isinstance(rec, dict): raw_ts = rec.get('Timestamp') or rec.get('timestamp') if raw_ts: clean_ts = standardize_timestamp(raw_ts) writer.writerow([clean_ts, file_path.name]) except json.JSONDecodeError: pass def extract_and_format_timestamps(): work_dir = Path(BASE_PATH) output_path = work_dir / OUTPUT_FILENAME with open(output_path, 'w', newline='', encoding='utf-8') as csv_out: writer = csv.writer(csv_out) writer.writerow(['Timestamp', 'Filename']) print(f"Scanning: {work_dir}") print(f"Output will be saved to: {output_path}") for file_path in work_dir.glob('**/*.json'): if file_path.name == OUTPUT_FILENAME: continue try: process_file(file_path, writer) except Exception as e: print(f"Error reading file {file_path.name}: {e}") print("Done!") if __name__ == "__main__": extract_and_format_timestamps()