uploader-bot/scripts/debug_chunking.py

84 lines
3.2 KiB
Python

#!/usr/bin/env python3
import argparse
import base64
import json
import os
import sys
import time
from typing import Any, Dict, List, Optional
# Логирование максимально подробное
import logging
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
log = logging.getLogger("debug_chunking")
try:
from app.core.content.chunk_manager import ChunkManager
from app.core.crypto import ContentCipher
except Exception as e:
print(f"[FATAL] Cannot import app modules: {e}", file=sys.stderr)
sys.exit(2)
def hexdump(b: bytes, length: int = 64) -> str:
if b is None:
return "<None>"
x = b[:length]
return x.hex() + ("..." if len(b) > length else "")
def main() -> int:
ap = argparse.ArgumentParser(description="Диагностика чанкинга/сборки контента")
ap.add_argument("file", help="Путь к файлу для проверки")
ap.add_argument("--aad", default="", help="Associated data (строка)")
ap.add_argument("--content-id", default=None, help="Явный content_id (если не указан — будет сгенерирован)")
ap.add_argument("--chunk-size", type=int, default=None, help="Переопределить размер чанка")
args = ap.parse_args()
cipher = ContentCipher()
cm = ChunkManager(cipher=cipher)
if args.chunk_size:
cm.CHUNK_SIZE = int(args.chunk_size) # type: ignore[attr-defined]
with open(args.file, "rb") as f:
data = f.read()
content_key = cipher.generate_content_key()
content_id = args.content_id or ("dbg-" + os.urandom(8).hex())
aad = args.aad.encode("utf-8") if args.aad else None
log.info("Input file: %s size=%d", args.file, len(data))
log.info("Chunk size: %d", cm.CHUNK_SIZE)
log.debug("Content key: %s", hexdump(content_key))
t0 = time.perf_counter()
chunks = cm.split_content(content_id, data, content_key=content_key, metadata={"debug": True}, associated_data=aad)
t1 = time.perf_counter()
log.info("Split completed in %.3fs, chunks=%d", (t1 - t0), len(chunks))
for ch in chunks:
ok, err = cm.verify_chunk_integrity(ch, verify_signature=True)
if not ok:
log.error("Chunk integrity failed: idx=%s id=%s err=%s", ch.chunk_index, ch.chunk_id, err)
else:
log.debug("Chunk OK: idx=%s id=%s hash=%s enc_len=%d", ch.chunk_index, ch.chunk_id, ch.chunk_hash, len(ch.encrypted_bytes()))
t2 = time.perf_counter()
restored = cm.reassemble_content(chunks, content_key=content_key, associated_data=aad, expected_content_id=content_id)
t3 = time.perf_counter()
log.info("Reassemble completed in %.3fs", (t3 - t2))
if restored == data:
log.info("Roundtrip OK: restored equals original")
else:
log.error("Roundtrip FAILED: data mismatch (orig=%d, restored=%d)", len(data), len(restored))
return 3
split_thr = len(data) / max(1e-9, (t1 - t0))
reas_thr = len(data) / max(1e-9, (t3 - t2))
log.info("Throughput: split=%.2f B/s, reassemble=%.2f B/s", split_thr, reas_thr)
return 0
if __name__ == "__main__":
raise SystemExit(main())