uploader-bot/app/core/content/sync_manager.py

from __future__ import annotations

import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple

from app.core.crypto import get_ed25519_manager
from app.core.content.chunk_manager import ChunkManager
from app.core.models.content.chunk import ContentChunk
from app.core.network.node_client import NodeClient

logger = logging.getLogger(__name__)


class ContentSyncManager:
    """
    Менеджер синхронизации чанков контента между нодами.

    Требования:
    - Batch-запросы для синхронизации между нодами
    - Валидация получаемых чанков:
        * SHA-256 хэш соответствия
        * Ed25519 подпись полезной нагрузки чанка
    """

    def __init__(self, chunk_manager: Optional[ChunkManager] = None):
        self.chunk_manager = chunk_manager or ChunkManager()

    async def verify_chunk_integrity(self, chunk: ContentChunk) -> Tuple[bool, Optional[str]]:
        """
        Обертка над проверкой целостности чанка с дополнительными логами.
        """
        ok, err = self.chunk_manager.verify_chunk_integrity(chunk)
        if not ok:
            logger.warning("Chunk integrity failed: chunk_id=%s reason=%s", chunk.chunk_id, err)
        else:
            logger.debug("Chunk integrity passed: chunk_id=%s", chunk.chunk_id)
        return ok, err

    async def request_chunks(
        self,
        target_url: str,
        content_id: str,
        needed_indexes: List[int],
        batch_size: int = 32
    ) -> Dict[str, Any]:
        """
        Запросить недостающие чанки у ноды пакетами.

        Ожидаемый контракт эндпойнта /api/node/content/sync:
        - action: "content_sync"
        - data: { sync_type: "content_request", content_info: { content_id, indexes: [...]} }

        Возвращает агрегированный ответ по партиям.
        """
        response_summary: Dict[str, Any] = {"requested": 0, "received": 0, "chunks": [], "errors": []}
        logger.info("Requesting chunks: target=%s content_id=%s total_missing=%d", target_url, content_id, len(needed_indexes))

        async with NodeClient() as client:
            for i in range(0, len(needed_indexes), batch_size):
                batch = needed_indexes[i:i + batch_size]
                try:
                    req = await client._create_signed_request(
                        action="content_sync",
                        data={
                            "sync_type": "content_request",
                            "content_info": {"content_id": content_id, "indexes": batch},
                        },
                        target_url=target_url,
                    )
                    logger.debug("Sending chunk request batch of %d indexes to %s", len(batch), target_url)

                    endpoint = f"{target_url}/api/node/content/sync"
                    async with client.session.post(endpoint, **req) as resp:
                        data = await resp.json()
                        if resp.status != 200:
                            msg = f"HTTP {resp.status}"
                            logger.warning("Chunk request failed: %s", msg)
                            response_summary["errors"].append({"batch": batch, "error": msg, "data": data})
                            continue

                        # Ожидаем, что данные приходят как JSON с полем 'chunks'
                        chunks_payload = data.get("data", {}).get("chunks") or data.get("chunks") or []
                        response_summary["requested"] += len(batch)

                        # Валидация полученных чанков
                        for ch in chunks_payload:
                            try:
                                chunk_model = ContentChunk.from_dict(ch)
                                ok, err = await self.verify_chunk_integrity(chunk_model)
                                if ok:
                                    response_summary["chunks"].append(chunk_model.to_dict())
                                    response_summary["received"] += 1
                                else:
                                    response_summary["errors"].append({"chunk_id": chunk_model.chunk_id, "error": err})
                            except Exception as e:
                                logger.error("Failed to parse/validate received chunk: %s", e)
                                response_summary["errors"].append({"batch": batch, "error": str(e)})

                except Exception as e:
                    logger.error("request_chunks batch error: %s", e)
                    response_summary["errors"].append({"batch": batch, "error": str(e)})

        logger.info(
            "Request chunks done: content_id=%s requested=%d received=%d errors=%d",
            content_id, response_summary["requested"], response_summary["received"], len(response_summary["errors"])
        )
        return response_summary

    async def provide_chunks(
        self,
        content_id: str,
        indexes: List[int],
        storage_reader,  # callable: (content_id, index) -> Optional[ContentChunk]
        batch_limit: int = 128
    ) -> Dict[str, Any]:
        """
        Подготовить пакет чанков к ответу на запрос другой ноды.

        storage_reader: функция/корутина, возвращающая ContentChunk или None по (content_id, index).
        Возвращает словарь для отправки в ответе API.
        """
        provided: List[Dict[str, Any]] = []
        errors: List[Dict[str, Any]] = []

        async def _maybe_await(x):
            if asyncio.iscoroutinefunction(storage_reader):
                return await x
            return x

        for idx in indexes[:batch_limit]:
            try:
                res = storage_reader(content_id, idx)
                if asyncio.iscoroutine(res):
                    res = await res
                if not res:
                    errors.append({"index": idx, "error": "not_found"})
                    continue
                # Перед отдачей еще раз локально проверим целостность
                ok, err = await self.verify_chunk_integrity(res)
                if not ok:
                    errors.append({"index": idx, "error": f"integrity_failed: {err}"})
                    continue
                provided.append(res.to_dict())
            except Exception as e:
                logger.error("provide_chunks error: %s", e)
                errors.append({"index": idx, "error": str(e)})

        logger.info("Prepared %d/%d chunks for provide, errors=%d", len(provided), len(indexes[:batch_limit]), len(errors))
        return {"chunks": provided, "errors": errors}

    async def sync_content(
        self,
        target_nodes: List[str],
        content_id: str,
        have_indexes: List[int],
        total_chunks: int
    ) -> Dict[str, Any]:
        """
        Высокоуровневая процедура синхронизации:
        - Рассчитывает недостающие индексы
        - Запрашивает чанки у всех указанных нод (параллельно)
        - Агрегирует результаты
        """
        missing = sorted(set(range(total_chunks)) - set(have_indexes))
        logger.info("Sync content start: content_id=%s total=%d have=%d missing=%d",
                    content_id, total_chunks, len(have_indexes), len(missing))

        if not missing:
            return {"success": True, "message": "nothing to sync", "downloaded": 0}

        results: Dict[str, Any] = {"success": True, "downloaded": 0, "details": {}}

        async def fetch_from_node(node_url: str):
            try:
                node_result = await self.request_chunks(node_url, content_id, missing)
                results["details"][node_url] = node_result
                results["downloaded"] += node_result.get("received", 0)
            except Exception as e:
                logger.error("sync_content: error requesting from %s: %s", node_url, e)
                results["details"][node_url] = {"error": str(e)}

        await asyncio.gather(*[fetch_from_node(url) for url in target_nodes])

        logger.info("Sync content done: content_id=%s downloaded=%d", content_id, results["downloaded"])
        return results