uploader-bot/app/core/content/sync_manager.py

186 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple
from app.core.crypto import get_ed25519_manager
from app.core.content.chunk_manager import ChunkManager
from app.core.models.content.chunk import ContentChunk
from app.core.network.node_client import NodeClient
logger = logging.getLogger(__name__)
class ContentSyncManager:
"""
Менеджер синхронизации чанков контента между нодами.
Требования:
- Batch-запросы для синхронизации между нодами
- Валидация получаемых чанков:
* SHA-256 хэш соответствия
* Ed25519 подпись полезной нагрузки чанка
"""
def __init__(self, chunk_manager: Optional[ChunkManager] = None):
self.chunk_manager = chunk_manager or ChunkManager()
async def verify_chunk_integrity(self, chunk: ContentChunk) -> Tuple[bool, Optional[str]]:
"""
Обертка над проверкой целостности чанка с дополнительными логами.
"""
ok, err = self.chunk_manager.verify_chunk_integrity(chunk)
if not ok:
logger.warning("Chunk integrity failed: chunk_id=%s reason=%s", chunk.chunk_id, err)
else:
logger.debug("Chunk integrity passed: chunk_id=%s", chunk.chunk_id)
return ok, err
async def request_chunks(
self,
target_url: str,
content_id: str,
needed_indexes: List[int],
batch_size: int = 32
) -> Dict[str, Any]:
"""
Запросить недостающие чанки у ноды пакетами.
Ожидаемый контракт эндпойнта /api/node/content/sync:
- action: "content_sync"
- data: { sync_type: "content_request", content_info: { content_id, indexes: [...]} }
Возвращает агрегированный ответ по партиям.
"""
response_summary: Dict[str, Any] = {"requested": 0, "received": 0, "chunks": [], "errors": []}
logger.info("Requesting chunks: target=%s content_id=%s total_missing=%d", target_url, content_id, len(needed_indexes))
async with NodeClient() as client:
for i in range(0, len(needed_indexes), batch_size):
batch = needed_indexes[i:i + batch_size]
try:
req = await client._create_signed_request(
action="content_sync",
data={
"sync_type": "content_request",
"content_info": {"content_id": content_id, "indexes": batch},
},
target_url=target_url,
)
logger.debug("Sending chunk request batch of %d indexes to %s", len(batch), target_url)
endpoint = f"{target_url}/api/node/content/sync"
async with client.session.post(endpoint, **req) as resp:
data = await resp.json()
if resp.status != 200:
msg = f"HTTP {resp.status}"
logger.warning("Chunk request failed: %s", msg)
response_summary["errors"].append({"batch": batch, "error": msg, "data": data})
continue
# Ожидаем, что данные приходят как JSON с полем 'chunks'
chunks_payload = data.get("data", {}).get("chunks") or data.get("chunks") or []
response_summary["requested"] += len(batch)
# Валидация полученных чанков
for ch in chunks_payload:
try:
chunk_model = ContentChunk.from_dict(ch)
ok, err = await self.verify_chunk_integrity(chunk_model)
if ok:
response_summary["chunks"].append(chunk_model.to_dict())
response_summary["received"] += 1
else:
response_summary["errors"].append({"chunk_id": chunk_model.chunk_id, "error": err})
except Exception as e:
logger.error("Failed to parse/validate received chunk: %s", e)
response_summary["errors"].append({"batch": batch, "error": str(e)})
except Exception as e:
logger.error("request_chunks batch error: %s", e)
response_summary["errors"].append({"batch": batch, "error": str(e)})
logger.info(
"Request chunks done: content_id=%s requested=%d received=%d errors=%d",
content_id, response_summary["requested"], response_summary["received"], len(response_summary["errors"])
)
return response_summary
async def provide_chunks(
self,
content_id: str,
indexes: List[int],
storage_reader, # callable: (content_id, index) -> Optional[ContentChunk]
batch_limit: int = 128
) -> Dict[str, Any]:
"""
Подготовить пакет чанков к ответу на запрос другой ноды.
storage_reader: функция/корутина, возвращающая ContentChunk или None по (content_id, index).
Возвращает словарь для отправки в ответе API.
"""
provided: List[Dict[str, Any]] = []
errors: List[Dict[str, Any]] = []
async def _maybe_await(x):
if asyncio.iscoroutinefunction(storage_reader):
return await x
return x
for idx in indexes[:batch_limit]:
try:
res = storage_reader(content_id, idx)
if asyncio.iscoroutine(res):
res = await res
if not res:
errors.append({"index": idx, "error": "not_found"})
continue
# Перед отдачей еще раз локально проверим целостность
ok, err = await self.verify_chunk_integrity(res)
if not ok:
errors.append({"index": idx, "error": f"integrity_failed: {err}"})
continue
provided.append(res.to_dict())
except Exception as e:
logger.error("provide_chunks error: %s", e)
errors.append({"index": idx, "error": str(e)})
logger.info("Prepared %d/%d chunks for provide, errors=%d", len(provided), len(indexes[:batch_limit]), len(errors))
return {"chunks": provided, "errors": errors}
async def sync_content(
self,
target_nodes: List[str],
content_id: str,
have_indexes: List[int],
total_chunks: int
) -> Dict[str, Any]:
"""
Высокоуровневая процедура синхронизации:
- Рассчитывает недостающие индексы
- Запрашивает чанки у всех указанных нод (параллельно)
- Агрегирует результаты
"""
missing = sorted(set(range(total_chunks)) - set(have_indexes))
logger.info("Sync content start: content_id=%s total=%d have=%d missing=%d",
content_id, total_chunks, len(have_indexes), len(missing))
if not missing:
return {"success": True, "message": "nothing to sync", "downloaded": 0}
results: Dict[str, Any] = {"success": True, "downloaded": 0, "details": {}}
async def fetch_from_node(node_url: str):
try:
node_result = await self.request_chunks(node_url, content_id, missing)
results["details"][node_url] = node_result
results["downloaded"] += node_result.get("received", 0)
except Exception as e:
logger.error("sync_content: error requesting from %s: %s", node_url, e)
results["details"][node_url] = {"error": str(e)}
await asyncio.gather(*[fetch_from_node(url) for url in target_nodes])
logger.info("Sync content done: content_id=%s downloaded=%d", content_id, results["downloaded"])
return results