186 lines
8.5 KiB
Python
186 lines
8.5 KiB
Python
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import logging
|
||
from typing import List, Dict, Any, Optional, Tuple
|
||
|
||
from app.core.crypto import get_ed25519_manager
|
||
from app.core.content.chunk_manager import ChunkManager
|
||
from app.core.models.content.chunk import ContentChunk
|
||
from app.core.network.node_client import NodeClient
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class ContentSyncManager:
|
||
"""
|
||
Менеджер синхронизации чанков контента между нодами.
|
||
|
||
Требования:
|
||
- Batch-запросы для синхронизации между нодами
|
||
- Валидация получаемых чанков:
|
||
* SHA-256 хэш соответствия
|
||
* Ed25519 подпись полезной нагрузки чанка
|
||
"""
|
||
|
||
def __init__(self, chunk_manager: Optional[ChunkManager] = None):
|
||
self.chunk_manager = chunk_manager or ChunkManager()
|
||
|
||
async def verify_chunk_integrity(self, chunk: ContentChunk) -> Tuple[bool, Optional[str]]:
|
||
"""
|
||
Обертка над проверкой целостности чанка с дополнительными логами.
|
||
"""
|
||
ok, err = self.chunk_manager.verify_chunk_integrity(chunk)
|
||
if not ok:
|
||
logger.warning("Chunk integrity failed: chunk_id=%s reason=%s", chunk.chunk_id, err)
|
||
else:
|
||
logger.debug("Chunk integrity passed: chunk_id=%s", chunk.chunk_id)
|
||
return ok, err
|
||
|
||
async def request_chunks(
|
||
self,
|
||
target_url: str,
|
||
content_id: str,
|
||
needed_indexes: List[int],
|
||
batch_size: int = 32
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Запросить недостающие чанки у ноды пакетами.
|
||
|
||
Ожидаемый контракт эндпойнта /api/node/content/sync:
|
||
- action: "content_sync"
|
||
- data: { sync_type: "content_request", content_info: { content_id, indexes: [...]} }
|
||
|
||
Возвращает агрегированный ответ по партиям.
|
||
"""
|
||
response_summary: Dict[str, Any] = {"requested": 0, "received": 0, "chunks": [], "errors": []}
|
||
logger.info("Requesting chunks: target=%s content_id=%s total_missing=%d", target_url, content_id, len(needed_indexes))
|
||
|
||
async with NodeClient() as client:
|
||
for i in range(0, len(needed_indexes), batch_size):
|
||
batch = needed_indexes[i:i + batch_size]
|
||
try:
|
||
req = await client._create_signed_request(
|
||
action="content_sync",
|
||
data={
|
||
"sync_type": "content_request",
|
||
"content_info": {"content_id": content_id, "indexes": batch},
|
||
},
|
||
target_url=target_url,
|
||
)
|
||
logger.debug("Sending chunk request batch of %d indexes to %s", len(batch), target_url)
|
||
|
||
endpoint = f"{target_url}/api/node/content/sync"
|
||
async with client.session.post(endpoint, **req) as resp:
|
||
data = await resp.json()
|
||
if resp.status != 200:
|
||
msg = f"HTTP {resp.status}"
|
||
logger.warning("Chunk request failed: %s", msg)
|
||
response_summary["errors"].append({"batch": batch, "error": msg, "data": data})
|
||
continue
|
||
|
||
# Ожидаем, что данные приходят как JSON с полем 'chunks'
|
||
chunks_payload = data.get("data", {}).get("chunks") or data.get("chunks") or []
|
||
response_summary["requested"] += len(batch)
|
||
|
||
# Валидация полученных чанков
|
||
for ch in chunks_payload:
|
||
try:
|
||
chunk_model = ContentChunk.from_dict(ch)
|
||
ok, err = await self.verify_chunk_integrity(chunk_model)
|
||
if ok:
|
||
response_summary["chunks"].append(chunk_model.to_dict())
|
||
response_summary["received"] += 1
|
||
else:
|
||
response_summary["errors"].append({"chunk_id": chunk_model.chunk_id, "error": err})
|
||
except Exception as e:
|
||
logger.error("Failed to parse/validate received chunk: %s", e)
|
||
response_summary["errors"].append({"batch": batch, "error": str(e)})
|
||
|
||
except Exception as e:
|
||
logger.error("request_chunks batch error: %s", e)
|
||
response_summary["errors"].append({"batch": batch, "error": str(e)})
|
||
|
||
logger.info(
|
||
"Request chunks done: content_id=%s requested=%d received=%d errors=%d",
|
||
content_id, response_summary["requested"], response_summary["received"], len(response_summary["errors"])
|
||
)
|
||
return response_summary
|
||
|
||
async def provide_chunks(
|
||
self,
|
||
content_id: str,
|
||
indexes: List[int],
|
||
storage_reader, # callable: (content_id, index) -> Optional[ContentChunk]
|
||
batch_limit: int = 128
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Подготовить пакет чанков к ответу на запрос другой ноды.
|
||
|
||
storage_reader: функция/корутина, возвращающая ContentChunk или None по (content_id, index).
|
||
Возвращает словарь для отправки в ответе API.
|
||
"""
|
||
provided: List[Dict[str, Any]] = []
|
||
errors: List[Dict[str, Any]] = []
|
||
|
||
async def _maybe_await(x):
|
||
if asyncio.iscoroutinefunction(storage_reader):
|
||
return await x
|
||
return x
|
||
|
||
for idx in indexes[:batch_limit]:
|
||
try:
|
||
res = storage_reader(content_id, idx)
|
||
if asyncio.iscoroutine(res):
|
||
res = await res
|
||
if not res:
|
||
errors.append({"index": idx, "error": "not_found"})
|
||
continue
|
||
# Перед отдачей еще раз локально проверим целостность
|
||
ok, err = await self.verify_chunk_integrity(res)
|
||
if not ok:
|
||
errors.append({"index": idx, "error": f"integrity_failed: {err}"})
|
||
continue
|
||
provided.append(res.to_dict())
|
||
except Exception as e:
|
||
logger.error("provide_chunks error: %s", e)
|
||
errors.append({"index": idx, "error": str(e)})
|
||
|
||
logger.info("Prepared %d/%d chunks for provide, errors=%d", len(provided), len(indexes[:batch_limit]), len(errors))
|
||
return {"chunks": provided, "errors": errors}
|
||
|
||
async def sync_content(
|
||
self,
|
||
target_nodes: List[str],
|
||
content_id: str,
|
||
have_indexes: List[int],
|
||
total_chunks: int
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Высокоуровневая процедура синхронизации:
|
||
- Рассчитывает недостающие индексы
|
||
- Запрашивает чанки у всех указанных нод (параллельно)
|
||
- Агрегирует результаты
|
||
"""
|
||
missing = sorted(set(range(total_chunks)) - set(have_indexes))
|
||
logger.info("Sync content start: content_id=%s total=%d have=%d missing=%d",
|
||
content_id, total_chunks, len(have_indexes), len(missing))
|
||
|
||
if not missing:
|
||
return {"success": True, "message": "nothing to sync", "downloaded": 0}
|
||
|
||
results: Dict[str, Any] = {"success": True, "downloaded": 0, "details": {}}
|
||
|
||
async def fetch_from_node(node_url: str):
|
||
try:
|
||
node_result = await self.request_chunks(node_url, content_id, missing)
|
||
results["details"][node_url] = node_result
|
||
results["downloaded"] += node_result.get("received", 0)
|
||
except Exception as e:
|
||
logger.error("sync_content: error requesting from %s: %s", node_url, e)
|
||
results["details"][node_url] = {"error": str(e)}
|
||
|
||
await asyncio.gather(*[fetch_from_node(url) for url in target_nodes])
|
||
|
||
logger.info("Sync content done: content_id=%s downloaded=%d", content_id, results["downloaded"])
|
||
return results |