uploader-bot/app/core/stats/stats_aggregator.py

198 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import asyncio
import logging
import statistics
import time
from collections import deque, defaultdict
from typing import Deque, Dict, Any, Optional, List, Tuple
from app.core.models.stats.metrics_models import SystemMetrics, AppMetrics, NodeStats, NetworkStats
from app.core.crypto import get_ed25519_manager
logger = logging.getLogger(__name__)
class StatsAggregator:
"""
Агрегатор статистики:
- хранит историю локальных метрик и входящих метрик от других нод (in-memory, ring buffer)
- вычисляет агрегаты и тренды
- предоставляет network overview
"""
def __init__(self, history_limit: int = 1000) -> None:
self._history_limit = history_limit
# История локальной ноды: deque[(ts, NodeStats)]
self._local_history: Deque[Tuple[int, NodeStats]] = deque(maxlen=history_limit)
# История по нодам сети: node_id -> deque[(ts, NodeStats)]
self._peers_history: Dict[str, Deque[Tuple[int, NodeStats]]] = defaultdict(lambda: deque(maxlen=history_limit))
# Кеш последнего слепка по нодам
self._last_by_node: Dict[str, NodeStats] = {}
# Список известных пиров (URL) - поддержка network overview
self._known_peers: List[str] = []
self._lock = asyncio.Lock()
async def set_known_peers(self, peers: List[str]) -> None:
async with self._lock:
self._known_peers = list(sorted(set(peers)))
async def add_local_snapshot(self, stats: NodeStats) -> None:
async with self._lock:
ts = stats.timestamp
self._local_history.append((ts, stats))
self._last_by_node[stats.node_id] = stats
async def add_peer_snapshot(self, stats: NodeStats) -> None:
async with self._lock:
ts = stats.timestamp
dq = self._peers_history[stats.node_id]
dq.append((ts, stats))
self._last_by_node[stats.node_id] = stats
async def get_latest_local(self) -> Optional[NodeStats]:
async with self._lock:
return self._local_history[-1][1] if self._local_history else None
async def aggregate_node_stats(self, node_id: Optional[str] = None, last_n: int = 20) -> Dict[str, Any]:
"""
Возвращает агрегаты для указанной ноды (по умолчанию локальная).
"""
async with self._lock:
if node_id is None:
series = list(self._local_history)[-last_n:]
else:
series = list(self._peers_history.get(node_id, deque()))[-last_n:]
if not series:
return {"samples": 0}
# агрегаты по cpu/mem
cpu = [s.system.cpu_percent for _, s in series if s.system.cpu_percent is not None]
mem = [s.system.mem_percent for _, s in series if s.system.mem_percent is not None]
res = {
"samples": len(series),
"time_span_sec": (series[-1][0] - series[0][0]) if len(series) > 1 else 0,
"cpu": {
"avg": round(statistics.fmean(cpu), 3) if cpu else None,
"max": round(max(cpu), 3) if cpu else None,
"min": round(min(cpu), 3) if cpu else None,
},
"mem": {
"avg": round(statistics.fmean(mem), 3) if mem else None,
"max": round(max(mem), 3) if mem else None,
"min": round(min(mem), 3) if mem else None,
},
}
return res
async def get_network_overview(self) -> NetworkStats:
"""
Сводка по сети с использованием последних значений по всем известным нодам.
"""
async with self._lock:
nodes = list(self._last_by_node.values())
node_count = len(nodes)
active_nodes = sum(1 for n in nodes if (int(time.time()) - n.timestamp) <= 300)
uptimes = [n.system.uptime_seconds for n in nodes if n.system.uptime_seconds is not None]
cpus = [n.system.cpu_percent for n in nodes if n.system.cpu_percent is not None]
mems = [n.system.mem_percent for n in nodes if n.system.mem_percent is not None]
avg_uptime = round(statistics.fmean(uptimes), 3) if uptimes else None
avg_cpu = round(statistics.fmean(cpus), 3) if cpus else None
avg_mem = round(statistics.fmean(mems), 3) if mems else None
# Простейшая метрика "здоровья" сети: 100 - avg_cpu/avg_mem penalty
health_score = None
if avg_cpu is not None and avg_mem is not None:
penalty = (avg_cpu / 2.0) + (avg_mem / 2.0) # 0..200
health_score = max(0.0, 100.0 - min(100.0, penalty))
nodes_summary: List[Dict[str, Any]] = []
for n in nodes:
nodes_summary.append({
"node_id": n.node_id,
"uptime": n.system.uptime_seconds,
"cpu": n.system.cpu_percent,
"mem": n.system.mem_percent,
"available_content_items": n.available_content_items,
"timestamp": n.timestamp,
})
# latency/total_available_content пока не вычисляем здесь, можно обновить из внешних сигналов
return NetworkStats(
node_count=node_count,
active_nodes=active_nodes,
avg_uptime_seconds=avg_uptime,
avg_cpu_percent=avg_cpu,
avg_mem_percent=avg_mem,
avg_latency_ms=None,
total_available_content=sum((n.available_content_items or 0) for n in nodes) if nodes else None,
health_score=health_score,
nodes=nodes_summary,
)
async def calculate_trends(self, node_id: Optional[str] = None, window: int = 60) -> Dict[str, Any]:
"""
Грубая оценка тренда по cpu/mem: сравнение первых и последних значений окна.
"""
async with self._lock:
series = list(self._local_history if node_id is None else self._peers_history.get(node_id, deque()))
if not series:
return {}
# берем последние window секунд данных
cutoff = int(time.time()) - window
window_series = [s for s in series if s[0] >= cutoff]
if len(window_series) < 2:
return {"samples": len(window_series)}
first = window_series[0][1]
last = window_series[-1][1]
def delta(a: Optional[float], b: Optional[float]) -> Optional[float]:
if a is None or b is None:
return None
return round(b - a, 3)
trend = {
"samples": len(window_series),
"cpu_percent_delta": delta(first.system.cpu_percent, last.system.cpu_percent),
"mem_percent_delta": delta(first.system.mem_percent, last.system.mem_percent),
}
return trend
async def build_local_signed_stats(self) -> NodeStats:
"""
Собирает последний локальный слепок и подписывает.
"""
async with self._lock:
latest = self._local_history[-1][1] if self._local_history else None
if not latest:
raise RuntimeError("No local stats available")
crypto = get_ed25519_manager()
payload = latest.to_dict(include_signature=False)
signature = crypto.sign_message(NodeStats.canonical_payload(payload))
latest.signature = signature
return latest
# Вспомогательные методы для тестов/диагностики
async def list_known_peers(self) -> List[str]:
async with self._lock:
return list(self._known_peers)
async def last_by_node(self) -> Dict[str, NodeStats]:
async with self._lock:
return dict(self._last_by_node)