uploader-bot/scripts/check_network_connectivity.py

203 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Network connectivity and node statistics checker for MY Network v3.x
Purpose:
- Query local node APIs to collect connectivity and version compatibility stats
- Optionally test bootstrap peers for reachability and latency
- Print a concise JSON summary that start.sh can parse and log
Behavior:
- Graceful degradation: if any endpoint is missing/unavailable, it is skipped
- Only prints JSON to stdout on success; on partial failures still prints JSON with flags
- Exit codes:
0 - success (even with partial data)
1 - unexpected error (e.g., invalid arguments)
Inputs (ENV or CLI args):
- API_BASE (default: http://localhost:8000)
- TIMEOUT_SECONDS (default: 5)
- TEST_PEERS (bool as "true"/"false", default: true) - whether to actively probe peers
"""
import json
import os
import sys
import time
from urllib.parse import urljoin
import urllib.request
import urllib.error
import ssl
API_BASE = os.environ.get("API_BASE", "http://localhost:8000").rstrip("/")
TIMEOUT = float(os.environ.get("TIMEOUT_SECONDS", "5"))
TEST_PEERS = os.environ.get("TEST_PEERS", "true").lower() == "true"
# For HTTPS with self-signed certs (when fronted by nginx in early boot)
CTX = ssl.create_default_context()
CTX.check_hostname = False
CTX.verify_mode = ssl.CERT_NONE
def http_get_json(url, timeout=TIMEOUT):
req = urllib.request.Request(url, method="GET")
try:
with urllib.request.urlopen(req, timeout=timeout, context=CTX) as resp:
data = resp.read()
try:
return json.loads(data.decode("utf-8")), None
except Exception as e:
return None, f"JSON parse error: {e}"
except urllib.error.HTTPError as e:
try:
body = e.read().decode("utf-8")
except Exception:
body = ""
return None, f"HTTP error {e.code}: {body}"
except Exception as e:
return None, f"Request error: {e}"
def measure_latency(url, timeout=TIMEOUT):
start = time.time()
j, err = http_get_json(url, timeout=timeout)
elapsed = time.time() - start
return j, err, elapsed
def main():
summary = {
"api_base": API_BASE,
"node": {
"health": {"ok": False, "error": None},
"status": {},
"version": None,
},
"peers": {
"count_reported": 0,
"list": [],
"tested": [],
"test_enabled": TEST_PEERS,
},
"network": {
"stats": {},
"version_mismatch": [],
},
"errors": [],
}
# 1) Health
health_url = urljoin(API_BASE + "/", "health")
j, err = http_get_json(health_url)
if j is not None:
summary["node"]["health"]["ok"] = True
summary["node"]["health"]["data"] = j
else:
summary["node"]["health"]["ok"] = False
summary["node"]["health"]["error"] = err
summary["errors"].append({"endpoint": health_url, "error": err})
# 2) Node status
status_url = urljoin(API_BASE + "/", "api/v3/node/status")
j, err = http_get_json(status_url)
if j is not None:
summary["node"]["status"] = j
# Try to infer version if present
version = None
for key in ("version", "node_version", "app_version"):
if isinstance(j, dict) and key in j:
version = j[key]
break
summary["node"]["version"] = version
elif err:
summary["errors"].append({"endpoint": status_url, "error": err})
# 3) Peers
peers_url = urljoin(API_BASE + "/", "api/v3/node/peers")
j, err = http_get_json(peers_url)
if j is not None:
# expected shapes: {"count": N, "peers":[...]} or list
if isinstance(j, dict):
summary["peers"]["count_reported"] = int(j.get("count", len(j.get("peers", [])) or 0))
peers = j.get("peers", [])
elif isinstance(j, list):
peers = j
summary["peers"]["count_reported"] = len(peers)
else:
peers = []
# normalize peers to {address, port, version?, id?}
norm = []
for p in peers:
if isinstance(p, dict):
addr = p.get("address") or p.get("host") or p.get("ip") or "unknown"
port = p.get("port") or 8000
pid = p.get("id") or p.get("node_id") or None
pver = p.get("version") or p.get("node_version") or None
else:
# if peer represented as string "host:port"
s = str(p)
if ":" in s:
addr, port = s.split(":", 1)
try:
port = int(port)
except Exception:
port = 8000
else:
addr, port = s, 8000
pid, pver = None, None
norm.append({"address": addr, "port": port, "id": pid, "version": pver})
summary["peers"]["list"] = norm
elif err:
summary["errors"].append({"endpoint": peers_url, "error": err})
# 4) Network stats
net_url = urljoin(API_BASE + "/", "api/v3/network/stats")
j, err = http_get_json(net_url)
if j is not None:
summary["network"]["stats"] = j
# version mismatch calc
local_version = summary["node"]["version"]
mism = []
peers = summary["peers"]["list"]
for p in peers:
if p.get("version") and local_version and str(p["version"]) != str(local_version):
mism.append({
"peer": f"{p.get('address')}:{p.get('port')}",
"peer_version": p["version"],
"local_version": local_version
})
summary["network"]["version_mismatch"] = mism
elif err:
summary["errors"].append({"endpoint": net_url, "error": err})
# 5) Active probing (latency and reachability) for peers
if TEST_PEERS and summary["peers"]["list"]:
tested = []
for p in summary["peers"]["list"]:
addr = p["address"]
port = p["port"]
url = f"http://{addr}:{port}/health"
j, err, elapsed = measure_latency(url, timeout=TIMEOUT)
tested.append({
"peer": f"{addr}:{port}",
"ok": err is None and isinstance(j, (dict, list)),
"latency_ms": round(elapsed * 1000, 1),
"error": err,
})
summary["peers"]["tested"] = tested
print(json.dumps(summary, ensure_ascii=False))
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except Exception as e:
# Never spam stacktraces to stdout; start.sh will handle gracefully
try:
print(json.dumps({"fatal_error": str(e)}))
except Exception:
pass
sys.exit(1)