203 lines
6.7 KiB
Python
203 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Network connectivity and node statistics checker for MY Network v3.x
|
|
|
|
Purpose:
|
|
- Query local node APIs to collect connectivity and version compatibility stats
|
|
- Optionally test bootstrap peers for reachability and latency
|
|
- Print a concise JSON summary that start.sh can parse and log
|
|
|
|
Behavior:
|
|
- Graceful degradation: if any endpoint is missing/unavailable, it is skipped
|
|
- Only prints JSON to stdout on success; on partial failures still prints JSON with flags
|
|
- Exit codes:
|
|
0 - success (even with partial data)
|
|
1 - unexpected error (e.g., invalid arguments)
|
|
|
|
Inputs (ENV or CLI args):
|
|
- API_BASE (default: http://localhost:8000)
|
|
- TIMEOUT_SECONDS (default: 5)
|
|
- TEST_PEERS (bool as "true"/"false", default: true) - whether to actively probe peers
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from urllib.parse import urljoin
|
|
|
|
import urllib.request
|
|
import urllib.error
|
|
import ssl
|
|
|
|
API_BASE = os.environ.get("API_BASE", "http://localhost:8000").rstrip("/")
|
|
TIMEOUT = float(os.environ.get("TIMEOUT_SECONDS", "5"))
|
|
TEST_PEERS = os.environ.get("TEST_PEERS", "true").lower() == "true"
|
|
|
|
# For HTTPS with self-signed certs (when fronted by nginx in early boot)
|
|
CTX = ssl.create_default_context()
|
|
CTX.check_hostname = False
|
|
CTX.verify_mode = ssl.CERT_NONE
|
|
|
|
|
|
def http_get_json(url, timeout=TIMEOUT):
|
|
req = urllib.request.Request(url, method="GET")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout, context=CTX) as resp:
|
|
data = resp.read()
|
|
try:
|
|
return json.loads(data.decode("utf-8")), None
|
|
except Exception as e:
|
|
return None, f"JSON parse error: {e}"
|
|
except urllib.error.HTTPError as e:
|
|
try:
|
|
body = e.read().decode("utf-8")
|
|
except Exception:
|
|
body = ""
|
|
return None, f"HTTP error {e.code}: {body}"
|
|
except Exception as e:
|
|
return None, f"Request error: {e}"
|
|
|
|
|
|
def measure_latency(url, timeout=TIMEOUT):
|
|
start = time.time()
|
|
j, err = http_get_json(url, timeout=timeout)
|
|
elapsed = time.time() - start
|
|
return j, err, elapsed
|
|
|
|
|
|
def main():
|
|
summary = {
|
|
"api_base": API_BASE,
|
|
"node": {
|
|
"health": {"ok": False, "error": None},
|
|
"status": {},
|
|
"version": None,
|
|
},
|
|
"peers": {
|
|
"count_reported": 0,
|
|
"list": [],
|
|
"tested": [],
|
|
"test_enabled": TEST_PEERS,
|
|
},
|
|
"network": {
|
|
"stats": {},
|
|
"version_mismatch": [],
|
|
},
|
|
"errors": [],
|
|
}
|
|
|
|
# 1) Health
|
|
health_url = urljoin(API_BASE + "/", "health")
|
|
j, err = http_get_json(health_url)
|
|
if j is not None:
|
|
summary["node"]["health"]["ok"] = True
|
|
summary["node"]["health"]["data"] = j
|
|
else:
|
|
summary["node"]["health"]["ok"] = False
|
|
summary["node"]["health"]["error"] = err
|
|
summary["errors"].append({"endpoint": health_url, "error": err})
|
|
|
|
# 2) Node status
|
|
status_url = urljoin(API_BASE + "/", "api/v3/node/status")
|
|
j, err = http_get_json(status_url)
|
|
if j is not None:
|
|
summary["node"]["status"] = j
|
|
# Try to infer version if present
|
|
version = None
|
|
for key in ("version", "node_version", "app_version"):
|
|
if isinstance(j, dict) and key in j:
|
|
version = j[key]
|
|
break
|
|
summary["node"]["version"] = version
|
|
elif err:
|
|
summary["errors"].append({"endpoint": status_url, "error": err})
|
|
|
|
# 3) Peers
|
|
peers_url = urljoin(API_BASE + "/", "api/v3/node/peers")
|
|
j, err = http_get_json(peers_url)
|
|
if j is not None:
|
|
# expected shapes: {"count": N, "peers":[...]} or list
|
|
if isinstance(j, dict):
|
|
summary["peers"]["count_reported"] = int(j.get("count", len(j.get("peers", [])) or 0))
|
|
peers = j.get("peers", [])
|
|
elif isinstance(j, list):
|
|
peers = j
|
|
summary["peers"]["count_reported"] = len(peers)
|
|
else:
|
|
peers = []
|
|
# normalize peers to {address, port, version?, id?}
|
|
norm = []
|
|
for p in peers:
|
|
if isinstance(p, dict):
|
|
addr = p.get("address") or p.get("host") or p.get("ip") or "unknown"
|
|
port = p.get("port") or 8000
|
|
pid = p.get("id") or p.get("node_id") or None
|
|
pver = p.get("version") or p.get("node_version") or None
|
|
else:
|
|
# if peer represented as string "host:port"
|
|
s = str(p)
|
|
if ":" in s:
|
|
addr, port = s.split(":", 1)
|
|
try:
|
|
port = int(port)
|
|
except Exception:
|
|
port = 8000
|
|
else:
|
|
addr, port = s, 8000
|
|
pid, pver = None, None
|
|
norm.append({"address": addr, "port": port, "id": pid, "version": pver})
|
|
summary["peers"]["list"] = norm
|
|
elif err:
|
|
summary["errors"].append({"endpoint": peers_url, "error": err})
|
|
|
|
# 4) Network stats
|
|
net_url = urljoin(API_BASE + "/", "api/v3/network/stats")
|
|
j, err = http_get_json(net_url)
|
|
if j is not None:
|
|
summary["network"]["stats"] = j
|
|
# version mismatch calc
|
|
local_version = summary["node"]["version"]
|
|
mism = []
|
|
peers = summary["peers"]["list"]
|
|
for p in peers:
|
|
if p.get("version") and local_version and str(p["version"]) != str(local_version):
|
|
mism.append({
|
|
"peer": f"{p.get('address')}:{p.get('port')}",
|
|
"peer_version": p["version"],
|
|
"local_version": local_version
|
|
})
|
|
summary["network"]["version_mismatch"] = mism
|
|
elif err:
|
|
summary["errors"].append({"endpoint": net_url, "error": err})
|
|
|
|
# 5) Active probing (latency and reachability) for peers
|
|
if TEST_PEERS and summary["peers"]["list"]:
|
|
tested = []
|
|
for p in summary["peers"]["list"]:
|
|
addr = p["address"]
|
|
port = p["port"]
|
|
url = f"http://{addr}:{port}/health"
|
|
j, err, elapsed = measure_latency(url, timeout=TIMEOUT)
|
|
tested.append({
|
|
"peer": f"{addr}:{port}",
|
|
"ok": err is None and isinstance(j, (dict, list)),
|
|
"latency_ms": round(elapsed * 1000, 1),
|
|
"error": err,
|
|
})
|
|
summary["peers"]["tested"] = tested
|
|
|
|
print(json.dumps(summary, ensure_ascii=False))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
sys.exit(main())
|
|
except Exception as e:
|
|
# Never spam stacktraces to stdout; start.sh will handle gracefully
|
|
try:
|
|
print(json.dumps({"fatal_error": str(e)}))
|
|
except Exception:
|
|
pass
|
|
sys.exit(1) |