automatic handshake and connect

This commit is contained in:
unexpected 2025-10-08 17:22:03 +00:00
parent dbc460f0bb
commit da446f5ab0
7 changed files with 283 additions and 10 deletions

View File

@ -6,10 +6,9 @@ from typing import Dict, Any
from base58 import b58decode
from sanic import response
from sqlalchemy import select
from urllib.parse import urlparse
from app.core.logger import make_log
from app.core.models.my_network import KnownNode
from app.core.network.constants import CURRENT_PROTOCOL_VERSION, NODE_TYPE_PRIVATE
from app.core.network.config import NODE_PRIVACY
from app.core.network.handshake import build_handshake_payload, compute_node_info, sign_response
@ -17,6 +16,52 @@ from app.core.network.nodes import upsert_known_node, list_known_public_nodes
from app.core.network.semver import compatibility
from app.core.network.guard import check_rate_limit, check_timestamp_fresh, check_and_remember_nonce
from app.core.network.config import HANDSHAKE_TS_TOLERANCE_SEC
from app.core.ipfs_client import swarm_connect
def _port_from_public_host(public_host: str) -> int:
"""Return an integer port extracted from a public_host URL or host:port string."""
if not public_host:
return 80
parsed = urlparse(public_host)
if parsed.scheme:
if parsed.port:
return parsed.port
return 443 if parsed.scheme == "https" else 80
host_port = public_host.strip()
if ":" in host_port:
candidate = host_port.rsplit(":", 1)[-1]
try:
return int(candidate)
except (TypeError, ValueError):
pass
return 80
def _extract_ipfs_meta(payload: Dict[str, Any]) -> Dict[str, Any]:
ipfs = payload or {}
multiaddrs = ipfs.get("multiaddrs") or []
if not isinstance(multiaddrs, list):
multiaddrs = [multiaddrs]
normalized_multiaddrs = [str(m) for m in multiaddrs if m]
meta: Dict[str, Any] = {}
if normalized_multiaddrs:
meta["multiaddrs"] = normalized_multiaddrs
peer_id = ipfs.get("peer_id")
if peer_id:
meta["peer_id"] = str(peer_id)
agent = ipfs.get("agent_version") or ipfs.get("agentVersion")
if agent:
meta["agent_version"] = str(agent)
return meta
async def _connect_ipfs_multiaddrs(addrs):
for addr in addrs or []:
try:
await swarm_connect(addr)
except Exception:
pass
async def s_api_v1_network_info(request):
@ -61,6 +106,7 @@ async def s_api_v1_network_handshake(request):
return response.json({"error": "NONCE_REPLAY"}, status=400)
peer_version = str(data.get("version"))
ipfs_meta = _extract_ipfs_meta(data.get("ipfs") or {})
comp = compatibility(peer_version, CURRENT_PROTOCOL_VERSION)
if comp == "blocked":
# We still store the node but respond with 409
@ -68,7 +114,7 @@ async def s_api_v1_network_handshake(request):
await upsert_known_node(
request.ctx.db_session,
host=data.get("public_host"),
port=int(str(data.get("public_host") or "").split(":")[-1]) if ":" in str(data.get("public_host") or "") else 80,
port=_port_from_public_host(data.get("public_host")),
public_key=str(data.get("public_key")),
meta={
"version": peer_version,
@ -76,6 +122,7 @@ async def s_api_v1_network_handshake(request):
"is_public": data.get("node_type", "public") != "private",
"public_host": data.get("public_host"),
"unsupported_last_checked_at": datetime.utcnow().isoformat(),
"ipfs": ipfs_meta,
}
)
except Exception:
@ -111,7 +158,7 @@ async def s_api_v1_network_handshake(request):
await upsert_known_node(
request.ctx.db_session,
host=data.get("public_host"),
port=int(str(data.get("public_host") or "").split(":")[-1]) if ":" in str(data.get("public_host") or "") else 80,
port=_port_from_public_host(data.get("public_host")),
public_key=str(data.get("public_key")),
meta={
"version": peer_version,
@ -120,13 +167,16 @@ async def s_api_v1_network_handshake(request):
"public_host": data.get("public_host"),
"last_metrics": data.get("metrics", {}),
"capabilities": data.get("capabilities", {}),
"ipfs": ipfs_meta,
}
)
await _connect_ipfs_multiaddrs(ipfs_meta.get("multiaddrs"))
except Exception as e:
make_log("Handshake", f"Upsert peer failed: {e}", level='warning')
# Merge advertised peers from the caller (optional field)
for n in data.get("known_public_nodes", []) or []:
known_ipfs_meta = _extract_ipfs_meta(n.get("ipfs") or {})
try:
await upsert_known_node(
request.ctx.db_session,
@ -139,8 +189,10 @@ async def s_api_v1_network_handshake(request):
"is_public": True,
"public_host": n.get("public_host") or n.get("host"),
"capabilities": n.get("capabilities") or {},
"ipfs": known_ipfs_meta,
}
)
await _connect_ipfs_multiaddrs(known_ipfs_meta.get("multiaddrs"))
except Exception:
pass

View File

@ -3,6 +3,7 @@ import os
from typing import List, Optional
import httpx
from urllib.parse import urlparse
import random
import shutil
from sqlalchemy import select
@ -11,7 +12,7 @@ from app.core.logger import make_log
from app.core.storage import db_session
from app.core.models.my_network import KnownNode
from app.core.models.content_v3 import EncryptedContent, ContentDerivative
from app.core.ipfs_client import pin_add, find_providers, swarm_connect
from app.core.ipfs_client import pin_add, pin_ls, find_providers, swarm_connect, add_streamed_file
INTERVAL_SEC = 60
@ -28,7 +29,8 @@ async def fetch_index(base_url: str, etag: Optional[str], since: Optional[str])
url = f"{base_url.rstrip('/')}/api/v1/content.delta" if since else f"{base_url.rstrip('/')}/api/v1/content.index"
if etag:
headers['If-None-Match'] = etag
async with httpx.AsyncClient(timeout=20) as client:
# follow_redirects handles peers that force HTTPS and issue 301s
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
r = await client.get(url, headers=headers, params=params)
if r.status_code != 200:
if r.status_code == 304:
@ -157,6 +159,20 @@ async def main_fn(memory):
async def _pin_one(cid: str):
async with sem:
try:
node_ipfs_meta = (n.meta or {}).get('ipfs') or {}
multiaddrs = node_ipfs_meta.get('multiaddrs') or []
for addr in multiaddrs:
try:
await swarm_connect(addr)
except Exception:
pass
try:
existing = await pin_ls(cid)
if existing and existing.get('Keys'):
make_log('index_scout_v3', f"pin {cid} already present", level='debug')
return
except Exception:
pass
# Try to pre-connect to discovered providers
try:
provs = await find_providers(cid, max_results=5)
@ -168,15 +184,56 @@ async def main_fn(memory):
pass
except Exception:
pass
await pin_add(cid, recursive=True)
try:
await asyncio.wait_for(pin_add(cid, recursive=True), timeout=60)
return
except httpx.HTTPStatusError as http_err:
body = (http_err.response.text or '').lower() if http_err.response else ''
if 'already pinned' in body or 'pin already set' in body:
make_log('index_scout_v3', f"pin {cid} already present", level='debug')
return
raise
except Exception as e:
make_log('index_scout_v3', f"pin {cid} failed: {e}", level='warning')
# Attempt HTTP gateway fallback before logging failure
fallback_sources = []
node_host = n.meta.get('public_host') if isinstance(n.meta, dict) else None
try:
# Derive gateway host: prefer public_host domain if present
parsed = urlparse(node_host) if node_host else None
gateway_host = parsed.hostname if parsed and parsed.hostname else (n.ip or '').split(':')[0]
gateway_port = parsed.port if (parsed and parsed.port not in (None, 80, 443)) else 8080
if gateway_host:
gateway_url = f"http://{gateway_host}:{gateway_port}/ipfs/{cid}"
make_log('index_scout_v3', f"fallback download start {cid} via {gateway_url}", level='debug')
async with httpx.AsyncClient(timeout=None) as client:
resp = await client.get(gateway_url)
resp.raise_for_status()
data = resp.content
chunk_bytes = int(os.getenv('CRYPTO_CHUNK_BYTES', '1048576'))
add_params = {
'cid-version': 1,
'raw-leaves': 'true',
'chunker': f'size-{chunk_bytes}',
'hash': 'sha2-256',
'pin': 'true',
}
result = await add_streamed_file([data], filename=f'{cid}.bin', params=add_params)
if str(result.get('Hash')) != str(cid):
raise ValueError(f"gateway add returned mismatched CID {result.get('Hash')}")
make_log('index_scout_v3', f"pin {cid} fetched via gateway {gateway_host}:{gateway_port}", level='info')
return
else:
fallback_sources.append('gateway-host-missing')
except Exception as fallback_err:
fallback_sources.append(str(fallback_err))
make_log('index_scout_v3', f"pin {cid} failed: {e}; fallback={'; '.join(fallback_sources) if fallback_sources else 'none'}", level='warning')
tasks = []
for it in items:
await upsert_content(it)
cid = it.get('encrypted_cid')
if cid:
make_log('index_scout_v3', f"queue pin {cid}")
tasks.append(asyncio.create_task(_pin_one(cid)))
if tasks:
await asyncio.gather(*tasks)

View File

@ -1,3 +1,4 @@
import json
import os
from typing import List
@ -9,6 +10,23 @@ def _csv_list(val: str) -> List[str]:
return [x.strip() for x in (val or "").split(",") if x.strip()]
def _json_value(val: str, fallback):
if not val:
return fallback
try:
return json.loads(val)
except Exception:
return fallback
def _as_list(value):
if isinstance(value, list):
return value
if value is None:
return []
return [value]
# Handshake / network config driven by env
NODE_PRIVACY = os.getenv("NODE_PRIVACY", NODE_TYPE_PUBLIC).strip().lower()
if NODE_PRIVACY not in (NODE_TYPE_PUBLIC, NODE_TYPE_PRIVATE):
@ -30,6 +48,12 @@ NETWORK_TLS_VERIFY = int(os.getenv("NETWORK_TLS_VERIFY", "1")) == 1
HANDSHAKE_TS_TOLERANCE_SEC = int(os.getenv("HANDSHAKE_TS_TOLERANCE_SEC", "300"))
HANDSHAKE_RATE_LIMIT_PER_MIN = int(os.getenv("HANDSHAKE_RATE_LIMIT_PER_MIN", "60"))
# IPFS discovery/peering
IPFS_PRIVATE_BOOTSTRAP_ADDRESSES = _as_list(_json_value(os.getenv("IPFS_PRIVATE_BOOTSTRAP"), []))
IPFS_PEERING_PEERS = _json_value(os.getenv("IPFS_PEERING_PEERS"), [])
IPFS_ANNOUNCE_ADDRESSES = _as_list(_json_value(os.getenv("IPFS_ANNOUNCE_ADDRESSES"), []))
IPFS_NOANNOUNCE_ADDRESSES = _as_list(_json_value(os.getenv("IPFS_NOANNOUNCE_ADDRESSES"), []))
# Capabilities
NODE_IS_BOOTSTRAP = int(os.getenv("NODE_IS_BOOTSTRAP", "0")) == 1
MAX_CONTENT_SIZE_MB = int(os.getenv("MAX_CONTENT_SIZE_MB", "512"))

View File

@ -19,9 +19,16 @@ from app.core.models.node_storage import StoredContent
from app.core.storage import db_session
from .constants import CURRENT_PROTOCOL_VERSION
from .nodes import list_known_public_nodes
from .config import PUBLIC_HOST, NODE_PRIVACY, NODE_IS_BOOTSTRAP, MAX_CONTENT_SIZE_MB
from .config import (
PUBLIC_HOST,
NODE_PRIVACY,
NODE_IS_BOOTSTRAP,
MAX_CONTENT_SIZE_MB,
IPFS_ANNOUNCE_ADDRESSES,
)
from app.core._config import ALLOWED_CONTENT_TYPES
from .constants import NODE_TYPE_PUBLIC
from app.core.ipfs_client import id_info
START_TS = time.time()
@ -60,7 +67,30 @@ def _sign(obj: Dict[str, Any]) -> str:
return signer.sign(blob)
async def _local_ipfs_payload() -> Dict[str, Any]:
try:
info = await id_info()
multiaddrs = info.get("Addresses") or []
peer_id = info.get("ID")
agent = info.get("AgentVersion")
except Exception:
multiaddrs = IPFS_ANNOUNCE_ADDRESSES or []
peer_id = None
agent = None
if not multiaddrs:
multiaddrs = IPFS_ANNOUNCE_ADDRESSES or []
payload: Dict[str, Any] = {}
if multiaddrs:
payload["multiaddrs"] = multiaddrs
if peer_id:
payload["peer_id"] = peer_id
if agent:
payload["agent_version"] = agent
return payload
async def build_handshake_payload(session) -> Dict[str, Any]:
ipfs_payload = await _local_ipfs_payload()
payload = {
"version": CURRENT_PROTOCOL_VERSION,
"public_key": b58encode(hot_pubkey).decode(),
@ -77,6 +107,8 @@ async def build_handshake_payload(session) -> Dict[str, Any]:
"timestamp": int(datetime.utcnow().timestamp()),
"nonce": secrets.token_hex(16),
}
if ipfs_payload.get("multiaddrs"):
payload["ipfs"] = ipfs_payload
try:
payload["known_public_nodes"] = await list_known_public_nodes(session)
except Exception:
@ -86,6 +118,7 @@ async def build_handshake_payload(session) -> Dict[str, Any]:
async def compute_node_info(session) -> Dict[str, Any]:
ipfs_payload = await _local_ipfs_payload()
node_info = {
"id": b58encode(hot_pubkey).decode(),
"public_key": b58encode(hot_pubkey).decode(),
@ -100,6 +133,8 @@ async def compute_node_info(session) -> Dict[str, Any]:
"max_content_size_mb": MAX_CONTENT_SIZE_MB,
},
}
if ipfs_payload.get("multiaddrs"):
node_info["ipfs"] = ipfs_payload
return node_info
def sign_response(data: Dict[str, Any]) -> Dict[str, Any]:

View File

@ -13,6 +13,7 @@ from app.core.logger import make_log
from app.core.models.my_network import KnownNode
from app.core.storage import db_session
from app.core._secrets import hot_pubkey
from app.core.ipfs_client import swarm_connect
from .config import (
HANDSHAKE_INTERVAL_SEC,
UNSUPPORTED_RECHECK_INTERVAL_SEC,
@ -21,6 +22,8 @@ from .config import (
BOOTSTRAP_TIMEOUT_SEC,
NODE_PRIVACY,
NETWORK_TLS_VERIFY,
IPFS_PRIVATE_BOOTSTRAP_ADDRESSES,
IPFS_PEERING_PEERS,
)
from .constants import NODE_TYPE_PRIVATE
from .semver import compatibility
@ -77,6 +80,44 @@ async def upsert_known_node(session, host: str, port: int, public_key: str, meta
return node
def _ipfs_meta_from_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
data = payload or {}
ipfs = data.get("ipfs") or data # allow direct ipfs dict
multiaddrs = ipfs.get("multiaddrs") if isinstance(ipfs, dict) else []
if not isinstance(multiaddrs, list):
multiaddrs = [multiaddrs]
normalized_multiaddrs = [str(addr) for addr in multiaddrs if addr]
meta: Dict[str, Any] = {}
if normalized_multiaddrs:
meta["multiaddrs"] = normalized_multiaddrs
peer_id = ipfs.get("peer_id") if isinstance(ipfs, dict) else None
if peer_id:
meta["peer_id"] = str(peer_id)
agent = ipfs.get("agent_version") if isinstance(ipfs, dict) else None
if agent:
meta["agent_version"] = str(agent)
return meta
async def _connect_ipfs_multiaddrs(multiaddrs: List[str]):
for addr in multiaddrs or []:
try:
await swarm_connect(addr)
except Exception as exc:
make_log("Handshake", f"IPFS connect failed {addr}: {exc}", level='debug')
async def _connect_static_peers():
env_addrs: List[str] = []
env_addrs.extend(IPFS_PRIVATE_BOOTSTRAP_ADDRESSES or [])
for peer in IPFS_PEERING_PEERS or []:
if isinstance(peer, dict):
env_addrs.extend(peer.get("Addrs") or [])
elif isinstance(peer, str):
env_addrs.append(peer)
await _connect_ipfs_multiaddrs(env_addrs)
def _compatibility_for_meta(remote_version: str) -> str:
if not remote_version or remote_version == "0.0.0":
return "warning"
@ -90,6 +131,7 @@ async def list_known_public_nodes(session) -> List[Dict[str, Any]]:
meta = r.meta or {}
if not meta.get("is_public", True):
continue
ipfs_meta = meta.get("ipfs") or ((meta.get("last_response") or {}).get("node", {}) or {}).get("ipfs") or {}
result.append({
"host": r.ip,
"port": r.port,
@ -99,6 +141,7 @@ async def list_known_public_nodes(session) -> List[Dict[str, Any]]:
"last_seen": (r.last_sync.isoformat() + "Z") if r.last_sync else None,
"public_host": meta.get("public_host"),
"capabilities": meta.get("capabilities") or {},
"ipfs": ipfs_meta,
})
return result
@ -171,6 +214,7 @@ async def perform_handshake_round():
resp = await _handshake_with(session, base_url)
# Merge known nodes received
for peer in (resp or {}).get("known_public_nodes", []):
peer_ipfs_meta = _ipfs_meta_from_payload(peer)
try:
await upsert_known_node(
session,
@ -181,13 +225,17 @@ async def perform_handshake_round():
"is_public": True,
"version": peer.get("version") or "0.0.0",
"public_host": peer.get("public_host") or (f"http://{peer.get('host')}:{peer.get('port')}" if peer.get('host') else None),
"ipfs": peer_ipfs_meta,
}
)
except Exception as e:
make_log("Handshake", f"Ignore bad peer from {base_url}: {e}", level='warning')
await _connect_ipfs_multiaddrs(peer_ipfs_meta.get("multiaddrs"))
# Update last_sync and meta for node
node_ipfs_meta = _ipfs_meta_from_payload((resp or {}).get("node", {}))
await _connect_ipfs_multiaddrs(node_ipfs_meta.get("multiaddrs"))
node.last_sync = _now()
node.meta = {**(node.meta or {}), "last_response": resp, "fail_count": 0}
node.meta = {**(node.meta or {}), "last_response": resp, "fail_count": 0, "ipfs": node_ipfs_meta}
await session.commit()
make_log("Handshake", f"Handshake OK with {base_url}")
except Exception as e:
@ -205,6 +253,10 @@ async def network_handshake_daemon(app):
# Stagger start a bit to allow HTTP server to come up
await asyncio.sleep(3)
make_log("Handshake", f"Daemon started; interval={HANDSHAKE_INTERVAL_SEC}s")
try:
await _connect_static_peers()
except Exception as exc:
make_log("Handshake", f"Static IPFS peering failed: {exc}", level='warning')
while True:
try:
await perform_handshake_round()
@ -218,6 +270,10 @@ async def bootstrap_once_and_exit_if_failed():
seeds = BOOTSTRAP_SEEDS or []
if not seeds:
return # Nothing to do
try:
await _connect_static_peers()
except Exception as exc:
make_log("Bootstrap", f"Static IPFS peering failed: {exc}", level='warning')
async with db_session(auto_commit=True) as session:
# If we already know nodes, skip bootstrap
have_any = (await session.execute(select(KnownNode))).scalars().first()
@ -235,6 +291,8 @@ async def bootstrap_once_and_exit_if_failed():
ok = True
# Seed itself gets inserted by handshake handling route; also insert it explicitly
try:
seed_ipfs_meta = _ipfs_meta_from_payload(resp.get("node", {}))
await _connect_ipfs_multiaddrs(seed_ipfs_meta.get("multiaddrs"))
await upsert_known_node(
session,
host=seed,
@ -244,6 +302,7 @@ async def bootstrap_once_and_exit_if_failed():
"is_public": True,
"version": resp.get("node", {}).get("version", "0.0.0"),
"public_host": resp.get("node", {}).get("public_host") or seed,
"ipfs": seed_ipfs_meta,
}
)
except Exception:

View File

@ -4,3 +4,7 @@ MYSQL_URI=mysql+pymysql://user:password@maria_db:3306
MYSQL_ROOT_PASSWORD=playground
MYSQL_DATABASE=bot_database
CONTENT_KEY_KEK_B64=Paste base64-encoded 32-byte key for wrapping DEKs
IPFS_PRIVATE_BOOTSTRAP=[]
IPFS_PEERING_PEERS=[]
IPFS_ANNOUNCE_ADDRESSES=[]
IPFS_NOANNOUNCE_ADDRESSES=[]

View File

@ -257,6 +257,48 @@ if [[ -z "$BOOTSTRAP_SEEDS" ]]; then
exit 1
fi
DEFAULT_IPFS_BOOTSTRAP_RAW=$(trim "$(ini_val IPFS_PRIVATE_BOOTSTRAP)")
DEFAULT_IPFS_BOOTSTRAP_ADDR=""
if [[ "$DEFAULT_IPFS_BOOTSTRAP_RAW" =~ ^\["([^"]+)"\]$ ]]; then
DEFAULT_IPFS_BOOTSTRAP_ADDR="${BASH_REMATCH[1]}"
fi
if [[ -z "$DEFAULT_IPFS_BOOTSTRAP_ADDR" ]]; then
DEFAULT_IPFS_BOOTSTRAP_ADDR="/ip4/2.58.65.188/tcp/4001/p2p/12D3KooWDNFkGrc7oFrCSLqm68gRgmCX7mUGmpZHQjezrtHRnptd"
fi
prompt_required IPFS_BOOTSTRAP_MULTIADDR "Primary IPFS bootstrap multiaddr (/ip4/.../tcp/.../p2p/PeerID)" "$DEFAULT_IPFS_BOOTSTRAP_ADDR"
IPFS_BOOTSTRAP_MULTIADDR=$(trim "$IPFS_BOOTSTRAP_MULTIADDR")
if [[ "$IPFS_BOOTSTRAP_MULTIADDR" != *"/p2p/"* ]]; then
echo 'Bootstrap multiaddr must include /p2p/<PeerID> suffix.' >&2
exit 1
fi
IPFS_PEER_ID=${IPFS_BOOTSTRAP_MULTIADDR##*/p2p/}
BOOTSTRAP_JSON=$(printf '["%s"]' "$IPFS_BOOTSTRAP_MULTIADDR")
PEERING_JSON=$(printf '[{"ID":"%s","Addrs":["%s"]}]' "$IPFS_PEER_ID" "$IPFS_BOOTSTRAP_MULTIADDR")
update_env IPFS_PRIVATE_BOOTSTRAP "$BOOTSTRAP_JSON"
update_env IPFS_PEERING_PEERS "$PEERING_JSON"
DEFAULT_IPFS_ANNOUNCE_RAW=$(trim "$(ini_val IPFS_ANNOUNCE_ADDRESSES)")
DEFAULT_IPFS_ANNOUNCE=""
if [[ "$DEFAULT_IPFS_ANNOUNCE_RAW" =~ ^\["([^"]+)"\]$ ]]; then
DEFAULT_IPFS_ANNOUNCE="${BASH_REMATCH[1]}"
fi
if [[ -z "$DEFAULT_IPFS_ANNOUNCE" ]]; then
if command -v curl >/dev/null 2>&1; then
PUBLIC_IP=$(curl -s https://ifconfig.me 2>/dev/null || true)
fi
if [[ -z "$PUBLIC_IP" ]]; then
PUBLIC_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
fi
if [[ -n "$PUBLIC_IP" ]]; then
DEFAULT_IPFS_ANNOUNCE="/ip4/${PUBLIC_IP}/tcp/4001"
fi
fi
prompt_required IPFS_ANNOUNCE_MULTIADDR "Public IPFS announce multiaddr (/ip4/.../tcp/...)" "$DEFAULT_IPFS_ANNOUNCE"
IPFS_ANNOUNCE_MULTIADDR=$(trim "$IPFS_ANNOUNCE_MULTIADDR")
ANNOUNCE_JSON=$(printf '["%s"]' "$IPFS_ANNOUNCE_MULTIADDR")
update_env IPFS_ANNOUNCE_ADDRESSES "$ANNOUNCE_JSON"
update_env IPFS_NOANNOUNCE_ADDRESSES '["/ip4/127.0.0.1","/ip6/::1"]'
prompt_optional NODE_PRIVACY "Node privacy (public/private)" "$(ini_val NODE_PRIVACY)"
if [[ -z "$NODE_PRIVACY" ]]; then
NODE_PRIVACY="public"