243 lines
13 KiB
Python
243 lines
13 KiB
Python
import asyncio
|
|
import os
|
|
from typing import List, Optional
|
|
|
|
import httpx
|
|
from urllib.parse import urlparse
|
|
import random
|
|
import shutil
|
|
from sqlalchemy import select
|
|
|
|
from app.core.logger import make_log
|
|
from app.core.storage import db_session
|
|
from app.core.models.my_network import KnownNode
|
|
from app.core.models.content_v3 import EncryptedContent, ContentDerivative
|
|
from app.core.ipfs_client import pin_add, pin_ls, find_providers, swarm_connect, add_streamed_file
|
|
|
|
|
|
INTERVAL_SEC = 60
|
|
ENV_PIN_CONCURRENCY = int(os.getenv('SYNC_MAX_CONCURRENT_PINS', '4'))
|
|
ENV_DISK_WATERMARK_PCT = int(os.getenv('SYNC_DISK_LOW_WATERMARK_PCT', '90'))
|
|
|
|
|
|
async def fetch_index(base_url: str, etag: Optional[str], since: Optional[str]) -> tuple[List[dict], Optional[str]]:
|
|
try:
|
|
headers = {}
|
|
params = {}
|
|
if since:
|
|
params['since'] = since
|
|
url = f"{base_url.rstrip('/')}/api/v1/content.delta" if since else f"{base_url.rstrip('/')}/api/v1/content.index"
|
|
if etag:
|
|
headers['If-None-Match'] = etag
|
|
# follow_redirects handles peers that force HTTPS and issue 301s
|
|
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
|
r = await client.get(url, headers=headers, params=params)
|
|
if r.status_code != 200:
|
|
if r.status_code == 304:
|
|
return [], etag
|
|
return [], etag
|
|
j = r.json()
|
|
new_etag = r.headers.get('ETag') or etag
|
|
return j.get('items') or [], (j.get('next_since') or new_etag or etag)
|
|
except Exception:
|
|
return [], etag
|
|
|
|
|
|
async def upsert_content(item: dict):
|
|
cid = item.get('encrypted_cid')
|
|
if not cid:
|
|
return
|
|
async with db_session() as session:
|
|
row = (await session.execute(select(EncryptedContent).where(EncryptedContent.encrypted_cid == cid))).scalars().first()
|
|
if not row:
|
|
row = EncryptedContent(
|
|
encrypted_cid=cid,
|
|
title=item.get('title') or cid,
|
|
description=item.get('description') or '',
|
|
content_type=item.get('content_type') or 'application/octet-stream',
|
|
enc_size_bytes=item.get('size_bytes'),
|
|
preview_enabled=bool(item.get('preview_enabled')),
|
|
preview_conf=item.get('preview_conf') or {},
|
|
salt_b64=item.get('salt_b64'),
|
|
)
|
|
session.add(row)
|
|
else:
|
|
row.title = item.get('title') or row.title
|
|
row.description = item.get('description') or row.description
|
|
row.content_type = item.get('content_type') or row.content_type
|
|
row.enc_size_bytes = item.get('size_bytes') or row.enc_size_bytes
|
|
row.preview_enabled = bool(item.get('preview_enabled')) if item.get('preview_enabled') is not None else row.preview_enabled
|
|
if item.get('preview_conf'):
|
|
row.preview_conf = item['preview_conf']
|
|
if item.get('salt_b64'):
|
|
row.salt_b64 = item['salt_b64']
|
|
await session.commit()
|
|
|
|
# Fetch thumbnail via HTTP if provided and not present locally
|
|
cover_url = item.get('cover_url')
|
|
if cover_url:
|
|
try:
|
|
async with db_session() as session:
|
|
ec = (await session.execute(select(EncryptedContent).where(EncryptedContent.encrypted_cid == cid))).scalars().first()
|
|
have_thumb = (await session.execute(select(ContentDerivative).where(ContentDerivative.content_id == ec.id, ContentDerivative.kind == 'decrypted_thumbnail', ContentDerivative.status == 'ready'))).scalars().first()
|
|
if not have_thumb:
|
|
import httpx, tempfile, os
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
r = await client.get(cover_url)
|
|
r.raise_for_status()
|
|
tmp = tempfile.NamedTemporaryFile(delete=False)
|
|
tmp.write(r.content)
|
|
tmp.close()
|
|
# Save into store
|
|
from app.core.background.convert_v3_service import _save_derivative
|
|
h, size = await _save_derivative(tmp.name, os.path.basename(cover_url) or 'thumb.jpg')
|
|
cd = ContentDerivative(
|
|
content_id=ec.id,
|
|
kind='decrypted_thumbnail',
|
|
local_path=os.path.join(os.getenv('UPLOADS_DIR', '/app/data'), h),
|
|
content_type=r.headers.get('Content-Type') or 'image/jpeg',
|
|
size_bytes=size,
|
|
status='ready',
|
|
)
|
|
session.add(cd)
|
|
await session.commit()
|
|
except Exception as e:
|
|
make_log('index_scout_v3', f"thumbnail fetch failed for {cid}: {e}", level='warning')
|
|
|
|
|
|
async def main_fn(memory):
|
|
make_log('index_scout_v3', 'Service started', level='info')
|
|
sem = None
|
|
while True:
|
|
try:
|
|
# Read runtime config from ServiceConfig (fallback to env)
|
|
from app.core.models._config import ServiceConfig
|
|
async with db_session() as session:
|
|
max_pins = int(await ServiceConfig(session).get('SYNC_MAX_CONCURRENT_PINS', ENV_PIN_CONCURRENCY))
|
|
disk_pct = int(await ServiceConfig(session).get('SYNC_DISK_LOW_WATERMARK_PCT', ENV_DISK_WATERMARK_PCT))
|
|
if sem is None or sem._value != max_pins:
|
|
sem = asyncio.Semaphore(max_pins)
|
|
async with db_session() as session:
|
|
nodes = (await session.execute(select(KnownNode))).scalars().all()
|
|
for n in nodes:
|
|
base = f"http://{n.ip}:{n.port}"
|
|
# jitter 0..30s per node to reduce stampede
|
|
await asyncio.sleep(random.uniform(0, 30))
|
|
etag = (n.meta or {}).get('index_etag')
|
|
since = (n.meta or {}).get('index_since')
|
|
items, marker = await fetch_index(base, etag, since)
|
|
if not items and marker == etag:
|
|
continue
|
|
# update node markers
|
|
try:
|
|
async with db_session() as session:
|
|
row = (await session.execute(select(KnownNode).where(KnownNode.id == n.id))).scalars().first()
|
|
if row:
|
|
meta = row.meta or {}
|
|
meta['index_etag'] = marker
|
|
meta['index_since'] = marker if (marker and 'T' in str(marker)) else meta.get('index_since')
|
|
row.meta = meta
|
|
await session.commit()
|
|
except Exception:
|
|
pass
|
|
|
|
if not items:
|
|
continue
|
|
make_log('index_scout_v3', f"Fetched {len(items)} from {base}")
|
|
|
|
# Check disk watermark
|
|
try:
|
|
from app.core._config import UPLOADS_DIR
|
|
du = shutil.disk_usage(UPLOADS_DIR)
|
|
used_pct = int(100 * (1 - du.free / du.total))
|
|
if used_pct >= disk_pct:
|
|
make_log('index_scout_v3', f"Disk watermark reached ({used_pct}%), skipping pins")
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
async def _pin_one(cid: str):
|
|
async with sem:
|
|
try:
|
|
node_ipfs_meta = (n.meta or {}).get('ipfs') or {}
|
|
multiaddrs = node_ipfs_meta.get('multiaddrs') or []
|
|
for addr in multiaddrs:
|
|
try:
|
|
await swarm_connect(addr)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
existing = await pin_ls(cid)
|
|
if existing and existing.get('Keys'):
|
|
make_log('index_scout_v3', f"pin {cid} already present", level='debug')
|
|
return
|
|
except Exception:
|
|
pass
|
|
# Try to pre-connect to discovered providers
|
|
try:
|
|
provs = await find_providers(cid, max_results=5)
|
|
for p in provs:
|
|
for addr in (p.get('addrs') or [])[:2]:
|
|
try:
|
|
await swarm_connect(addr)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
try:
|
|
await asyncio.wait_for(pin_add(cid, recursive=True), timeout=60)
|
|
return
|
|
except httpx.HTTPStatusError as http_err:
|
|
body = (http_err.response.text or '').lower() if http_err.response else ''
|
|
if 'already pinned' in body or 'pin already set' in body:
|
|
make_log('index_scout_v3', f"pin {cid} already present", level='debug')
|
|
return
|
|
raise
|
|
except Exception as e:
|
|
# Attempt HTTP gateway fallback before logging failure
|
|
fallback_sources = []
|
|
node_host = n.meta.get('public_host') if isinstance(n.meta, dict) else None
|
|
try:
|
|
# Derive gateway host: prefer public_host domain if present
|
|
parsed = urlparse(node_host) if node_host else None
|
|
gateway_host = parsed.hostname if parsed and parsed.hostname else (n.ip or '').split(':')[0]
|
|
gateway_port = parsed.port if (parsed and parsed.port not in (None, 80, 443)) else 8080
|
|
if gateway_host:
|
|
gateway_url = f"http://{gateway_host}:{gateway_port}/ipfs/{cid}"
|
|
make_log('index_scout_v3', f"fallback download start {cid} via {gateway_url}", level='debug')
|
|
async with httpx.AsyncClient(timeout=None) as client:
|
|
resp = await client.get(gateway_url)
|
|
resp.raise_for_status()
|
|
data = resp.content
|
|
chunk_bytes = int(os.getenv('CRYPTO_CHUNK_BYTES', '1048576'))
|
|
add_params = {
|
|
'cid-version': 1,
|
|
'raw-leaves': 'true',
|
|
'chunker': f'size-{chunk_bytes}',
|
|
'hash': 'sha2-256',
|
|
'pin': 'true',
|
|
}
|
|
result = await add_streamed_file([data], filename=f'{cid}.bin', params=add_params)
|
|
if str(result.get('Hash')) != str(cid):
|
|
raise ValueError(f"gateway add returned mismatched CID {result.get('Hash')}")
|
|
make_log('index_scout_v3', f"pin {cid} fetched via gateway {gateway_host}:{gateway_port}", level='info')
|
|
return
|
|
else:
|
|
fallback_sources.append('gateway-host-missing')
|
|
except Exception as fallback_err:
|
|
fallback_sources.append(str(fallback_err))
|
|
make_log('index_scout_v3', f"pin {cid} failed: {e}; fallback={'; '.join(fallback_sources) if fallback_sources else 'none'}", level='warning')
|
|
|
|
tasks = []
|
|
for it in items:
|
|
await upsert_content(it)
|
|
cid = it.get('encrypted_cid')
|
|
if cid:
|
|
make_log('index_scout_v3', f"queue pin {cid}")
|
|
tasks.append(asyncio.create_task(_pin_one(cid)))
|
|
if tasks:
|
|
await asyncio.gather(*tasks)
|
|
except Exception as e:
|
|
make_log('index_scout_v3', f"loop error: {e}", level='error')
|
|
await asyncio.sleep(INTERVAL_SEC)
|