uploader-bot/app/core/background/convert_v3_service.py

315 lines
12 KiB
Python

import asyncio
import os
import json
import shutil
from datetime import datetime
from typing import List, Tuple
from sqlalchemy import select
from app.core.logger import make_log
from app.core.storage import db_session
from app.core._config import UPLOADS_DIR, BACKEND_LOGS_DIR_HOST
from app.core.models.content_v3 import (
EncryptedContent,
ContentKey,
ContentDerivative,
UploadSession,
)
from app.core.models.node_storage import StoredContent
from app.core.ipfs_client import cat_stream
from app.core.crypto.encf_stream import decrypt_encf_auto
from app.core.crypto.keywrap import unwrap_dek, wrap_dek, KeyWrapError
from app.core.network.key_client import request_key_from_peer
from app.core.models.my_network import KnownNode
CONCURRENCY = int(os.getenv("CONVERT_V3_MAX_CONCURRENCY", "3"))
def _ensure_dir(path: str):
try:
os.makedirs(path, exist_ok=True)
except Exception:
pass
async def _sha256_b58(file_path: str) -> str:
import hashlib
import base58
h = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(2 * 1024 * 1024), b''):
h.update(chunk)
return base58.b58encode(h.digest()).decode()
async def _save_derivative(file_path: str, filename: str) -> Tuple[str, int]:
"""Move file into UPLOADS_DIR under sha256 b58 name; return (hash_b58, size)."""
file_hash = await _sha256_b58(file_path)
dst = os.path.join(UPLOADS_DIR, file_hash)
try:
os.remove(dst)
except FileNotFoundError:
pass
shutil.move(file_path, dst)
size = os.path.getsize(dst)
return file_hash, size
async def _run_media_converter(input_host_path: str, input_ext: str, quality: str, trim_value: str | None, is_audio: bool) -> Tuple[str, dict]:
rid = __import__('uuid').uuid4().hex[:8]
output_dir_container = f"/tmp/conv_{rid}"
output_dir_host = f"/tmp/conv_{rid}"
_ensure_dir(output_dir_host)
logs_dir_host = BACKEND_LOGS_DIR_HOST
_ensure_dir(logs_dir_host)
cmd = [
"docker", "run", "--rm",
"-v", f"{input_host_path}:/app/input:ro",
"-v", f"{output_dir_host}:/app/output",
"-v", f"{logs_dir_host}:/app/logs",
"media_converter",
"--ext", input_ext,
"--quality", quality,
]
if trim_value:
cmd.extend(["--trim", trim_value])
if is_audio:
cmd.append("--audio-only")
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"media_converter failed: {stderr.decode()}")
# Find produced media file and optional output.json
try:
files = os.listdir(output_dir_host)
except Exception as e:
raise RuntimeError(f"Read output dir error: {e}")
media_files = [f for f in files if f != "output.json"]
if len(media_files) != 1:
raise RuntimeError(f"Expected one media file, found {len(media_files)}: {media_files}")
output_media = os.path.join(output_dir_host, media_files[0])
ffprobe_meta = {}
out_json = os.path.join(output_dir_host, "output.json")
if os.path.exists(out_json):
try:
with open(out_json, 'r') as f:
ffprobe_meta = json.load(f)
except Exception:
ffprobe_meta = {}
return output_media, ffprobe_meta
async def _convert_content(ec: EncryptedContent, input_host_path: str):
content_kind = 'audio' if ec.content_type.startswith('audio/') else ('video' if ec.content_type.startswith('video/') else 'other')
if content_kind == 'other':
return
input_ext = (ec.content_type.split('/')[-1] or 'bin')
is_audio = content_kind == 'audio'
# Required outputs
required = ['high', 'low', 'low_preview']
# Preview interval
conf = ec.preview_conf or {}
intervals = conf.get('intervals') or [[0, int(conf.get('duration_ms', 30000))]]
main_interval = intervals[0]
trim_value = None
start_s = max(0, int(main_interval[0]) // 1000)
dur_s = max(1, int((main_interval[1] - main_interval[0]) // 1000) or 30)
trim_value = f"{start_s},{dur_s}"
qualities = {
'high': 'high',
'low': 'low',
'low_preview': 'low',
}
for opt in required:
try:
# Mark derivative processing
async with db_session() as session:
cd = ContentDerivative(
content_id=ec.id,
kind=f"decrypted_{opt if opt != 'low_preview' else 'preview'}",
interval_start_ms=main_interval[0] if opt == 'low_preview' else None,
interval_end_ms=main_interval[1] if opt == 'low_preview' else None,
local_path="",
status='processing',
)
session.add(cd)
await session.commit()
out_path, ffprobe = await _run_media_converter(
input_host_path=input_host_path,
input_ext=input_ext,
quality=qualities[opt],
trim_value=trim_value if opt == 'low_preview' else None,
is_audio=is_audio,
)
# Save into store and StoredContent
file_hash, size_bytes = await _save_derivative(out_path, os.path.basename(out_path))
async with db_session() as session:
sc = StoredContent(
type="local/content_bin",
hash=file_hash,
user_id=None,
filename=os.path.basename(out_path),
meta={'encrypted_cid': ec.encrypted_cid, 'kind': opt, 'ffprobe_meta': ffprobe},
created=datetime.utcnow(),
)
session.add(sc)
await session.flush()
# Update derivative record
cd = (await session.execute(select(ContentDerivative).where(
ContentDerivative.content_id == ec.id,
ContentDerivative.kind == (f"decrypted_{opt if opt != 'low_preview' else 'preview'}"),
ContentDerivative.status == 'processing'
))).scalars().first()
if cd:
cd.local_path = os.path.join(UPLOADS_DIR, file_hash)
cd.size_bytes = size_bytes
cd.content_type = ('audio/mpeg' if is_audio else 'video/mp4') if opt != 'high' else ec.content_type
cd.status = 'ready'
await session.commit()
make_log('convert_v3', f"Converted {ec.encrypted_cid} opt={opt} -> {file_hash}")
except Exception as e:
make_log('convert_v3', f"Convert error {ec.encrypted_cid} opt={opt}: {e}", level='error')
async with db_session() as session:
cd = ContentDerivative(
content_id=ec.id,
kind=f"decrypted_{opt if opt != 'low_preview' else 'preview'}",
status='failed',
error=str(e),
local_path="",
)
session.add(cd)
await session.commit()
async def _pick_pending(limit: int) -> List[Tuple[EncryptedContent, str]]:
async with db_session() as session:
# Find A/V contents with preview_enabled and no ready low/low_preview derivatives yet
ecs = (await session.execute(select(EncryptedContent).where(
EncryptedContent.preview_enabled == True
).order_by(EncryptedContent.created_at.desc()))).scalars().all()
picked: List[Tuple[EncryptedContent, str]] = []
for ec in ecs:
# Check if derivatives already ready
rows = (await session.execute(select(ContentDerivative).where(ContentDerivative.content_id == ec.id))).scalars().all()
kinds_ready = {r.kind for r in rows if r.status == 'ready'}
required = {'decrypted_low', 'decrypted_high'} if ec.content_type.startswith('audio/') else {'decrypted_low', 'decrypted_high', 'decrypted_preview'}
if required.issubset(kinds_ready):
continue
# Always decrypt from IPFS using local or remote key
storage_path: str | None = None
ck = (await session.execute(select(ContentKey).where(ContentKey.content_id == ec.id))).scalars().first()
if ck:
storage_path = await stage_plain_from_ipfs(ec, ck.key_ciphertext_b64)
if not storage_path:
peers = (await session.execute(select(KnownNode))).scalars().all()
for peer in peers:
base_url = f"http://{peer.ip}:{peer.port}"
dek = await request_key_from_peer(base_url, ec.encrypted_cid)
if not dek:
continue
try:
dek_b64 = wrap_dek(dek)
except KeyWrapError as exc:
make_log('convert_v3', f"wrap failed for peer DEK: {exc}", level='error')
continue
session_ck = ContentKey(
content_id=ec.id,
key_ciphertext_b64=dek_b64,
key_fingerprint=peer.public_key,
issuer_node_id=peer.public_key,
allow_auto_grant=True,
)
session.add(session_ck)
await session.commit()
storage_path = await stage_plain_from_ipfs(ec, dek_b64)
if storage_path:
break
if not storage_path or not os.path.exists(storage_path):
continue
picked.append((ec, storage_path))
if len(picked) >= limit:
break
return picked
async def worker_loop():
sem = asyncio.Semaphore(CONCURRENCY)
async def _run_one(ec: EncryptedContent, input_path: str):
async with sem:
try:
await _convert_content(ec, input_path)
# After successful conversion, attempt to remove staging file to avoid duplicates
try:
if input_path and input_path.startswith("/data/") and os.path.exists(input_path):
os.remove(input_path)
except Exception:
pass
except Exception as e:
make_log('convert_v3', f"job error {ec.encrypted_cid}: {e}", level='error')
while True:
try:
batch = await _pick_pending(limit=CONCURRENCY * 2)
if not batch:
await asyncio.sleep(3)
continue
tasks = [asyncio.create_task(_run_one(ec, path)) for (ec, path) in batch]
await asyncio.gather(*tasks)
except Exception as e:
make_log('convert_v3', f"loop error: {e}", level='error')
await asyncio.sleep(2)
async def main_fn(memory):
make_log('convert_v3', f"Service started with concurrency={CONCURRENCY}", level='info')
await worker_loop()
async def stage_plain_from_ipfs(ec: EncryptedContent, dek_wrapped: str) -> str | None:
"""Download encrypted ENCF stream from IPFS and decrypt on the fly into a temp file."""
import tempfile
try:
dek = unwrap_dek(dek_wrapped)
except KeyWrapError as exc:
make_log('convert_v3', f"unwrap failed for {ec.encrypted_cid}: {exc}", level='error')
return None
tmp = tempfile.NamedTemporaryFile(prefix=f"dec_{ec.encrypted_cid[:8]}_", delete=False)
tmp_path = tmp.name
tmp.close()
try:
async def _aiter():
async for ch in cat_stream(ec.encrypted_cid):
yield ch
await decrypt_encf_auto(_aiter(), dek, tmp_path)
return tmp_path
except Exception as e:
make_log('convert_v3', f"decrypt from ipfs failed: {e}", level='error')
try:
os.remove(tmp_path)
except Exception:
pass
return None