import asyncio import os import json import shutil import tempfile from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import List, Optional, Tuple from sqlalchemy import select, and_, or_ from app.core.logger import make_log from app.core.storage import db_session from app.core._config import UPLOADS_DIR, BACKEND_LOGS_DIR_HOST from app.core.models.content_v3 import ( EncryptedContent, ContentKey, ContentDerivative, UploadSession, ) from app.core.models.node_storage import StoredContent from app.core.ipfs_client import cat_stream from app.core.crypto.encf_stream import decrypt_encf_auto from app.core.crypto.keywrap import unwrap_dek, wrap_dek, KeyWrapError from app.core.network.key_client import request_key_from_peer from app.core.models.my_network import KnownNode from app.core._utils.resolve_content import resolve_content from app.core.content.content_id import ContentId CONCURRENCY = int(os.getenv("CONVERT_V3_MAX_CONCURRENCY", "3")) STAGING_SUBDIR = os.getenv("CONVERT_V3_STAGING_SUBDIR", "convert-staging") UPLOADS_PATH = Path(UPLOADS_DIR).resolve() _host_uploads_env = os.getenv("BACKEND_DATA_DIR_HOST") HOST_UPLOADS_PATH = Path(_host_uploads_env).resolve() if _host_uploads_env else None @dataclass class PlainStaging: container_path: str host_path: str def _container_to_host(path: str) -> str: """Map a container path under UPLOADS_DIR to the host path for docker -v.""" if not HOST_UPLOADS_PATH: raise RuntimeError("BACKEND_DATA_DIR_HOST is not configured for convert_v3") real_path = Path(path).resolve() try: real_path.relative_to(UPLOADS_PATH) except ValueError: # Not under uploads; best effort fallback to original string return str(real_path) rel = real_path.relative_to(UPLOADS_PATH) return str(HOST_UPLOADS_PATH / rel) MEDIA_CONVERTER_CPU_LIMIT = os.getenv("MEDIA_CONVERTER_CPU_LIMIT") MEDIA_CONVERTER_MEM_LIMIT = os.getenv("MEDIA_CONVERTER_MEM_LIMIT") MEDIA_CONVERTER_CPUSET = os.getenv("MEDIA_CONVERTER_CPUSET") or os.getenv("CONVERT_CPUSET") ERROR_TRUNCATE_LIMIT = 512 def _ensure_dir(path: str): try: os.makedirs(path, exist_ok=True) except Exception: pass async def _sha256_b58(file_path: str) -> str: import hashlib import base58 h = hashlib.sha256() with open(file_path, 'rb') as f: for chunk in iter(lambda: f.read(2 * 1024 * 1024), b''): h.update(chunk) return base58.b58encode(h.digest()).decode() async def _save_derivative(file_path: str, filename: str) -> Tuple[str, int]: """Move file into UPLOADS_DIR under sha256 b58 name; return (hash_b58, size).""" file_hash = await _sha256_b58(file_path) dst = os.path.join(UPLOADS_DIR, file_hash) try: os.remove(dst) except FileNotFoundError: pass shutil.move(file_path, dst) size = os.path.getsize(dst) return file_hash, size async def _run_media_converter(staging: PlainStaging, input_ext: str, quality: str, trim_value: Optional[str], is_audio: bool): if not os.path.exists(staging.container_path): raise FileNotFoundError(f"Plain input missing at {staging.container_path}") host_input_path = staging.host_path if not host_input_path or not host_input_path.startswith('/'): host_input_path = os.path.abspath(host_input_path) rid = __import__('uuid').uuid4().hex[:8] output_dir_container = UPLOADS_PATH / "convert-output" / f"conv_{rid}" output_dir_host = _container_to_host(output_dir_container) _ensure_dir(str(output_dir_container)) logs_dir_candidate = os.getenv("BACKEND_LOGS_DIR_HOST", "") logs_dir_host = logs_dir_candidate if logs_dir_candidate else str(HOST_UPLOADS_PATH / "logs" / "converter") if HOST_UPLOADS_PATH else "/tmp/converter-logs" if not logs_dir_host.startswith('/'): logs_dir_host = os.path.join(os.getcwd(), logs_dir_host) try: os.makedirs(logs_dir_host, exist_ok=True) except Exception: fallback_logs = HOST_UPLOADS_PATH / "logs" / "converter" if HOST_UPLOADS_PATH else Path("/tmp/converter-logs") logs_dir_host = str(fallback_logs) os.makedirs(logs_dir_host, exist_ok=True) cmd = [ "docker", "run", "--rm", "-v", f"{host_input_path}:/app/input:ro", "-v", f"{output_dir_host}:/app/output", "-v", f"{logs_dir_host}:/app/logs", ] if MEDIA_CONVERTER_CPU_LIMIT: cmd.extend(["--cpus", str(MEDIA_CONVERTER_CPU_LIMIT)]) if MEDIA_CONVERTER_MEM_LIMIT: cmd.extend(["--memory", str(MEDIA_CONVERTER_MEM_LIMIT)]) if MEDIA_CONVERTER_CPUSET: cmd.extend(["--cpuset-cpus", MEDIA_CONVERTER_CPUSET]) cmd.append("media_converter") cmd.extend(["--ext", input_ext, "--quality", quality]) if trim_value: cmd.extend(["--trim", trim_value]) make_log('convert_v3', f"Run media_converter cmd: {' '.join(cmd)}") proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, stderr = await proc.communicate() if proc.returncode != 0: raise RuntimeError(f"media_converter failed: {stderr.decode()}") # Find produced media file and optional output.json try: files = os.listdir(output_dir_container) except Exception as e: raise RuntimeError(f"Read output dir error: {e}") media_files = [f for f in files if f != "output.json"] if len(media_files) != 1: raise RuntimeError(f"Expected one media file, found {len(media_files)}: {media_files}") output_media = os.path.join(output_dir_container, media_files[0]) ffprobe_meta = {} out_json = os.path.join(output_dir_container, "output.json") if os.path.exists(out_json): try: with open(out_json, 'r') as f: ffprobe_meta = json.load(f) except Exception: ffprobe_meta = {} return output_media, ffprobe_meta async def _update_upload_session(ec: EncryptedContent, all_success: bool, errors: List[str]): async with db_session() as session: upload_row = (await session.execute( select(UploadSession).where(UploadSession.encrypted_cid == ec.encrypted_cid) )).scalars().first() if upload_row: if all_success: upload_row.state = 'converted' upload_row.error = None elif upload_row.state != 'converted': upload_row.state = 'conversion_failed' if errors: upload_row.error = _short_error(errors[0]) await session.commit() async def _convert_content(ec: EncryptedContent, staging: PlainStaging): content_kind = 'audio' if ec.content_type.startswith('audio/') else ('video' if ec.content_type.startswith('video/') else 'other') input_ext = (ec.content_type.split('/')[-1] or 'bin') is_audio = content_kind == 'audio' encrypted_hash_b58 = ContentId.deserialize(ec.encrypted_cid).content_hash_b58 if content_kind == 'other': errors: List[str] = [] all_success = True try: file_hash, size_bytes = await _save_derivative(staging.container_path, staging.container_path) plain_path = os.path.join(UPLOADS_DIR, file_hash) plain_filename = f"{ec.encrypted_cid}.{input_ext}" if input_ext else ec.encrypted_cid async with db_session() as session: existing = (await session.execute(select(StoredContent).where(StoredContent.hash == file_hash))).scalars().first() if existing: sc = existing sc.type = sc.type or "local/content_bin" sc.filename = plain_filename sc.meta = { **(sc.meta or {}), 'encrypted_cid': ec.encrypted_cid, 'kind': 'original', 'content_type': ec.content_type, } sc.updated = datetime.utcnow() else: sc = StoredContent( type="local/content_bin", hash=file_hash, user_id=None, filename=plain_filename, meta={ 'encrypted_cid': ec.encrypted_cid, 'kind': 'original', 'content_type': ec.content_type, }, created=datetime.utcnow(), ) session.add(sc) await session.flush() encrypted_records = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().all() for encrypted_sc in encrypted_records: meta = dict(encrypted_sc.meta or {}) converted = dict(meta.get('converted_content') or {}) converted['original'] = file_hash meta['converted_content'] = converted if 'content_type' not in meta: meta['content_type'] = ec.content_type encrypted_sc.meta = meta encrypted_sc.decrypted_content_id = sc.id encrypted_sc.updated = datetime.utcnow() derivative = ContentDerivative( content_id=ec.id, kind='decrypted_original', local_path=plain_path, content_type=ec.content_type, size_bytes=size_bytes, status='ready', ) session.add(derivative) await session.commit() make_log('convert_v3', f"Stored original derivative for {ec.encrypted_cid}") except Exception as e: all_success = False errors.append(str(e)) make_log('convert_v3', f"Convert error {ec.encrypted_cid} opt=original: {e}", level='error') await _update_upload_session(ec, all_success, errors) return # audio/video path required = ['high', 'low', 'low_preview'] conf = ec.preview_conf or {} intervals = conf.get('intervals') or [[0, int(conf.get('duration_ms', 30000))]] main_interval = intervals[0] start_s = max(0, int(main_interval[0]) // 1000) dur_s = max(1, int((main_interval[1] - main_interval[0]) // 1000) or 30) trim_value = f"{start_s}-{start_s + dur_s}" qualities = { 'high': 'high', 'low': 'low', 'low_preview': 'low', } all_success = True errors: List[str] = [] for opt in required: derivative_kind = f"decrypted_{opt if opt != 'low_preview' else 'preview'}" derivative_id: Optional[int] = None try: async with db_session() as session: cd = ContentDerivative( content_id=ec.id, kind=derivative_kind, interval_start_ms=main_interval[0] if opt == 'low_preview' else None, interval_end_ms=main_interval[1] if opt == 'low_preview' else None, local_path="", status='processing', ) session.add(cd) await session.flush() derivative_id = cd.id await session.commit() out_path, ffprobe = await _run_media_converter( staging=staging, input_ext=input_ext, quality=qualities[opt], trim_value=trim_value if opt == 'low_preview' else None, is_audio=is_audio, ) file_hash, size_bytes = await _save_derivative(out_path, os.path.basename(out_path)) async with db_session() as session: sc = (await session.execute(select(StoredContent).where(StoredContent.hash == file_hash))).scalars().first() meta_payload = {'encrypted_cid': ec.encrypted_cid, 'kind': opt, 'ffprobe_meta': ffprobe} if sc: sc.type = sc.type or "local/content_bin" sc.filename = os.path.basename(out_path) sc.meta = meta_payload sc.updated = datetime.utcnow() else: sc = StoredContent( type="local/content_bin", hash=file_hash, user_id=None, filename=os.path.basename(out_path), meta=meta_payload, created=datetime.utcnow(), ) session.add(sc) await session.flush() encrypted_sc = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().first() if encrypted_sc: meta = dict(encrypted_sc.meta or {}) converted = dict(meta.get('converted_content') or {}) converted[opt] = file_hash meta['converted_content'] = converted encrypted_sc.meta = meta if opt == 'high': encrypted_sc.decrypted_content_id = sc.id encrypted_sc.updated = datetime.utcnow() cd = await session.get(ContentDerivative, derivative_id) if derivative_id else None if cd: cd.local_path = os.path.join(UPLOADS_DIR, file_hash) cd.size_bytes = size_bytes if is_audio: cd.content_type = 'audio/flac' if opt == 'high' else 'audio/mpeg' else: cd.content_type = ec.content_type if opt == 'high' else 'video/mp4' cd.status = 'ready' cd.error = None await session.commit() output_parent = Path(out_path).parent shutil.rmtree(output_parent, ignore_errors=True) make_log('convert_v3', f"Converted {ec.encrypted_cid} opt={opt} -> {file_hash}") except Exception as e: make_log('convert_v3', f"Convert error {ec.encrypted_cid} opt={opt}: {e}", level='error') all_success = False errors.append(_short_error(e)) async with db_session() as session: cd = await session.get(ContentDerivative, derivative_id) if derivative_id else None if cd: cd.status = 'failed' cd.error = _short_error(e) else: session.add(ContentDerivative( content_id=ec.id, kind=derivative_kind, status='failed', error=_short_error(e), local_path="", )) await session.commit() await _update_upload_session(ec, all_success, errors) async def _pick_pending(limit: int) -> List[Tuple[EncryptedContent, PlainStaging]]: async with db_session() as session: # Include preview-enabled media and non-media content that need decrypted originals non_media_filter = and_( EncryptedContent.content_type.isnot(None), ~EncryptedContent.content_type.like('audio/%'), ~EncryptedContent.content_type.like('video/%'), ) ecs = (await session.execute( select(EncryptedContent) .where(or_(EncryptedContent.preview_enabled == True, non_media_filter)) .order_by(EncryptedContent.created_at.desc()) )).scalars().all() picked: List[Tuple[EncryptedContent, PlainStaging]] = [] for ec in ecs: try: cid_obj, cid_err = resolve_content(ec.encrypted_cid) if cid_err: make_log('convert_v3', f"Skip {ec.encrypted_cid}: resolve error {cid_err}", level='debug') continue encrypted_hash_b58 = cid_obj.content_hash_b58 except Exception as exc: make_log('convert_v3', f"Skip {ec.encrypted_cid}: resolve exception {exc}", level='warning') continue sc = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().first() if not sc or sc.onchain_index is None: continue # Check if derivatives already ready rows = (await session.execute(select(ContentDerivative).where(ContentDerivative.content_id == ec.id))).scalars().all() kinds_ready = {r.kind for r in rows if r.status == 'ready'} if ec.content_type.startswith('audio/'): required = {'decrypted_low', 'decrypted_high'} elif ec.content_type.startswith('video/'): required = {'decrypted_low', 'decrypted_high', 'decrypted_preview'} else: required = {'decrypted_original'} if required.issubset(kinds_ready): continue # Always decrypt from IPFS using local or remote key staging: Optional[PlainStaging] = None ck = (await session.execute(select(ContentKey).where(ContentKey.content_id == ec.id))).scalars().first() if ck: staging = await stage_plain_from_ipfs(ec, ck.key_ciphertext_b64) if not staging: peers = (await session.execute(select(KnownNode))).scalars().all() for peer in peers: meta = peer.meta or {} public_host = meta.get('public_host') if not public_host: last_resp = (meta.get('last_response') or {}).get('node', {}) if isinstance(meta, dict) else {} public_host = last_resp.get('public_host') base_url = public_host or f"http://{peer.ip}:{peer.port}" dek = await request_key_from_peer(base_url, ec.encrypted_cid) if not dek: continue try: dek_b64 = wrap_dek(dek) except KeyWrapError as exc: make_log('convert_v3', f"wrap failed for peer DEK: {exc}", level='error') continue session_ck = ContentKey( content_id=ec.id, key_ciphertext_b64=dek_b64, key_fingerprint=peer.public_key, issuer_node_id=peer.public_key, allow_auto_grant=True, ) session.add(session_ck) await session.commit() staging = await stage_plain_from_ipfs(ec, dek_b64) if staging: break if not staging or not os.path.exists(staging.container_path): continue picked.append((ec, staging)) if len(picked) >= limit: break return picked async def worker_loop(): sem = asyncio.Semaphore(CONCURRENCY) async def _run_one(ec: EncryptedContent, staging: PlainStaging): async with sem: try: await _convert_content(ec, staging) # After successful conversion, attempt to remove staging file to avoid duplicates try: if staging and staging.container_path and os.path.exists(staging.container_path): os.remove(staging.container_path) except Exception: pass except Exception as e: make_log('convert_v3', f"job error {ec.encrypted_cid}: {e}", level='error') while True: try: batch = await _pick_pending(limit=CONCURRENCY * 2) if not batch: await asyncio.sleep(3) continue tasks = [asyncio.create_task(_run_one(ec, staging)) for (ec, staging) in batch] await asyncio.gather(*tasks) except Exception as e: make_log('convert_v3', f"loop error: {e}", level='error') await asyncio.sleep(2) async def main_fn(memory): make_log('convert_v3', f"Service started with concurrency={CONCURRENCY}", level='info') await worker_loop() async def stage_plain_from_ipfs(ec: EncryptedContent, dek_wrapped: str) -> Optional[PlainStaging]: """Download encrypted ENCF stream from IPFS and decrypt on the fly into shared staging.""" os.makedirs(UPLOADS_PATH / STAGING_SUBDIR, exist_ok=True) try: dek = unwrap_dek(dek_wrapped) except KeyWrapError as exc: make_log('convert_v3', f"unwrap failed for {ec.encrypted_cid}: {exc}", level='error') return None tmp = tempfile.NamedTemporaryFile( prefix=f"dec_{ec.encrypted_cid[:8]}_", dir=UPLOADS_PATH / STAGING_SUBDIR, delete=False, ) tmp_path = tmp.name tmp.close() try: async def _aiter(): async for ch in cat_stream(ec.encrypted_cid): yield ch await decrypt_encf_auto(_aiter(), dek, tmp_path) host_path = _container_to_host(tmp_path) return PlainStaging(container_path=tmp_path, host_path=host_path) except Exception as e: make_log('convert_v3', f"decrypt from ipfs failed: {e}", level='error') try: os.remove(tmp_path) except Exception: pass return None def _short_error(message: str, limit: int = ERROR_TRUNCATE_LIMIT) -> str: if not message: return message message = str(message) return message if len(message) <= limit else message[: limit - 3] + '...'