python magic for images

This commit is contained in:
user 2025-04-26 12:52:32 +03:00
parent 0586ed9d94
commit 0e4268fb4d
3 changed files with 158 additions and 164 deletions

View File

@ -21,5 +21,7 @@ RUN apt-get update && apt-get install -y \
apt-get update && \ apt-get update && \
apt-get install -y docker-ce-cli apt-get install -y docker-ce-cli
RUN apt-get install libmagic1 -y
CMD ["python", "app"] CMD ["python", "app"]

View File

@ -4,23 +4,24 @@ import os
import uuid import uuid
import json import json
import shutil import shutil
import magic # python-magic
from base58 import b58decode, b58encode from base58 import b58decode, b58encode
from sqlalchemy import and_, or_ from sqlalchemy import and_, or_
from app.core.models.node_storage import StoredContent from app.core.models.node_storage import StoredContent
from app.core.models._telegram import Wrapped_CBotChat from app.core.models._telegram import Wrapped_CBotChat
from app.core._utils.send_status import send_status from app.core._utils.send_status import send_status
from app.core.logger import make_log from app.core.logger import make_log
from app.core.models.user import User
from app.core.models import WalletConnection from app.core.models import WalletConnection
from app.core.storage import db_session from app.core.storage import db_session
from app.core._config import UPLOADS_DIR from app.core._config import UPLOADS_DIR
from app.core.content.content_id import ContentId from app.core.content.content_id import ContentId
import converter_module # наш модуль для конвертации
async def convert_loop(memory): async def convert_loop(memory):
with db_session() as session: with db_session() as session:
# Query for unprocessed encrypted content # 1) Найти несанкционированный контент
unprocessed_encrypted_content = session.query(StoredContent).filter( item = session.query(StoredContent).filter(
and_( and_(
StoredContent.type == "onchain/content", StoredContent.type == "onchain/content",
or_( or_(
@ -29,192 +30,182 @@ async def convert_loop(memory):
) )
) )
).first() ).first()
if not unprocessed_encrypted_content: if not item:
make_log("ConvertProcess", "No content to convert", level="debug") make_log("ConvertProcess", "No content to convert", level="debug")
return return
# Static preview interval in seconds # 2) Достать расшифрованный файл
preview_interval = [0, 30] decrypted = session.query(StoredContent).filter(
if unprocessed_encrypted_content.onchain_index in [2]: StoredContent.id == item.decrypted_content_id
preview_interval = [0, 60]
make_log("ConvertProcess", f"Processing content {unprocessed_encrypted_content.id} with preview interval {preview_interval}", level="info")
decrypted_content = session.query(StoredContent).filter(
StoredContent.id == unprocessed_encrypted_content.decrypted_content_id
).first() ).first()
if not decrypted_content: if not decrypted:
make_log("ConvertProcess", "Decrypted content not found", level="error") make_log("ConvertProcess", "Decrypted content not found", level="error")
return return
input_path = f"/Storage/storedContent/{decrypted.hash}"
filename = item.filename
ext = filename.split('.')[-1] if '.' in filename else ""
# List of conversion options to process # 3) Определяем MIME-тип через python-magic
REQUIRED_CONVERT_OPTIONS = ['high', 'low', 'low_preview']
converted_content = {} # Mapping: option -> sha256 hash of output file
# Define input file path and extract its extension from filename
input_file_path = f"/Storage/storedContent/{decrypted_content.hash}"
input_ext = unprocessed_encrypted_content.filename.split('.')[-1] if '.' in unprocessed_encrypted_content.filename else "mp4"
# Logs directory mapping
logs_dir = "/Storage/logs/converter"
# Process each conversion option in sequence
for option in REQUIRED_CONVERT_OPTIONS:
# Set quality parameter and trim option (only for preview)
if option == "low_preview":
quality = "low"
trim_value = f"{preview_interval[0]}-{preview_interval[1]}"
else:
quality = option # 'high' or 'low'
trim_value = None
# Generate a unique output directory for docker container
output_uuid = str(uuid.uuid4())
output_dir = f"/Storage/storedContent/converter-output/{output_uuid}"
# Build the docker command with appropriate volume mounts and parameters
cmd = [
"docker", "run", "--rm",
"-v", f"{input_file_path}:/app/input",
"-v", f"{output_dir}:/app/output",
"-v", f"{logs_dir}:/app/logs",
"media_converter",
"--ext", input_ext,
"--quality", quality
]
if trim_value:
cmd.extend(["--trim", trim_value])
# Run the docker container asynchronously
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
make_log("ConvertProcess", f"Docker conversion failed for option {option}: {stderr.decode()}", level="error")
return
# List files in the output directory
try: try:
files = os.listdir(output_dir.replace("/Storage/storedContent", "/app/data")) mime = magic.from_file(input_path, mime=True)
except Exception as e: except Exception as e:
make_log("ConvertProcess", f"Error reading output directory {output_dir}: {e}", level="error") make_log("ConvertProcess", f"magic probe failed: {e}", level="warning")
return mime = ""
# Exclude 'output.json' and expect exactly one media output file if mime.startswith("video/"):
media_files = [f for f in files if f != "output.json"] kind = "video"
if len(media_files) != 1: elif mime.startswith("audio/"):
make_log("ConvertProcess", f"Expected one media file, found {len(media_files)} for option {option}", level="error") kind = "audio"
return else:
kind = "other"
output_file = os.path.join(output_dir.replace("/Storage/storedContent", "/app/data"), media_files[0]) make_log("ConvertProcess", f"Detected kind={kind}, mime={mime}", level="info")
# Compute SHA256 hash of the output file using async subprocess # 4) Если не видео и не аудио — сохраняем raw copy и выходим
hash_process = await asyncio.create_subprocess_exec( if kind == "other":
"sha256sum", output_file, raw_hash = item.hash
stdout=asyncio.subprocess.PIPE, raw = StoredContent(
stderr=asyncio.subprocess.PIPE type="local/content_raw",
) hash=raw_hash,
hash_stdout, hash_stderr = await hash_process.communicate() user_id=item.user_id,
if hash_process.returncode != 0: filename=filename,
make_log("ConvertProcess", f"Error computing sha256sum for option {option}: {hash_stderr.decode()}", level="error") meta={'source': 'raw_copy'},
return
file_hash = hash_stdout.decode().split()[0]
file_hash = b58encode(bytes.fromhex(file_hash)).decode()
if not session.query(StoredContent).filter(
StoredContent.hash == file_hash
).first():
new_content = StoredContent(
type="local/content_bin",
hash=file_hash,
user_id=unprocessed_encrypted_content.user_id,
filename=media_files[0],
meta={
'encrypted_file_hash': unprocessed_encrypted_content.hash,
},
created=datetime.now(), created=datetime.now(),
) )
session.add(new_content) session.add(raw)
session.commit() session.commit()
save_path = os.path.join(UPLOADS_DIR, file_hash) # Копируем файл в UPLOADS_DIR
dst = os.path.join(UPLOADS_DIR, raw_hash)
try: try:
os.remove(save_path) os.remove(dst)
except FileNotFoundError: except FileNotFoundError:
pass pass
shutil.copy2(input_path, dst)
try: # Обновляем оригинальный объект
shutil.move(output_file, save_path) item.btfs_cid = ContentId(version=2, content_hash=b58decode(raw_hash)).serialize_v2()
except Exception as e: item.ipfs_cid = ContentId(version=2, content_hash=b58decode(raw_hash)).serialize_v2()
make_log("ConvertProcess", f"Error moving output file {output_file} to {save_path}: {e}", level="error") item.meta = {**item.meta, 'converted_content': {'raw': raw_hash}}
session.commit()
make_log("ConvertProcess", f"Raw content saved and CIDs set for {raw_hash}", level="info")
return return
converted_content[option] = file_hash # 5) Задаём опции конвертации для видео/аудио
if kind == "video":
options = ['high', 'low', 'low_preview']
else: # audio
options = ['high_audio', 'low_audio']
# Process output.json: read its contents and update meta['ffprobe_meta'] # Preview interval
output_json_path = os.path.join(output_dir.replace("/Storage/storedContent", "/app/data"), "output.json") preview_interval = [0, 30]
if os.path.exists(output_json_path): if item.onchain_index == 2:
if unprocessed_encrypted_content.meta.get('ffprobe_meta') is None: preview_interval = [0, 60]
try:
with open(output_json_path, "r") as f:
output_json_content = f.read()
except Exception as e:
make_log("ConvertProcess", f"Error reading output.json for option {option}: {e}", level="error")
return
try: converted = {}
ffprobe_meta = json.loads(output_json_content) for opt in options:
except Exception as e: # quality и trim
make_log("ConvertProcess", f"Error parsing output.json for option {option}: {e}", level="error") if opt.endswith("_preview"):
return quality = opt.replace("_preview", "")
trim = preview_interval
unprocessed_encrypted_content.meta = {
**unprocessed_encrypted_content.meta,
'ffprobe_meta': ffprobe_meta
}
else: else:
make_log("ConvertProcess", f"output.json not found for option {option}", level="error") quality = opt.replace("_audio", "")
trim = None
# Remove the output directory after processing # уникальная папка вывода
uid = str(uuid.uuid4())
out_dir = f"/Storage/storedContent/converter-output/{uid}"
os.makedirs(out_dir, exist_ok=True)
# ==== Вызов конвертера ====
# converter_module.convert(input_path, out_dir, ext=ext,
# quality=quality, trim=trim, audio_only=(kind=="audio"))
# Здесь предполагаем, что convert возвращает имя файла результата.
try: try:
shutil.rmtree(output_dir.replace("/Storage/storedContent", "/app/data")) result_fname = await converter_module.convert(
input=input_path,
output_dir=out_dir,
ext=ext,
quality=quality,
trim=trim,
audio_only=(kind == "audio")
)
except Exception as e: except Exception as e:
make_log("ConvertProcess", f"Error removing output directory {output_dir}: {e}", level="error") make_log("ConvertProcess", f"Conversion failed {opt}: {e}", level="error")
# Continue even if deletion fails return
make_log("ConvertProcess", f"Content {unprocessed_encrypted_content.id} processed. Converted content: {converted_content}", level="info") out_file = os.path.join(out_dir, result_fname)
unprocessed_encrypted_content.btfs_cid = ContentId( # 6) Считаем sha256 и b58
version=2, content_hash=b58decode(converted_content['high']) proc = await asyncio.create_subprocess_exec(
).serialize_v2() "sha256sum", out_file,
unprocessed_encrypted_content.ipfs_cid = ContentId( stdout=asyncio.subprocess.PIPE,
version=2, content_hash=b58decode(converted_content['low']) stderr=asyncio.subprocess.PIPE
).serialize_v2() )
unprocessed_encrypted_content.meta = { so, se = await proc.communicate()
**unprocessed_encrypted_content.meta, if proc.returncode != 0:
'converted_content': converted_content make_log("ConvertProcess", f"sha256sum error: {se.decode()}", level="error")
} return
sha_hex = so.decode().split()[0]
h58 = b58encode(bytes.fromhex(sha_hex)).decode()
# 7) Добавляем StoredContent, если нужно
if not session.query(StoredContent).filter(StoredContent.hash == h58).first():
new = StoredContent(
type="local/content_bin",
hash=h58,
user_id=item.user_id,
filename=result_fname,
meta={'encrypted_file_hash': item.hash},
created=datetime.now(),
)
session.add(new)
session.commit() session.commit()
if not unprocessed_encrypted_content.meta.get('upload_notify_msg_id'):
wallet_owner_connection = session.query(WalletConnection).filter( # 8) Перемещаем в UPLOADS_DIR
WalletConnection.wallet_address == unprocessed_encrypted_content.owner_address dst = os.path.join(UPLOADS_DIR, h58)
try:
os.remove(dst)
except FileNotFoundError:
pass
shutil.move(out_file, dst)
converted[opt] = h58
# 9) optional: metadata from converter_module
meta_info = converter_module.get_meta(out_dir) # допустим, так
if meta_info and 'ffprobe_meta' not in item.meta:
item.meta['ffprobe_meta'] = meta_info
# 10) очистка
shutil.rmtree(out_dir, ignore_errors=True)
# 11) Обновляем оригинальный объект после всех опций
make_log("ConvertProcess", f"Converted: {converted}", level="info")
main_high = 'high' if kind=='video' else 'high_audio'
main_low = 'low' if kind=='video' else 'low_audio'
item.btfs_cid = ContentId(version=2,
content_hash=b58decode(converted[main_high])).serialize_v2()
item.ipfs_cid = ContentId(version=2,
content_hash=b58decode(converted[main_low])).serialize_v2()
item.meta = {**item.meta, 'converted_content': converted}
session.commit()
# 12) Отправляем уведомление пользователю
if not item.meta.get('upload_notify_msg_id'):
wc = session.query(WalletConnection).filter(
WalletConnection.wallet_address == item.owner_address
).order_by(WalletConnection.id.desc()).first() ).order_by(WalletConnection.id.desc()).first()
if wallet_owner_connection: if wc:
wallet_owner_user = wallet_owner_connection.user bot = Wrapped_CBotChat(memory._client_telegram_bot,
wallet_owner_bot = Wrapped_CBotChat(memory._client_telegram_bot, chat_id=wallet_owner_user.telegram_id, user=wallet_owner_user, db_session=session) chat_id=wc.user.telegram_id,
unprocessed_encrypted_content.meta = { user=wc.user, db_session=session)
**unprocessed_encrypted_content.meta, item.meta['upload_notify_msg_id'] = await bot.send_content(session, item)
'upload_notify_msg_id': await wallet_owner_bot.send_content(session, unprocessed_encrypted_content)
}
session.commit() session.commit()
async def main_fn(memory): async def main_fn(memory):
make_log("ConvertProcess", "Service started", level="info") make_log("ConvertProcess", "Service started", level="info")
seqno = 0 seqno = 0

View File

@ -15,4 +15,5 @@ aiofiles==23.2.1
pydub==0.25.1 pydub==0.25.1
pillow==10.2.0 pillow==10.2.0
ffmpeg-python==0.2.0 ffmpeg-python==0.2.0
python-magic==0.4.27