FrameTour-RenderWorker/entity/ffmpeg_command_builder.py

import json
import os
import time
from typing import List, Optional

from config.settings import get_ffmpeg_config
from entity.render_task import RenderTask, TaskType
from entity.effects import registry as effect_registry
from util.exceptions import FFmpegError
from util.ffmpeg import probe_video_info, probe_video_audio
import logging

logger = logging.getLogger(__name__)

class FFmpegCommandBuilder:
    """FFmpeg命令构建器"""

    def __init__(self, task: RenderTask):
        self.task = task
        self.config = get_ffmpeg_config()

    def build_command(self) -> List[str]:
        """构建FFmpeg命令"""
        self.task.update_task_type()

        if self.task.task_type == TaskType.COPY:
            return self._build_copy_command()
        elif self.task.task_type == TaskType.CONCAT:
            return self._build_concat_command()
        elif self.task.task_type == TaskType.ENCODE:
            return self._build_encode_command()
        else:
            raise FFmpegError(f"Unsupported task type: {self.task.task_type}")

    def _build_copy_command(self) -> List[str]:
        """构建复制命令"""
        if len(self.task.input_files) == 1:
            input_file = self.task.input_files[0]
            if input_file == self.task.output_file:
                return []  # 不需要处理

        return [
            "ffmpeg", "-y", "-hide_banner",
            "-i", self.task.input_files[0],
            "-c", "copy",
            self.task.output_file
        ]

    def _build_concat_command(self) -> List[str]:
        """构建拼接命令"""
        args = ["ffmpeg", "-y", "-hide_banner"]
        input_args = []
        output_args = [*self.config.default_args]
        filter_args = []

        if len(self.task.input_files) == 1:
            # 单个文件
            file = self.task.input_files[0]
            input_args.extend(["-i", file])
            self.task.mute = not probe_video_audio(file)
        else:
            # 多个文件使用concat协议
            tmp_file = f"tmp_concat_{time.time()}.txt"
            with open(tmp_file, "w", encoding="utf-8") as f:
                for input_file in self.task.input_files:
                    f.write(f"file '{input_file}'\n")
            input_args.extend(["-f", "concat", "-safe", "0", "-i", tmp_file])
            self.task.mute = not probe_video_audio(tmp_file, "concat")

        # 视频流映射
        output_args.extend(["-map", "0:v", "-c:v", "copy"])

        # 音频处理
        audio_output_str = self._handle_audio_concat(input_args, filter_args)
        if audio_output_str:
            output_args.extend(["-map", audio_output_str])
            output_args.extend(self.config.audio_args)

        # annexb处理
        if self.task.annexb:
            output_args.extend(["-bsf:v", self._get_mp4toannexb_filter()])
            output_args.extend(["-bsf:a", "setts=pts=DTS"])
            output_args.extend(["-f", "mpegts"])
        else:
            output_args.extend(["-f", "mp4"])

        filter_complex = ["-filter_complex", ";".join(filter_args)] if filter_args else []

        return args + input_args + filter_complex + output_args + [self.task.output_file]

    def _build_encode_command(self) -> List[str]:
        """构建编码命令"""
        args = ["ffmpeg", "-y", "-hide_banner"]
        input_args = []
        filter_args = []
        output_args = [
            *self.config.video_args,
            *self.config.audio_args,
            *self.config.encoder_args,
            *self.config.default_args
        ]

        # annexb处理
        if self.task.annexb:
            output_args.extend(["-bsf:v", self._get_mp4toannexb_filter()])
            output_args.extend(["-reset_timestamps", "1"])

        # 处理输入文件
        for input_file in self.task.input_files:
            input_args.extend(["-i", input_file])

        # 处理视频流
        video_output_str = "[0:v]"
        effect_index = 0

        # 处理中心裁剪
        if self.task.center_cut == 1:
            video_output_str, effect_index = self._add_center_cut(filter_args, video_output_str, effect_index)

        # 处理缩放裁剪
        if self.task.zoom_cut == 1 and self.task.resolution:
            video_output_str, effect_index = self._add_zoom_cut(filter_args, video_output_str, effect_index)

        # 处理效果
        video_output_str, effect_index = self._add_effects(filter_args, video_output_str, effect_index)

        # 处理分辨率
        if self.task.resolution:
            filter_args.append(f"{video_output_str}scale={self.task.resolution.replace('x', ':')}[v]")
            video_output_str = "[v]"

        # 处理LUT
        for lut in self.task.luts:
            filter_args.append(f"{video_output_str}lut3d=file={lut}{video_output_str}")

        # 处理覆盖层
        video_output_str = self._add_overlays(input_args, filter_args, video_output_str)

        # 处理字幕
        for subtitle in self.task.subtitles:
            filter_args.append(f"{video_output_str}ass={subtitle}[v]")
            video_output_str = "[v]"

        # 映射视频流
        output_args.extend(["-map", video_output_str])
        output_args.extend(["-r", str(self.task.frame_rate)])
        output_args.extend(["-fps_mode", "cfr"])

        # 处理音频
        audio_output_str = self._handle_audio_encode(input_args, filter_args)
        if audio_output_str:
            output_args.extend(["-map", audio_output_str])

        filter_complex = ["-filter_complex", ";".join(filter_args)] if filter_args else []

        return args + input_args + filter_complex + output_args + [self.task.output_file]

    def _add_center_cut(self, filter_args: List[str], video_input: str, effect_index: int) -> tuple[str, int]:
        """添加中心裁剪"""
        pos_json = self.task.ext_data.get('posJson', '{}')
        try:
            pos_data = json.loads(pos_json) if pos_json != '{}' else {}
        except:
            pos_data = {}

        _v_w = pos_data.get('imgWidth', 1)
        _f_x = pos_data.get('ltX', 0)
        _f_x2 = pos_data.get('rbX', 0)
        _x = f'{float((_f_x2 + _f_x)/(2 * _v_w)):.4f}*iw-ih*ih/(2*iw)'

        filter_args.append(f"{video_input}crop=x={_x}:y=0:w=ih*ih/iw:h=ih[v_cut{effect_index}]")
        return f"[v_cut{effect_index}]", effect_index + 1

    def _add_zoom_cut(self, filter_args: List[str], video_input: str, effect_index: int) -> tuple[str, int]:
        """添加缩放裁剪"""
        # 获取输入视频尺寸
        input_file = self.task.input_files[0]
        _iw, _ih, _ = probe_video_info(input_file)

        _w, _h = self.task.resolution.split('x', 1)
        pos_json = self.task.ext_data.get('posJson', '{}')
        try:
            pos_data = json.loads(pos_json) if pos_json != '{}' else {}
        except:
            pos_data = {}

        _v_w = pos_data.get('imgWidth', 1)
        _v_h = pos_data.get('imgHeight', 1)
        _f_x = pos_data.get('ltX', 0)
        _f_x2 = pos_data.get('rbX', 0)
        _f_y = pos_data.get('ltY', 0)
        _f_y2 = pos_data.get('rbY', 0)

        _x = min(max(0, int((_f_x + _f_x2) / 2 - int(_w) / 2)), _iw - int(_w))
        _y = min(max(0, int((_f_y + _f_y2) / 2 - int(_h) / 2)), _ih - int(_h))

        filter_args.append(f"{video_input}crop=x={_x}:y={_y}:w={_w}:h={_h}[vz_cut{effect_index}]")
        return f"[vz_cut{effect_index}]", effect_index + 1

    def _add_effects(self, filter_args: List[str], video_input: str, effect_index: int) -> tuple[str, int]:
        """添加效果处理"""
        current_input = video_input

        for effect_str in self.task.effects:
            effect_name, params = effect_registry.parse_effect_string(effect_str)
            processor = effect_registry.get_processor(effect_name, params, self.task.ext_data)

            if processor:
                processor.frame_rate = self.task.frame_rate
                effect_filters, output_stream = processor.generate_filter_args(current_input, effect_index)

                if effect_filters:
                    filter_args.extend(effect_filters)
                    current_input = output_stream
                    effect_index += 1

        return current_input, effect_index

    def _add_overlays(self, input_args: List[str], filter_args: List[str], video_input: str) -> str:
        """添加覆盖层"""
        current_input = video_input

        for overlay in self.task.overlays:
            input_index = input_args.count("-i") // 2  # 每个输入占两个参数 -i filename
            input_args.extend(["-i", overlay])

            if self.config.old_ffmpeg:
                filter_args.append(f"{current_input}[{input_index}:v]scale2ref=iw:ih[v]")
            else:
                filter_args.append(f"{current_input}[{input_index}:v]scale=rw:rh[v]")

            filter_args.append(f"[v][{input_index}:v]overlay=1:eof_action=endall[v]")
            current_input = "[v]"

        return current_input

    def _handle_audio_concat(self, input_args: List[str], filter_args: List[str]) -> Optional[str]:
        """处理concat模式的音频"""
        audio_output_str = ""

        if self.task.mute:
            input_index = input_args.count("-i") // 2
            input_args.extend(["-f", "lavfi", "-i", "anullsrc=cl=stereo:r=48000"])
            audio_output_str = f"[{input_index}:a]"
        else:
            audio_output_str = "[0:a]"

        for audio in self.task.audios:
            input_index = input_args.count("-i") // 2
            input_args.extend(["-i", audio.replace("\\", "/")])
            filter_args.append(f"{audio_output_str}[{input_index}:a]amix=duration=shortest:dropout_transition=0:normalize=0[a]")
            audio_output_str = "[a]"

        return audio_output_str.strip("[]") if audio_output_str else None

    def _handle_audio_encode(self, input_args: List[str], filter_args: List[str]) -> Optional[str]:
        """处理encode模式的音频"""
        audio_output_str = ""

        if self.task.mute:
            input_index = input_args.count("-i") // 2
            input_args.extend(["-f", "lavfi", "-i", "anullsrc=cl=stereo:r=48000"])
            filter_args.append(f"[{input_index}:a]acopy[a]")
            audio_output_str = "[a]"
        else:
            audio_output_str = "[0:a]"

        for audio in self.task.audios:
            input_index = input_args.count("-i") // 2
            input_args.extend(["-i", audio.replace("\\", "/")])
            filter_args.append(f"{audio_output_str}[{input_index}:a]amix=duration=shortest:dropout_transition=0:normalize=0[a]")
            audio_output_str = "[a]"

        return audio_output_str if audio_output_str else None

    def _get_mp4toannexb_filter(self) -> str:
        """获取mp4toannexb滤镜"""
        encoder_args_str = " ".join(self.config.encoder_args).lower()
        if "hevc" in encoder_args_str:
            return "hevc_mp4toannexb"
        return "h264_mp4toannexb"