FrameTour-RenderWorker/handlers/prepare_audio.py

# -*- coding: utf-8 -*-
"""
全局音频准备处理器

处理 PREPARE_JOB_AUDIO 任务，生成整个视频的连续音频轨道。
"""

import os
import logging
from typing import List, Dict, Optional

from handlers.base import BaseHandler, AUDIO_ENCODE_ARGS
from domain.task import Task, TaskType, AudioSpec, AudioProfile
from domain.result import TaskResult, ErrorCode

logger = logging.getLogger(__name__)


class PrepareJobAudioHandler(BaseHandler):
    """
    全局音频准备处理器

    职责：
    - 下载全局 BGM
    - 下载各片段叠加音效
    - 构建复杂混音命令
    - 执行混音
    - 上传音频产物

    关键约束：
    - 全局 BGM 连续生成一次，贯穿整个时长
    - 禁止使用 amix normalize=1
    - 只对叠加音轨做极短淡入淡出（5-20ms）
    - 不对 BGM 做边界 fade
    """

    def get_supported_type(self) -> TaskType:
        return TaskType.PREPARE_JOB_AUDIO

    def handle(self, task: Task) -> TaskResult:
        """处理音频准备任务"""
        work_dir = self.create_work_dir(task.task_id)

        try:
            # 解析参数
            total_duration_ms = task.get_total_duration_ms()
            if total_duration_ms <= 0:
                return TaskResult.fail(
                    ErrorCode.E_SPEC_INVALID,
                    "Invalid totalDurationMs"
                )

            total_duration_sec = total_duration_ms / 1000.0
            audio_profile = task.get_audio_profile()
            bgm_url = task.get_bgm_url()
            segments = task.get_segments()

            # 1. 并行下载 BGM 与叠加音效
            bgm_file = os.path.join(work_dir, 'bgm.mp3') if bgm_url else None
            download_jobs = []
            if bgm_url and bgm_file:
                download_jobs.append({
                    'key': 'bgm',
                    'url': bgm_url,
                    'dest': bgm_file,
                    'required': False
                })

            sfx_download_candidates = []
            for i, seg in enumerate(segments):
                audio_spec_data = seg.get('audioSpecJson')
                if not audio_spec_data:
                    continue
                audio_spec = AudioSpec.from_dict(audio_spec_data)
                if not audio_spec or not audio_spec.audio_url:
                    continue
                sfx_file = os.path.join(work_dir, f'sfx_{i}.mp3')
                job_key = f'sfx_{i}'
                sfx_download_candidates.append({
                    'key': job_key,
                    'file': sfx_file,
                    'spec': audio_spec,
                    'segment': seg
                })
                download_jobs.append({
                    'key': job_key,
                    'url': audio_spec.audio_url,
                    'dest': sfx_file,
                    'required': False
                })

            download_results = self.download_files_parallel(download_jobs)
            if bgm_url:
                bgm_result = download_results.get('bgm')
                if not bgm_result or not bgm_result['success']:
                    logger.warning(f"[task:{task.task_id}] Failed to download BGM")
                    bgm_file = None

            sfx_files = []
            for sfx_candidate in sfx_download_candidates:
                sfx_result = download_results.get(sfx_candidate['key'])
                if sfx_result and sfx_result['success']:
                    sfx_files.append({
                        'file': sfx_candidate['file'],
                        'spec': sfx_candidate['spec'],
                        'segment': sfx_candidate['segment']
                    })
                else:
                    logger.warning(f"[task:{task.task_id}] Failed to download SFX {sfx_candidate['key']}")

            # 2. 构建音频混音命令
            output_file = os.path.join(work_dir, 'audio_full.aac')
            global_fade_in_ms = task.get_global_audio_fade_in_ms()
            global_fade_out_ms = task.get_global_audio_fade_out_ms()
            cmd = self._build_audio_command(
                bgm_file=bgm_file,
                sfx_files=sfx_files,
                output_file=output_file,
                total_duration_sec=total_duration_sec,
                audio_profile=audio_profile,
                global_fade_in_ms=global_fade_in_ms,
                global_fade_out_ms=global_fade_out_ms
            )

            # 3. 执行 FFmpeg
            if not self.run_ffmpeg(cmd, task.task_id):
                return TaskResult.fail(
                    ErrorCode.E_FFMPEG_FAILED,
                    "Audio mixing failed"
                )

            # 4. 验证输出文件
            if not self.ensure_file_exists(output_file, min_size=1024):
                return TaskResult.fail(
                    ErrorCode.E_FFMPEG_FAILED,
                    "Audio output file is missing or too small"
                )

            # 5. 上传产物
            audio_url = self.upload_file(task.task_id, 'audio', output_file)
            if not audio_url:
                return TaskResult.fail(
                    ErrorCode.E_UPLOAD_FAILED,
                    "Failed to upload audio"
                )

            return TaskResult.ok({
                'audioUrl': audio_url
            })

        except Exception as e:
            logger.error(f"[task:{task.task_id}] Unexpected error: {e}", exc_info=True)
            return TaskResult.fail(ErrorCode.E_UNKNOWN, str(e))

        finally:
            self.cleanup_work_dir(work_dir)

    def _build_audio_command(
        self,
        bgm_file: Optional[str],
        sfx_files: List[Dict],
        output_file: str,
        total_duration_sec: float,
        audio_profile: AudioProfile,
        global_fade_in_ms: int = 0,
        global_fade_out_ms: int = 0
    ) -> List[str]:
        """
        构建音频混音命令

        Args:
            bgm_file: BGM 文件路径（可选）
            sfx_files: 叠加音效列表
            output_file: 输出文件路径
            total_duration_sec: 总时长（秒）
            audio_profile: 音频配置
            global_fade_in_ms: 全局音频淡入时长（毫秒），0 不应用
            global_fade_out_ms: 全局音频淡出时长（毫秒），0 不应用

        Returns:
            FFmpeg 命令参数列表
        """
        sample_rate = audio_profile.sample_rate
        channels = audio_profile.channels

        # 构建全局 afade 滤镜（作用于最终混合音频，在 amix 之后）
        global_fade_filters = self._build_global_fade_filters(
            total_duration_sec, global_fade_in_ms, global_fade_out_ms
        )

        # 情况1：无 BGM 也无叠加音效 -> 生成静音
        if not bgm_file and not sfx_files:
            if global_fade_filters:
                return [
                    'ffmpeg', '-y', '-hide_banner',
                    '-f', 'lavfi',
                    '-i', f'anullsrc=r={sample_rate}:cl=stereo',
                    '-t', str(total_duration_sec),
                    '-af', ','.join(global_fade_filters),
                    '-c:a', 'aac', '-b:a', '128k',
                    output_file
                ]
            return [
                'ffmpeg', '-y', '-hide_banner',
                '-f', 'lavfi',
                '-i', f'anullsrc=r={sample_rate}:cl=stereo',
                '-t', str(total_duration_sec),
                '-c:a', 'aac', '-b:a', '128k',
                output_file
            ]

        # 情况2：仅 BGM，无叠加音效
        if not sfx_files:
            af_arg = []
            if global_fade_filters:
                af_arg = ['-af', ','.join(global_fade_filters)]
            return [
                'ffmpeg', '-y', '-hide_banner',
                '-i', bgm_file,
                '-t', str(total_duration_sec),
                *af_arg,
                '-c:a', 'aac', '-b:a', '128k',
                '-ar', str(sample_rate), '-ac', str(channels),
                output_file
            ]

        # 情况3：BGM + 叠加音效 -> 复杂滤镜
        inputs = []
        if bgm_file:
            inputs.extend(['-i', bgm_file])
        for sfx in sfx_files:
            inputs.extend(['-i', sfx['file']])

        filter_parts = []
        input_idx = 0

        # BGM 处理（或生成静音底轨）
        if bgm_file:
            filter_parts.append(
                f"[0:a]atrim=0:{total_duration_sec},asetpts=PTS-STARTPTS,"
                f"apad=whole_dur={total_duration_sec}[bgm]"
            )
            input_idx = 1
        else:
            filter_parts.append(
                f"anullsrc=r={sample_rate}:cl=stereo,"
                f"atrim=0:{total_duration_sec}[bgm]"
            )
            input_idx = 0

        # 叠加音效处理
        sfx_labels = []
        for i, sfx in enumerate(sfx_files):
            idx = input_idx + i
            spec = sfx['spec']
            seg = sfx['segment']

            # 计算时间参数
            start_time_ms = seg.get('startTimeMs', 0)
            duration_ms = seg.get('durationMs', 5000)
            delay_ms = start_time_ms + spec.delay_ms
            delay_sec = delay_ms / 1000.0
            duration_sec = duration_ms / 1000.0

            # 淡入淡出参数（极短，5-20ms）
            fade_in_sec = spec.fade_in_ms / 1000.0
            fade_out_sec = spec.fade_out_ms / 1000.0

            # 音量
            volume = spec.volume

            label = f"sfx{i}"
            sfx_labels.append(f"[{label}]")

            # 构建滤镜：延迟 + 淡入淡出 + 音量
            # 注意：只对叠加音轨做淡入淡出，不对 BGM 做
            sfx_filter = (
                f"[{idx}:a]"
                f"adelay={int(delay_ms)}|{int(delay_ms)},"
                f"afade=t=in:st={delay_sec}:d={fade_in_sec},"
                f"afade=t=out:st={delay_sec + duration_sec - fade_out_sec}:d={fade_out_sec},"
                f"volume={volume}"
                f"[{label}]"
            )
            filter_parts.append(sfx_filter)

        # 混音（关键：normalize=0，禁止归一化）
        # dropout_transition=0 表示输入结束时不做渐变
        mix_inputs = "[bgm]" + "".join(sfx_labels)
        num_inputs = 1 + len(sfx_files)

        # 如果有全局 fade，amix 输出到中间标签后再追加 afade；否则直接输出到 [out]
        if global_fade_filters:
            filter_parts.append(
                f"{mix_inputs}amix=inputs={num_inputs}:duration=first:"
                f"dropout_transition=0:normalize=0[mixed]"
            )
            filter_parts.append(
                f"[mixed]{','.join(global_fade_filters)}[out]"
            )
        else:
            filter_parts.append(
                f"{mix_inputs}amix=inputs={num_inputs}:duration=first:"
                f"dropout_transition=0:normalize=0[out]"
            )

        filter_complex = ';'.join(filter_parts)

        cmd = ['ffmpeg', '-y', '-hide_banner'] + inputs + [
            '-filter_complex', filter_complex,
            '-map', '[out]',
            '-c:a', 'aac', '-b:a', '128k',
            '-ar', str(sample_rate), '-ac', str(channels),
            output_file
        ]

        return cmd

    @staticmethod
    def _build_global_fade_filters(
        total_duration_sec: float,
        global_fade_in_ms: int,
        global_fade_out_ms: int
    ) -> List[str]:
        """
        构建全局音频淡入/淡出滤镜列表

        在 amix 混音输出之后追加，作用于最终混合音频。
        与片段级 audioSpecJson 中的 fadeInMs/fadeOutMs（仅作用于单个叠加音效）独立。

        Args:
            total_duration_sec: 总时长（秒）
            global_fade_in_ms: 全局淡入时长（毫秒），0 不应用
            global_fade_out_ms: 全局淡出时长（毫秒），0 不应用

        Returns:
            afade 滤镜字符串列表，可能为空
        """
        filters = []
        if global_fade_in_ms > 0:
            fade_in_sec = global_fade_in_ms / 1000.0
            filters.append(f"afade=t=in:st=0:d={fade_in_sec}")
        if global_fade_out_ms > 0:
            fade_out_sec = global_fade_out_ms / 1000.0
            fade_out_start = total_duration_sec - fade_out_sec
            filters.append(f"afade=t=out:st={fade_out_start}:d={fade_out_sec}")
        return filters