$ cat node-template.py

Speech Conversion Legacy

// Converts speech from one voice to another. Takes a source audio and a target narrator voice reference, then re-synthesizes the speech in the target voice. Supports 23 languages and adjustable expressiveness. Outputs an MP3 audio file.

Process
Audio
template.py
1import os2import sys3import json4import time5import traceback67try:8    import requests9except ImportError:10    import subprocess11    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])12    import requests1314COMFYUI_API_URL = os.getenv("COMFYUI_API_URL", "http://192.168.1.39:8188")15INPUT_DIR = "/data/input"16OUTPUT_DIR = "/data/output"1718# ---------- ChatterBox convert workflow (EM_Chatterbox_convert_v1) ----------19WORKFLOW = {20    "48": {21        "inputs": {22            "refinement_passes": 1,23            "max_chunk_duration": 30,24            "chunk_method": "smart",25            "TTS_engine": ["74", 0],26            "source_audio": ["76", 0],27            "narrator_target": ["77", 0],28        },29        "class_type": "UnifiedVoiceChangerNode",30        "_meta": {"title": "\ud83d\udd04 Voice Changer"},31    },32    "74": {33        "inputs": {34            "model_version": "v2",35            "language": "Italian",36            "device": "auto",37            "exaggeration": 0.8,38            "temperature": 0.8,39            "cfg_weight": 0.2,40            "repetition_penalty": 2,41            "min_p": 0.05,42            "top_p": 1,43        },44        "class_type": "ChatterBoxOfficial23LangEngineNode",45        "_meta": {"title": "\u2699\ufe0f ChatterBox Official 23-Lang Engine"},46    },47    "75": {48        "inputs": {49            "filename_prefix": "audio/ComfyUI",50            "quality": "V0",51            "audioUI": "",52            "audio": ["48", 0],53        },54        "class_type": "SaveAudioMP3",55        "_meta": {"title": "Save Audio (MP3)"},56    },57    "76": {58        "inputs": {59            "audio_file": "input/",60            "seek_seconds": 0,61            "duration": 0,62        },63        "class_type": "VHS_LoadAudio",64        "_meta": {"title": "Load Audio (Path)\ud83c\udfa5\ud83c\udd65\ud83c\udd57\ud83c\udd62 (Source Voice)"},65    },66    "77": {67        "inputs": {68            "audio_file": "input/",69            "seek_seconds": 0,70            "duration": 0,71        },72        "class_type": "VHS_LoadAudio",73        "_meta": {"title": "Load Audio (Path)\ud83c\udfa5\ud83c\udd65\ud83c\udd57\ud83c\udd62 (Narrator Voice)"},74    },75}767778def upload_file_to_comfyui(local_path: str, content_type: str) -> str:79    """Upload a local file to ComfyUI and return the uploaded filename."""80    with open(local_path, "rb") as f:81        resp = requests.post(82            f"{COMFYUI_API_URL}/upload/image",83            files={"image": (os.path.basename(local_path), f, content_type)},84            timeout=30,85        )86    resp.raise_for_status()87    data = resp.json()88    return data["name"]899091def detect_audio_mime(filename: str) -> str:92    """Detect MIME type from audio file extension."""93    ext = os.path.splitext(filename)[1].lower()94    mime_map = {95        ".mp3": "audio/mpeg",96        ".wav": "audio/wav",97        ".ogg": "audio/ogg",98        ".flac": "audio/flac",99        ".aac": "audio/aac",100        ".m4a": "audio/mp4",101    }102    return mime_map.get(ext, "application/octet-stream")103104105def build_workflow(106    source_name: str,107    narrator_name: str,108    language: str,109    exaggeration: float,110    temperature: float,111) -> dict:112    """Build a voice conversion workflow with the given parameters."""113    import copy114115    wf = copy.deepcopy(WORKFLOW)116117    # Swap VHS_LoadAudio → LoadAudio for uploaded-file compatibility118    # Source audio (node 76)119    wf["76"] = {120        "inputs": {"audio": source_name},121        "class_type": "LoadAudio",122        "_meta": {"title": "Load Audio (Source Voice)"},123    }124125    # Narrator/target voice (node 77)126    wf["77"] = {127        "inputs": {"audio": narrator_name},128        "class_type": "LoadAudio",129        "_meta": {"title": "Load Audio (Narrator Voice)"},130    }131132    # ChatterBox engine settings (node 74)133    wf["74"]["inputs"]["language"] = language134    wf["74"]["inputs"]["exaggeration"] = exaggeration135    wf["74"]["inputs"]["temperature"] = temperature136137    # Output prefix (node 75)138    wf["75"]["inputs"]["filename_prefix"] = "audio/emblema-speech-convert"139140    return wf141142143def submit_prompt(workflow: dict) -> str:144    """Submit workflow to ComfyUI and return prompt_id."""145    resp = requests.post(146        f"{COMFYUI_API_URL}/prompt",147        json={"prompt": workflow},148        timeout=30,149    )150    if resp.status_code != 200:151        try:152            error_detail = resp.json()153        except Exception:154            error_detail = resp.text155        raise RuntimeError(156            f"ComfyUI /prompt returned {resp.status_code}: "157            f"{json.dumps(error_detail, indent=2) if isinstance(error_detail, dict) else error_detail}"158        )159    data = resp.json()160161    # ComfyUI returns 200 even when nodes have validation errors162    node_errors = data.get("node_errors", {})163    if node_errors:164        raise RuntimeError(165            f"ComfyUI workflow has node errors: {json.dumps(node_errors, indent=2)}"166        )167168    return data["prompt_id"]169170171def wait_for_result(prompt_id: str, timeout: int = 600, poll_interval: int = 3) -> dict:172    """Poll ComfyUI history until the prompt completes with outputs."""173    deadline = time.time() + timeout174    empty_complete_retries = 0175    max_empty_retries = 3  # grace period for output serialization lag176177    while time.time() < deadline:178        resp = requests.get(179            f"{COMFYUI_API_URL}/history/{prompt_id}",180            timeout=10,181        )182        resp.raise_for_status()183        history = resp.json()184185        if prompt_id in history:186            prompt_data = history[prompt_id]187            status = prompt_data.get("status", {})188189            if status.get("status_str") == "error":190                messages = status.get("messages", [])191                raise RuntimeError(192                    f"ComfyUI prompt failed: {json.dumps(messages, indent=2)}"193                )194195            if status.get("completed", False):196                if prompt_data.get("outputs"):197                    return prompt_data198199                # Completed but no outputs — retry briefly for race condition200                empty_complete_retries += 1201                if empty_complete_retries >= max_empty_retries:202                    raise RuntimeError(203                        f"ComfyUI prompt completed but produced no outputs. "204                        f"This usually means a node failed silently (missing custom node or model). "205                        f"Status: {json.dumps(status, indent=2)}"206                    )207208        time.sleep(poll_interval)209210    raise TimeoutError(f"ComfyUI prompt {prompt_id} did not complete within {timeout}s")211212213def download_output_audio(prompt_data: dict, output_dir: str) -> str:214    """Download the converted audio from ComfyUI."""215    outputs = prompt_data.get("outputs", {})216    for node_id, node_output in outputs.items():217        audio_list = node_output.get("audio") or []218        if audio_list:219            audio_info = audio_list[0]220            filename = audio_info["filename"]221            subfolder = audio_info.get("subfolder", "")222            audio_type = audio_info.get("type", "output")223224            resp = requests.get(225                f"{COMFYUI_API_URL}/view",226                params={227                    "filename": filename,228                    "subfolder": subfolder,229                    "type": audio_type,230                },231                timeout=120,232            )233            resp.raise_for_status()234235            out_filename = f"converted_{filename}"236            out_path = os.path.join(output_dir, out_filename)237            with open(out_path, "wb") as f:238                f.write(resp.content)239240            return out_filename241242    raise RuntimeError(243        f"No output audio found in ComfyUI response. Available outputs: {json.dumps(outputs, indent=2)}"244    )245246247def main():248    try:249        input_json = sys.stdin.read()250        execution_input = json.loads(input_json)251        inputs = execution_input.get("inputs", {})252253        source_audio = inputs.get("source_audio", "")254        narrator_voice = inputs.get("narrator_voice", "")255        language = inputs.get("language", "Italian")256        exaggeration = float(inputs.get("exaggeration", 0.8))257        temperature = float(inputs.get("temperature", 0.8))258259        if not source_audio:260            raise ValueError("Source audio input is required")261        if not narrator_voice:262            raise ValueError("Narrator voice input is required")263        if not (0.0 <= exaggeration <= 1.0):264            raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")265        if not (0.0 <= temperature <= 1.5):266            raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")267268        os.makedirs(OUTPUT_DIR, exist_ok=True)269270        # Upload source audio to ComfyUI271        source_path = os.path.join(INPUT_DIR, source_audio)272        if not os.path.exists(source_path):273            raise FileNotFoundError(f"Source audio not found: {source_path}")274        source_mime = detect_audio_mime(source_audio)275        source_name = upload_file_to_comfyui(source_path, source_mime)276277        # Upload narrator voice to ComfyUI278        narrator_path = os.path.join(INPUT_DIR, narrator_voice)279        if not os.path.exists(narrator_path):280            raise FileNotFoundError(f"Narrator voice not found: {narrator_path}")281        narrator_mime = detect_audio_mime(narrator_voice)282        narrator_name = upload_file_to_comfyui(narrator_path, narrator_mime)283284        # Build workflow, submit, wait, download285        workflow = build_workflow(source_name, narrator_name, language, exaggeration, temperature)286        prompt_id = submit_prompt(workflow)287        prompt_data = wait_for_result(prompt_id)288        out_filename = download_output_audio(prompt_data, OUTPUT_DIR)289290        # Log metadata to stderr291        print(292            f"prompt_id={prompt_id}, language={language}, "293            f"exaggeration={exaggeration}, temperature={temperature}",294            file=sys.stderr,295        )296297        # Flat output — keys match OUTPUT_SCHEMA298        output = {299            "audio": out_filename,300        }301        print(json.dumps(output, indent=2))302303    except Exception as e:304        error_output = {305            "error": str(e),306            "errorType": type(e).__name__,307            "traceback": traceback.format_exc(),308        }309        print(json.dumps(error_output), file=sys.stderr)310        sys.exit(1)311312313if __name__ == "__main__":314    main()