$ cat node-template.py

Speech Creation Legacy

// Converts text to speech with support for 23 languages and optional voice cloning from a reference audio file. Adjustable expressiveness and generation temperature. Outputs an MP3 audio file.

Process
Audio
template.py
1import os2import sys3import json4import time5import random6import traceback78try:9    import requests10except ImportError:11    import subprocess12    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])13    import requests1415COMFYUI_API_URL = os.getenv("COMFYUI_API_URL", "http://192.168.1.39:8188")16INPUT_DIR = "/data/input"17OUTPUT_DIR = "/data/output"1819# ---------- ChatterBox generate workflow (EM_Chatterbox_generate_v1) ----------20WORKFLOW = {21    "47": {22        "inputs": {23            "text": ["65", 0],24            "narrator_voice": "none",25            "seed": 1552347952,26            "enable_chunking": True,27            "max_chars_per_chunk": 400,28            "chunk_combination_method": "auto",29            "silence_between_chunks_ms": 100,30            "enable_audio_cache": True,31            "batch_size": 0,32            "TTS_engine": ["74", 0],33            "opt_narrator": ["75", 0],34        },35        "class_type": "UnifiedTTSTextNode",36        "_meta": {"title": "\ud83c\udfa4 TTS Text"},37    },38    "65": {39        "inputs": {40            "value": "",41        },42        "class_type": "PrimitiveStringMultiline",43        "_meta": {"title": "String (Multiline)"},44    },45    "74": {46        "inputs": {47            "model_version": "v2",48            "language": "Italian",49            "device": "auto",50            "exaggeration": 0.8,51            "temperature": 0.8,52            "cfg_weight": 0.2,53            "repetition_penalty": 2,54            "min_p": 0.05,55            "top_p": 1,56        },57        "class_type": "ChatterBoxOfficial23LangEngineNode",58        "_meta": {"title": "\u2699\ufe0f ChatterBox Official 23-Lang Engine"},59    },60    "75": {61        "inputs": {62            "audio_file": "input/",63            "seek_seconds": 0,64            "duration": 0,65        },66        "class_type": "VHS_LoadAudio",67        "_meta": {"title": "Load Audio (Path)"},68    },69    "79": {70        "inputs": {71            "filename_prefix": "audio/ComfyUI",72            "quality": "V0",73            "audioUI": "",74            "audio": ["47", 0],75        },76        "class_type": "SaveAudioMP3",77        "_meta": {"title": "Save Audio (MP3)"},78    },79}808182def upload_file_to_comfyui(local_path: str, content_type: str) -> str:83    """Upload a local file to ComfyUI and return the uploaded filename."""84    with open(local_path, "rb") as f:85        resp = requests.post(86            f"{COMFYUI_API_URL}/upload/image",87            files={"image": (os.path.basename(local_path), f, content_type)},88            timeout=30,89        )90    resp.raise_for_status()91    data = resp.json()92    return data["name"]939495def detect_audio_mime(filename: str) -> str:96    """Detect MIME type from audio file extension."""97    ext = os.path.splitext(filename)[1].lower()98    mime_map = {99        ".mp3": "audio/mpeg",100        ".wav": "audio/wav",101        ".ogg": "audio/ogg",102        ".flac": "audio/flac",103        ".aac": "audio/aac",104        ".m4a": "audio/mp4",105    }106    return mime_map.get(ext, "application/octet-stream")107108109def build_workflow(110    prompt: str,111    language: str,112    voice_reference_name: str | None,113    exaggeration: float,114    temperature: float,115) -> dict:116    """Build a speech generation workflow with the given parameters."""117    import copy118119    wf = copy.deepcopy(WORKFLOW)120121    # Text prompt (node 65)122    wf["65"]["inputs"]["value"] = prompt123124    # ChatterBox engine settings (node 74)125    wf["74"]["inputs"]["language"] = language126    wf["74"]["inputs"]["exaggeration"] = exaggeration127    wf["74"]["inputs"]["temperature"] = temperature128129    # Voice reference (node 75): swap to LoadAudio for uploaded-file compatibility130    if voice_reference_name:131        wf["75"] = {132            "inputs": {"audio": voice_reference_name},133            "class_type": "LoadAudio",134            "_meta": {"title": "Load Audio"},135        }136    else:137        # No voice reference — remove audio node and disconnect from TTS138        del wf["75"]139        if "opt_narrator" in wf["47"]["inputs"]:140            del wf["47"]["inputs"]["opt_narrator"]141142    # Seed (node 47): randomize within node's max (2^32 - 1)143    wf["47"]["inputs"]["seed"] = random.randint(0, 2**31 - 1)144145    # Output prefix (node 79)146    wf["79"]["inputs"]["filename_prefix"] = "audio/emblema-speech"147148    return wf149150151def submit_prompt(workflow: dict) -> str:152    """Submit workflow to ComfyUI and return prompt_id."""153    resp = requests.post(154        f"{COMFYUI_API_URL}/prompt",155        json={"prompt": workflow},156        timeout=30,157    )158    if resp.status_code != 200:159        try:160            error_detail = resp.json()161        except Exception:162            error_detail = resp.text163        raise RuntimeError(164            f"ComfyUI /prompt returned {resp.status_code}: "165            f"{json.dumps(error_detail, indent=2) if isinstance(error_detail, dict) else error_detail}"166        )167    data = resp.json()168169    # ComfyUI returns 200 even when nodes have validation errors170    node_errors = data.get("node_errors", {})171    if node_errors:172        raise RuntimeError(173            f"ComfyUI workflow has node errors: {json.dumps(node_errors, indent=2)}"174        )175176    return data["prompt_id"]177178179def wait_for_result(prompt_id: str, timeout: int = 600, poll_interval: int = 3) -> dict:180    """Poll ComfyUI history until the prompt completes with outputs."""181    deadline = time.time() + timeout182    empty_complete_retries = 0183    max_empty_retries = 3  # grace period for output serialization lag184185    while time.time() < deadline:186        resp = requests.get(187            f"{COMFYUI_API_URL}/history/{prompt_id}",188            timeout=10,189        )190        resp.raise_for_status()191        history = resp.json()192193        if prompt_id in history:194            prompt_data = history[prompt_id]195            status = prompt_data.get("status", {})196197            if status.get("status_str") == "error":198                messages = status.get("messages", [])199                raise RuntimeError(200                    f"ComfyUI prompt failed: {json.dumps(messages, indent=2)}"201                )202203            if status.get("completed", False):204                if prompt_data.get("outputs"):205                    return prompt_data206207                # Completed but no outputs — retry briefly for race condition208                empty_complete_retries += 1209                if empty_complete_retries >= max_empty_retries:210                    raise RuntimeError(211                        f"ComfyUI prompt completed but produced no outputs. "212                        f"This usually means a node failed silently (missing custom node or model). "213                        f"Status: {json.dumps(status, indent=2)}"214                    )215216        time.sleep(poll_interval)217218    raise TimeoutError(f"ComfyUI prompt {prompt_id} did not complete within {timeout}s")219220221def download_output_audio(prompt_data: dict, output_dir: str) -> str:222    """Download the generated audio from ComfyUI."""223    outputs = prompt_data.get("outputs", {})224    for node_id, node_output in outputs.items():225        audio_list = node_output.get("audio") or []226        if audio_list:227            audio_info = audio_list[0]228            filename = audio_info["filename"]229            subfolder = audio_info.get("subfolder", "")230            audio_type = audio_info.get("type", "output")231232            resp = requests.get(233                f"{COMFYUI_API_URL}/view",234                params={235                    "filename": filename,236                    "subfolder": subfolder,237                    "type": audio_type,238                },239                timeout=120,240            )241            resp.raise_for_status()242243            out_filename = f"generated_{filename}"244            out_path = os.path.join(output_dir, out_filename)245            with open(out_path, "wb") as f:246                f.write(resp.content)247248            return out_filename249250    raise RuntimeError(251        f"No output audio found in ComfyUI response. Available outputs: {json.dumps(outputs, indent=2)}"252    )253254255def main():256    try:257        input_json = sys.stdin.read()258        execution_input = json.loads(input_json)259        inputs = execution_input.get("inputs", {})260261        prompt = inputs.get("prompt", "")262        voice_reference = inputs.get("voice_reference", "")263        language = inputs.get("language", "Italian")264        exaggeration = float(inputs.get("exaggeration", 0.8))265        temperature = float(inputs.get("temperature", 0.8))266267        if not prompt:268            raise ValueError("Prompt input is required")269        if not (0.0 <= exaggeration <= 1.0):270            raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")271        if not (0.0 <= temperature <= 1.5):272            raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")273274        os.makedirs(OUTPUT_DIR, exist_ok=True)275276        # Upload voice reference to ComfyUI (optional)277        voice_reference_name = None278        if voice_reference:279            voice_path = os.path.join(INPUT_DIR, voice_reference)280            if not os.path.exists(voice_path):281                raise FileNotFoundError(f"Voice reference not found: {voice_path}")282            voice_mime = detect_audio_mime(voice_reference)283            voice_reference_name = upload_file_to_comfyui(voice_path, voice_mime)284285        # Build workflow, submit, wait, download286        workflow = build_workflow(prompt, language, voice_reference_name, exaggeration, temperature)287        prompt_id = submit_prompt(workflow)288        prompt_data = wait_for_result(prompt_id)289        out_filename = download_output_audio(prompt_data, OUTPUT_DIR)290291        # Log metadata to stderr292        print(293            f"prompt_id={prompt_id}, language={language}, "294            f"exaggeration={exaggeration}, temperature={temperature}, "295            f"voice_cloning={bool(voice_reference_name)}",296            file=sys.stderr,297        )298299        # Flat output — keys match OUTPUT_SCHEMA300        output = {301            "audio": out_filename,302        }303        print(json.dumps(output, indent=2))304305    except Exception as e:306        error_output = {307            "error": str(e),308            "errorType": type(e).__name__,309            "traceback": traceback.format_exc(),310        }311        print(json.dumps(error_output), file=sys.stderr)312        sys.exit(1)313314315if __name__ == "__main__":316    main()