$ cat node-template.py
Speech Creation Legacy
// Converts text to speech with support for 23 languages and optional voice cloning from a reference audio file. Adjustable expressiveness and generation temperature. Outputs an MP3 audio file.
Process
Audio
template.py
1import os2import sys3import json4import time5import random6import traceback78try:9 import requests10except ImportError:11 import subprocess12 subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])13 import requests1415COMFYUI_API_URL = os.getenv("COMFYUI_API_URL", "http://192.168.1.39:8188")16INPUT_DIR = "/data/input"17OUTPUT_DIR = "/data/output"1819# ---------- ChatterBox generate workflow (EM_Chatterbox_generate_v1) ----------20WORKFLOW = {21 "47": {22 "inputs": {23 "text": ["65", 0],24 "narrator_voice": "none",25 "seed": 1552347952,26 "enable_chunking": True,27 "max_chars_per_chunk": 400,28 "chunk_combination_method": "auto",29 "silence_between_chunks_ms": 100,30 "enable_audio_cache": True,31 "batch_size": 0,32 "TTS_engine": ["74", 0],33 "opt_narrator": ["75", 0],34 },35 "class_type": "UnifiedTTSTextNode",36 "_meta": {"title": "\ud83c\udfa4 TTS Text"},37 },38 "65": {39 "inputs": {40 "value": "",41 },42 "class_type": "PrimitiveStringMultiline",43 "_meta": {"title": "String (Multiline)"},44 },45 "74": {46 "inputs": {47 "model_version": "v2",48 "language": "Italian",49 "device": "auto",50 "exaggeration": 0.8,51 "temperature": 0.8,52 "cfg_weight": 0.2,53 "repetition_penalty": 2,54 "min_p": 0.05,55 "top_p": 1,56 },57 "class_type": "ChatterBoxOfficial23LangEngineNode",58 "_meta": {"title": "\u2699\ufe0f ChatterBox Official 23-Lang Engine"},59 },60 "75": {61 "inputs": {62 "audio_file": "input/",63 "seek_seconds": 0,64 "duration": 0,65 },66 "class_type": "VHS_LoadAudio",67 "_meta": {"title": "Load Audio (Path)"},68 },69 "79": {70 "inputs": {71 "filename_prefix": "audio/ComfyUI",72 "quality": "V0",73 "audioUI": "",74 "audio": ["47", 0],75 },76 "class_type": "SaveAudioMP3",77 "_meta": {"title": "Save Audio (MP3)"},78 },79}808182def upload_file_to_comfyui(local_path: str, content_type: str) -> str:83 """Upload a local file to ComfyUI and return the uploaded filename."""84 with open(local_path, "rb") as f:85 resp = requests.post(86 f"{COMFYUI_API_URL}/upload/image",87 files={"image": (os.path.basename(local_path), f, content_type)},88 timeout=30,89 )90 resp.raise_for_status()91 data = resp.json()92 return data["name"]939495def detect_audio_mime(filename: str) -> str:96 """Detect MIME type from audio file extension."""97 ext = os.path.splitext(filename)[1].lower()98 mime_map = {99 ".mp3": "audio/mpeg",100 ".wav": "audio/wav",101 ".ogg": "audio/ogg",102 ".flac": "audio/flac",103 ".aac": "audio/aac",104 ".m4a": "audio/mp4",105 }106 return mime_map.get(ext, "application/octet-stream")107108109def build_workflow(110 prompt: str,111 language: str,112 voice_reference_name: str | None,113 exaggeration: float,114 temperature: float,115) -> dict:116 """Build a speech generation workflow with the given parameters."""117 import copy118119 wf = copy.deepcopy(WORKFLOW)120121 # Text prompt (node 65)122 wf["65"]["inputs"]["value"] = prompt123124 # ChatterBox engine settings (node 74)125 wf["74"]["inputs"]["language"] = language126 wf["74"]["inputs"]["exaggeration"] = exaggeration127 wf["74"]["inputs"]["temperature"] = temperature128129 # Voice reference (node 75): swap to LoadAudio for uploaded-file compatibility130 if voice_reference_name:131 wf["75"] = {132 "inputs": {"audio": voice_reference_name},133 "class_type": "LoadAudio",134 "_meta": {"title": "Load Audio"},135 }136 else:137 # No voice reference — remove audio node and disconnect from TTS138 del wf["75"]139 if "opt_narrator" in wf["47"]["inputs"]:140 del wf["47"]["inputs"]["opt_narrator"]141142 # Seed (node 47): randomize within node's max (2^32 - 1)143 wf["47"]["inputs"]["seed"] = random.randint(0, 2**31 - 1)144145 # Output prefix (node 79)146 wf["79"]["inputs"]["filename_prefix"] = "audio/emblema-speech"147148 return wf149150151def submit_prompt(workflow: dict) -> str:152 """Submit workflow to ComfyUI and return prompt_id."""153 resp = requests.post(154 f"{COMFYUI_API_URL}/prompt",155 json={"prompt": workflow},156 timeout=30,157 )158 if resp.status_code != 200:159 try:160 error_detail = resp.json()161 except Exception:162 error_detail = resp.text163 raise RuntimeError(164 f"ComfyUI /prompt returned {resp.status_code}: "165 f"{json.dumps(error_detail, indent=2) if isinstance(error_detail, dict) else error_detail}"166 )167 data = resp.json()168169 # ComfyUI returns 200 even when nodes have validation errors170 node_errors = data.get("node_errors", {})171 if node_errors:172 raise RuntimeError(173 f"ComfyUI workflow has node errors: {json.dumps(node_errors, indent=2)}"174 )175176 return data["prompt_id"]177178179def wait_for_result(prompt_id: str, timeout: int = 600, poll_interval: int = 3) -> dict:180 """Poll ComfyUI history until the prompt completes with outputs."""181 deadline = time.time() + timeout182 empty_complete_retries = 0183 max_empty_retries = 3 # grace period for output serialization lag184185 while time.time() < deadline:186 resp = requests.get(187 f"{COMFYUI_API_URL}/history/{prompt_id}",188 timeout=10,189 )190 resp.raise_for_status()191 history = resp.json()192193 if prompt_id in history:194 prompt_data = history[prompt_id]195 status = prompt_data.get("status", {})196197 if status.get("status_str") == "error":198 messages = status.get("messages", [])199 raise RuntimeError(200 f"ComfyUI prompt failed: {json.dumps(messages, indent=2)}"201 )202203 if status.get("completed", False):204 if prompt_data.get("outputs"):205 return prompt_data206207 # Completed but no outputs — retry briefly for race condition208 empty_complete_retries += 1209 if empty_complete_retries >= max_empty_retries:210 raise RuntimeError(211 f"ComfyUI prompt completed but produced no outputs. "212 f"This usually means a node failed silently (missing custom node or model). "213 f"Status: {json.dumps(status, indent=2)}"214 )215216 time.sleep(poll_interval)217218 raise TimeoutError(f"ComfyUI prompt {prompt_id} did not complete within {timeout}s")219220221def download_output_audio(prompt_data: dict, output_dir: str) -> str:222 """Download the generated audio from ComfyUI."""223 outputs = prompt_data.get("outputs", {})224 for node_id, node_output in outputs.items():225 audio_list = node_output.get("audio") or []226 if audio_list:227 audio_info = audio_list[0]228 filename = audio_info["filename"]229 subfolder = audio_info.get("subfolder", "")230 audio_type = audio_info.get("type", "output")231232 resp = requests.get(233 f"{COMFYUI_API_URL}/view",234 params={235 "filename": filename,236 "subfolder": subfolder,237 "type": audio_type,238 },239 timeout=120,240 )241 resp.raise_for_status()242243 out_filename = f"generated_{filename}"244 out_path = os.path.join(output_dir, out_filename)245 with open(out_path, "wb") as f:246 f.write(resp.content)247248 return out_filename249250 raise RuntimeError(251 f"No output audio found in ComfyUI response. Available outputs: {json.dumps(outputs, indent=2)}"252 )253254255def main():256 try:257 input_json = sys.stdin.read()258 execution_input = json.loads(input_json)259 inputs = execution_input.get("inputs", {})260261 prompt = inputs.get("prompt", "")262 voice_reference = inputs.get("voice_reference", "")263 language = inputs.get("language", "Italian")264 exaggeration = float(inputs.get("exaggeration", 0.8))265 temperature = float(inputs.get("temperature", 0.8))266267 if not prompt:268 raise ValueError("Prompt input is required")269 if not (0.0 <= exaggeration <= 1.0):270 raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")271 if not (0.0 <= temperature <= 1.5):272 raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")273274 os.makedirs(OUTPUT_DIR, exist_ok=True)275276 # Upload voice reference to ComfyUI (optional)277 voice_reference_name = None278 if voice_reference:279 voice_path = os.path.join(INPUT_DIR, voice_reference)280 if not os.path.exists(voice_path):281 raise FileNotFoundError(f"Voice reference not found: {voice_path}")282 voice_mime = detect_audio_mime(voice_reference)283 voice_reference_name = upload_file_to_comfyui(voice_path, voice_mime)284285 # Build workflow, submit, wait, download286 workflow = build_workflow(prompt, language, voice_reference_name, exaggeration, temperature)287 prompt_id = submit_prompt(workflow)288 prompt_data = wait_for_result(prompt_id)289 out_filename = download_output_audio(prompt_data, OUTPUT_DIR)290291 # Log metadata to stderr292 print(293 f"prompt_id={prompt_id}, language={language}, "294 f"exaggeration={exaggeration}, temperature={temperature}, "295 f"voice_cloning={bool(voice_reference_name)}",296 file=sys.stderr,297 )298299 # Flat output — keys match OUTPUT_SCHEMA300 output = {301 "audio": out_filename,302 }303 print(json.dumps(output, indent=2))304305 except Exception as e:306 error_output = {307 "error": str(e),308 "errorType": type(e).__name__,309 "traceback": traceback.format_exc(),310 }311 print(json.dumps(error_output), file=sys.stderr)312 sys.exit(1)313314315if __name__ == "__main__":316 main()