$ cat node-template.py
Speech Conversion Legacy
// Converts speech from one voice to another. Takes a source audio and a target narrator voice reference, then re-synthesizes the speech in the target voice. Supports 23 languages and adjustable expressiveness. Outputs an MP3 audio file.
Process
Audio
template.py
1import os2import sys3import json4import time5import traceback67try:8 import requests9except ImportError:10 import subprocess11 subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])12 import requests1314COMFYUI_API_URL = os.getenv("COMFYUI_API_URL", "http://192.168.1.39:8188")15INPUT_DIR = "/data/input"16OUTPUT_DIR = "/data/output"1718# ---------- ChatterBox convert workflow (EM_Chatterbox_convert_v1) ----------19WORKFLOW = {20 "48": {21 "inputs": {22 "refinement_passes": 1,23 "max_chunk_duration": 30,24 "chunk_method": "smart",25 "TTS_engine": ["74", 0],26 "source_audio": ["76", 0],27 "narrator_target": ["77", 0],28 },29 "class_type": "UnifiedVoiceChangerNode",30 "_meta": {"title": "\ud83d\udd04 Voice Changer"},31 },32 "74": {33 "inputs": {34 "model_version": "v2",35 "language": "Italian",36 "device": "auto",37 "exaggeration": 0.8,38 "temperature": 0.8,39 "cfg_weight": 0.2,40 "repetition_penalty": 2,41 "min_p": 0.05,42 "top_p": 1,43 },44 "class_type": "ChatterBoxOfficial23LangEngineNode",45 "_meta": {"title": "\u2699\ufe0f ChatterBox Official 23-Lang Engine"},46 },47 "75": {48 "inputs": {49 "filename_prefix": "audio/ComfyUI",50 "quality": "V0",51 "audioUI": "",52 "audio": ["48", 0],53 },54 "class_type": "SaveAudioMP3",55 "_meta": {"title": "Save Audio (MP3)"},56 },57 "76": {58 "inputs": {59 "audio_file": "input/",60 "seek_seconds": 0,61 "duration": 0,62 },63 "class_type": "VHS_LoadAudio",64 "_meta": {"title": "Load Audio (Path)\ud83c\udfa5\ud83c\udd65\ud83c\udd57\ud83c\udd62 (Source Voice)"},65 },66 "77": {67 "inputs": {68 "audio_file": "input/",69 "seek_seconds": 0,70 "duration": 0,71 },72 "class_type": "VHS_LoadAudio",73 "_meta": {"title": "Load Audio (Path)\ud83c\udfa5\ud83c\udd65\ud83c\udd57\ud83c\udd62 (Narrator Voice)"},74 },75}767778def upload_file_to_comfyui(local_path: str, content_type: str) -> str:79 """Upload a local file to ComfyUI and return the uploaded filename."""80 with open(local_path, "rb") as f:81 resp = requests.post(82 f"{COMFYUI_API_URL}/upload/image",83 files={"image": (os.path.basename(local_path), f, content_type)},84 timeout=30,85 )86 resp.raise_for_status()87 data = resp.json()88 return data["name"]899091def detect_audio_mime(filename: str) -> str:92 """Detect MIME type from audio file extension."""93 ext = os.path.splitext(filename)[1].lower()94 mime_map = {95 ".mp3": "audio/mpeg",96 ".wav": "audio/wav",97 ".ogg": "audio/ogg",98 ".flac": "audio/flac",99 ".aac": "audio/aac",100 ".m4a": "audio/mp4",101 }102 return mime_map.get(ext, "application/octet-stream")103104105def build_workflow(106 source_name: str,107 narrator_name: str,108 language: str,109 exaggeration: float,110 temperature: float,111) -> dict:112 """Build a voice conversion workflow with the given parameters."""113 import copy114115 wf = copy.deepcopy(WORKFLOW)116117 # Swap VHS_LoadAudio → LoadAudio for uploaded-file compatibility118 # Source audio (node 76)119 wf["76"] = {120 "inputs": {"audio": source_name},121 "class_type": "LoadAudio",122 "_meta": {"title": "Load Audio (Source Voice)"},123 }124125 # Narrator/target voice (node 77)126 wf["77"] = {127 "inputs": {"audio": narrator_name},128 "class_type": "LoadAudio",129 "_meta": {"title": "Load Audio (Narrator Voice)"},130 }131132 # ChatterBox engine settings (node 74)133 wf["74"]["inputs"]["language"] = language134 wf["74"]["inputs"]["exaggeration"] = exaggeration135 wf["74"]["inputs"]["temperature"] = temperature136137 # Output prefix (node 75)138 wf["75"]["inputs"]["filename_prefix"] = "audio/emblema-speech-convert"139140 return wf141142143def submit_prompt(workflow: dict) -> str:144 """Submit workflow to ComfyUI and return prompt_id."""145 resp = requests.post(146 f"{COMFYUI_API_URL}/prompt",147 json={"prompt": workflow},148 timeout=30,149 )150 if resp.status_code != 200:151 try:152 error_detail = resp.json()153 except Exception:154 error_detail = resp.text155 raise RuntimeError(156 f"ComfyUI /prompt returned {resp.status_code}: "157 f"{json.dumps(error_detail, indent=2) if isinstance(error_detail, dict) else error_detail}"158 )159 data = resp.json()160161 # ComfyUI returns 200 even when nodes have validation errors162 node_errors = data.get("node_errors", {})163 if node_errors:164 raise RuntimeError(165 f"ComfyUI workflow has node errors: {json.dumps(node_errors, indent=2)}"166 )167168 return data["prompt_id"]169170171def wait_for_result(prompt_id: str, timeout: int = 600, poll_interval: int = 3) -> dict:172 """Poll ComfyUI history until the prompt completes with outputs."""173 deadline = time.time() + timeout174 empty_complete_retries = 0175 max_empty_retries = 3 # grace period for output serialization lag176177 while time.time() < deadline:178 resp = requests.get(179 f"{COMFYUI_API_URL}/history/{prompt_id}",180 timeout=10,181 )182 resp.raise_for_status()183 history = resp.json()184185 if prompt_id in history:186 prompt_data = history[prompt_id]187 status = prompt_data.get("status", {})188189 if status.get("status_str") == "error":190 messages = status.get("messages", [])191 raise RuntimeError(192 f"ComfyUI prompt failed: {json.dumps(messages, indent=2)}"193 )194195 if status.get("completed", False):196 if prompt_data.get("outputs"):197 return prompt_data198199 # Completed but no outputs — retry briefly for race condition200 empty_complete_retries += 1201 if empty_complete_retries >= max_empty_retries:202 raise RuntimeError(203 f"ComfyUI prompt completed but produced no outputs. "204 f"This usually means a node failed silently (missing custom node or model). "205 f"Status: {json.dumps(status, indent=2)}"206 )207208 time.sleep(poll_interval)209210 raise TimeoutError(f"ComfyUI prompt {prompt_id} did not complete within {timeout}s")211212213def download_output_audio(prompt_data: dict, output_dir: str) -> str:214 """Download the converted audio from ComfyUI."""215 outputs = prompt_data.get("outputs", {})216 for node_id, node_output in outputs.items():217 audio_list = node_output.get("audio") or []218 if audio_list:219 audio_info = audio_list[0]220 filename = audio_info["filename"]221 subfolder = audio_info.get("subfolder", "")222 audio_type = audio_info.get("type", "output")223224 resp = requests.get(225 f"{COMFYUI_API_URL}/view",226 params={227 "filename": filename,228 "subfolder": subfolder,229 "type": audio_type,230 },231 timeout=120,232 )233 resp.raise_for_status()234235 out_filename = f"converted_{filename}"236 out_path = os.path.join(output_dir, out_filename)237 with open(out_path, "wb") as f:238 f.write(resp.content)239240 return out_filename241242 raise RuntimeError(243 f"No output audio found in ComfyUI response. Available outputs: {json.dumps(outputs, indent=2)}"244 )245246247def main():248 try:249 input_json = sys.stdin.read()250 execution_input = json.loads(input_json)251 inputs = execution_input.get("inputs", {})252253 source_audio = inputs.get("source_audio", "")254 narrator_voice = inputs.get("narrator_voice", "")255 language = inputs.get("language", "Italian")256 exaggeration = float(inputs.get("exaggeration", 0.8))257 temperature = float(inputs.get("temperature", 0.8))258259 if not source_audio:260 raise ValueError("Source audio input is required")261 if not narrator_voice:262 raise ValueError("Narrator voice input is required")263 if not (0.0 <= exaggeration <= 1.0):264 raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")265 if not (0.0 <= temperature <= 1.5):266 raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")267268 os.makedirs(OUTPUT_DIR, exist_ok=True)269270 # Upload source audio to ComfyUI271 source_path = os.path.join(INPUT_DIR, source_audio)272 if not os.path.exists(source_path):273 raise FileNotFoundError(f"Source audio not found: {source_path}")274 source_mime = detect_audio_mime(source_audio)275 source_name = upload_file_to_comfyui(source_path, source_mime)276277 # Upload narrator voice to ComfyUI278 narrator_path = os.path.join(INPUT_DIR, narrator_voice)279 if not os.path.exists(narrator_path):280 raise FileNotFoundError(f"Narrator voice not found: {narrator_path}")281 narrator_mime = detect_audio_mime(narrator_voice)282 narrator_name = upload_file_to_comfyui(narrator_path, narrator_mime)283284 # Build workflow, submit, wait, download285 workflow = build_workflow(source_name, narrator_name, language, exaggeration, temperature)286 prompt_id = submit_prompt(workflow)287 prompt_data = wait_for_result(prompt_id)288 out_filename = download_output_audio(prompt_data, OUTPUT_DIR)289290 # Log metadata to stderr291 print(292 f"prompt_id={prompt_id}, language={language}, "293 f"exaggeration={exaggeration}, temperature={temperature}",294 file=sys.stderr,295 )296297 # Flat output — keys match OUTPUT_SCHEMA298 output = {299 "audio": out_filename,300 }301 print(json.dumps(output, indent=2))302303 except Exception as e:304 error_output = {305 "error": str(e),306 "errorType": type(e).__name__,307 "traceback": traceback.format_exc(),308 }309 print(json.dumps(error_output), file=sys.stderr)310 sys.exit(1)311312313if __name__ == "__main__":314 main()