$ cat node-template.py
Speech Conversion
// Converts speech from one voice to another using ChatterBox v2 via a native GPU service. Takes a source audio and a target narrator voice reference, then re-synthesizes the speech in the target voice. Supports 23 languages and adjustable expressiveness. Outputs an MP3 audio file.
Process
Audio
template.py
1import os2import sys3import json4import subprocess5import time6import traceback78try:9 import requests10except ImportError:11 subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])12 import requests1314NATIVE_SPEECH_CONVERSION_SERVICE_URL = os.getenv(15 "NATIVE_SPEECH_CONVERSION_SERVICE_URL", "http://native-speech-conversion-service:8109"16)17_EMBLEMA_VERSION = os.getenv("EMBLEMA_VERSION", "dev")18NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE = os.getenv(19 "NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE",20 f"emblema/native-speech-conversion-service:{_EMBLEMA_VERSION}",21)22HF_CACHE_HOST_PATH = os.getenv("HF_CACHE_HOST_PATH", "/root/.cache/huggingface")23CONTAINER_NAME = "native-speech-conversion-service"24INPUT_DIR = "/data/input"25OUTPUT_DIR = "/data/output"262728def start_container():29 """Create and start native-speech-conversion-service, removing any stale container first."""30 subprocess.run(31 ["docker", "rm", "-f", CONTAINER_NAME],32 capture_output=True, text=True33 )3435 hf_token = os.getenv("HUGGINGFACE_TOKEN", "")36 print(f"Creating container {CONTAINER_NAME}...", file=sys.stderr)37 run_cmd = [38 "docker", "run", "-d",39 "--name", CONTAINER_NAME,40 "--network", "emblema",41 "--gpus", "all",42 "-e", "PORT=8109",43 "-e", "DEVICE=cuda",44 "-e", f"HF_TOKEN={hf_token}",45 "-v", f"{HF_CACHE_HOST_PATH}:/root/.cache/huggingface",46 NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE,47 ]48 result = subprocess.run(run_cmd, capture_output=True, text=True)49 if result.returncode != 0:50 print(f"docker run failed (exit {result.returncode}): {result.stderr}", file=sys.stderr)51 raise RuntimeError(f"Failed to start container: {result.stderr}")5253 # Poll health endpoint54 timeout = 18055 interval = 356 elapsed = 057 health_url = f"{NATIVE_SPEECH_CONVERSION_SERVICE_URL}/health"58 while elapsed < timeout:59 try:60 r = requests.get(health_url, timeout=5)61 if r.status_code == 200:62 print(f"Container healthy (waited {elapsed}s).", file=sys.stderr)63 return64 except requests.ConnectionError:65 pass66 time.sleep(interval)67 elapsed += interval6869 raise RuntimeError(f"Container did not become healthy within {timeout}s")707172def stop_container():73 """Remove the container."""74 try:75 subprocess.run(76 ["docker", "rm", "-f", CONTAINER_NAME],77 capture_output=True, text=True, timeout=3078 )79 print(f"Container {CONTAINER_NAME} removed.", file=sys.stderr)80 except Exception as e:81 print(f"Warning: failed to remove container: {e}", file=sys.stderr)828384def detect_audio_mime(filename: str) -> str:85 """Detect MIME type from audio file extension."""86 ext = os.path.splitext(filename)[1].lower()87 mime_map = {88 ".mp3": "audio/mpeg",89 ".wav": "audio/wav",90 ".ogg": "audio/ogg",91 ".flac": "audio/flac",92 ".aac": "audio/aac",93 ".m4a": "audio/mp4",94 }95 return mime_map.get(ext, "application/octet-stream")969798def main():99 try:100 input_json = sys.stdin.read()101 execution_input = json.loads(input_json)102 inputs = execution_input.get("inputs", {})103104 source_audio = inputs.get("source_audio", "")105 narrator_voice = inputs.get("narrator_voice", "")106 language = inputs.get("language", "Italian")107 exaggeration = float(inputs.get("exaggeration", 0.8))108 temperature = float(inputs.get("temperature", 0.8))109110 if not source_audio:111 raise ValueError("Source audio input is required")112 if not narrator_voice:113 raise ValueError("Narrator voice input is required")114 if not (0.0 <= exaggeration <= 1.0):115 raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")116 if not (0.0 <= temperature <= 1.5):117 raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")118119 source_path = os.path.join(INPUT_DIR, source_audio)120 if not os.path.exists(source_path):121 raise FileNotFoundError(f"Source audio not found: {source_path}")122123 narrator_path = os.path.join(INPUT_DIR, narrator_voice)124 if not os.path.exists(narrator_path):125 raise FileNotFoundError(f"Narrator voice not found: {narrator_path}")126127 os.makedirs(OUTPUT_DIR, exist_ok=True)128129 # Start the container130 start_container()131132 try:133 # Send multipart form data to service134 source_mime = detect_audio_mime(source_audio)135 narrator_mime = detect_audio_mime(narrator_voice)136137 with open(source_path, "rb") as sf, open(narrator_path, "rb") as nf:138 resp = requests.post(139 f"{NATIVE_SPEECH_CONVERSION_SERVICE_URL}/convert",140 files={141 "source_audio": (os.path.basename(source_path), sf, source_mime),142 "narrator_voice": (os.path.basename(narrator_path), nf, narrator_mime),143 },144 data={145 "language": language,146 "exaggeration": str(exaggeration),147 "temperature": str(temperature),148 },149 timeout=600,150 )151152 if resp.status_code != 200:153 try:154 error_detail = resp.json()155 except Exception:156 error_detail = resp.text157 raise RuntimeError(158 f"Speech conversion service returned {resp.status_code}: {error_detail}"159 )160161 # Save result162 out_filename = "converted_speech.mp3"163 out_path = os.path.join(OUTPUT_DIR, out_filename)164 with open(out_path, "wb") as f:165 f.write(resp.content)166167 inference_time = resp.headers.get("X-Inference-Time-Ms", "unknown")168 print(169 f"Voice converted: time={inference_time}ms, language={language}, "170 f"exaggeration={exaggeration}, temperature={temperature}",171 file=sys.stderr,172 )173174 # Flat output - keys match OUTPUT_SCHEMA175 output = {176 "audio": out_filename,177 }178 print(json.dumps(output, indent=2))179180 finally:181 stop_container()182183 except Exception as e:184 error_output = {185 "error": str(e),186 "errorType": type(e).__name__,187 "traceback": traceback.format_exc(),188 }189 print(json.dumps(error_output), file=sys.stderr)190 sys.exit(1)191192193if __name__ == "__main__":194 main()