$ cat node-template.py

Speech Conversion

// Converts speech from one voice to another using ChatterBox v2 via a native GPU service. Takes a source audio and a target narrator voice reference, then re-synthesizes the speech in the target voice. Supports 23 languages and adjustable expressiveness. Outputs an MP3 audio file.

Process
Audio
template.py
1import os2import sys3import json4import subprocess5import time6import traceback78try:9    import requests10except ImportError:11    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])12    import requests1314NATIVE_SPEECH_CONVERSION_SERVICE_URL = os.getenv(15    "NATIVE_SPEECH_CONVERSION_SERVICE_URL", "http://native-speech-conversion-service:8109"16)17_EMBLEMA_VERSION = os.getenv("EMBLEMA_VERSION", "dev")18NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE = os.getenv(19    "NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE",20    f"emblema/native-speech-conversion-service:{_EMBLEMA_VERSION}",21)22HF_CACHE_HOST_PATH = os.getenv("HF_CACHE_HOST_PATH", "/root/.cache/huggingface")23CONTAINER_NAME = "native-speech-conversion-service"24INPUT_DIR = "/data/input"25OUTPUT_DIR = "/data/output"262728def start_container():29    """Create and start native-speech-conversion-service, removing any stale container first."""30    subprocess.run(31        ["docker", "rm", "-f", CONTAINER_NAME],32        capture_output=True, text=True33    )3435    hf_token = os.getenv("HUGGINGFACE_TOKEN", "")36    print(f"Creating container {CONTAINER_NAME}...", file=sys.stderr)37    run_cmd = [38        "docker", "run", "-d",39        "--name", CONTAINER_NAME,40        "--network", "emblema",41        "--gpus", "all",42        "-e", "PORT=8109",43        "-e", "DEVICE=cuda",44        "-e", f"HF_TOKEN={hf_token}",45        "-v", f"{HF_CACHE_HOST_PATH}:/root/.cache/huggingface",46        NATIVE_SPEECH_CONVERSION_SERVICE_IMAGE,47    ]48    result = subprocess.run(run_cmd, capture_output=True, text=True)49    if result.returncode != 0:50        print(f"docker run failed (exit {result.returncode}): {result.stderr}", file=sys.stderr)51        raise RuntimeError(f"Failed to start container: {result.stderr}")5253    # Poll health endpoint54    timeout = 18055    interval = 356    elapsed = 057    health_url = f"{NATIVE_SPEECH_CONVERSION_SERVICE_URL}/health"58    while elapsed < timeout:59        try:60            r = requests.get(health_url, timeout=5)61            if r.status_code == 200:62                print(f"Container healthy (waited {elapsed}s).", file=sys.stderr)63                return64        except requests.ConnectionError:65            pass66        time.sleep(interval)67        elapsed += interval6869    raise RuntimeError(f"Container did not become healthy within {timeout}s")707172def stop_container():73    """Remove the container."""74    try:75        subprocess.run(76            ["docker", "rm", "-f", CONTAINER_NAME],77            capture_output=True, text=True, timeout=3078        )79        print(f"Container {CONTAINER_NAME} removed.", file=sys.stderr)80    except Exception as e:81        print(f"Warning: failed to remove container: {e}", file=sys.stderr)828384def detect_audio_mime(filename: str) -> str:85    """Detect MIME type from audio file extension."""86    ext = os.path.splitext(filename)[1].lower()87    mime_map = {88        ".mp3": "audio/mpeg",89        ".wav": "audio/wav",90        ".ogg": "audio/ogg",91        ".flac": "audio/flac",92        ".aac": "audio/aac",93        ".m4a": "audio/mp4",94    }95    return mime_map.get(ext, "application/octet-stream")969798def main():99    try:100        input_json = sys.stdin.read()101        execution_input = json.loads(input_json)102        inputs = execution_input.get("inputs", {})103104        source_audio = inputs.get("source_audio", "")105        narrator_voice = inputs.get("narrator_voice", "")106        language = inputs.get("language", "Italian")107        exaggeration = float(inputs.get("exaggeration", 0.8))108        temperature = float(inputs.get("temperature", 0.8))109110        if not source_audio:111            raise ValueError("Source audio input is required")112        if not narrator_voice:113            raise ValueError("Narrator voice input is required")114        if not (0.0 <= exaggeration <= 1.0):115            raise ValueError(f"Exaggeration must be between 0.0 and 1.0, got {exaggeration}")116        if not (0.0 <= temperature <= 1.5):117            raise ValueError(f"Temperature must be between 0.0 and 1.5, got {temperature}")118119        source_path = os.path.join(INPUT_DIR, source_audio)120        if not os.path.exists(source_path):121            raise FileNotFoundError(f"Source audio not found: {source_path}")122123        narrator_path = os.path.join(INPUT_DIR, narrator_voice)124        if not os.path.exists(narrator_path):125            raise FileNotFoundError(f"Narrator voice not found: {narrator_path}")126127        os.makedirs(OUTPUT_DIR, exist_ok=True)128129        # Start the container130        start_container()131132        try:133            # Send multipart form data to service134            source_mime = detect_audio_mime(source_audio)135            narrator_mime = detect_audio_mime(narrator_voice)136137            with open(source_path, "rb") as sf, open(narrator_path, "rb") as nf:138                resp = requests.post(139                    f"{NATIVE_SPEECH_CONVERSION_SERVICE_URL}/convert",140                    files={141                        "source_audio": (os.path.basename(source_path), sf, source_mime),142                        "narrator_voice": (os.path.basename(narrator_path), nf, narrator_mime),143                    },144                    data={145                        "language": language,146                        "exaggeration": str(exaggeration),147                        "temperature": str(temperature),148                    },149                    timeout=600,150                )151152            if resp.status_code != 200:153                try:154                    error_detail = resp.json()155                except Exception:156                    error_detail = resp.text157                raise RuntimeError(158                    f"Speech conversion service returned {resp.status_code}: {error_detail}"159                )160161            # Save result162            out_filename = "converted_speech.mp3"163            out_path = os.path.join(OUTPUT_DIR, out_filename)164            with open(out_path, "wb") as f:165                f.write(resp.content)166167            inference_time = resp.headers.get("X-Inference-Time-Ms", "unknown")168            print(169                f"Voice converted: time={inference_time}ms, language={language}, "170                f"exaggeration={exaggeration}, temperature={temperature}",171                file=sys.stderr,172            )173174            # Flat output - keys match OUTPUT_SCHEMA175            output = {176                "audio": out_filename,177            }178            print(json.dumps(output, indent=2))179180        finally:181            stop_container()182183    except Exception as e:184        error_output = {185            "error": str(e),186            "errorType": type(e).__name__,187            "traceback": traceback.format_exc(),188        }189        print(json.dumps(error_output), file=sys.stderr)190        sys.exit(1)191192193if __name__ == "__main__":194    main()