$ cat node-template.py

Video Creation From Speech

// Generates a talking-head video by animating a portrait image driven by an audio clip. Uses the SONIC model via a native GPU service with face detection, Whisper audio conditioning, dual guidance, and RIFE frame interpolation.

Process
Video
template.py
1import os2import sys3import json4import subprocess5import time6import traceback78try:9    import requests10except ImportError:11    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])12    import requests1314NATIVE_VIDEO_SPEECH_SERVICE_URL = os.getenv(15    "NATIVE_VIDEO_SPEECH_SERVICE_URL", "http://native-video-speech-service:8106"16)17_EMBLEMA_VERSION = os.getenv("EMBLEMA_VERSION", "dev")18NATIVE_VIDEO_SPEECH_SERVICE_IMAGE = os.getenv(19    "NATIVE_VIDEO_SPEECH_SERVICE_IMAGE",20    f"emblema/native-video-speech-service:{_EMBLEMA_VERSION}",21)22HF_CACHE_HOST_PATH = os.getenv("HF_CACHE_HOST_PATH", "/root/.cache/huggingface")23CONTAINER_NAME = "native-video-speech-service"24INPUT_DIR = "/data/input"25OUTPUT_DIR = "/data/output"262728def start_container():29    """Create and start native-video-speech-service, removing any stale container first."""30    subprocess.run(31        ["docker", "rm", "-f", CONTAINER_NAME],32        capture_output=True, text=True33    )3435    hf_token = os.getenv("HUGGINGFACE_TOKEN", "")36    print(f"Creating container {CONTAINER_NAME}...", file=sys.stderr)37    run_cmd = [38        "docker", "run", "-d",39        "--name", CONTAINER_NAME,40        "--network", "emblema",41        "--gpus", "all",42        "-e", "PORT=8106",43        "-e", "DEVICE=cuda",44        "-e", f"HF_TOKEN={hf_token}",45        "-e", f"SONIC_REPO_ID={os.getenv('SONIC_REPO_ID', 'LeonJoe13/Sonic')}",46        "-e", f"ENABLE_RIFE={os.getenv('ENABLE_RIFE', 'true')}",47        "-v", f"{HF_CACHE_HOST_PATH}:/root/.cache/huggingface",48        NATIVE_VIDEO_SPEECH_SERVICE_IMAGE,49    ]50    result = subprocess.run(run_cmd, capture_output=True, text=True)51    if result.returncode != 0:52        print(f"docker run failed (exit {result.returncode}): {result.stderr}", file=sys.stderr)53        raise RuntimeError(f"Failed to start container: {result.stderr}")5455    # Poll health endpoint56    timeout = 36057    interval = 358    elapsed = 059    health_url = f"{NATIVE_VIDEO_SPEECH_SERVICE_URL}/health"60    while elapsed < timeout:61        try:62            r = requests.get(health_url, timeout=5)63            if r.status_code == 200:64                print(f"Container healthy (waited {elapsed}s).", file=sys.stderr)65                return66        except requests.ConnectionError:67            pass68        time.sleep(interval)69        elapsed += interval7071    raise RuntimeError(f"Container did not become healthy within {timeout}s")727374def stop_container():75    """Remove the container."""76    try:77        subprocess.run(78            ["docker", "rm", "-f", CONTAINER_NAME],79            capture_output=True, text=True, timeout=3080        )81        print(f"Container {CONTAINER_NAME} removed.", file=sys.stderr)82    except Exception as e:83        print(f"Warning: failed to remove container: {e}", file=sys.stderr)848586def main():87    try:88        input_json = sys.stdin.read()89        execution_input = json.loads(input_json)90        inputs = execution_input.get("inputs", {})9192        image = inputs.get("image", "")93        audio = inputs.get("audio", "")94        resolution = inputs.get("resolution", "768x512")95        min_resolution = inputs.get("min_resolution", 512)96        dynamic_scale = inputs.get("dynamic_scale", 1.0)97        crop = inputs.get("crop", False)98        seed = inputs.get("seed", 72589)99        inference_steps = inputs.get("inference_steps", 25)100        enable_rife = inputs.get("enable_rife", True)101102        if not image:103            raise ValueError("Input image is required")104        if not audio:105            raise ValueError("Input audio is required")106107        image_path = os.path.join(INPUT_DIR, image)108        audio_path = os.path.join(INPUT_DIR, audio)109110        if not os.path.exists(image_path):111            raise FileNotFoundError(f"Input image not found: {image_path}")112        if not os.path.exists(audio_path):113            raise FileNotFoundError(f"Input audio not found: {audio_path}")114115        os.makedirs(OUTPUT_DIR, exist_ok=True)116117        # Start the container118        start_container()119120        try:121            # Send image and audio to service122            with open(image_path, "rb") as img_f, open(audio_path, "rb") as aud_f:123                resp = requests.post(124                    f"{NATIVE_VIDEO_SPEECH_SERVICE_URL}/generate",125                    files={126                        "image": (os.path.basename(image_path), img_f, "image/png"),127                        "audio": (os.path.basename(audio_path), aud_f, "audio/wav"),128                    },129                    data={130                        "resolution": str(resolution),131                        "min_resolution": str(min_resolution),132                        "dynamic_scale": str(dynamic_scale),133                        "crop": str(crop).lower(),134                        "seed": str(seed),135                        "inference_steps": str(inference_steps),136                        "enable_rife": str(enable_rife).lower(),137                    },138                    timeout=2520,139                )140141            if resp.status_code != 200:142                try:143                    error_detail = resp.json()144                except Exception:145                    error_detail = resp.text146                raise RuntimeError(147                    f"Video speech service returned {resp.status_code}: {error_detail}"148                )149150            # Save result151            out_filename = "speech_video.mp4"152            out_path = os.path.join(OUTPUT_DIR, out_filename)153            with open(out_path, "wb") as f:154                f.write(resp.content)155156            inference_time = resp.headers.get("X-Inference-Time-Ms", "unknown")157            print(f"Video generated: time={inference_time}ms", file=sys.stderr)158159            output = {160                "video": out_filename,161            }162            print(json.dumps(output, indent=2))163164        finally:165            stop_container()166167    except Exception as e:168        error_output = {169            "error": str(e),170            "errorType": type(e).__name__,171            "traceback": traceback.format_exc(),172        }173        print(json.dumps(error_output), file=sys.stderr)174        sys.exit(1)175176177if __name__ == "__main__":178    main()