$ cat node-template.py

Image Vision Legacy

// Analyzes images using Florence-2 computer vision. Generates detailed captions, OCR text extraction, and image descriptions. Output is text that can be connected to prompt inputs of other nodes.

Process
Image
template.py
1import os2import sys3import json4import time5import random6import traceback78try:9    import requests10except ImportError:11    import subprocess12    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])13    import requests1415COMFYUI_API_URL = os.getenv("COMFYUI_API_URL", "http://192.168.1.39:8188")16INPUT_DIR = "/data/input"1718WORKFLOW = {19    "2": {20        "inputs": {21            "text_input": "",22            "task": "more_detailed_caption",23            "fill_mask": True,24            "keep_model_loaded": False,25            "max_new_tokens": 1024,26            "num_beams": 3,27            "do_sample": True,28            "output_mask_select": "",29            "seed": 732209118079946,30            "image": ["7", 0],31            "florence2_model": ["3", 0],32        },33        "class_type": "Florence2Run",34        "_meta": {"title": "Florence2Run"},35    },36    "3": {37        "inputs": {38            "model": "gokaygokay/Florence-2-Flux-Large",39            "precision": "fp16",40            "attention": "sdpa",41            "convert_to_safetensors": False,42        },43        "class_type": "DownloadAndLoadFlorence2Model",44        "_meta": {"title": "DownloadAndLoadFlorence2Model"},45    },46    "7": {47        "inputs": {48            "directory": "",49            "image_load_cap": 0,50            "skip_first_images": 0,51            "select_every_nth": 1,52        },53        "class_type": "VHS_LoadImagesPath",54        "_meta": {"title": "Load Images (Path)"},55    },56    "6": {57        "inputs": {58            "text": ["2", 2],59        },60        "class_type": "ShowText|pysssss",61        "_meta": {"title": "Show Text"},62    },63}646566def upload_image_to_comfyui(local_path: str) -> str:67    """Upload a local image to ComfyUI and return the uploaded filename."""68    with open(local_path, "rb") as f:69        resp = requests.post(70            f"{COMFYUI_API_URL}/upload/image",71            files={"image": (os.path.basename(local_path), f, "image/png")},72            timeout=30,73        )74    resp.raise_for_status()75    data = resp.json()76    return data["name"]777879def build_workflow(comfyui_image_name: str, task: str) -> dict:80    """Inject user inputs into the workflow template."""81    import copy82    wf = copy.deepcopy(WORKFLOW)8384    # Set input image — swap VHS_LoadImagesPath to LoadImage85    wf["7"] = {86        "inputs": {"image": comfyui_image_name},87        "class_type": "LoadImage",88        "_meta": {"title": "Load Image"},89    }9091    # Set vision task92    wf["2"]["inputs"]["task"] = task9394    # Randomize seed95    wf["2"]["inputs"]["seed"] = random.randint(0, 2**31 - 1)9697    return wf9899100def submit_prompt(workflow: dict) -> str:101    """Submit workflow to ComfyUI and return prompt_id."""102    resp = requests.post(103        f"{COMFYUI_API_URL}/prompt",104        json={"prompt": workflow},105        timeout=30,106    )107    if resp.status_code != 200:108        try:109            error_detail = resp.json()110        except Exception:111            error_detail = resp.text112        raise RuntimeError(113            f"ComfyUI /prompt returned {resp.status_code}: {json.dumps(error_detail, indent=2) if isinstance(error_detail, dict) else error_detail}"114        )115    data = resp.json()116117    # ComfyUI returns 200 even when nodes have validation errors118    node_errors = data.get("node_errors", {})119    if node_errors:120        raise RuntimeError(121            f"ComfyUI workflow has node errors: {json.dumps(node_errors, indent=2)}"122        )123124    return data["prompt_id"]125126127def wait_for_result(prompt_id: str, timeout: int = 600, poll_interval: int = 2) -> dict:128    """Poll ComfyUI history until the prompt completes with outputs."""129    deadline = time.time() + timeout130    empty_complete_retries = 0131    max_empty_retries = 3  # grace period for output serialization lag132133    while time.time() < deadline:134        resp = requests.get(135            f"{COMFYUI_API_URL}/history/{prompt_id}",136            timeout=10,137        )138        resp.raise_for_status()139        history = resp.json()140141        if prompt_id in history:142            prompt_data = history[prompt_id]143            status = prompt_data.get("status", {})144145            if status.get("status_str") == "error":146                messages = status.get("messages", [])147                raise RuntimeError(148                    f"ComfyUI prompt failed: {json.dumps(messages, indent=2)}"149                )150151            if status.get("completed", False):152                if prompt_data.get("outputs"):153                    return prompt_data154155                # Completed but no outputs — retry briefly for race condition156                empty_complete_retries += 1157                if empty_complete_retries >= max_empty_retries:158                    raise RuntimeError(159                        f"ComfyUI prompt completed but produced no outputs. "160                        f"This usually means a node failed silently (missing custom node or model). "161                        f"Status: {json.dumps(status, indent=2)}"162                    )163164        time.sleep(poll_interval)165166    raise TimeoutError(f"ComfyUI prompt {prompt_id} did not complete within {timeout}s")167168169def extract_text_output(prompt_data: dict) -> str:170    """Extract text from ComfyUI history outputs."""171    outputs = prompt_data.get("outputs", {})172173    # Try Florence2Run node "2" — may have text output174    node_2 = outputs.get("2", {})175    if "text" in node_2:176        text_data = node_2["text"]177        if isinstance(text_data, list):178            return "\n".join(str(t) for t in text_data)179        return str(text_data)180181    # Try SaveText node "6" — may have text/string output182    node_6 = outputs.get("6", {})183    if "text" in node_6:184        text_data = node_6["text"]185        if isinstance(text_data, list):186            return "\n".join(str(t) for t in text_data)187        return str(text_data)188    if "string" in node_6:189        text_data = node_6["string"]190        if isinstance(text_data, list):191            return "\n".join(str(t) for t in text_data)192        return str(text_data)193194    # Fallback: search all node outputs for any text data195    for node_id, node_output in outputs.items():196        for key in ("text", "string", "TEXT", "STRING"):197            if key in node_output:198                text_data = node_output[key]199                if isinstance(text_data, list):200                    return "\n".join(str(t) for t in text_data)201                return str(text_data)202203    raise RuntimeError(204        f"No text output found in ComfyUI response. Available outputs: {json.dumps(outputs, indent=2)}"205    )206207208def main():209    try:210        input_json = sys.stdin.read()211        execution_input = json.loads(input_json)212        inputs = execution_input.get("inputs", {})213214        image = inputs.get("image", "")215        task = inputs.get("task", "more_detailed_caption")216217        if not image:218            raise ValueError("Image is required")219220        valid_tasks = [221            "more_detailed_caption", "caption", "ocr",222            "ocr_with_region", "object_detection", "dense_region_caption",223        ]224        if task not in valid_tasks:225            raise ValueError(f"Invalid task '{task}'. Must be one of: {valid_tasks}")226227        # Upload input image to ComfyUI228        local_path = os.path.join(INPUT_DIR, image)229        if not os.path.exists(local_path):230            raise FileNotFoundError(f"Input image not found: {local_path}")231        comfyui_image_name = upload_image_to_comfyui(local_path)232233        # Build and submit workflow234        workflow = build_workflow(comfyui_image_name, task)235        prompt_id = submit_prompt(workflow)236237        # Wait for completion and extract text238        prompt_data = wait_for_result(prompt_id)239        result_text = extract_text_output(prompt_data)240241        # Log metadata to stderr242        print(f"prompt_id={prompt_id}, task={task}", file=sys.stderr)243244        # Flat output — keys match OUTPUT_SCHEMA245        # portType is "Text" so output the actual content, not a filename246        output = {247            "text": result_text,248        }249        print(json.dumps(output, indent=2))250251    except Exception as e:252        error_output = {253            "error": str(e),254            "errorType": type(e).__name__,255            "traceback": traceback.format_exc(),256        }257        print(json.dumps(error_output), file=sys.stderr)258        sys.exit(1)259260261if __name__ == "__main__":262    main()