$ cat node-template.py

Drive Document Reader

// Reads one or multiple documents from Drive and returns merged content.

Input
Storage
template.py
1import os2import sys3import json4import traceback5import requests67# Environment variables automatically set by workspace executor8EMBLEMA_API_BASE_URL = os.getenv("EMBLEMA_API_BASE_URL")9USER_TOKEN = os.getenv("USER_TOKEN")1011if not EMBLEMA_API_BASE_URL:12    raise ValueError("EMBLEMA_API_BASE_URL environment variable not set")131415def api_request_text(method, path, **kwargs):16    """Make authenticated API request and return plain text response.1718    Args:19        method: HTTP method (GET, POST, PUT, DELETE, etc.)20        path: API path (e.g., "/api/v2/drive/items/{id}/llms.txt")21        **kwargs: Additional arguments for requests.request()22    """23    if not USER_TOKEN:24        raise ValueError("USER_TOKEN environment variable not set")2526    headers = {27        "Authorization": f"Bearer {USER_TOKEN}",28    }29    url = f"{EMBLEMA_API_BASE_URL}{path}"30    response = requests.request(method, url, headers=headers, **kwargs)31    response.raise_for_status()32    return response.text333435def fetch_document_content(drive_item_id_input) -> str:36    """Fetch LLM-optimized content from one or multiple documents via REST API3738    Args:39        drive_item_id_input: Single document ID (string) or array of document IDs4041    Returns:42        Merged document content with separators for multiple documents43    """44    # Normalize input to list45    if isinstance(drive_item_id_input, str):46        drive_item_ids = [drive_item_id_input]47    elif isinstance(drive_item_id_input, list):48        drive_item_ids = drive_item_id_input49    else:50        raise ValueError(f"driveItemId must be a string or array, got {type(drive_item_id_input).__name__}")5152    # Deduplicate while preserving order53    seen = set()54    unique_ids = []55    for item_id in drive_item_ids:56        if item_id and item_id not in seen:57            seen.add(item_id)58            unique_ids.append(item_id)5960    if not unique_ids:61        raise ValueError("No valid document IDs provided")6263    # Limit to prevent memory/timeout issues64    MAX_DOCUMENTS = 10065    if len(unique_ids) > MAX_DOCUMENTS:66        raise ValueError(f"Too many documents: {len(unique_ids)} (max {MAX_DOCUMENTS})")6768    # Fetch LLM-optimized text for each document69    all_contents = []70    skipped = []71    for item_id in unique_ids:72        try:73            doc_content = api_request_text("GET", f"/api/v2/drive/items/{item_id}/llms.txt")74            if doc_content.strip():  # Only add non-empty content75                all_contents.append(doc_content.strip())76        except requests.HTTPError as e:77            # Skip items that fail (e.g., folders, unprocessed docs, missing files)78            # but continue with remaining documents79            skipped.append({"id": item_id, "status": e.response.status_code, "detail": e.response.text[:200]})80            continue8182    if skipped:83        print(json.dumps({"warning": f"Skipped {len(skipped)} item(s)", "skipped": skipped}), file=sys.stderr)8485    if not all_contents:86        skipped_ids = [s["id"] for s in skipped] if skipped else unique_ids87        raise ValueError(f"No documents could be read. Failed items: {skipped_ids}")8889    # Merge all document contents90    if len(unique_ids) > 1:91        # Add document separators when multiple docs92        return "\n\n--- Document Separator ---\n\n".join(all_contents)93    else:94        # Single document - no separator needed95        return all_contents[0] if all_contents else ""969798def main():99    """Main execution function"""100    try:101        # Read execution input from stdin102        input_json = sys.stdin.read()103        execution_input = json.loads(input_json)104105        # Extract inputs (from connections)106        inputs = execution_input.get("inputs", {})107        document_id = inputs.get("driveItemId")108109        if not document_id:110            raise ValueError("Required input 'driveItemId' not provided")111112        # Fetch document content113        # API automatically applies user permissions via PAT114        content = fetch_document_content(document_id)115116        # Prepare output matching OUTPUT_SCHEMA exactly (KISS principle)117        # OUTPUT_SCHEMA declares only "content" field118        output = {119            "content": content,120        }121122        # Write output to stdout123        print(json.dumps(output, indent=2))124125    except requests.HTTPError as e:126        # HTTP error (401, 403, 404, etc.)127        error_output = {128            "error": f"API request failed: {e}",129            "errorType": "HTTPError",130            "status": e.response.status_code,131            "detail": e.response.text,132        }133        print(json.dumps(error_output), file=sys.stderr)134        sys.exit(1)135    except Exception as e:136        # Other errors137        error_output = {138            "error": str(e),139            "errorType": type(e).__name__,140            "traceback": traceback.format_exc(),141        }142        print(json.dumps(error_output), file=sys.stderr)143        sys.exit(1)144145146if __name__ == "__main__":147    main()