$ cat node-template.py
Drive Document Reader
// Reads one or multiple documents from Drive and returns merged content.
Input
Storage
template.py
1import os2import sys3import json4import traceback5import requests67# Environment variables automatically set by workspace executor8EMBLEMA_API_BASE_URL = os.getenv("EMBLEMA_API_BASE_URL")9USER_TOKEN = os.getenv("USER_TOKEN")1011if not EMBLEMA_API_BASE_URL:12 raise ValueError("EMBLEMA_API_BASE_URL environment variable not set")131415def api_request_text(method, path, **kwargs):16 """Make authenticated API request and return plain text response.1718 Args:19 method: HTTP method (GET, POST, PUT, DELETE, etc.)20 path: API path (e.g., "/api/v2/drive/items/{id}/llms.txt")21 **kwargs: Additional arguments for requests.request()22 """23 if not USER_TOKEN:24 raise ValueError("USER_TOKEN environment variable not set")2526 headers = {27 "Authorization": f"Bearer {USER_TOKEN}",28 }29 url = f"{EMBLEMA_API_BASE_URL}{path}"30 response = requests.request(method, url, headers=headers, **kwargs)31 response.raise_for_status()32 return response.text333435def fetch_document_content(drive_item_id_input) -> str:36 """Fetch LLM-optimized content from one or multiple documents via REST API3738 Args:39 drive_item_id_input: Single document ID (string) or array of document IDs4041 Returns:42 Merged document content with separators for multiple documents43 """44 # Normalize input to list45 if isinstance(drive_item_id_input, str):46 drive_item_ids = [drive_item_id_input]47 elif isinstance(drive_item_id_input, list):48 drive_item_ids = drive_item_id_input49 else:50 raise ValueError(f"driveItemId must be a string or array, got {type(drive_item_id_input).__name__}")5152 # Deduplicate while preserving order53 seen = set()54 unique_ids = []55 for item_id in drive_item_ids:56 if item_id and item_id not in seen:57 seen.add(item_id)58 unique_ids.append(item_id)5960 if not unique_ids:61 raise ValueError("No valid document IDs provided")6263 # Limit to prevent memory/timeout issues64 MAX_DOCUMENTS = 10065 if len(unique_ids) > MAX_DOCUMENTS:66 raise ValueError(f"Too many documents: {len(unique_ids)} (max {MAX_DOCUMENTS})")6768 # Fetch LLM-optimized text for each document69 all_contents = []70 skipped = []71 for item_id in unique_ids:72 try:73 doc_content = api_request_text("GET", f"/api/v2/drive/items/{item_id}/llms.txt")74 if doc_content.strip(): # Only add non-empty content75 all_contents.append(doc_content.strip())76 except requests.HTTPError as e:77 # Skip items that fail (e.g., folders, unprocessed docs, missing files)78 # but continue with remaining documents79 skipped.append({"id": item_id, "status": e.response.status_code, "detail": e.response.text[:200]})80 continue8182 if skipped:83 print(json.dumps({"warning": f"Skipped {len(skipped)} item(s)", "skipped": skipped}), file=sys.stderr)8485 if not all_contents:86 skipped_ids = [s["id"] for s in skipped] if skipped else unique_ids87 raise ValueError(f"No documents could be read. Failed items: {skipped_ids}")8889 # Merge all document contents90 if len(unique_ids) > 1:91 # Add document separators when multiple docs92 return "\n\n--- Document Separator ---\n\n".join(all_contents)93 else:94 # Single document - no separator needed95 return all_contents[0] if all_contents else ""969798def main():99 """Main execution function"""100 try:101 # Read execution input from stdin102 input_json = sys.stdin.read()103 execution_input = json.loads(input_json)104105 # Extract inputs (from connections)106 inputs = execution_input.get("inputs", {})107 document_id = inputs.get("driveItemId")108109 if not document_id:110 raise ValueError("Required input 'driveItemId' not provided")111112 # Fetch document content113 # API automatically applies user permissions via PAT114 content = fetch_document_content(document_id)115116 # Prepare output matching OUTPUT_SCHEMA exactly (KISS principle)117 # OUTPUT_SCHEMA declares only "content" field118 output = {119 "content": content,120 }121122 # Write output to stdout123 print(json.dumps(output, indent=2))124125 except requests.HTTPError as e:126 # HTTP error (401, 403, 404, etc.)127 error_output = {128 "error": f"API request failed: {e}",129 "errorType": "HTTPError",130 "status": e.response.status_code,131 "detail": e.response.text,132 }133 print(json.dumps(error_output), file=sys.stderr)134 sys.exit(1)135 except Exception as e:136 # Other errors137 error_output = {138 "error": str(e),139 "errorType": type(e).__name__,140 "traceback": traceback.format_exc(),141 }142 print(json.dumps(error_output), file=sys.stderr)143 sys.exit(1)144145146if __name__ == "__main__":147 main()