$ cat node-template.py
T
Text Summarizer
// Summarizes large text content using an LLM. Automatically splits long documents into chunks and summarizes them hierarchically to produce a final summary.
Process
LLM
template.py
1import sys2import json3import traceback4import asyncio5import time6from gais import Gais789def split_content_into_chunks(content: str, num_chunks: int, model: str) -> list:10 """11 Split content into roughly equal chunks by character count.1213 Tries to split on paragraph boundaries when possible.1415 Args:16 content: Text to split17 num_chunks: Number of chunks to create18 model: Model name for char limit calculation1920 Returns:21 List of text chunks22 """23 safe_chars = Gais.llm.safe_input_chars(model)24 chunk_size = min(len(content) // num_chunks, safe_chars)2526 chunks = []27 current_pos = 02829 while current_pos < len(content):30 # Calculate end position31 end_pos = min(current_pos + chunk_size, len(content))3233 # Try to find a good break point (paragraph, sentence, space)34 if end_pos < len(content):35 # Look for paragraph break36 para_break = content.rfind("\n\n", current_pos, end_pos)37 if para_break > current_pos + chunk_size // 2:38 end_pos = para_break + 239 else:40 # Look for sentence break41 sent_break = content.rfind(". ", current_pos, end_pos)42 if sent_break > current_pos + chunk_size // 2:43 end_pos = sent_break + 244 else:45 # Look for any whitespace46 space_break = content.rfind(" ", current_pos, end_pos)47 if space_break > current_pos + chunk_size // 2:48 end_pos = space_break + 14950 chunk = content[current_pos:end_pos].strip()51 if chunk:52 chunks.append(chunk)5354 current_pos = end_pos5556 return chunks575859async def recursive_summarize(60 content: str,61 user_prompt: str,62 model: str,63 level: int = 0,64 max_levels: int = 10,65 max_concurrent: int = 566) -> str:67 """68 Recursively summarize content using hierarchical reduction with parallel chunk processing.6970 Algorithm:71 1. Estimate tokens in content72 2. If fits in context → summarize directly73 3. If exceeds → split into chunks, summarize in parallel, combine, recurse74 4. Safety: max 10 levels, then truncate7576 Args:77 content: Text to summarize78 user_prompt: User's summarization instructions79 model: LLM model name80 level: Current recursion level (0-indexed)81 max_levels: Maximum recursion depth82 max_concurrent: Maximum concurrent LLM requests (default: 5)8384 Returns:85 Summary text86 """87 estimated_tokens = Gais.llm.estimate_tokens(content, model)88 safe_chars = Gais.llm.safe_input_chars(model)8990 # Log progress to stderr91 print(f"[Level {level}] Estimated tokens: {estimated_tokens:,}, Safe char limit: {safe_chars:,}", file=sys.stderr)9293 # Base case: content fits in context94 if len(content) <= safe_chars:95 print(f"[Level {level}] Content fits in context, generating summary...", file=sys.stderr)9697 system_prompt = (98 "You are an expert summarizer. Extract all key information, "99 "main points, and important details. Preserve critical facts and context."100 )101102 user_message = (103 f"<user_prompt>{user_prompt}</user_prompt>\n\n"104 f"Content to summarize:\n\n"105 f"<content>{content}</content>"106 )107108 messages = [109 {"role": "system", "content": system_prompt},110 {"role": "user", "content": user_message},111 ]112113 return (await Gais.llm.chat_async(messages, model=model, temperature=0)).text114115 # Safety limit: max recursion depth116 if level >= max_levels:117 print(f"[Level {level}] ⚠️ Max recursion depth reached, truncating content...", file=sys.stderr)118119 # Truncate to safe char count120 truncated = content[:safe_chars]121122 system_prompt = (123 "You are an expert summarizer. Extract all key information, "124 "main points, and important details. Preserve critical facts and context. "125 "Note: Content was truncated due to length."126 )127128 user_message = (129 f"<user_prompt>{user_prompt}</user_prompt>\n\n"130 f"Content to summarize (truncated):\n\n"131 f"<content>{truncated}</content>"132 )133134 messages = [135 {"role": "system", "content": system_prompt},136 {"role": "user", "content": user_message},137 ]138139 return (await Gais.llm.chat_async(messages, model=model, temperature=0)).text140141 # Recursive case: split and reduce with parallel processing142 reduction_needed = len(content) / safe_chars143144 # Adaptive group size (from background-task tool.py pattern)145 if reduction_needed > 8:146 num_chunks = 5147 elif reduction_needed > 4:148 num_chunks = 4149 else:150 num_chunks = 3151152 print(f"[Level {level}] Content exceeds limit ({reduction_needed:.2f}x), splitting into {num_chunks} chunks...", file=sys.stderr)153154 # Split content155 chunks = split_content_into_chunks(content, num_chunks, model)156 print(f"[Level {level}] Split into {len(chunks)} chunks", file=sys.stderr)157158 # Create semaphore for rate limiting159 semaphore = asyncio.Semaphore(max_concurrent)160161 # Summarize each chunk in parallel162 async def summarize_chunk(i: int, chunk: str) -> str:163 """Summarize a single chunk with error handling"""164 try:165 start_time = time.time()166 timestamp = time.strftime("%H:%M:%S", time.localtime(start_time))167 print(f"[Level {level}] [{timestamp}] 🚀 STARTING chunk {i+1}/{len(chunks)} (task launched)", file=sys.stderr)168169 summary_prompt = "Create a comprehensive summary that preserves all key information and important details."170 system_prompt = "You are an expert summarizer."171172 user_message = (173 f"<summary_prompt>{summary_prompt}</summary_prompt>\n\n"174 f"Content:\n\n"175 f"<content>{chunk}</content>"176 )177178 messages = [179 {"role": "system", "content": system_prompt},180 {"role": "user", "content": user_message},181 ]182183 llm_start = time.time()184 llm_timestamp = time.strftime("%H:%M:%S", time.localtime(llm_start))185 print(f"[Level {level}] [{llm_timestamp}] 📡 Sending LLM request for chunk {i+1}/{len(chunks)}", file=sys.stderr)186187 async with semaphore:188 summary = (await Gais.llm.chat_async(messages, model=model, temperature=0)).text189190 end_time = time.time()191 end_timestamp = time.strftime("%H:%M:%S", time.localtime(end_time))192 duration = end_time - start_time193 print(f"[Level {level}] [{end_timestamp}] ✅ COMPLETED chunk {i+1}/{len(chunks)} (took {duration:.2f}s)", file=sys.stderr)194 return summary195196 except Exception as e:197 error_time = time.time()198 error_timestamp = time.strftime("%H:%M:%S", time.localtime(error_time))199 print(f"[Level {level}] [{error_timestamp}] ⚠️ Error in chunk {i+1}: {e}", file=sys.stderr)200 # Return truncated chunk as fallback201 return f"[Partial] {chunk[:500]}..."202203 batch_start = time.time()204 batch_timestamp = time.strftime("%H:%M:%S", time.localtime(batch_start))205 print(f"[Level {level}] [{batch_timestamp}] ⚡ LAUNCHING {len(chunks)} chunks in PARALLEL (max {max_concurrent} concurrent)...", file=sys.stderr)206207 # Execute all chunk summaries in parallel208 chunk_summaries = await asyncio.gather(209 *[summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)],210 return_exceptions=False # Errors are handled in summarize_chunk211 )212213 batch_end = time.time()214 batch_end_timestamp = time.strftime("%H:%M:%S", time.localtime(batch_end))215 batch_duration = batch_end - batch_start216 print(f"[Level {level}] [{batch_end_timestamp}] 🎉 ALL {len(chunks)} chunks completed in {batch_duration:.2f}s (speedup: {len(chunks)}x vs sequential)", file=sys.stderr)217218 # Combine summaries and recurse219 combined = "\n\n".join(chunk_summaries)220 combined_tokens = Gais.llm.estimate_tokens(combined, model)221222 print(f"[Level {level}] Combined {len(chunk_summaries)} summaries ({combined_tokens:,} tokens), recursing to level {level+1}...", file=sys.stderr)223224 return await recursive_summarize(225 combined,226 user_prompt,227 model,228 level=level + 1,229 max_levels=max_levels,230 max_concurrent=max_concurrent231 )232233234def main():235 """Main execution function"""236 try:237 # Read execution input from stdin238 input_json = sys.stdin.read()239 execution_input = json.loads(input_json)240241 # Extract inputs242 inputs = execution_input.get("inputs", {})243 content = inputs.get("content")244 llm_model = inputs.get("llmModel")245 prompt = inputs.get("prompt")246247 # Validate inputs248 if not content:249 raise ValueError("Required input 'content' not provided")250 if not llm_model:251 raise ValueError("Required input 'llmModel' not provided")252 if not prompt:253 raise ValueError("Required input 'prompt' not provided")254255 print("="*80, file=sys.stderr)256 print(f"🚀 RECURSIVE SUMMARIZER v12 - Starting (GAIS LLM SDK)", file=sys.stderr)257 print("="*80, file=sys.stderr)258 print(f"📋 Selected Model ID: '{llm_model}'", file=sys.stderr)259 print(f"📄 Content length: {len(content):,} characters", file=sys.stderr)260 print(f"🔢 Estimated tokens: {Gais.llm.estimate_tokens(content, llm_model):,}", file=sys.stderr)261 print(f"⚡ Max concurrent requests: 5", file=sys.stderr)262 print("="*80, file=sys.stderr)263264 # Perform recursive summarization with async execution265 summary = asyncio.run(recursive_summarize(content, prompt, llm_model, max_concurrent=5))266267 print(f"✓ Summarization complete! Summary length: {len(summary):,} characters", file=sys.stderr)268269 # Prepare output matching OUTPUT_SCHEMA270 output = {271 "summary": summary,272 }273274 # Write output to stdout275 print(json.dumps(output, indent=2))276277 except Exception as e:278 # Other errors279 error_output = {280 "error": str(e),281 "errorType": type(e).__name__,282 "traceback": traceback.format_exc(),283 }284 print(json.dumps(error_output), file=sys.stderr)285 sys.exit(1)286287288if __name__ == "__main__":289 main()$ git log --oneline
v1.3.0
HEAD
2026-05-07v1.0.02026-04-09