$ cat node-template.py

T

Text Summarizer

// Summarizes large text content using an LLM. Automatically splits long documents into chunks and summarizes them hierarchically to produce a final summary.

Process
LLM
template.py
1import sys2import json3import traceback4import asyncio5import time6from gais import Gais789def split_content_into_chunks(content: str, num_chunks: int, model: str) -> list:10    """11    Split content into roughly equal chunks by character count.1213    Tries to split on paragraph boundaries when possible.1415    Args:16        content: Text to split17        num_chunks: Number of chunks to create18        model: Model name for char limit calculation1920    Returns:21        List of text chunks22    """23    safe_chars = Gais.llm.safe_input_chars(model)24    chunk_size = min(len(content) // num_chunks, safe_chars)2526    chunks = []27    current_pos = 02829    while current_pos < len(content):30        # Calculate end position31        end_pos = min(current_pos + chunk_size, len(content))3233        # Try to find a good break point (paragraph, sentence, space)34        if end_pos < len(content):35            # Look for paragraph break36            para_break = content.rfind("\n\n", current_pos, end_pos)37            if para_break > current_pos + chunk_size // 2:38                end_pos = para_break + 239            else:40                # Look for sentence break41                sent_break = content.rfind(". ", current_pos, end_pos)42                if sent_break > current_pos + chunk_size // 2:43                    end_pos = sent_break + 244                else:45                    # Look for any whitespace46                    space_break = content.rfind(" ", current_pos, end_pos)47                    if space_break > current_pos + chunk_size // 2:48                        end_pos = space_break + 14950        chunk = content[current_pos:end_pos].strip()51        if chunk:52            chunks.append(chunk)5354        current_pos = end_pos5556    return chunks575859async def recursive_summarize(60    content: str,61    user_prompt: str,62    model: str,63    level: int = 0,64    max_levels: int = 10,65    max_concurrent: int = 566) -> str:67    """68    Recursively summarize content using hierarchical reduction with parallel chunk processing.6970    Algorithm:71    1. Estimate tokens in content72    2. If fits in context → summarize directly73    3. If exceeds → split into chunks, summarize in parallel, combine, recurse74    4. Safety: max 10 levels, then truncate7576    Args:77        content: Text to summarize78        user_prompt: User's summarization instructions79        model: LLM model name80        level: Current recursion level (0-indexed)81        max_levels: Maximum recursion depth82        max_concurrent: Maximum concurrent LLM requests (default: 5)8384    Returns:85        Summary text86    """87    estimated_tokens = Gais.llm.estimate_tokens(content, model)88    safe_chars = Gais.llm.safe_input_chars(model)8990    # Log progress to stderr91    print(f"[Level {level}] Estimated tokens: {estimated_tokens:,}, Safe char limit: {safe_chars:,}", file=sys.stderr)9293    # Base case: content fits in context94    if len(content) <= safe_chars:95        print(f"[Level {level}] Content fits in context, generating summary...", file=sys.stderr)9697        system_prompt = (98            "You are an expert summarizer. Extract all key information, "99            "main points, and important details. Preserve critical facts and context."100        )101102        user_message = (103            f"<user_prompt>{user_prompt}</user_prompt>\n\n"104            f"Content to summarize:\n\n"105            f"<content>{content}</content>"106        )107108        messages = [109            {"role": "system", "content": system_prompt},110            {"role": "user", "content": user_message},111        ]112113        return (await Gais.llm.chat_async(messages, model=model, temperature=0)).text114115    # Safety limit: max recursion depth116    if level >= max_levels:117        print(f"[Level {level}] ⚠️  Max recursion depth reached, truncating content...", file=sys.stderr)118119        # Truncate to safe char count120        truncated = content[:safe_chars]121122        system_prompt = (123            "You are an expert summarizer. Extract all key information, "124            "main points, and important details. Preserve critical facts and context. "125            "Note: Content was truncated due to length."126        )127128        user_message = (129            f"<user_prompt>{user_prompt}</user_prompt>\n\n"130            f"Content to summarize (truncated):\n\n"131            f"<content>{truncated}</content>"132        )133134        messages = [135            {"role": "system", "content": system_prompt},136            {"role": "user", "content": user_message},137        ]138139        return (await Gais.llm.chat_async(messages, model=model, temperature=0)).text140141    # Recursive case: split and reduce with parallel processing142    reduction_needed = len(content) / safe_chars143144    # Adaptive group size (from background-task tool.py pattern)145    if reduction_needed > 8:146        num_chunks = 5147    elif reduction_needed > 4:148        num_chunks = 4149    else:150        num_chunks = 3151152    print(f"[Level {level}] Content exceeds limit ({reduction_needed:.2f}x), splitting into {num_chunks} chunks...", file=sys.stderr)153154    # Split content155    chunks = split_content_into_chunks(content, num_chunks, model)156    print(f"[Level {level}] Split into {len(chunks)} chunks", file=sys.stderr)157158    # Create semaphore for rate limiting159    semaphore = asyncio.Semaphore(max_concurrent)160161    # Summarize each chunk in parallel162    async def summarize_chunk(i: int, chunk: str) -> str:163        """Summarize a single chunk with error handling"""164        try:165            start_time = time.time()166            timestamp = time.strftime("%H:%M:%S", time.localtime(start_time))167            print(f"[Level {level}] [{timestamp}] 🚀 STARTING chunk {i+1}/{len(chunks)} (task launched)", file=sys.stderr)168169            summary_prompt = "Create a comprehensive summary that preserves all key information and important details."170            system_prompt = "You are an expert summarizer."171172            user_message = (173                f"<summary_prompt>{summary_prompt}</summary_prompt>\n\n"174                f"Content:\n\n"175                f"<content>{chunk}</content>"176            )177178            messages = [179                {"role": "system", "content": system_prompt},180                {"role": "user", "content": user_message},181            ]182183            llm_start = time.time()184            llm_timestamp = time.strftime("%H:%M:%S", time.localtime(llm_start))185            print(f"[Level {level}] [{llm_timestamp}] 📡 Sending LLM request for chunk {i+1}/{len(chunks)}", file=sys.stderr)186187            async with semaphore:188                summary = (await Gais.llm.chat_async(messages, model=model, temperature=0)).text189190            end_time = time.time()191            end_timestamp = time.strftime("%H:%M:%S", time.localtime(end_time))192            duration = end_time - start_time193            print(f"[Level {level}] [{end_timestamp}] ✅ COMPLETED chunk {i+1}/{len(chunks)} (took {duration:.2f}s)", file=sys.stderr)194            return summary195196        except Exception as e:197            error_time = time.time()198            error_timestamp = time.strftime("%H:%M:%S", time.localtime(error_time))199            print(f"[Level {level}] [{error_timestamp}] ⚠️  Error in chunk {i+1}: {e}", file=sys.stderr)200            # Return truncated chunk as fallback201            return f"[Partial] {chunk[:500]}..."202203    batch_start = time.time()204    batch_timestamp = time.strftime("%H:%M:%S", time.localtime(batch_start))205    print(f"[Level {level}] [{batch_timestamp}] ⚡ LAUNCHING {len(chunks)} chunks in PARALLEL (max {max_concurrent} concurrent)...", file=sys.stderr)206207    # Execute all chunk summaries in parallel208    chunk_summaries = await asyncio.gather(209        *[summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)],210        return_exceptions=False  # Errors are handled in summarize_chunk211    )212213    batch_end = time.time()214    batch_end_timestamp = time.strftime("%H:%M:%S", time.localtime(batch_end))215    batch_duration = batch_end - batch_start216    print(f"[Level {level}] [{batch_end_timestamp}] 🎉 ALL {len(chunks)} chunks completed in {batch_duration:.2f}s (speedup: {len(chunks)}x vs sequential)", file=sys.stderr)217218    # Combine summaries and recurse219    combined = "\n\n".join(chunk_summaries)220    combined_tokens = Gais.llm.estimate_tokens(combined, model)221222    print(f"[Level {level}] Combined {len(chunk_summaries)} summaries ({combined_tokens:,} tokens), recursing to level {level+1}...", file=sys.stderr)223224    return await recursive_summarize(225        combined,226        user_prompt,227        model,228        level=level + 1,229        max_levels=max_levels,230        max_concurrent=max_concurrent231    )232233234def main():235    """Main execution function"""236    try:237        # Read execution input from stdin238        input_json = sys.stdin.read()239        execution_input = json.loads(input_json)240241        # Extract inputs242        inputs = execution_input.get("inputs", {})243        content = inputs.get("content")244        llm_model = inputs.get("llmModel")245        prompt = inputs.get("prompt")246247        # Validate inputs248        if not content:249            raise ValueError("Required input 'content' not provided")250        if not llm_model:251            raise ValueError("Required input 'llmModel' not provided")252        if not prompt:253            raise ValueError("Required input 'prompt' not provided")254255        print("="*80, file=sys.stderr)256        print(f"🚀 RECURSIVE SUMMARIZER v12 - Starting (GAIS LLM SDK)", file=sys.stderr)257        print("="*80, file=sys.stderr)258        print(f"📋 Selected Model ID: '{llm_model}'", file=sys.stderr)259        print(f"📄 Content length: {len(content):,} characters", file=sys.stderr)260        print(f"🔢 Estimated tokens: {Gais.llm.estimate_tokens(content, llm_model):,}", file=sys.stderr)261        print(f"⚡ Max concurrent requests: 5", file=sys.stderr)262        print("="*80, file=sys.stderr)263264        # Perform recursive summarization with async execution265        summary = asyncio.run(recursive_summarize(content, prompt, llm_model, max_concurrent=5))266267        print(f"✓ Summarization complete! Summary length: {len(summary):,} characters", file=sys.stderr)268269        # Prepare output matching OUTPUT_SCHEMA270        output = {271            "summary": summary,272        }273274        # Write output to stdout275        print(json.dumps(output, indent=2))276277    except Exception as e:278        # Other errors279        error_output = {280            "error": str(e),281            "errorType": type(e).__name__,282            "traceback": traceback.format_exc(),283        }284        print(json.dumps(error_output), file=sys.stderr)285        sys.exit(1)286287288if __name__ == "__main__":289    main()

$ git log --oneline

v1.3.0
HEAD
2026-05-07
v1.0.02026-04-09