Reference

This section provides a comprehensive reference for all functions and classes in our project.

`lampe`

`cli`

`commands`

`check_reviewed`

`check_reviewed(repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True), repo_full_name: str | None = typer.Option(None, help='Repository full name (e.g. owner/repo)'), output: str = typer.Option('auto', help='Output provider (auto|console|github|gitlab|bitbucket)'), pr_number: int | None = typer.Option(None, '--pr', help='Pull request number (required for non-console providers)'))`

Check if the token user has already reviewed this PR.

Returns exit code 0 if reviewed, 1 if not reviewed.

Source code in packages/lampe-cli/src/lampe/cli/commands/check_reviewed.py

def check_reviewed(
    repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True),
    repo_full_name: str | None = typer.Option(None, help="Repository full name (e.g. owner/repo)"),
    output: str = typer.Option("auto", help="Output provider (auto|console|github|gitlab|bitbucket)"),
    pr_number: int | None = typer.Option(None, "--pr", help="Pull request number (required for non-console providers)"),
):
    """Check if the token user has already reviewed this PR.

    Returns exit code 0 if reviewed, 1 if not reviewed.
    """
    initialize()
    repo_model = Repository(local_path=str(repo), full_name=repo_full_name)
    pr_model = PullRequest(
        number=pr_number or 0,
        title="",
        body=None,
        base_commit_hash="",
        base_branch_name="",
        head_commit_hash="",
        head_branch_name="",
    )

    try:
        provider = Provider.create_provider(provider_name=output, repository=repo_model, pull_request=pr_model)
    except ValueError as e:
        if "required" in str(e).lower() and "pr" in str(e).lower():
            print(f"❌ Error: PR number is required for {output} provider. Use --pr <number>", file=sys.stderr)
            sys.exit(1)
        raise

    try:
        has_reviewed = provider.has_reviewed()
        if has_reviewed:
            print("✅ PR has already been reviewed by the token user")
            sys.exit(0)
        else:
            print("❌ PR has not been reviewed by the token user yet")
            sys.exit(1)
    except Exception as e:
        print(f"❌ Error checking if PR has been reviewed: {e}", file=sys.stderr)
        sys.exit(1)

`describe`

describe(repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True), repo_full_name: str | None = typer.Option(None, help='Repository full name (e.g. owner/repo)'), base: str = typer.Option(..., help='Base commit SHA'), head: str = typer.Option(..., help='Head commit SHA'), title: str = typer.Option('Pull Request', help='PR title (local runs)'), output: str = typer.Option('auto', help='Output provider (auto|console|github|gitlab|bitbucket)'), variant: str = typer.Option('default', help='default|agentic'), files_exclude: list[str] | None = typer.Option(None, '--exclude'), files_reinclude: list[str] | None = typer.Option(None, '--reinclude'), truncation_tokens: int = typer.Option(DEFAULT_MAX_TOKENS, '--max-tokens'), timeout: int | None = typer.Option(None, '--timeout-seconds'), verbose: bool = typer.Option(False, '--verbose/--no-verbose'))

Generate a PR description and deliver it to the specified output provider.

Source code in packages/lampe-cli/src/lampe/cli/commands/describe.py

def describe(
    repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True),
    repo_full_name: str | None = typer.Option(None, help="Repository full name (e.g. owner/repo)"),
    base: str = typer.Option(..., help="Base commit SHA"),
    head: str = typer.Option(..., help="Head commit SHA"),
    title: str = typer.Option("Pull Request", help="PR title (local runs)"),
    output: str = typer.Option("auto", help="Output provider (auto|console|github|gitlab|bitbucket)"),
    variant: str = typer.Option("default", help="default|agentic"),
    files_exclude: list[str] | None = typer.Option(None, "--exclude"),
    files_reinclude: list[str] | None = typer.Option(None, "--reinclude"),
    truncation_tokens: int = typer.Option(DEFAULT_MAX_TOKENS, "--max-tokens"),
    timeout: int | None = typer.Option(None, "--timeout-seconds"),
    verbose: bool = typer.Option(False, "--verbose/--no-verbose"),
):
    """Generate a PR description and deliver it to the specified output provider."""
    initialize()
    repo_model = Repository(local_path=str(repo), full_name=repo_full_name)
    pr_model = PullRequest(
        number=0,
        title=title,
        body=None,
        base_commit_hash=base,
        base_branch_name="",
        head_commit_hash=head,
        head_branch_name="",
    )

    provider = Provider.create_provider(provider_name=output, repository=repo_model, pull_request=pr_model)

    generator = DefaultGeneratorAdapter() if variant == "default" else AgenticGeneratorAdapter()
    pr_cfg = PRDescriptionConfig(
        files_exclude_patterns=list(files_exclude) if files_exclude else None,
        files_reinclude_patterns=list(files_reinclude) if files_reinclude else None,
        truncation_tokens=truncation_tokens,
        timeout=timeout,
        verbose=verbose,
    )

    async def _run():
        workflow_task = PRDescriptionOrchestratorWorkflow(
            provider=provider, generator=generator, timeout=timeout, verbose=verbose
        )
        await workflow_task.run(
            start_event=PRDescriptionStart(repository=repo_model, pull_request=pr_model, config=pr_cfg)
        )

    asyncio.run(_run())

`healthcheck`

`healthcheck() -> None`

Check if the CLI is healthy and can connect to the configured provider.

Source code in packages/lampe-cli/src/lampe/cli/commands/healthcheck.py

def healthcheck() -> None:
    """Check if the CLI is healthy and can connect to the configured provider."""
    logger.info("🔍 Checking CLI health...")
    initialize()
    # Create dummy repository and pull request objects for testing
    repo = Repository(local_path=".", full_name="test/repo")
    pr = PullRequest(
        number=1,
        title="Test PR",
        base_commit_hash="test-base",
        base_branch_name="main",
        head_commit_hash="test-head",
        head_branch_name="feature/test",
    )

    # Initialize provider and run healthcheck
    try:
        provider: Provider = Provider.create_provider("auto", repository=repo, pull_request=pr)
        provider.healthcheck()

        # Check LLM API keys
        logger.info("🔑 Checking LLM API keys...")
        openai_key = os.getenv("OPENAI_API_KEY")
        anthropic_key = os.getenv("ANTHROPIC_API_KEY")

        if not openai_key and not anthropic_key:
            logger.info("❌ No LLM API keys found")
            logger.info("   Set at least one of:")
            logger.info("   - OPENAI_API_KEY for OpenAI models")
            logger.info("   - ANTHROPIC_API_KEY for Anthropic models")
            sys.exit(1)

        if openai_key:
            logger.info("✅ OPENAI_API_KEY is set")
        if anthropic_key:
            logger.info("✅ ANTHROPIC_API_KEY is set")

        logger.info("\n🎉 All health checks passed! CLI is ready to use.")

    except Exception as e:
        logger.exception(f"❌ Health check failed: {e}")
        sys.exit(1)

`review`

review(repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True), repo_full_name: str | None = typer.Option(None, help='Repository full name (e.g. owner/repo)'), base: str = typer.Option(..., help='Base commit SHA'), head: str = typer.Option(..., help='Head commit SHA'), title: str = typer.Option('Pull Request', help='PR title (local runs)'), output: str = typer.Option('auto', help='Output provider (auto|console|github|gitlab|bitbucket)'), review_depth: ReviewDepth = typer.Option(ReviewDepth.STANDARD, help='Review depth (basic|standard|comprehensive)'), variant: str = typer.Option('multi-agent', help='Review variant (multi-agent|diff-by-diff)'), guidelines: list[str] | None = typer.Option(None, '--guideline', help='Custom review guidelines (can be repeated)'), files_exclude: list[str] | None = typer.Option(None, '--exclude'), timeout: int | None = typer.Option(None, '--timeout-seconds'), verbose: bool = typer.Option(False, '--verbose/--no-verbose'))

Generate a PR code review and deliver it to the specified output provider.

Model selection is automatic based on review_depth: - basic: gpt-5-nano - standard: gpt-5 - comprehensive: gpt-5.1

Source code in packages/lampe-cli/src/lampe/cli/commands/review.py

def review(
    repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True),
    repo_full_name: str | None = typer.Option(None, help="Repository full name (e.g. owner/repo)"),
    base: str = typer.Option(..., help="Base commit SHA"),
    head: str = typer.Option(..., help="Head commit SHA"),
    title: str = typer.Option("Pull Request", help="PR title (local runs)"),
    output: str = typer.Option("auto", help="Output provider (auto|console|github|gitlab|bitbucket)"),
    review_depth: ReviewDepth = typer.Option(ReviewDepth.STANDARD, help="Review depth (basic|standard|comprehensive)"),
    variant: str = typer.Option("multi-agent", help="Review variant (multi-agent|diff-by-diff)"),
    guidelines: list[str] | None = typer.Option(None, "--guideline", help="Custom review guidelines (can be repeated)"),
    files_exclude: list[str] | None = typer.Option(None, "--exclude"),
    timeout: int | None = typer.Option(None, "--timeout-seconds"),
    verbose: bool = typer.Option(False, "--verbose/--no-verbose"),
):
    """Generate a PR code review and deliver it to the specified output provider.

    Model selection is automatic based on review_depth:
    - basic: gpt-5-nano
    - standard: gpt-5
    - comprehensive: gpt-5.1
    """
    initialize()
    repo_model = Repository(local_path=str(repo), full_name=repo_full_name)
    pr_model = PullRequest(
        number=0,
        title=title,
        body=None,
        base_commit_hash=base,
        base_branch_name="",
        head_commit_hash=head,
        head_branch_name="",
    )

    provider = Provider.create_provider(provider_name=output, repository=repo_model, pull_request=pr_model)

    generator = DiffByDiffReviewAdapter() if variant == "diff-by-diff" else AgenticReviewAdapter()
    pr_cfg = PRReviewConfig(
        review_depth=review_depth,
        custom_guidelines=guidelines,
        files_exclude_patterns=files_exclude,
        agents_required=[DefaultAgent],
        timeout=timeout,
        verbose=verbose,
    )

    async def _run():
        workflow_task = PRReviewOrchestratorWorkflow(
            provider=provider, generator=generator, timeout=timeout, verbose=verbose
        )
        await workflow_task.run(start_event=PRReviewStart(repository=repo_model, pull_request=pr_model, config=pr_cfg))

    asyncio.run(_run())

`entrypoint`

`version() -> None`

Show version information.

Source code in packages/lampe-cli/src/lampe/cli/entrypoint.py

@app.command()
def version() -> None:
    """Show version information."""
    import importlib.metadata

    version = importlib.metadata.version("lampe-cli")
    logger.info(f"🔦 Lampe CLI v{version}")
    logger.info("   Put some light on your codebase! ✨")

`providers`

`base`

`Provider(repository: Repository, pull_request: PullRequest)`

Bases: ABC

Abstract provider for delivering workflow outputs.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    self.repository = repository
    self.pull_request = pull_request

create_provider(provider_name: ProviderType | str, repository: Repository, pull_request: PullRequest) -> 'Provider' staticmethod

Create a provider instance based on the specified type.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@staticmethod
def create_provider(
    provider_name: ProviderType | str, repository: Repository, pull_request: PullRequest
) -> "Provider":
    """Create a provider instance based on the specified type."""
    if isinstance(provider_name, str):
        # Handle "auto" detection
        if provider_name == "auto":
            provider_name = Provider.detect_provider_type()
        else:
            provider_name = ProviderType(provider_name)

    if provider_name == ProviderType.CONSOLE:
        from lampe.cli.providers.console import ConsoleProvider

        return ConsoleProvider(repository=repository, pull_request=pull_request)
    elif provider_name == ProviderType.GITHUB:
        from lampe.cli.providers.github import GitHubProvider

        return GitHubProvider(repository=repository, pull_request=pull_request)
    elif provider_name == ProviderType.BITBUCKET:
        from lampe.cli.providers.bitbucket import BitbucketProvider

        return BitbucketProvider(repository=repository, pull_request=pull_request)
    else:
        raise ValueError(f"Provider type {provider_name} not yet implemented")

deliver_pr_description(payload: PRDescriptionPayload) -> None abstractmethod

Deliver a PR description to the configured destination.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@abstractmethod
def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Deliver a PR description to the configured destination."""
    ...

deliver_pr_review(payload: PRReviewPayload) -> None abstractmethod

Deliver a PR review to the configured destination.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@abstractmethod
def deliver_pr_review(self, payload: PRReviewPayload) -> None:
    """Deliver a PR review to the configured destination."""
    ...

detect_provider_type() -> ProviderType staticmethod

Detect the appropriate provider type based on available environment variables.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@staticmethod
def detect_provider_type() -> ProviderType:
    """Detect the appropriate provider type based on available environment variables."""
    # Priority order for provider detection
    env_var_mapping = {
        "GITHUB_API_TOKEN": ProviderType.GITHUB,
        "GITHUB_TOKEN": ProviderType.GITHUB,
        "LAMPE_GITHUB_TOKEN": ProviderType.GITHUB,
        "LAMPE_GITHUB_APP_ID": ProviderType.GITHUB,
        "LAMPE_GITHUB_APP_PRIVATE_KEY": ProviderType.GITHUB,
        "GITLAB_API_TOKEN": ProviderType.GITLAB,
        "LAMPE_BITBUCKET_TOKEN": ProviderType.BITBUCKET,
        "LAMPE_BITBUCKET_APP_KEY": ProviderType.BITBUCKET,
        "BITBUCKET_WORKSPACE": ProviderType.BITBUCKET,
    }

    for env_var, provider_type in env_var_mapping.items():
        if os.getenv(env_var):
            return provider_type

    # Fallback to console if no API tokens are found
    return ProviderType.CONSOLE

has_reviewed() -> bool abstractmethod

Check if the token user has already reviewed this PR.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@abstractmethod
def has_reviewed(self) -> bool:
    """Check if the token user has already reviewed this PR."""
    ...

healthcheck() -> None abstractmethod

Check if the provider is healthy and can connect to the service.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

@abstractmethod
def healthcheck(self) -> None:
    """Check if the provider is healthy and can connect to the service."""
    ...

`ProviderType`

Bases: StrEnum

Available provider types.

`update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str`

Update the text between the tags and with new_text. If the tags don't exist, add them at the bottom of the text. The tags and new_text are preserved in the output.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py

def update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str:
    """
    Update the text between the tags [](lampe-sdk-{feature}-start) and [](lampe-sdk-{feature}-end)
    with new_text. If the tags don't exist, add them at the bottom of the text.
    The tags and new_text are preserved in the output.
    """
    identifier = f"lampe-sdk-{feature}-start"
    start_tag = rf"\[\]\(lampe-sdk-{feature}-start\)"
    end_tag = rf"\[\]\(lampe-sdk-{feature}-end\)"

    pattern = re.compile(rf"({start_tag})(.*?|\s*?){end_tag}", re.DOTALL)

    def replacer(match):
        return f"{match.group(1)}\n{new_text}\n[]({identifier.replace('-start', '')}-end)"

    # Try to replace the first occurrence
    result, count = pattern.subn(replacer, text, count=1)

    # If no tags were found, add them at the bottom
    if count == 0:
        result = f"{text}\n\n[]({identifier})\n{new_text}\n[]({identifier.replace('-start', '')}-end)"

    return result

`bitbucket`

`BitbucketProvider(repository: Repository, pull_request: PullRequest)`

Bases: Provider

Bitbucket provider for delivering PR descriptions to Bitbucket Cloud API.

Source code in packages/lampe-cli/src/lampe/cli/providers/bitbucket.py

def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    if pull_request.number == 0:
        # Try Bitbucket Pipelines environment variable first, then fallback to PR_NUMBER
        pr_number = os.getenv("BITBUCKET_PR_ID") or os.getenv("PR_NUMBER")
        if not pr_number:
            raise ValueError("BITBUCKET_PR_ID or PR_NUMBER environment variable is required for Bitbucket provider")
        pull_request.number = int(pr_number)

    super().__init__(repository, pull_request)

    # Extract workspace and repository from environment variables
    self.workspace = os.getenv("BITBUCKET_WORKSPACE")
    self.repo_slug = os.getenv("BITBUCKET_REPO_SLUG")

    if not self.workspace or not self.repo_slug:
        raise ValueError(
            "BITBUCKET_WORKSPACE and BITBUCKET_REPO_SLUG environment variables are required for Bitbucket provider"
        )

    # Initialize Bitbucket client with appropriate authentication
    self.base_url, self.auth_headers = self._initialize_bitbucket_client()

deliver_pr_description(payload: PRDescriptionPayload) -> None

Update the PR description on Bitbucket.

Source code in packages/lampe-cli/src/lampe/cli/providers/bitbucket.py

def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Update the PR description on Bitbucket."""
    if self.pull_request.number == 0:
        raise ValueError("Cannot update Bitbucket PR description for local run")

    try:
        # Get current PR details
        pr_url = (
            f"{self.base_url}/2.0/repositories/{self.workspace}/"
            f"{self.repo_slug}/pullrequests/{self.pull_request.number}"
        )

        # Fetch current PR to get existing description
        response = requests.get(pr_url, headers=self.auth_headers)
        response.raise_for_status()
        pr_data = response.json()

        # Update description with new content
        current_description = pr_data.get("description", "") or ""
        new_description = update_or_add_text_between_tags(
            current_description, payload.description_with_title, "description"
        )

        # Update the PR
        update_data = {"description": new_description}
        update_response = requests.put(pr_url, json=update_data, headers=self.auth_headers)
        update_response.raise_for_status()

        logger.info(f"✅ Successfully updated PR #{self.pull_request.number} description on Bitbucket")
    except requests.exceptions.RequestException as e:
        logger.error(f"❌ Failed to update Bitbucket PR: {e}")
        # Fallback to console output
        logger.info("Description:")
        logger.info(payload.description)
    except Exception as e:
        logger.error(f"❌ Unexpected error updating Bitbucket PR: {e}")
        # Fallback to console output
        logger.info("Description:")
        logger.info(payload.description)

deliver_pr_review(payload: PRReviewPayload) -> None

Post PR review comments on Bitbucket.

Source code in packages/lampe-cli/src/lampe/cli/providers/bitbucket.py

def deliver_pr_review(self, payload: PRReviewPayload) -> None:
    """Post PR review comments on Bitbucket."""
    if self.pull_request.number == 0:
        raise ValueError("Cannot post Bitbucket PR review for local run")

    try:
        # Post review comments for each agent review
        for agent_review in payload.reviews:
            # Post agent summary comment
            if agent_review.summary:
                try:
                    comment_url = (
                        f"{self.base_url}/2.0/repositories/{self.workspace}/"
                        f"{self.repo_slug}/pullrequests/{self.pull_request.number}/comments"
                    )
                    comment_data = {
                        "content": {
                            "raw": f"## {agent_review.agent_name}\n\n"
                            f"**Focus Areas:** {', '.join(agent_review.focus_areas)}\n\n"
                            f"{agent_review.summary}"
                        }
                    }
                    response = requests.post(comment_url, json=comment_data, headers=self.auth_headers)
                    response.raise_for_status()
                except Exception as e:
                    logger.warning(f"Failed to post agent summary for {agent_review.agent_name}: {e}")

            # Post file-specific comments
            for file_review in agent_review.reviews:
                if file_review.line_comments:
                    # Create review comments for specific lines
                    for line, comment in file_review.line_comments.items():
                        try:
                            line_number = int(line)
                        except ValueError:
                            match = re.match(r"\D*(\d+)", str(line))
                            if match:
                                line_number = int(match.group(1))
                            else:
                                line_number = 0
                        try:
                            # Post a comment on the PR
                            comment_url = (
                                f"{self.base_url}/2.0/repositories/{self.workspace}/"
                                f"{self.repo_slug}/pullrequests/{self.pull_request.number}/comments"
                            )
                            comment_data = {
                                "content": {"raw": f"## 🔦🐛\n{comment}"},
                                "inline": {
                                    "from": line_number - 1 if line_number != 0 else 0,
                                    "to": line_number,
                                    "start_from": line_number - 1 if line_number != 0 else 0,
                                    "start_to": line_number,
                                    "path": file_review.file_path,
                                },
                            }
                            response = requests.post(comment_url, json=comment_data, headers=self.auth_headers)
                            response.raise_for_status()
                        except Exception as e:
                            logger.warning(f"Failed to post comment for {file_review.file_path}:{line}: {e}")

                # Post file summary comment if no line comments
                if not file_review.line_comments and file_review.summary:
                    try:
                        comment_url = (
                            f"{self.base_url}/2.0/repositories/{self.workspace}/"
                            f"{self.repo_slug}/pullrequests/{self.pull_request.number}/comments"
                        )
                        comment_data = {"content": {"raw": f"**{file_review.file_path}:** {file_review.summary}"}}
                        response = requests.post(comment_url, json=comment_data, headers=self.auth_headers)
                        response.raise_for_status()
                    except Exception as e:
                        logger.warning(f"Failed to post summary for {file_review.file_path}: {e}")

        logger.info(f"✅ Successfully posted PR #{self.pull_request.number} review comments on Bitbucket")
    except requests.exceptions.RequestException as e:
        logger.error(f"❌ Failed to post Bitbucket PR review: {e}")
        # Fallback to console output
        logger.info("Review:")
        logger.info(payload.review_markdown)
    except Exception as e:
        logger.error(f"❌ Unexpected error posting Bitbucket PR review: {e}")
        # Fallback to console output
        logger.info("Review:")
        logger.info(payload.review_markdown)

has_reviewed() -> bool

Check if the token user has already reviewed this PR.

Source code in packages/lampe-cli/src/lampe/cli/providers/bitbucket.py

def has_reviewed(self) -> bool:
    """Check if the token user has already reviewed this PR."""
    if self.pull_request.number == 0:
        return False

    try:
        # Get PR comments
        comments_url = (
            f"{self.base_url}/2.0/repositories/{self.workspace}/"
            f"{self.repo_slug}/pullrequests/{self.pull_request.number}/comments"
        )
        comments_response = requests.get(comments_url, headers=self.auth_headers)
        comments_response.raise_for_status()
        comments_data = comments_response.json()

        # Try to get the current authenticated user (token owner)
        # This works for user tokens but may fail for repository/workspace tokens
        token_user_uuid = None
        token_username = None
        try:
            user_info_response = requests.get(f"{self.base_url}/2.0/user", headers=self.auth_headers)
            user_info_response.raise_for_status()
            user_info = user_info_response.json()
            token_user_uuid = user_info.get("uuid") or user_info.get("account_id")
            if not token_user_uuid:
                token_username = user_info.get("username") or user_info.get("nickname")
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 401:
                # Repository/workspace tokens can't access /2.0/user
                # Fall back to pattern-based detection
                logger.debug("Token doesn't have access to /2.0/user endpoint, using pattern-based detection")
            else:
                raise

        # Check for comments by the token user (if we have user identity)
        if token_user_uuid or token_username:
            for comment in comments_data.get("values", []):
                user = comment.get("user", {})
                if token_user_uuid:
                    if user.get("uuid") == token_user_uuid or user.get("account_id") == token_user_uuid:
                        return True
                elif token_username:
                    if user.get("username") == token_username or user.get("nickname") == token_username:
                        return True

        # Fallback: Check for review comments by pattern (for repository/workspace tokens)
        # Look for comments that match Lampe review format:
        # - Comments starting with "## " (agent name headers)
        # - Comments containing "Focus Areas:"
        # - Comments containing "🔦🐛" (line comment marker)
        review_patterns = [
            r"^##\s+\w+",  # Agent name header (e.g., "## SecurityAgent")
            r"\*\*Focus Areas:\*\*",  # Focus areas marker
            r"##\s*🔦🐛",  # Line comment marker
        ]

        for comment in comments_data.get("values", []):
            content = comment.get("content", {}).get("raw", "") or comment.get("content", {}).get("markup", "")
            if content:
                for pattern in review_patterns:
                    if re.search(pattern, content, re.IGNORECASE | re.MULTILINE):
                        return True

        return False
    except requests.exceptions.RequestException as e:
        logger.warning(f"Failed to check if PR has been reviewed: {e}")
        return False
    except Exception as e:
        logger.warning(f"Unexpected error checking if PR has been reviewed: {e}")
        return False

healthcheck() -> None

Check if the Bitbucket provider is healthy and can connect to Bitbucket.

Source code in packages/lampe-cli/src/lampe/cli/providers/bitbucket.py

def healthcheck(self) -> None:
    """Check if the Bitbucket provider is healthy and can connect to Bitbucket."""
    logger.info("🔍 Checking Bitbucket provider health...")

    # Check Bitbucket environment variables
    workspace = os.getenv("BITBUCKET_WORKSPACE")
    repo_slug = os.getenv("BITBUCKET_REPO_SLUG")

    if not workspace or not repo_slug:
        logger.info("❌ Bitbucket environment variables not set")
        logger.info("   Set both:")
        logger.info("   - BITBUCKET_WORKSPACE (e.g., 'my-workspace')")
        logger.info("   - BITBUCKET_REPO_SLUG (e.g., 'my-repo')")
        raise ValueError("BITBUCKET_WORKSPACE and BITBUCKET_REPO_SLUG environment variables are required")

    logger.info(f"✅ BITBUCKET_WORKSPACE set to: {workspace}")
    logger.info(f"✅ BITBUCKET_REPO_SLUG set to: {repo_slug}")

    # Check authentication environment variables
    token = os.getenv("LAMPE_BITBUCKET_TOKEN")
    app_key = os.getenv("LAMPE_BITBUCKET_APP_KEY")
    app_secret = os.getenv("LAMPE_BITBUCKET_APP_SECRET")

    auth_method = None
    if token:
        auth_method = "Token"
        logger.info("✅ Bitbucket token authentication detected")
    elif app_key and app_secret:
        auth_method = "App"
        logger.info("✅ Bitbucket App authentication detected")
    else:
        logger.info("❌ No Bitbucket authentication found")
        logger.info("   Set either:")
        logger.info("   - LAMPE_BITBUCKET_TOKEN for token authentication")
        logger.info("   - LAMPE_BITBUCKET_APP_KEY and LAMPE_BITBUCKET_APP_SECRET for app authentication")
        raise ValueError("No Bitbucket authentication found")

    # Test Bitbucket connection
    try:
        # Test API access by getting repository info
        repo_url = f"{self.base_url}/2.0/repositories/{workspace}/{repo_slug}"
        response = requests.get(repo_url, headers=self.auth_headers)
        response.raise_for_status()
        repo_data = response.json()

        logger.info(f"✅ Repository access confirmed: {repo_data.get('full_name', f'{workspace}/{repo_slug}')}")
        logger.info(f"   Description: {repo_data.get('description') or 'No description'}")
        logger.info(f"   Private: {repo_data.get('is_private', 'Unknown')}")
        logger.info(f"✅ Bitbucket {auth_method} authentication successful")

    except requests.exceptions.RequestException as e:
        logger.info(f"❌ Bitbucket connection failed: {e}")
        logger.info("\nTroubleshooting tips:")
        if auth_method == "Token":
            logger.info("- Verify LAMPE_BITBUCKET_TOKEN is valid and has appropriate permissions")
            logger.info("- Ensure the token has 'repositories:read' scope")
        else:
            logger.info("- Verify LAMPE_BITBUCKET_APP_KEY and LAMPE_BITBUCKET_APP_SECRET are correct")
            logger.info("- Ensure the Bitbucket App is installed on the workspace")
        raise
    except Exception as e:
        logger.info(f"❌ Unexpected error during Bitbucket healthcheck: {e}")
        raise

`console`

`ConsoleProvider(repository: Repository, pull_request: PullRequest)`

Bases: Provider

Console provider for delivering PR descriptions to stdout.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py

def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    super().__init__(repository, pull_request)

deliver_pr_description(payload: PRDescriptionPayload) -> None

Print the PR description to console.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py

def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Print the PR description to console."""
    print(payload.description)

deliver_pr_review(payload: PRReviewPayload) -> None

Print the PR review to console.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py

def deliver_pr_review(self, payload: PRReviewPayload) -> None:
    """Print the PR review to console."""
    print(payload.review_markdown)

has_reviewed() -> bool

Check if the token user has already reviewed this PR.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py

def has_reviewed(self) -> bool:
    """Check if the token user has already reviewed this PR."""
    # Console provider cannot check for existing reviews
    return False

healthcheck() -> None

Check if the console provider is healthy and can connect to the service.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py

def healthcheck(self) -> None:
    """Check if the console provider is healthy and can connect to the service."""
    logger.info("✅ Console provider is healthy")

`github`

`GitHubProvider(repository: Repository, pull_request: PullRequest)`

Bases: Provider

GitHub provider for delivering PR descriptions to GitHub API.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py

def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    if pull_request.number == 0:
        pr_number = os.getenv("PR_NUMBER")
        if not pr_number:
            raise ValueError("PR_NUMBER environment variable is required for GitHub provider")
        pull_request.number = int(pr_number)

    super().__init__(repository, pull_request)

    # github action has many default environment variables, including the repository full name:
    # https://docs.github.com/en/actions/reference/workflows-and-actions/variables#default-environment-variables
    if repo_name := os.getenv("GITHUB_REPOSITORY"):
        self.owner, self.repo_name = repo_name.split("/")
    else:
        raise ValueError("GITHUB_REPOSITORY environment variable is required for GitHub provider")

    # Initialize GitHub client with appropriate authentication
    self.github_client = self._initialize_github_client()

deliver_pr_description(payload: PRDescriptionPayload) -> None

Update the PR description on GitHub.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py

def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Update the PR description on GitHub."""
    if self.pull_request.number == 0:
        raise ValueError("Cannot update GitHub PR description for local run")

    try:
        repo = self.github_client.get_repo(f"{self.owner}/{self.repo_name}")
        pull_request = repo.get_pull(self.pull_request.number)
        new_description = update_or_add_text_between_tags(
            pull_request.body or "", payload.description_with_title, "description"
        )
        pull_request.edit(body=new_description)
        logger.info(f"✅ Successfully updated PR #{self.pull_request.number} description on GitHub")
    except Exception as e:
        logger.info(f"❌ Failed to update GitHub PR: {e}")
        # Fallback to console output
        logger.info("Description:")
        logger.info(payload.description)

deliver_pr_review(payload: PRReviewPayload) -> None

Post PR review comments on GitHub.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py

def deliver_pr_review(self, payload: PRReviewPayload) -> None:
    """Post PR review comments on GitHub."""
    if self.pull_request.number == 0:
        raise ValueError("Cannot post GitHub PR review for local run")

    try:
        repo = self.github_client.get_repo(f"{self.owner}/{self.repo_name}")
        pull_request = repo.get_pull(self.pull_request.number)

        # Post review comments for each agent review
        for agent_review in payload.reviews:
            # Post agent summary comment
            if agent_review.summary:
                try:
                    pull_request.create_issue_comment(
                        f"## {agent_review.agent_name}\n\n"
                        f"**Focus Areas:** {', '.join(agent_review.focus_areas)}\n\n"
                        f"{agent_review.summary}"
                    )
                except Exception as e:
                    logger.warning(f"Failed to post agent summary for {agent_review.agent_name}: {e}")

            # Post file-specific comments
            for file_review in agent_review.reviews:
                if file_review.line_comments:
                    # Create review comments for specific lines
                    for line, comment in file_review.line_comments.items():
                        try:
                            # Post a review comment
                            pull_request.create_review_comment(
                                body=f"## 🔦🐛\n{comment}",
                                commit=pull_request.head.sha,
                                path=file_review.file_path,
                                line=int(line),
                            )
                        except Exception as e:
                            logger.warning(f"Failed to post comment for {file_review.file_path}:{line}: {e}")
                            # Fallback: post as general comment
                            pull_request.create_issue_comment(
                                f"**{file_review.file_path} (Line {line}):** {comment}"
                            )

                # Post summary comment if no line comments
                if not file_review.line_comments and file_review.summary:
                    pull_request.create_issue_comment(f"**{file_review.file_path}:** {file_review.summary}")

        logger.info(f"✅ Successfully posted PR #{self.pull_request.number} review comments on GitHub")
    except Exception as e:
        logger.info(f"❌ Failed to post GitHub PR review: {e}")
        # Fallback to console output
        logger.info("Review:")
        logger.info(payload.review_markdown)

has_reviewed() -> bool

Check if the token user has already reviewed this PR.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py

def has_reviewed(self) -> bool:
    """Check if the token user has already reviewed this PR."""
    if self.pull_request.number == 0:
        return False

    try:
        repo = self.github_client.get_repo(f"{self.owner}/{self.repo_name}")
        pull_request = repo.get_pull(self.pull_request.number)

        # Get the authenticated user
        authenticated_user = self.github_client.get_user()

        # Check issue comments (where reviews are posted)
        comments = pull_request.get_issue_comments()
        for comment in comments:
            if comment.user.login == authenticated_user.login:
                return True

        # Also check review comments (inline comments)
        review_comments = pull_request.get_review_comments()
        for comment in review_comments:
            if comment.user.login == authenticated_user.login:
                return True

        return False
    except Exception as e:
        logger.warning(f"Failed to check if PR has been reviewed: {e}")
        return False

healthcheck() -> None

Check if the GitHub provider is healthy and can connect to GitHub.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py

def healthcheck(self) -> None:
    """Check if the GitHub provider is healthy and can connect to GitHub."""
    logger.info("🔍 Checking GitHub provider health...")

    # Check GitHub repository environment variable
    github_repo = os.getenv("GITHUB_REPOSITORY")
    if not github_repo or len(github_repo.split("/")) != 2:
        logger.info("❌ GITHUB_REPOSITORY environment variable not set")
        logger.info("   Set it to 'owner/repo' format (e.g., 'montagne-dev/lampe')")
        raise ValueError("GITHUB_REPOSITORY environment variable not set")
    logger.info(f"✅ GITHUB_REPOSITORY set to: {github_repo}")

    # Check authentication environment variables
    app_id = os.getenv("LAMPE_GITHUB_APP_ID")
    private_key = os.getenv("LAMPE_GITHUB_APP_PRIVATE_KEY")
    token = os.getenv("LAMPE_GITHUB_TOKEN")

    auth_method = None
    if app_id and private_key:
        auth_method = "GitHub App"
        logger.info(f"✅ GitHub App authentication detected (App ID: {app_id})")
    elif token:
        auth_method = "User Token"
        logger.info("✅ User token authentication detected")
    else:
        logger.info("❌ No GitHub authentication found")
        logger.info("   Set either:")
        logger.info("   - LAMPE_GITHUB_APP_ID and LAMPE_GITHUB_APP_PRIVATE_KEY for GitHub App")
        logger.info("   - LAMPE_GITHUB_TOKEN for user token authentication")
        raise ValueError("No GitHub authentication found")

    # Test GitHub connection
    try:
        # Test API access by getting repository info
        repo_info = self.github_client.get_repo(github_repo)
        logger.info(f"✅ Repository access confirmed: {repo_info.full_name}")
        logger.info(f"   Description: {repo_info.description or 'No description'}")
        logger.info(f"   Private: {repo_info.private}")
        logger.info(f"✅ GitHub {auth_method} authentication successful")

    except Exception as e:
        logger.info(f"❌ GitHub connection failed: {e}")
        logger.info("\nTroubleshooting tips:")
        if auth_method == "GitHub App":
            logger.info("- Verify LAMPE_GITHUB_APP_ID and LAMPE_GITHUB_APP_PRIVATE_KEY are correct")
            logger.info("- Ensure the GitHub App is installed on the repository")
            logger.info("- Check that the private key is properly formatted")
        else:
            logger.info("- Verify LAMPE_GITHUB_TOKEN is valid and has appropriate permissions")
            logger.info("- Ensure the token has 'repo' scope for private repositories")
        raise

`core`

`data_models`

`Issue`

Bases: BaseModel

Individual issue to be resolved.

`PullRequest`

Bases: BaseModel

Pull request information.

`Repository`

Bases: BaseModel

Repository information.

`issue`

`Issue`

Bases: BaseModel

Individual issue to be resolved.

`pull_request`

`PullRequest`

Bases: BaseModel

Pull request information.

`repository`

`Repository`

Bases: BaseModel

Repository information.

`gitconfig`

`init_git()`

Initialize Git configuration and check version requirements.

Source code in src/lampe/core/gitconfig.py

def init_git():
    """Initialize Git configuration and check version requirements."""
    logger.debug("Initializing Git configuration...")
    valid_git_version_available()

`valid_git_version_available() -> bool`

Check if the installed Git version meets the minimum requirement.

Returns:

Type	Description
`bool`	True if Git version meets requirement, False otherwise

Source code in src/lampe/core/gitconfig.py

def valid_git_version_available() -> bool:
    """
    Check if the installed Git version meets the minimum requirement.

    Returns
    -------
    :
        True if Git version meets requirement, False otherwise
    """
    try:
        version_line = git.Git().version().strip()
        if not version_line:
            logger.critical("Unable to determine Git version from output.")
            return False

        # Extract version number from output like "git version 2.39.0"
        version_parts = version_line.split()
        if len(version_parts) < 3:
            logger.critical(f"Unexpected Git version output format: {version_line}")
            return False

        current_version = version_parts[2]

        # Handle version strings with additional info (e.g., "2.39.0.windows.1")
        # Take only the semantic version part
        current_version = current_version.split(".")[0:3]
        current_version = ".".join(current_version)

        if version.parse(current_version) >= version.parse(MINIMUM_GIT_VERSION):
            logger.debug(f"Git version {current_version} meets requirement ({MINIMUM_GIT_VERSION}+)")
            return True
        else:
            logger.critical(
                f"CRITICAL: Git version {current_version} does not meet the minimum requirement "
                f"({MINIMUM_GIT_VERSION}+). The lampe-sdk requires Git {MINIMUM_GIT_VERSION} or higher "
                f"for proper functionality. Git operations may fail or behave unexpectedly. "
                f"Please upgrade your Git installation. See the README for installation instructions."
            )
            return False
    except Exception as e:
        logger.critical(f"Unexpected error while checking Git version: {e}")
        return False

`parsers`

`MarkdownCodeBlockRemoverOutputParser`

Bases: BaseOutputParser

Output parser that extracts and returns the content of markdown code blocks marked with 'md' or 'markdown'.

This parser is designed to process LLM outputs or other text that may contain markdown code blocks.
It specifically targets code blocks with the language tag 'md' or 'markdown', removing the code block
markers and returning only the inner content. If no such block is found, it falls back to extracting
a generic code block (```). If the result still contains any other code block (with a language tag),
it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace)
is returned.
Edge Cases:
- If the input is an empty string, returns an empty string.
- If the input contains a code block with a language other than 'md' or 'markdown', it is preserved.
- If the input contains text before or after a markdown code block, only the content inside the block is returned.
- If the input contains an incomplete code block, returns the input with the trailing backticks removed if present.

Examples

>>> parser = MarkdownCodeBlockRemoverOutputParser()
>>> text = '''```md
... This is inside md block.
... ```'''
>>> parser.parse(text)
'This is inside md block.'

>>> text = '''```python
... Multiple lines
... are here.
... ```'''
>>> parser.parse(text)
'```python

Multiple lines are here. ```'

>>> text = 'No code block here.'
>>> parser.parse(text)
'No code block here.'

`parse(output: str) -> str`

Extracts and returns the content of a markdown code block marked with md ormarkdown from the input text.

If the input contains a markdown code block with language tag 'md' or 'markdown', the content inside that block is returned, with the code block markers removed. If no such block is found, but a generic code block (```) is present, its content is returned. If the result still contains any other code block (with a language tag), it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.

Source code in src/lampe/core/parsers/markdown_code_block_remover_output.py

def parse(self, output: str) -> str:
    """
    Extracts and returns the content of a markdown code block marked with ```md or ```markdown from the input text.

    If the input contains a markdown code block with language tag 'md' or 'markdown',
    the content inside that block is returned, with the code block markers removed.
    If no such block is found, but a generic code block (```) is present, its content is returned.
    If the result still contains any other code block (with a language tag), it is preserved as-is.
    If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.
    """
    if output == "":
        return output
    # Try to extract content from markdown code blocks with specific languages
    content = (
        extract_md_code_block(output, "md")
        or extract_md_code_block(output, "markdown")
        or extract_md_code_block(output, "")
    ) or output.strip()

    if extract_md_code_block(content, match_any_language=True) is not None:
        # if there is any other remaining code block, we don't want to remove triple backticks
        return content

    if content.startswith("```"):
        content = content[3:]
    if content.endswith("```"):
        content = content[:-3]
    return content

`YAMLPydanticOutputParser`

Bases: PydanticOutputParser[Model], Generic[Model]

A parser that extracts and validates YAML content using Pydantic models.

Parameters:

Name	Description	Default
`output_cls`	Pydantic output class used for validation	required
`excluded_schema_keys_from_format`	Schema keys to exclude from format string, by default None	required
`pydantic_format_tmpl`	Template for format string, by default PYDANTIC_FORMAT_TMPL	required

Notes

This parser extracts YAML content from markdown code blocks, validates the structure using a Pydantic model, and returns the validated data. It first looks for YAML-specific code blocks, then falls back to any code block if needed.

`format_string: str` `property`

Get the format string that instructs the LLM how to output YAML.

This method will provide a format string that includes the Pydantic model's JSON schema converted to a YAML example, helping the LLM understand the expected output structure.

Returns:

Type	Description
`str`	Format string with YAML schema example

Raises:

Type	Description
`NotImplementedError`	The method is not yet implemented

`parse(text: str) -> Model`

Extract, parse and validate YAML content using the configured Pydantic model.

Parameters:

Name	Type	Description	Default
`text`	`str`	Raw text containing YAML content in markdown code blocks	required

Returns:

Type	Description
`Model`	Validated data matching the Pydantic model structure

Raises:

Type	Description
`YAMLParsingError`	If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
`ValidationError`	If the data does not match the Pydantic model schema

Source code in src/lampe/core/parsers/yaml_pydantic_output.py

def parse(self, text: str) -> Model:
    """
    Extract, parse and validate YAML content using the configured Pydantic model.

    Parameters
    ----------
    text
        Raw text containing YAML content in markdown code blocks

    Returns
    -------
    :
        Validated data matching the Pydantic model structure

    Raises
    ------
    YAMLParsingError
        If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
    ValidationError
        If the data does not match the Pydantic model schema
    """
    if not text:
        raise YAMLParsingError("No text provided")

    yaml_block = extract_md_code_block(text, "yaml")
    if not yaml_block:
        logger.warning("No YAML block found, attempting to parse generic code block")
        yaml_block = extract_md_code_block(text)
    if not yaml_block:
        yaml_block = text
    try:
        data = yaml.safe_load(yaml_block)
    except yaml.YAMLError as e:
        raise YAMLParsingError(f"Invalid YAML syntax: {e}") from e

    return self.output_cls.model_validate(data)

`markdown_code_block_remover_output`

`MarkdownCodeBlockRemoverOutputParser`

Bases: BaseOutputParser

Output parser that extracts and returns the content of markdown code blocks marked with 'md' or 'markdown'.

This parser is designed to process LLM outputs or other text that may contain markdown code blocks.
It specifically targets code blocks with the language tag 'md' or 'markdown', removing the code block
markers and returning only the inner content. If no such block is found, it falls back to extracting
a generic code block (```). If the result still contains any other code block (with a language tag),
it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace)
is returned.
Edge Cases:
- If the input is an empty string, returns an empty string.
- If the input contains a code block with a language other than 'md' or 'markdown', it is preserved.
- If the input contains text before or after a markdown code block, only the content inside the block is returned.
- If the input contains an incomplete code block, returns the input with the trailing backticks removed if present.

Examples

>>> parser = MarkdownCodeBlockRemoverOutputParser()
>>> text = '''```md
... This is inside md block.
... ```'''
>>> parser.parse(text)
'This is inside md block.'

>>> text = '''```python
... Multiple lines
... are here.
... ```'''
>>> parser.parse(text)
'```python

Multiple lines are here. ```'

>>> text = 'No code block here.'
>>> parser.parse(text)
'No code block here.'

parse(output: str) -> str

Extracts and returns the content of a markdown code block marked with md ormarkdown from the input text.

If the input contains a markdown code block with language tag 'md' or 'markdown', the content inside that block is returned, with the code block markers removed. If no such block is found, but a generic code block (```) is present, its content is returned. If the result still contains any other code block (with a language tag), it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.

Source code in src/lampe/core/parsers/markdown_code_block_remover_output.py

def parse(self, output: str) -> str:
    """
    Extracts and returns the content of a markdown code block marked with ```md or ```markdown from the input text.

    If the input contains a markdown code block with language tag 'md' or 'markdown',
    the content inside that block is returned, with the code block markers removed.
    If no such block is found, but a generic code block (```) is present, its content is returned.
    If the result still contains any other code block (with a language tag), it is preserved as-is.
    If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.
    """
    if output == "":
        return output
    # Try to extract content from markdown code blocks with specific languages
    content = (
        extract_md_code_block(output, "md")
        or extract_md_code_block(output, "markdown")
        or extract_md_code_block(output, "")
    ) or output.strip()

    if extract_md_code_block(content, match_any_language=True) is not None:
        # if there is any other remaining code block, we don't want to remove triple backticks
        return content

    if content.startswith("```"):
        content = content[3:]
    if content.endswith("```"):
        content = content[:-3]
    return content

`utils`

`extract_md_code_block(output: str, language: str = '', match_any_language: bool = False) -> str | None`

Extract markdown code block content from a string, handling nested code blocks.

Parameters:

Name	Type	Description	Default
`output`	`str`	The string to extract code block content from.	required
`language`	`str`	The language identifier for the code block (e.g., 'yaml', 'python', 'json').	`''`
`match_any_language`	`bool`	If True, the language of the code block is optional and the function will return the first code block found.	`False`

Returns:

Type	Description
`str \| None`	The extracted code block content, or the entire input if no language is specified or no matching code block is found.

Notes

This function extracts content between {language} tags, preserving any nested code blocks within the content. The regex pattern handles: - Optional text before the code block - Nested code blocks (e.g.json, python, inside the main block) - Proper indentation of nested content - Case-insensitive language tag matching

Examples:

>>> text = '''
... Some text
... ```yaml
... key: value
... nested: |
...   ```python
...   print("Hello")
...   ```
... ```
... '''
>>> result = extract_md_code_block(text, 'yaml')
>>> print(result)
key: value
nested: |
  ```python
  print("Hello")
  ```

Source code in src/lampe/core/parsers/utils.py

def extract_md_code_block(output: str, language: str = "", match_any_language: bool = False) -> str | None:
    """Extract markdown code block content from a string, handling nested code blocks.

    Parameters
    ----------
    output : str
        The string to extract code block content from.
    language : str
        The language identifier for the code block (e.g., 'yaml', 'python', 'json').
    match_any_language : bool
        If True, the language of the code block is optional and the function will return the first code block found.
    Returns
    -------
    :
        The extracted code block content, or the entire input if no language is specified
        or no matching code block is found.

    Notes
    -----
    This function extracts content between ```{language} tags, preserving any nested
    code blocks within the content. The regex pattern handles:
    - Optional text before the code block
    - Nested code blocks (e.g. ```json, ```python, ``` inside the main block)
    - Proper indentation of nested content
    - Case-insensitive language tag matching

    Examples
    --------
    >>> text = '''
    ... Some text
    ... ```yaml
    ... key: value
    ... nested: |
    ...   ```python
    ...   print("Hello")
    ...   ```
    ... ```
    ... '''
    >>> result = extract_md_code_block(text, 'yaml')
    >>> print(result)
    key: value
    nested: |
      ```python
      print("Hello")
      ```
    """

    if match_any_language:
        code_block_pattern = MARKDOWN_CODE_BLOCK_PATTERN.format(language=MARKDOWN_CODE_BLOCK_MATCH_ANY_LANGUAGE_PATTERN)
    else:
        code_block_pattern = MARKDOWN_CODE_BLOCK_PATTERN.format(language=language)

    result = re.search(code_block_pattern, output, re.MULTILINE | re.IGNORECASE | re.DOTALL)
    if result:
        return result.group(1)
    return None

`yaml_pydantic_output`

`YAMLParsingError`

Bases: Exception

Raised when YAML parsing or validation fails.

`YAMLPydanticOutputParser`

Bases: PydanticOutputParser[Model], Generic[Model]

A parser that extracts and validates YAML content using Pydantic models.

Parameters:

Name	Description	Default
`output_cls`	Pydantic output class used for validation	required
`excluded_schema_keys_from_format`	Schema keys to exclude from format string, by default None	required
`pydantic_format_tmpl`	Template for format string, by default PYDANTIC_FORMAT_TMPL	required

Notes

This parser extracts YAML content from markdown code blocks, validates the structure using a Pydantic model, and returns the validated data. It first looks for YAML-specific code blocks, then falls back to any code block if needed.

format_string: str property

Get the format string that instructs the LLM how to output YAML.

This method will provide a format string that includes the Pydantic model's JSON schema converted to a YAML example, helping the LLM understand the expected output structure.

Returns:

Type	Description
`str`	Format string with YAML schema example

Raises:

Type	Description
`NotImplementedError`	The method is not yet implemented

parse(text: str) -> Model

Extract, parse and validate YAML content using the configured Pydantic model.

Parameters:

Name	Type	Description	Default
`text`	`str`	Raw text containing YAML content in markdown code blocks	required

Returns:

Type	Description
`Model`	Validated data matching the Pydantic model structure

Raises:

Type	Description
`YAMLParsingError`	If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
`ValidationError`	If the data does not match the Pydantic model schema

Source code in src/lampe/core/parsers/yaml_pydantic_output.py

def parse(self, text: str) -> Model:
    """
    Extract, parse and validate YAML content using the configured Pydantic model.

    Parameters
    ----------
    text
        Raw text containing YAML content in markdown code blocks

    Returns
    -------
    :
        Validated data matching the Pydantic model structure

    Raises
    ------
    YAMLParsingError
        If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
    ValidationError
        If the data does not match the Pydantic model schema
    """
    if not text:
        raise YAMLParsingError("No text provided")

    yaml_block = extract_md_code_block(text, "yaml")
    if not yaml_block:
        logger.warning("No YAML block found, attempting to parse generic code block")
        yaml_block = extract_md_code_block(text)
    if not yaml_block:
        yaml_block = text
    try:
        data = yaml.safe_load(yaml_block)
    except yaml.YAMLError as e:
        raise YAMLParsingError(f"Invalid YAML syntax: {e}") from e

    return self.output_cls.model_validate(data)

`tools`

`TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)`

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name	Type	Description
`repo_url`		Repository URL to clone
`head_ref`		Optional head ref to check out.
`folder_name`		Optional name prefix for temp directory
`sparse`		Enable sparse checkout mode to avoid populating all files initially.
`shallow`		Enable shallow clone (depth=1) to fetch only the target commit.
`blob_filter`		Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
`remove_existing`		Remove existing directory if it exists

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails
`UnableToDeleteError`	If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py

def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None

`clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str`

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name	Type	Description	Default
`repo_url`	`str`	Repository URL to clone	required
`head_ref`	`str \| None`	Head ref to checkout	`None`
`base_ref`	`str \| None`	Base ref to fetch for diff computation	`None`
`folder_name`	`str \| None`	Optional name prefix for temp directory	`None`
`sparse`	`bool`	Enable sparse checkout mode to avoid populating all files initially	`True`
`shallow`	`bool`	Enable shallow clone (depth=1) to fetch only the target commit	`True`
`blob_filter`	`bool`	Enable blob filtering (--filter=blob:none) to fetch file contents on-demand	`True`
`remove_existing`	`bool`	Remove existing directory if it exists	`True`

Returns:

Type	Description
`str`	Path to the cloned repository

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails

Source code in src/lampe/core/tools/repository/management.py

def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path

`get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name	Type	Description	Default
`base_hash`	`str`	Base commit hash to compare from	required
`head_hash`	`str`	Head commit hash to compare to. If not provided, uses HEAD	`'HEAD'`
`files_exclude_patterns`	`list[str] \| None`	List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.	`None`
`files_include_patterns`	`list[str] \| None`	List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.	`None`
`files_reinclude_patterns`	`list[str] \| None`	List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.	`None`
`repo_path`	`str`	Path to the git repository	`'/tmp/'`
`batch_size`	`int`	Number of files to process in each batch.	`50`
`include_line_numbers`	`bool`	Whether to include line numbers in diff output (default: False)	`False`

Returns:

Type	Description
`str`	Diff as a string

Raises:

Type	Description
`DiffNotFoundError`	If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py

def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    include_line_numbers
        Whether to include line numbers in diff output (default: False)
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diff = repo.git.diff(base_hash, head_hash, "--", *batch)
            if diff:
                diffs.append(sanitize_utf8(diff))
            elif include_line_numbers:
                # Git diff already includes line numbers in the @@ -X,Y +A,B @@ format
                # and shows line numbers in the context, so we don't need to modify it
                pass
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e

`view_file(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

Get file content from a specific commit.

Parameters:

Name	Type	Description	Default
`commit_hash`	`str`	Commit reference (e.g., "main", commit hash)	required
`file_path`	`str`	Path to the file within the repository	required
`line_start`	`int \| None`	Line range start index (0-based) of head_content to extract content from	`None`
`line_end`	`int \| None`	Line range end index (0-based) of head_content to extract content to	`None`
`include_line_numbers`	`bool`	Whether to prefix each line with its line number (default: False)	`False`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type	Description
`GitCommandError`	If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py

def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    include_line_numbers
        Whether to prefix each line with its line number (default: False)
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
            blob = sanitize_utf8(blob)
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])

        if include_line_numbers:
            lines = blob.splitlines()
            numbered_lines = []
            start_line = 0 if line_start is None else line_start
            for i, line in enumerate(lines):
                line_number = start_line + i
                numbered_lines.append(f"{line_number:>6}| {line}")
            blob = "\n".join(numbered_lines)

        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise

`repository`

`FileDiffInfo`

Bases: BaseModel

Information about a single file diff.

`LocalCommitsAvailability(repo_path: str, commits: list[str])`

Context manager to check if commits are available locally before git operations.

Checks if specified commits exist locally using git fsck --root and fetches them if they're not present. This is useful for ensuring all required commits are available before performing git operations that depend on them.

Attributes:

Name	Type	Description
`repo_path`		Path to the git repository
`commits`		List of commit references to check and fetch if needed

Source code in src/lampe/core/tools/repository/management.py

def __init__(self, repo_path: str, commits: list[str]):
    self.repo_path = repo_path
    self.commits = commits
    self.repo = Repo(path=repo_path)
    self._fetched_commits = []

`TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)`

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name	Type	Description
`repo_url`		Repository URL to clone
`head_ref`		Optional head ref to check out.
`folder_name`		Optional name prefix for temp directory
`sparse`		Enable sparse checkout mode to avoid populating all files initially.
`shallow`		Enable shallow clone (depth=1) to fetch only the target commit.
`blob_filter`		Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
`remove_existing`		Remove existing directory if it exists

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails
`UnableToDeleteError`	If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py

def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None

`clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str`

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name	Type	Description	Default
`repo_url`	`str`	Repository URL to clone	required
`head_ref`	`str \| None`	Head ref to checkout	`None`
`base_ref`	`str \| None`	Base ref to fetch for diff computation	`None`
`folder_name`	`str \| None`	Optional name prefix for temp directory	`None`
`sparse`	`bool`	Enable sparse checkout mode to avoid populating all files initially	`True`
`shallow`	`bool`	Enable shallow clone (depth=1) to fetch only the target commit	`True`
`blob_filter`	`bool`	Enable blob filtering (--filter=blob:none) to fetch file contents on-demand	`True`
`remove_existing`	`bool`	Remove existing directory if it exists	`True`

Returns:

Type	Description
`str`	Path to the cloned repository

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails

Source code in src/lampe/core/tools/repository/management.py

def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path

`fetch_commit_ref(repo_path: str, commit_ref: str) -> None`

Fetch a base reference from the remote repository.

Parameters:

Name	Type	Description	Default
`repo_path`	`str`	Path to the git repository	required
`commit_ref`	`str`	Commit reference to fetch (e.g., branch name, commit hash)	required

Raises:

Type	Description
`GitCommandError`	If the fetch operation fails

Source code in src/lampe/core/tools/repository/management.py

def fetch_commit_ref(repo_path: str, commit_ref: str) -> None:
    """Fetch a base reference from the remote repository.

    Parameters
    ----------
    repo_path
        Path to the git repository
    commit_ref
        Commit reference to fetch (e.g., branch name, commit hash)

    Raises
    ------
    GitCommandError
        If the fetch operation fails
    """
    repo = Repo(path=repo_path)

    repo.git.fetch("--no-tags", "--depth=1", "--filter=blob:none", "origin", commit_ref)

`find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str`

Search for files using git ls-files and pattern matching.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	Pattern to search for (e.g. ".py", "src//.md")	required
`repo_path`	`str`	Path to git repository	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string containing matching file paths

Source code in src/lampe/core/tools/repository/search.py

def find_files_by_pattern(pattern: str, repo_path: str = "/tmp/") -> str:
    """Search for files using git ls-files and pattern matching.

    Parameters
    ----------
    pattern
        Pattern to search for (e.g. "*.py", "src/**/*.md")
    repo_path
        Path to git repository

    Returns
    -------
    str
        Formatted string containing matching file paths
    """
    repo = Repo(path=repo_path)
    try:
        # Filter files matching pattern using git's pathspec matching
        ls_output = repo.git.ls_files("--", pattern)
        ls_output = sanitize_utf8(ls_output)
        matching = ls_output.splitlines()

        if not matching:
            return "No files found"

        return f"```shell\n{'\n'.join(matching)}\n```"

    except GitCommandError as e:
        logger.exception(f"Error finding files: {e}")
        return f"Error: {str(e)}"

`get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name	Type	Description	Default
`base_hash`	`str`	Base commit hash to compare from	required
`head_hash`	`str`	Head commit hash to compare to. If not provided, uses HEAD	`'HEAD'`
`files_exclude_patterns`	`list[str] \| None`	List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.	`None`
`files_include_patterns`	`list[str] \| None`	List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.	`None`
`files_reinclude_patterns`	`list[str] \| None`	List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.	`None`
`repo_path`	`str`	Path to the git repository	`'/tmp/'`
`batch_size`	`int`	Number of files to process in each batch.	`50`
`include_line_numbers`	`bool`	Whether to include line numbers in diff output (default: False)	`False`

Returns:

Type	Description
`str`	Diff as a string

Raises:

Type	Description
`DiffNotFoundError`	If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py

def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    include_line_numbers
        Whether to include line numbers in diff output (default: False)
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diff = repo.git.diff(base_hash, head_hash, "--", *batch)
            if diff:
                diffs.append(sanitize_utf8(diff))
            elif include_line_numbers:
                # Git diff already includes line numbers in the @@ -X,Y +A,B @@ format
                # and shows line numbers in the context, so we don't need to modify it
                pass
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e

`get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str`

Get the diff between two commits, optionally for specific files.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Base commit reference (e.g., "main", commit hash)	required
`file_paths`	`list[str] \| None`	List of file paths to get diff for	`None`
`head_reference`	`str`	Head commit reference (e.g., "feature", commit hash). Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`
`batch_size`	`int`	Number of files to process in each batch.	`50`

Returns:

Type	Description
`str`	Formatted string containing diffs for specified files or all changed files

Source code in src/lampe/core/tools/repository/diff.py

def get_diff_for_files(
    base_reference: str,
    file_paths: list[str] | None = None,
    head_reference: str = "HEAD",
    repo_path: str = "/tmp/",
    batch_size: int = 50,
) -> str:
    """Get the diff between two commits, optionally for specific files.

    Parameters
    ----------
    base_reference
        Base commit reference (e.g., "main", commit hash)
    file_paths
        List of file paths to get diff for
    head_reference
        Head commit reference (e.g., "feature", commit hash). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"
    batch_size
        Number of files to process in each batch.

    Returns
    -------
    str
        Formatted string containing diffs for specified files or all changed files
    """
    repo = Repo(path=repo_path)
    with LocalCommitsAvailability(repo_path, [base_reference, head_reference]):
        if file_paths:
            # Get diff for specific files
            diffs = []
            for batch_file_paths in batched(iterable=file_paths, n=batch_size):
                try:
                    diff = repo.git.diff(base_reference, head_reference, "--", *batch_file_paths)
                    if diff:
                        diffs.append(sanitize_utf8(diff))
                except GitCommandError:
                    # Skip files that don't exist or can't be diffed
                    logger.debug(f"Files {batch_file_paths} not found or can't be diffed in get_diff_for_files")
                    continue
            return "\n".join(diffs)
        else:
            # Get diff for all changed files
            diff = repo.git.diff(base_reference, head_reference)
            return sanitize_utf8(diff)

`get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

Get file content from a specific commit.

Parameters:

Name	Type	Description	Default
`commit_hash`	`str`	Commit reference (e.g., "main", commit hash)	required
`file_path`	`str`	Path to the file within the repository	required
`line_start`	`int \| None`	Line range start index (0-based) of head_content to extract content from	`None`
`line_end`	`int \| None`	Line range end index (0-based) of head_content to extract content to	`None`
`include_line_numbers`	`bool`	Whether to prefix each line with its line number (default: False)	`False`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type	Description
`GitCommandError`	If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py

def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    include_line_numbers
        Whether to prefix each line with its line number (default: False)
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
            blob = sanitize_utf8(blob)
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])

        if include_line_numbers:
            lines = blob.splitlines()
            numbered_lines = []
            start_line = 0 if line_start is None else line_start
            for i, line in enumerate(lines):
                line_number = start_line + i
                numbered_lines.append(f"{line_number:>6}| {line}")
            blob = "\n".join(numbered_lines)

        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise

`is_sparse_clone(repo_path: str) -> bool`

Check if a repository is a sparse clone.

A sparse clone is detected by checking multiple indicators: 1. If core.sparseCheckout is enabled 2. If .git/info/sparse-checkout file exists and has content

Parameters:

Name	Type	Description	Default
`repo_path`	`str`	Path to the git repository	required

Returns:

Type	Description
`bool`	True if the repository appears to be a sparse clone, False otherwise

Raises:

Type	Description
`GitCommandError`	If git commands fail

Source code in src/lampe/core/tools/repository/management.py

def is_sparse_clone(repo_path: str) -> bool:
    """Check if a repository is a sparse clone.

    A sparse clone is detected by checking multiple indicators:
    1. If core.sparseCheckout is enabled
    2. If .git/info/sparse-checkout file exists and has content

    Parameters
    ----------
    repo_path
        Path to the git repository

    Returns
    -------
    bool
        True if the repository appears to be a sparse clone, False otherwise

    Raises
    ------
    GitCommandError
        If git commands fail
    """
    try:
        repo = Repo(path=repo_path)

        # Check if sparse checkout is enabled
        try:
            sparse_checkout = repo.git.config("core.sparseCheckout")
            if sparse_checkout.strip().lower() == "true":
                logger.debug(f"Sparse checkout enabled in {repo_path}")
                return True
        except GitCommandError:
            # core.sparseCheckout not set, continue with other checks
            pass

        # Check if .git/info/sparse-checkout file exists and has content
        sparse_checkout_file = Path(repo_path) / ".git" / "info" / "sparse-checkout"
        if sparse_checkout_file.exists():
            with open(sparse_checkout_file, "r") as f:
                content = f.read().strip()
                if content:
                    logger.debug(f"Sparse checkout file found with content in {repo_path}")
                    return True

        logger.debug(f"No sparse clone indicators found in {repo_path}")
        return False

    except Exception as e:
        logger.exception(f"Error checking if repository is sparse clone: {e}")
        return False

`list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str`

List files changed between base reference and HEAD, with change stats.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against HEAD	required
`head_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string listing changed files with status, additions/deletions and size Format: "[STATUS] filepath \| +additions -deletions \| sizeKB" STATUS is one of: A (added), D (deleted), M (modified)

Raises:

Type	Description
`GitCommandError`	If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py

def list_changed_files(base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/") -> str:
    """List files changed between base reference and HEAD, with change stats.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string listing changed files with status, additions/deletions and size
        Format: "[STATUS] filepath | +additions -deletions | sizeKB"
        STATUS is one of: A (added), D (deleted), M (modified)

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, "--numstat")
    status_output = repo.git.diff(base_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(f"[{status}] {file_path} | +{additions} -{deletions} | {size_kb}KB")

    return "\n".join(sorted(result))

`list_changed_files_as_objects(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> list[FileDiffInfo]`

List files changed between base reference and HEAD as structured objects.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against HEAD	required
`head_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`list[FileDiffInfo]`	List of FileDiffInfo objects for each changed file

Raises:

Type	Description
`GitCommandError`	If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py

def list_changed_files_as_objects(
    base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/"
) -> list[FileDiffInfo]:
    """List files changed between base reference and HEAD as structured objects.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    list[FileDiffInfo]
        List of FileDiffInfo objects for each changed file

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, head_reference, "--numstat")
    status_output = repo.git.diff(base_reference, head_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files_as_objects, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(
                FileDiffInfo(
                    file_path=file_path, status=status, additions=additions, deletions=deletions, size_kb=size_kb
                )
            )

    return sorted(result, key=lambda x: x.file_path)

`search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

Search for a pattern in files within a directory at a specific commit.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	Pattern to search for	required
`relative_dir_path`	`str`	Directory path to search in	required
`commit_reference`	`str`	Commit reference to search at	required
`include_line_numbers`	`bool`	Whether to include line numbers in search results (default: False)	`False`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Search results as a string

Source code in src/lampe/core/tools/repository/search.py

def search_in_files(
    pattern: str,
    relative_dir_path: str,
    commit_reference: str,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Search for a pattern in files within a directory at a specific commit.

    Parameters
    ----------
    pattern
        Pattern to search for
    relative_dir_path
        Directory path to search in
    commit_reference
        Commit reference to search at
    include_line_numbers
        Whether to include line numbers in search results (default: False)
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    str
        Search results as a string
    """
    try:
        repo = Repo(path=repo_path)
        commit_reference_path = f"{commit_reference}:{relative_dir_path if relative_dir_path else '.'}"
        if include_line_numbers:
            grep_output = repo.git.grep("-n", pattern, commit_reference_path)
        else:
            grep_output = repo.git.grep(pattern, commit_reference_path)
        if grep_output:
            grep_output = sanitize_utf8(grep_output)
            return f"```grep\n{grep_output}\n```"
        return "No matches found"
    except GitCommandError as e:
        if e.status == 128:
            return "No matches found"
        return f"Error executing git grep: {str(e)}"

`show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str`

Show the contents of a commit.

This function shows the contents of a commit, including the commit details and diffs.

Parameters:

Name	Type	Description	Default
`commit_reference`	`str`	Commit reference (e.g., "main", commit hash)	required
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string containing commit details and diffs

Source code in src/lampe/core/tools/repository/history.py

def show_commit(commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Show the contents of a commit.

    This function shows the contents of a commit, including the commit details and diffs.

    Parameters
    ----------
    commit_reference
        Commit reference (e.g., "main", commit hash)
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and diffs
    """
    repo = Repo(path=repo_path)
    commit = repo.commit(commit_reference)
    output = [
        f"Commit: {commit.hexsha}\n"
        f"Author: {commit.author}\n"
        f"Date: {commit.authored_datetime}\n"
        f"Message: {commit.message}\n"
        f"Files: {len(commit.stats.files)} files changed\n"
        f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
        f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
    ]
    if commit.parents:
        parent = commit.parents[0]
        diff = parent.diff(commit, create_patch=True)
    else:
        diff = commit.diff(None, create_patch=True)
    for d in diff:
        output.append(f"\n--- {d.a_path}\n+++ {d.b_path}\n")
        if d.diff:
            diff_str = str(d.diff)
            output.append(sanitize_utf8(diff_str))
    return "".join(output)

`content`

file_exists(file_path: str, commit_hash: str = 'HEAD', repo_path: str = '/tmp/') -> bool

Check if a file exists in a specific commit.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the file within the repository	required
`commit_hash`	`str`	Commit reference to check (e.g., commit hash, branch name, tag). Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`bool`	True if file exists in the commit, False otherwise

Raises:

Type	Description
`GitCommandError`	If there is an unexpected git error

Source code in src/lampe/core/tools/repository/content.py

def file_exists(file_path: str, commit_hash: str = "HEAD", repo_path: str = "/tmp/") -> bool:
    """Check if a file exists in a specific commit.

    Parameters
    ----------
    file_path
        Path to the file within the repository
    commit_hash
        Commit reference to check (e.g., commit hash, branch name, tag). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    bool
        True if file exists in the commit, False otherwise

    Raises
    ------
    GitCommandError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            repo.git.cat_file("-e", f"{commit_hash}:{file_path}")
        return True
    except GitCommandError as e:
        if e.status == 128:
            return False
        logger.exception(f"Unexpected error checking if file exists: {e}")
        raise

get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

Get file content from a specific commit.

Parameters:

Name	Type	Description	Default
`commit_hash`	`str`	Commit reference (e.g., "main", commit hash)	required
`file_path`	`str`	Path to the file within the repository	required
`line_start`	`int \| None`	Line range start index (0-based) of head_content to extract content from	`None`
`line_end`	`int \| None`	Line range end index (0-based) of head_content to extract content to	`None`
`include_line_numbers`	`bool`	Whether to prefix each line with its line number (default: False)	`False`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type	Description
`GitCommandError`	If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py

def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    include_line_numbers
        Whether to prefix each line with its line number (default: False)
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
            blob = sanitize_utf8(blob)
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])

        if include_line_numbers:
            lines = blob.splitlines()
            numbered_lines = []
            start_line = 0 if line_start is None else line_start
            for i, line in enumerate(lines):
                line_number = start_line + i
                numbered_lines.append(f"{line_number:>6}| {line}")
            blob = "\n".join(numbered_lines)

        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise

get_file_size_at_commit(file_path: str, commit_hash: str = 'HEAD', repo_path: str = '/tmp/') -> int

Get the size of a file at a specific commit.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the file within the repository	required
`commit_hash`	`str`	Commit reference (e.g., "main", commit hash). Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`int`	Size of the file in bytes

Source code in src/lampe/core/tools/repository/content.py

def get_file_size_at_commit(file_path: str, commit_hash: str = "HEAD", repo_path: str = "/tmp/") -> int:
    """Get the size of a file at a specific commit.

    Parameters
    ----------
    file_path
        Path to the file within the repository
    commit_hash
        Commit reference (e.g., "main", commit hash). Defaults to "HEAD"
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        Size of the file in bytes
    """
    repo = Repo(path=repo_path)
    with LocalCommitsAvailability(repo_path, [commit_hash]):
        tree = repo.commit(rev=commit_hash).tree
    try:
        git_obj = tree[file_path]
        return git_obj.size
    except KeyError:
        return 0

`diff`

FileDiffInfo

Bases: BaseModel

Information about a single file diff.

get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name	Type	Description	Default
`base_hash`	`str`	Base commit hash to compare from	required
`head_hash`	`str`	Head commit hash to compare to. If not provided, uses HEAD	`'HEAD'`
`files_exclude_patterns`	`list[str] \| None`	List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.	`None`
`files_include_patterns`	`list[str] \| None`	List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.	`None`
`files_reinclude_patterns`	`list[str] \| None`	List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.	`None`
`repo_path`	`str`	Path to the git repository	`'/tmp/'`
`batch_size`	`int`	Number of files to process in each batch.	`50`
`include_line_numbers`	`bool`	Whether to include line numbers in diff output (default: False)	`False`

Returns:

Type	Description
`str`	Diff as a string

Raises:

Type	Description
`DiffNotFoundError`	If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py

def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    include_line_numbers
        Whether to include line numbers in diff output (default: False)
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diff = repo.git.diff(base_hash, head_hash, "--", *batch)
            if diff:
                diffs.append(sanitize_utf8(diff))
            elif include_line_numbers:
                # Git diff already includes line numbers in the @@ -X,Y +A,B @@ format
                # and shows line numbers in the context, so we don't need to modify it
                pass
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e

get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str

Get the diff between two commits, optionally for specific files.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Base commit reference (e.g., "main", commit hash)	required
`file_paths`	`list[str] \| None`	List of file paths to get diff for	`None`
`head_reference`	`str`	Head commit reference (e.g., "feature", commit hash). Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`
`batch_size`	`int`	Number of files to process in each batch.	`50`

Returns:

Type	Description
`str`	Formatted string containing diffs for specified files or all changed files

Source code in src/lampe/core/tools/repository/diff.py

def get_diff_for_files(
    base_reference: str,
    file_paths: list[str] | None = None,
    head_reference: str = "HEAD",
    repo_path: str = "/tmp/",
    batch_size: int = 50,
) -> str:
    """Get the diff between two commits, optionally for specific files.

    Parameters
    ----------
    base_reference
        Base commit reference (e.g., "main", commit hash)
    file_paths
        List of file paths to get diff for
    head_reference
        Head commit reference (e.g., "feature", commit hash). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"
    batch_size
        Number of files to process in each batch.

    Returns
    -------
    str
        Formatted string containing diffs for specified files or all changed files
    """
    repo = Repo(path=repo_path)
    with LocalCommitsAvailability(repo_path, [base_reference, head_reference]):
        if file_paths:
            # Get diff for specific files
            diffs = []
            for batch_file_paths in batched(iterable=file_paths, n=batch_size):
                try:
                    diff = repo.git.diff(base_reference, head_reference, "--", *batch_file_paths)
                    if diff:
                        diffs.append(sanitize_utf8(diff))
                except GitCommandError:
                    # Skip files that don't exist or can't be diffed
                    logger.debug(f"Files {batch_file_paths} not found or can't be diffed in get_diff_for_files")
                    continue
            return "\n".join(diffs)
        else:
            # Get diff for all changed files
            diff = repo.git.diff(base_reference, head_reference)
            return sanitize_utf8(diff)

list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str

List files changed between base reference and HEAD, with change stats.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against HEAD	required
`head_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string listing changed files with status, additions/deletions and size Format: "[STATUS] filepath \| +additions -deletions \| sizeKB" STATUS is one of: A (added), D (deleted), M (modified)

Raises:

Type	Description
`GitCommandError`	If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py

def list_changed_files(base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/") -> str:
    """List files changed between base reference and HEAD, with change stats.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string listing changed files with status, additions/deletions and size
        Format: "[STATUS] filepath | +additions -deletions | sizeKB"
        STATUS is one of: A (added), D (deleted), M (modified)

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, "--numstat")
    status_output = repo.git.diff(base_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(f"[{status}] {file_path} | +{additions} -{deletions} | {size_kb}KB")

    return "\n".join(sorted(result))

list_changed_files_as_objects(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> list[FileDiffInfo]

List files changed between base reference and HEAD as structured objects.

Parameters:

Name	Type	Description	Default
`base_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against HEAD	required
`head_reference`	`str`	Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"	`'HEAD'`
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`list[FileDiffInfo]`	List of FileDiffInfo objects for each changed file

Raises:

Type	Description
`GitCommandError`	If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py

def list_changed_files_as_objects(
    base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/"
) -> list[FileDiffInfo]:
    """List files changed between base reference and HEAD as structured objects.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    list[FileDiffInfo]
        List of FileDiffInfo objects for each changed file

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, head_reference, "--numstat")
    status_output = repo.git.diff(base_reference, head_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files_as_objects, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(
                FileDiffInfo(
                    file_path=file_path, status=status, additions=additions, deletions=deletions, size_kb=size_kb
                )
            )

    return sorted(result, key=lambda x: x.file_path)

`encoding`

Encoding utilities for git command outputs.

sanitize_utf8(text: str) -> str

Sanitize a string to ensure it contains only valid UTF-8 characters.

This function handles surrogate pairs and other invalid UTF-8 sequences that can occur when processing file content from git commands. Surrogate pairs are common in binary files or files with incorrect encoding.

The function uses 'replace' error handling which replaces invalid sequences with the Unicode replacement character (U+FFFD).

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to sanitize (may contain surrogate pairs or invalid UTF-8)	required

Returns:

Type	Description
`str`	Sanitized text containing only valid UTF-8 characters

Examples:

>>> sanitize_utf8("Valid text")
'Valid text'
>>> sanitize_utf8("Text with surrogates: \udcff\udcfe")
'Text with surrogates:'

Source code in src/lampe/core/tools/repository/encoding.py

def sanitize_utf8(text: str) -> str:
    """
    Sanitize a string to ensure it contains only valid UTF-8 characters.

    This function handles surrogate pairs and other invalid UTF-8 sequences
    that can occur when processing file content from git commands. Surrogate
    pairs are common in binary files or files with incorrect encoding.

    The function uses 'replace' error handling which replaces invalid sequences
    with the Unicode replacement character (U+FFFD).

    Parameters
    ----------
    text : str
        The text to sanitize (may contain surrogate pairs or invalid UTF-8)

    Returns
    -------
    str
        Sanitized text containing only valid UTF-8 characters

    Examples
    --------
    >>> sanitize_utf8("Valid text")
    'Valid text'
    >>> sanitize_utf8("Text with surrogates: \\udcff\\udcfe")
    'Text with surrogates:'
    """
    if not text:
        return text

    # Encode to UTF-8 with 'replace' to handle surrogates, then decode back
    # This effectively replaces any invalid UTF-8 sequences (including surrogates)
    # with the replacement character (U+FFFD)
    return text.encode("utf-8", errors="replace").decode("utf-8", errors="replace")

`history`

get_commit_log(max_count: int, repo_path: str = '/tmp/') -> str

Get the log of commits for a repository.

This function gets the log of commits for a repository, including the commit details and the list of files path that were changed.

Parameters:

Name	Type	Description	Default
`max_count`	`int`	Maximum number of commits to return	required
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string containing commit details and list of files that were changed

Source code in src/lampe/core/tools/repository/history.py

def get_commit_log(max_count: int, repo_path: str = "/tmp/") -> str:
    """Get the log of commits for a repository.

    This function gets the log of commits for a repository, including the commit details
    and the list of files path that were changed.

    Parameters
    ----------
    max_count
        Maximum number of commits to return
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and list of files that were changed
    """
    repo = Repo(path=repo_path)
    commits = list(repo.iter_commits(max_count=max_count))
    log = []
    for commit in commits:
        log.append(
            f"Commit: {commit.hexsha}\n"
            f"Author: {commit.author}\n"
            f"Date: {commit.authored_datetime}\n"
            f"Message: {commit.message}\n"
            f"Files: {len(commit.stats.files)} files changed\n"
            f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
            f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
        )
    return "\n".join(log)

show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str

Show the contents of a commit.

This function shows the contents of a commit, including the commit details and diffs.

Parameters:

Name	Type	Description	Default
`commit_reference`	`str`	Commit reference (e.g., "main", commit hash)	required
`repo_path`	`str`	Path to git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string containing commit details and diffs

Source code in src/lampe/core/tools/repository/history.py

def show_commit(commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Show the contents of a commit.

    This function shows the contents of a commit, including the commit details and diffs.

    Parameters
    ----------
    commit_reference
        Commit reference (e.g., "main", commit hash)
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and diffs
    """
    repo = Repo(path=repo_path)
    commit = repo.commit(commit_reference)
    output = [
        f"Commit: {commit.hexsha}\n"
        f"Author: {commit.author}\n"
        f"Date: {commit.authored_datetime}\n"
        f"Message: {commit.message}\n"
        f"Files: {len(commit.stats.files)} files changed\n"
        f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
        f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
    ]
    if commit.parents:
        parent = commit.parents[0]
        diff = parent.diff(commit, create_patch=True)
    else:
        diff = commit.diff(None, create_patch=True)
    for d in diff:
        output.append(f"\n--- {d.a_path}\n+++ {d.b_path}\n")
        if d.diff:
            diff_str = str(d.diff)
            output.append(sanitize_utf8(diff_str))
    return "".join(output)

`management`

LocalCommitsAvailability(repo_path: str, commits: list[str])

Context manager to check if commits are available locally before git operations.

Checks if specified commits exist locally using git fsck --root and fetches them if they're not present. This is useful for ensuring all required commits are available before performing git operations that depend on them.

Attributes:

Name	Type	Description
`repo_path`		Path to the git repository
`commits`		List of commit references to check and fetch if needed

Source code in src/lampe/core/tools/repository/management.py

def __init__(self, repo_path: str, commits: list[str]):
    self.repo_path = repo_path
    self.commits = commits
    self.repo = Repo(path=repo_path)
    self._fetched_commits = []

TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name	Type	Description
`repo_url`		Repository URL to clone
`head_ref`		Optional head ref to check out.
`folder_name`		Optional name prefix for temp directory
`sparse`		Enable sparse checkout mode to avoid populating all files initially.
`shallow`		Enable shallow clone (depth=1) to fetch only the target commit.
`blob_filter`		Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
`remove_existing`		Remove existing directory if it exists

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails
`UnableToDeleteError`	If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py

def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None

clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name	Type	Description	Default
`repo_url`	`str`	Repository URL to clone	required
`head_ref`	`str \| None`	Head ref to checkout	`None`
`base_ref`	`str \| None`	Base ref to fetch for diff computation	`None`
`folder_name`	`str \| None`	Optional name prefix for temp directory	`None`
`sparse`	`bool`	Enable sparse checkout mode to avoid populating all files initially	`True`
`shallow`	`bool`	Enable shallow clone (depth=1) to fetch only the target commit	`True`
`blob_filter`	`bool`	Enable blob filtering (--filter=blob:none) to fetch file contents on-demand	`True`
`remove_existing`	`bool`	Remove existing directory if it exists	`True`

Returns:

Type	Description
`str`	Path to the cloned repository

Raises:

Type	Description
`RuntimeError`	If Git version check fails
`GitCommandError`	If clone operation fails

Source code in src/lampe/core/tools/repository/management.py

def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path

fetch_commit_ref(repo_path: str, commit_ref: str) -> None

Fetch a base reference from the remote repository.

Parameters:

Name	Type	Description	Default
`repo_path`	`str`	Path to the git repository	required
`commit_ref`	`str`	Commit reference to fetch (e.g., branch name, commit hash)	required

Raises:

Type	Description
`GitCommandError`	If the fetch operation fails

Source code in src/lampe/core/tools/repository/management.py

def fetch_commit_ref(repo_path: str, commit_ref: str) -> None:
    """Fetch a base reference from the remote repository.

    Parameters
    ----------
    repo_path
        Path to the git repository
    commit_ref
        Commit reference to fetch (e.g., branch name, commit hash)

    Raises
    ------
    GitCommandError
        If the fetch operation fails
    """
    repo = Repo(path=repo_path)

    repo.git.fetch("--no-tags", "--depth=1", "--filter=blob:none", "origin", commit_ref)

is_sparse_clone(repo_path: str) -> bool

Check if a repository is a sparse clone.

A sparse clone is detected by checking multiple indicators: 1. If core.sparseCheckout is enabled 2. If .git/info/sparse-checkout file exists and has content

Parameters:

Name	Type	Description	Default
`repo_path`	`str`	Path to the git repository	required

Returns:

Type	Description
`bool`	True if the repository appears to be a sparse clone, False otherwise

Raises:

Type	Description
`GitCommandError`	If git commands fail

Source code in src/lampe/core/tools/repository/management.py

def is_sparse_clone(repo_path: str) -> bool:
    """Check if a repository is a sparse clone.

    A sparse clone is detected by checking multiple indicators:
    1. If core.sparseCheckout is enabled
    2. If .git/info/sparse-checkout file exists and has content

    Parameters
    ----------
    repo_path
        Path to the git repository

    Returns
    -------
    bool
        True if the repository appears to be a sparse clone, False otherwise

    Raises
    ------
    GitCommandError
        If git commands fail
    """
    try:
        repo = Repo(path=repo_path)

        # Check if sparse checkout is enabled
        try:
            sparse_checkout = repo.git.config("core.sparseCheckout")
            if sparse_checkout.strip().lower() == "true":
                logger.debug(f"Sparse checkout enabled in {repo_path}")
                return True
        except GitCommandError:
            # core.sparseCheckout not set, continue with other checks
            pass

        # Check if .git/info/sparse-checkout file exists and has content
        sparse_checkout_file = Path(repo_path) / ".git" / "info" / "sparse-checkout"
        if sparse_checkout_file.exists():
            with open(sparse_checkout_file, "r") as f:
                content = f.read().strip()
                if content:
                    logger.debug(f"Sparse checkout file found with content in {repo_path}")
                    return True

        logger.debug(f"No sparse clone indicators found in {repo_path}")
        return False

    except Exception as e:
        logger.exception(f"Error checking if repository is sparse clone: {e}")
        return False

`search`

find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str

Search for files using git ls-files and pattern matching.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	Pattern to search for (e.g. ".py", "src//.md")	required
`repo_path`	`str`	Path to git repository	`'/tmp/'`

Returns:

Type	Description
`str`	Formatted string containing matching file paths

Source code in src/lampe/core/tools/repository/search.py

def find_files_by_pattern(pattern: str, repo_path: str = "/tmp/") -> str:
    """Search for files using git ls-files and pattern matching.

    Parameters
    ----------
    pattern
        Pattern to search for (e.g. "*.py", "src/**/*.md")
    repo_path
        Path to git repository

    Returns
    -------
    str
        Formatted string containing matching file paths
    """
    repo = Repo(path=repo_path)
    try:
        # Filter files matching pattern using git's pathspec matching
        ls_output = repo.git.ls_files("--", pattern)
        ls_output = sanitize_utf8(ls_output)
        matching = ls_output.splitlines()

        if not matching:
            return "No files found"

        return f"```shell\n{'\n'.join(matching)}\n```"

    except GitCommandError as e:
        logger.exception(f"Error finding files: {e}")
        return f"Error: {str(e)}"

search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

Search for a pattern in files within a directory at a specific commit.

Parameters:

Name	Type	Description	Default
`pattern`	`str`	Pattern to search for	required
`relative_dir_path`	`str`	Directory path to search in	required
`commit_reference`	`str`	Commit reference to search at	required
`include_line_numbers`	`bool`	Whether to include line numbers in search results (default: False)	`False`
`repo_path`	`str`	Path to the git repository, by default "/tmp/"	`'/tmp/'`

Returns:

Type	Description
`str`	Search results as a string

Source code in src/lampe/core/tools/repository/search.py

def search_in_files(
    pattern: str,
    relative_dir_path: str,
    commit_reference: str,
    include_line_numbers: bool = False,
    repo_path: str = "/tmp/",
) -> str:
    """Search for a pattern in files within a directory at a specific commit.

    Parameters
    ----------
    pattern
        Pattern to search for
    relative_dir_path
        Directory path to search in
    commit_reference
        Commit reference to search at
    include_line_numbers
        Whether to include line numbers in search results (default: False)
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    str
        Search results as a string
    """
    try:
        repo = Repo(path=repo_path)
        commit_reference_path = f"{commit_reference}:{relative_dir_path if relative_dir_path else '.'}"
        if include_line_numbers:
            grep_output = repo.git.grep("-n", pattern, commit_reference_path)
        else:
            grep_output = repo.git.grep(pattern, commit_reference_path)
        if grep_output:
            grep_output = sanitize_utf8(grep_output)
            return f"```grep\n{grep_output}\n```"
        return "No matches found"
    except GitCommandError as e:
        if e.status == 128:
            return "No matches found"
        return f"Error executing git grep: {str(e)}"

`utils`

`truncate_to_token_limit(content: str, max_tokens: int) -> str`

Truncate the content to the maximum number of tokens. If the content is too long, truncate it to 200000 characters (3-4 characters per token) before encoding for performance reasons. We allow endoftext token to be encoded, since in the past we encountered issues with the tokenizer.

Args: content (str): The content to truncate. max_tokens (int): The maximum number of tokens to keep.

Returns: str: The truncated content.

Source code in src/lampe/core/utils/token.py

def truncate_to_token_limit(content: str, max_tokens: int) -> str:
    """Truncate the content to the maximum number of tokens.
    If the content is too long, truncate it to 200000 characters (3-4 characters per token)
    before encoding for performance reasons.
    We allow `endoftext` token to be encoded, since in the past we encountered issues with the tokenizer.

    Args:
        content (str): The content to truncate.
        max_tokens (int): The maximum number of tokens to keep.

    Returns:
        str: The truncated content.
    """
    if max_tokens <= 0:
        raise ValueError("max_tokens must be a positive integer")
    if len(content) >= CHARACTER_TRUNCATION_THRESHOLD:
        logger.warning(
            f"Truncating content to {CHARACTER_TRUNCATION_THRESHOLD} characters before encoding "
            f"for performance reasons. Content length: {len(content)}"
        )
        content = safe_truncate(content, CHARACTER_TRUNCATION_THRESHOLD)
    tokens = encoder.encode(
        content,
        disallowed_special=(),
    )
    truncated = encoder.decode(tokens[:max_tokens])
    return truncated

`token`

`truncate_to_token_limit(content: str, max_tokens: int) -> str`

Truncate the content to the maximum number of tokens. If the content is too long, truncate it to 200000 characters (3-4 characters per token) before encoding for performance reasons. We allow endoftext token to be encoded, since in the past we encountered issues with the tokenizer.

Args: content (str): The content to truncate. max_tokens (int): The maximum number of tokens to keep.

Returns: str: The truncated content.

Source code in src/lampe/core/utils/token.py

def truncate_to_token_limit(content: str, max_tokens: int) -> str:
    """Truncate the content to the maximum number of tokens.
    If the content is too long, truncate it to 200000 characters (3-4 characters per token)
    before encoding for performance reasons.
    We allow `endoftext` token to be encoded, since in the past we encountered issues with the tokenizer.

    Args:
        content (str): The content to truncate.
        max_tokens (int): The maximum number of tokens to keep.

    Returns:
        str: The truncated content.
    """
    if max_tokens <= 0:
        raise ValueError("max_tokens must be a positive integer")
    if len(content) >= CHARACTER_TRUNCATION_THRESHOLD:
        logger.warning(
            f"Truncating content to {CHARACTER_TRUNCATION_THRESHOLD} characters before encoding "
            f"for performance reasons. Content length: {len(content)}"
        )
        content = safe_truncate(content, CHARACTER_TRUNCATION_THRESHOLD)
    tokens = encoder.encode(
        content,
        disallowed_special=(),
    )
    truncated = encoder.decode(tokens[:max_tokens])
    return truncated

`describe`

`PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)`

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name	Type	Description	Default
`truncation_tokens`		Maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()

`generate_description(ev: PRDescriptionPromptEvent) -> StopEvent` `async`

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.	required

Returns:

Type	Description
`StopEvent`	The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))

`prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent` `async`

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionStartEvent`	The start event containing the PR details.	required

Returns:

Type	Description
`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)

`generate_pr_description(repository: Repository, pull_request: PullRequest, files_exclude_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, truncation_tokens: int = MAX_TOKENS, timeout: int | None = None, verbose: bool = False, metadata: dict | None = None) -> PRDescriptionOutput` `async`

Generate a PR description.

This function generates a PR description for a given pull request. It uses the PRDescriptionWorkflow to generate the description.

Parameters:

Name	Type	Description	Default
`repository`	`Repository`	The repository to generate the PR description for.	required
`pull_request`	`PullRequest`	The pull request to generate the PR description for.	required
`files_exclude_patterns`	`list[str] \| None`	The glob matching patterns to exclude from the diff, by default None	`None`
`files_reinclude_patterns`	`list[str] \| None`	The glob matching patterns to re-include in the diff, by default None	`None`
`truncation_tokens`	`int`	The maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`
`timeout`	`int \| None`	The timeout for the workflow, by default None	`None`
`verbose`	`bool`	Whether to print verbose output, by default False	`False`
`metadata`	`dict \| None`	The metadata to use for the workflow, by default None	`None`

Returns:

Type	Description
`PRDescriptionOutput`	The output containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

async def generate_pr_description(
    repository: Repository,
    pull_request: PullRequest,
    files_exclude_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    truncation_tokens: int = MAX_TOKENS,
    timeout: int | None = None,
    verbose: bool = False,
    metadata: dict | None = None,
) -> PRDescriptionOutput:
    """Generate a PR description.

    This function generates a PR description for a given pull request.
    It uses the PRDescriptionWorkflow to generate the description.

    Parameters
    ----------
    repository
        The repository to generate the PR description for.
    pull_request
        The pull request to generate the PR description for.
    files_exclude_patterns
        The glob matching patterns to exclude from the diff, by default None
    files_reinclude_patterns
        The glob matching patterns to re-include in the diff, by default None
    truncation_tokens
        The maximum number of tokens to use for the diff content, by default MAX_TOKENS
    timeout
        The timeout for the workflow, by default None
    verbose
        Whether to print verbose output, by default False
    metadata
        The metadata to use for the workflow, by default None

    Returns
    -------
    :
        The output containing the generated description.
    """
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    workflow = PRDescriptionWorkflow(truncation_tokens=truncation_tokens, timeout=timeout, verbose=verbose)
    result = await workflow.run(
        start_event=PRDescriptionStartEvent(
            pr_title=pull_request.title,
            repository=repository,
            pull_request=pull_request,
            files_exclude_patterns=files_exclude_patterns,
        )
    )
    return result

`workflows`

`PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)`

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name	Type	Description	Default
`truncation_tokens`		Maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()

`generate_description(ev: PRDescriptionPromptEvent) -> StopEvent` `async`

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.	required

Returns:

Type	Description
`StopEvent`	The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))

`prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent` `async`

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionStartEvent`	The start event containing the PR details.	required

Returns:

Type	Description
`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)

`pr_description`

`PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)`

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name	Type	Description	Default
`truncation_tokens`		Maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()

generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.	required

Returns:

Type	Description
`StopEvent`	The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))

prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionStartEvent`	The start event containing the PR details.	required

Returns:

Type	Description
`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)

`data_models`

PRDescriptionInput

Bases: BaseModel

Input for PR description generation workflow.

`generation`

PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name	Type	Description	Default
`truncation_tokens`		Maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()

generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.	required

Returns:

Type	Description
`StopEvent`	The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))

prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name	Type	Description	Default
`ev`	`PRDescriptionStartEvent`	The start event containing the PR details.	required

Returns:

Type	Description
`PRDescriptionPromptEvent`	The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)

generate_pr_description(repository: Repository, pull_request: PullRequest, files_exclude_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, truncation_tokens: int = MAX_TOKENS, timeout: int | None = None, verbose: bool = False, metadata: dict | None = None) -> PRDescriptionOutput

async

Generate a PR description.

This function generates a PR description for a given pull request. It uses the PRDescriptionWorkflow to generate the description.

Parameters:

Name	Type	Description	Default
`repository`	`Repository`	The repository to generate the PR description for.	required
`pull_request`	`PullRequest`	The pull request to generate the PR description for.	required
`files_exclude_patterns`	`list[str] \| None`	The glob matching patterns to exclude from the diff, by default None	`None`
`files_reinclude_patterns`	`list[str] \| None`	The glob matching patterns to re-include in the diff, by default None	`None`
`truncation_tokens`	`int`	The maximum number of tokens to use for the diff content, by default MAX_TOKENS	`MAX_TOKENS`
`timeout`	`int \| None`	The timeout for the workflow, by default None	`None`
`verbose`	`bool`	Whether to print verbose output, by default False	`False`
`metadata`	`dict \| None`	The metadata to use for the workflow, by default None	`None`

Returns:

Type	Description
`PRDescriptionOutput`	The output containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py

async def generate_pr_description(
    repository: Repository,
    pull_request: PullRequest,
    files_exclude_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    truncation_tokens: int = MAX_TOKENS,
    timeout: int | None = None,
    verbose: bool = False,
    metadata: dict | None = None,
) -> PRDescriptionOutput:
    """Generate a PR description.

    This function generates a PR description for a given pull request.
    It uses the PRDescriptionWorkflow to generate the description.

    Parameters
    ----------
    repository
        The repository to generate the PR description for.
    pull_request
        The pull request to generate the PR description for.
    files_exclude_patterns
        The glob matching patterns to exclude from the diff, by default None
    files_reinclude_patterns
        The glob matching patterns to re-include in the diff, by default None
    truncation_tokens
        The maximum number of tokens to use for the diff content, by default MAX_TOKENS
    timeout
        The timeout for the workflow, by default None
    verbose
        Whether to print verbose output, by default False
    metadata
        The metadata to use for the workflow, by default None

    Returns
    -------
    :
        The output containing the generated description.
    """
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    workflow = PRDescriptionWorkflow(truncation_tokens=truncation_tokens, timeout=timeout, verbose=verbose)
    result = await workflow.run(
        start_event=PRDescriptionStartEvent(
            pr_title=pull_request.title,
            repository=repository,
            pull_request=pull_request,
            files_exclude_patterns=files_exclude_patterns,
        )
    )
    return result

`review`

`generate_multi_agent_pr_review(repository: Repository, pull_request: PullRequest, review_depth: ReviewDepth = ReviewDepth.STANDARD, custom_guidelines: list[str] | None = None, files_exclude_patterns: list[str] | None = None, timeout: int | None = None, verbose: bool = False, agents_required: list[type[SpecializedReviewAgent]] | None = None) -> PRReviewComplete` `async`

Generate a PR review using the multi-agent LlamaIndex workflow.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

async def generate_multi_agent_pr_review(
    repository: Repository,
    pull_request: PullRequest,
    review_depth: ReviewDepth = ReviewDepth.STANDARD,
    custom_guidelines: list[str] | None = None,
    files_exclude_patterns: list[str] | None = None,
    timeout: int | None = None,
    verbose: bool = False,
    agents_required: list[type[SpecializedReviewAgent]] | None = None,
) -> PRReviewComplete:
    """Generate a PR review using the multi-agent LlamaIndex workflow."""
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    agents = []
    if agents_required:
        agents = [agent(timeout=timeout, verbose=verbose) for agent in agents_required]
    # Create the LlamaIndex workflow
    workflow = MultiAgentPipelineWorkflow(agents=agents, timeout=timeout, verbose=verbose)

    # Create input data
    input_data = PRReviewInput(
        repository=repository,
        pull_request=pull_request,
        review_depth=review_depth,
        custom_guidelines=custom_guidelines,
        files_exclude_patterns=files_exclude_patterns,
        use_multi_agent=True,
    )

    # Run the workflow
    result: PRReviewComplete = await workflow.run(start_event=PRReviewStart(input=input_data))

    return result

`workflows`

`pr_review`

`MultiAgentPipelineWorkflow(agents: list[SpecializedReviewAgent], timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any)`

Bases: Workflow

LlamaIndex Workflow for multi-agent PR review pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

def __init__(
    self,
    agents: list[SpecializedReviewAgent],
    timeout: int | None = None,
    verbose: bool = False,
    *args: Any,
    **kwargs: Any,
):
    super().__init__(*args, timeout=timeout, verbose=verbose, **kwargs)
    self.verbose = verbose
    self.aggregator = ReviewAggregator()

    # Initialize all specialized agents
    self.agents = agents or [
        # SecurityAgent(timeout=timeout, verbose=verbose),
        # APIUsageAgent(timeout=timeout, verbose=verbose),
        DesignPatternAgent(timeout=timeout, verbose=verbose),
        # PerformanceAgent(timeout=timeout, verbose=verbose),
        # CodeQualityAgent(timeout=timeout, verbose=verbose),
        # TestingAgent(timeout=timeout, verbose=verbose),
    ]
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)

execute_pipeline(ctx: Context, ev: PRReviewStart) -> PRReviewComplete async

Execute the complete multi-agent review pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

@step
async def execute_pipeline(self, ctx: Context, ev: PRReviewStart) -> PRReviewComplete:
    """Execute the complete multi-agent review pipeline."""
    # Get list of changed files
    files_changed = list_changed_files(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        repo_path=ev.input.repository.local_path,
    )

    # Collect reviews from all agents
    all_agent_reviews: list[AgentReviewOutput] = []

    for agent in self.agents:
        if self.verbose:
            self.logger.debug(f"Running {agent.agent_name} review...")

        # Create input for this agent
        agent_input = AgentReviewInput(
            repository=ev.input.repository,
            pull_request=ev.input.pull_request,
            files_changed=files_changed,
            review_depth=ev.input.review_depth,
            custom_guidelines=ev.input.custom_guidelines,
        )

        try:
            # Run the agent
            agent_output: SpecializedAgentComplete = await agent.run(
                start_event=SpecializedAgentStart(input=agent_input)
            )
            all_agent_reviews.append(agent_output.review_output)

            if self.verbose:
                self.logger.debug(f"✓ {agent.agent_name} completed with {len(agent_output.reviews)} file reviews")

        except Exception as e:
            if self.verbose:
                self.logger.debug(f"✗ {agent.agent_name} failed: {e}")
            self.logger.exception(e)
            # Continue with other agents even if one fails
            continue

    # Aggregate all reviews
    if self.verbose:
        self.logger.debug("Aggregating reviews from all agents...")

    # aggregated_reviews = self.aggregator.aggregate_reviews(all_agent_reviews)

    # if self.verbose:
    #     self.logger.debug(f"✓ Pipeline completed with {len(aggregated_reviews)} aggregated reviews")

    return PRReviewComplete(output=all_agent_reviews)

`agents`

Specialized review agents for multi-agent PR review system.

APIUsageAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating API usage and library integration.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/api_usage_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="API Usage Expert",
        focus_areas=[
            "API method validation",
            "Library integration",
            "Error handling",
            "Resource management",
            "Version compatibility",
            "Deprecation warnings",
        ],
        system_prompt=API_USAGE_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

CodeQualityAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in code quality, readability, and maintainability.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/code_quality_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Code Quality Expert",
        focus_areas=[
            "Code readability",
            "Naming conventions",
            "Code organization",
            "Documentation quality",
            "Error handling",
            "Maintainability",
        ],
        system_prompt=CODE_QUALITY_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

DefaultAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating design patterns and architectural consistency.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/default_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Default Agent",
        focus_areas=[
            "Code quality",
            "Code readability",
            "Code organization",
            "Code maintainability",
            "Code performance",
        ],
        system_prompt=DEFAULT_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

setup_query_and_tools(ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent async

Setup the query and tools for the specialized agent.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/default_agent.py

@step
async def setup_query_and_tools(self, ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent:
    """Setup the query and tools for the specialized agent."""
    query = PR_REVIEW_USER_PROMPT.format(
        pull_request=ev.input.pull_request,
        working_dir=ev.input.repository.local_path,
        files_changed=ev.input.files_changed,
        custom_guidelines_section="",
    )
    self.update_tools(partial_params={"repo_path": ev.input.repository.local_path, "include_line_numbers": True})
    return UserInputEvent(input=query)

DesignPatternAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating design patterns and architectural consistency.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/design_pattern_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Architecture Expert",
        focus_areas=[
            "SOLID principles",
            "Design patterns",
            "Architectural consistency",
            "Separation of concerns",
            "Dependency management",
            "Code organization",
        ],
        system_prompt=DESIGN_PATTERN_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

DiffFocusedAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent that focuses on reviewing one specific diff to find bugs introduced by that change.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/diff_focused_agent.py

def __init__(self, *args, **kwargs):
    # Default model, will be updated based on review_depth in setup_query_and_tools
    llm = LiteLLM(model=MODELS.GPT_5_2025_08_07, temperature=1.0, reasoning_effort="high")
    super().__init__(
        agent_name="Diff-Focused Bug Finder",
        focus_areas=[
            "Bug detection",
            "Regression identification",
            "Integration issues",
            "Logic errors",
            "Runtime errors",
        ],
        system_prompt=DIFF_FOCUSED_AGENT_SYSTEM_PROMPT,
        llm=llm,
        *args,
        **kwargs,
    )

setup_query_and_tools(ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent async

Setup the query and tools for the diff-focused agent.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/diff_focused_agent.py

@step
async def setup_query_and_tools(self, ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent:
    """Setup the query and tools for the diff-focused agent."""
    if not ev.input.target_file_path:
        raise ValueError("DiffFocusedAgent requires target_file_path in AgentReviewInput")

    # Update LLM based on review depth
    model = self._get_model_for_review_depth(ev.input.review_depth)
    self.llm = LiteLLM(model=model, temperature=1.0, reasoning_effort="high")

    # Get the diff for the target file
    target_file_diff = get_diff_for_files(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        file_paths=[ev.input.target_file_path],
        repo_path=ev.input.repository.local_path,
    )

    # If diff is empty, provide a message
    if not target_file_diff.strip():
        target_file_diff = (
            f"(No diff content found for {ev.input.target_file_path} - file may be newly added or deleted)"
        )

    custom_guidelines_section = self._get_custom_guidelines_section(ev.input.custom_guidelines)

    query = DIFF_FOCUSED_USER_PROMPT.format(
        pull_request_number=ev.input.pull_request.number,
        pull_request_title=ev.input.pull_request.title,
        base_commit_hash=ev.input.pull_request.base_commit_hash,
        head_commit_hash=ev.input.pull_request.head_commit_hash,
        target_file_path=ev.input.target_file_path,
        target_file_diff=target_file_diff,
        custom_guidelines_section=custom_guidelines_section,
        files_changed=ev.input.files_changed,
    )

    self.update_tools(partial_params={"repo_path": ev.input.repository.local_path, "include_line_numbers": True})
    return UserInputEvent(input=query)

PerformanceAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in identifying performance issues and optimization opportunities.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/performance_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Performance Expert",
        focus_areas=[
            "Algorithmic complexity",
            "Memory optimization",
            "Database performance",
            "Resource efficiency",
            "Caching strategies",
            "Async operations",
        ],
        system_prompt=PERFORMANCE_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

SecurityAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in identifying security vulnerabilities and issues.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/security_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Security Expert",
        focus_areas=[
            "SQL injection prevention",
            "XSS/CSRF protection",
            "Authentication security",
            "Secret management",
            "Input validation",
            "Secure coding practices",
        ],
        system_prompt=SECURITY_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

TestingAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in testing strategy, coverage, and test quality.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/testing_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Testing Expert",
        focus_areas=[
            "Test coverage",
            "Test quality",
            "Edge case testing",
            "Integration testing",
            "Test organization",
            "Test maintainability",
        ],
        system_prompt=TESTING_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

api_usage_agent

API and library usage validation agent.

APIUsageAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating API usage and library integration.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/api_usage_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="API Usage Expert",
        focus_areas=[
            "API method validation",
            "Library integration",
            "Error handling",
            "Resource management",
            "Version compatibility",
            "Deprecation warnings",
        ],
        system_prompt=API_USAGE_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

api_usage_agent_prompt

API usage agent prompt for validating API usage and library integration.

code_quality_agent

Code quality and maintainability agent.

CodeQualityAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in code quality, readability, and maintainability.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/code_quality_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Code Quality Expert",
        focus_areas=[
            "Code readability",
            "Naming conventions",
            "Code organization",
            "Documentation quality",
            "Error handling",
            "Maintainability",
        ],
        system_prompt=CODE_QUALITY_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

code_quality_agent_prompt

Code quality agent prompt for ensuring code quality, readability, and maintainability.

default_agent

Design pattern and architectural consistency agent.

DefaultAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating design patterns and architectural consistency.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/default_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Default Agent",
        focus_areas=[
            "Code quality",
            "Code readability",
            "Code organization",
            "Code maintainability",
            "Code performance",
        ],
        system_prompt=DEFAULT_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

setup_query_and_tools(ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent async

Setup the query and tools for the specialized agent.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/default_agent.py

@step
async def setup_query_and_tools(self, ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent:
    """Setup the query and tools for the specialized agent."""
    query = PR_REVIEW_USER_PROMPT.format(
        pull_request=ev.input.pull_request,
        working_dir=ev.input.repository.local_path,
        files_changed=ev.input.files_changed,
        custom_guidelines_section="",
    )
    self.update_tools(partial_params={"repo_path": ev.input.repository.local_path, "include_line_numbers": True})
    return UserInputEvent(input=query)

design_pattern_agent

Design pattern and architectural consistency agent.

DesignPatternAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in validating design patterns and architectural consistency.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/design_pattern_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Architecture Expert",
        focus_areas=[
            "SOLID principles",
            "Design patterns",
            "Architectural consistency",
            "Separation of concerns",
            "Dependency management",
            "Code organization",
        ],
        system_prompt=DESIGN_PATTERN_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

design_pattern_agent_prompt

Design pattern agent prompt for validating design patterns and architectural consistency.

diff_focused_agent

Diff-focused agent that reviews one specific diff at a time to find bugs.

DiffFocusedAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent that focuses on reviewing one specific diff to find bugs introduced by that change.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/diff_focused_agent.py

def __init__(self, *args, **kwargs):
    # Default model, will be updated based on review_depth in setup_query_and_tools
    llm = LiteLLM(model=MODELS.GPT_5_2025_08_07, temperature=1.0, reasoning_effort="high")
    super().__init__(
        agent_name="Diff-Focused Bug Finder",
        focus_areas=[
            "Bug detection",
            "Regression identification",
            "Integration issues",
            "Logic errors",
            "Runtime errors",
        ],
        system_prompt=DIFF_FOCUSED_AGENT_SYSTEM_PROMPT,
        llm=llm,
        *args,
        **kwargs,
    )

setup_query_and_tools(ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent async

Setup the query and tools for the diff-focused agent.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/diff_focused_agent.py

@step
async def setup_query_and_tools(self, ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent:
    """Setup the query and tools for the diff-focused agent."""
    if not ev.input.target_file_path:
        raise ValueError("DiffFocusedAgent requires target_file_path in AgentReviewInput")

    # Update LLM based on review depth
    model = self._get_model_for_review_depth(ev.input.review_depth)
    self.llm = LiteLLM(model=model, temperature=1.0, reasoning_effort="high")

    # Get the diff for the target file
    target_file_diff = get_diff_for_files(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        file_paths=[ev.input.target_file_path],
        repo_path=ev.input.repository.local_path,
    )

    # If diff is empty, provide a message
    if not target_file_diff.strip():
        target_file_diff = (
            f"(No diff content found for {ev.input.target_file_path} - file may be newly added or deleted)"
        )

    custom_guidelines_section = self._get_custom_guidelines_section(ev.input.custom_guidelines)

    query = DIFF_FOCUSED_USER_PROMPT.format(
        pull_request_number=ev.input.pull_request.number,
        pull_request_title=ev.input.pull_request.title,
        base_commit_hash=ev.input.pull_request.base_commit_hash,
        head_commit_hash=ev.input.pull_request.head_commit_hash,
        target_file_path=ev.input.target_file_path,
        target_file_diff=target_file_diff,
        custom_guidelines_section=custom_guidelines_section,
        files_changed=ev.input.files_changed,
    )

    self.update_tools(partial_params={"repo_path": ev.input.repository.local_path, "include_line_numbers": True})
    return UserInputEvent(input=query)

diff_focused_agent_prompt

Prompt for diff-focused agent that reviews one specific diff at a time.

performance_agent

Performance analysis and optimization agent.

PerformanceAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in identifying performance issues and optimization opportunities.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/performance_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Performance Expert",
        focus_areas=[
            "Algorithmic complexity",
            "Memory optimization",
            "Database performance",
            "Resource efficiency",
            "Caching strategies",
            "Async operations",
        ],
        system_prompt=PERFORMANCE_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

performance_agent_prompt

Performance agent prompt for identifying performance issues and optimization opportunities.

security_agent

Security-focused review agent for identifying security vulnerabilities.

SecurityAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in identifying security vulnerabilities and issues.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/security_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Security Expert",
        focus_areas=[
            "SQL injection prevention",
            "XSS/CSRF protection",
            "Authentication security",
            "Secret management",
            "Input validation",
            "Secure coding practices",
        ],
        system_prompt=SECURITY_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

security_agent_prompt

Security agent prompt for identifying security vulnerabilities.

specialized_agent_base

SpecializedAgentComplete

Bases: StopEvent

Stop event for specialized review agent.

SpecializedAgentStart

Bases: StartEvent

Start event for specialized review agent.

SpecializedReviewAgent(agent_name: str = '', focus_areas: list[str] | None = None, system_prompt: str = '', *args: Any, **kwargs: Any)

Bases: FunctionCallingAgent

Base class for specialized review agents.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/specialized_agent_base.py

def __init__(
    self,
    agent_name: str = "",
    focus_areas: list[str] | None = None,
    system_prompt: str = "",
    *args: Any,
    **kwargs: Any,
) -> None:
    # Initialize with tools that have line numbers enabled
    tools = git_tools_gpt_5_nano_agent_prompt
    super().__init__(
        *args,
        tools=tools,
        system_prompt=system_prompt,
        **kwargs,
    )
    self.agent_name = agent_name
    self.focus_areas = focus_areas or []
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)

handle_agent_completion(ctx: Context, ev: AgentCompleteEvent) -> SpecializedAgentComplete async

Handle agent completion and generate structured output.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/specialized_agent_base.py

@step
async def handle_agent_completion(self, ctx: Context, ev: AgentCompleteEvent) -> SpecializedAgentComplete:
    """Handle agent completion and generate structured output."""

    reviews = self._parse_agent_response(ev.output or "")
    result = AgentReviewOutput(
        agent_name=self.agent_name,
        focus_areas=self.focus_areas,
        reviews=reviews,
        sources=ev.sources,
        summary="",  # TODO: Add a llm generated summary of the review
    )

    return SpecializedAgentComplete(review_output=result)

setup_query_and_tools(ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent async

Setup the query and tools for the specialized agent.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/specialized_agent_base.py

@step
async def setup_query_and_tools(self, ctx: Context, ev: SpecializedAgentStart) -> UserInputEvent:
    """Setup the query and tools for the specialized agent."""
    review_depth_guidelines = self._get_review_depth_guidelines(ev.input.review_depth)
    query = AGENT_PROMPT_TEMPLATE.format(
        agent_name=self.agent_name,
        focus_areas=", ".join(self.focus_areas),
        pull_request_number=ev.input.pull_request.number,
        pull_request_title=ev.input.pull_request.title,
        files_changed=ev.input.files_changed,
        review_depth=ev.input.review_depth.value,
        review_depth_guidelines=review_depth_guidelines,
    )
    self.update_tools(partial_params={"repo_path": ev.input.repository.local_path, "include_line_numbers": True})
    return UserInputEvent(input=query)

specialized_agent_base_prompt

Specialized agent base prompts for review depth guidelines and agent prompt template.

testing_agent

Testing strategy and coverage agent.

TestingAgent(*args, **kwargs)

Bases: SpecializedReviewAgent

Agent specialized in testing strategy, coverage, and test quality.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/agents/testing_agent.py

def __init__(self, *args, **kwargs):
    super().__init__(
        agent_name="Testing Expert",
        focus_areas=[
            "Test coverage",
            "Test quality",
            "Edge case testing",
            "Integration testing",
            "Test organization",
            "Test maintainability",
        ],
        system_prompt=TESTING_AGENT_SYSTEM_PROMPT,
        *args,
        **kwargs,
    )

testing_agent_prompt

Testing agent prompt for testing strategy, coverage, and test quality.

`aggregator`

Review aggregator for merging and deduplicating agent findings.

ReviewAggregator

Aggregates reviews from multiple agents and deduplicates findings.

aggregate_reviews(agent_reviews: list[AgentReviewOutput]) -> list[FileReview]

Aggregate reviews from all agents into a cohesive output.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/aggregator.py

def aggregate_reviews(self, agent_reviews: list[AgentReviewOutput]) -> list[FileReview]:
    """Aggregate reviews from all agents into a cohesive output."""
    # TODO: Keep the metadata sources from all agent linked to the issue
    # Group reviews by file path
    file_reviews = defaultdict(list)

    for agent_output in agent_reviews:
        for file_review in agent_output.reviews:
            file_reviews[file_review.file_path].append(file_review)

    # Merge reviews for each file
    aggregated_reviews = []

    for file_path, reviews in file_reviews.items():
        merged_review = self._merge_file_reviews(file_path, reviews)
        aggregated_reviews.append(merged_review)

    return aggregated_reviews

`data_models`

AgentResponseModel

Bases: BaseModel

Pydantic model for agent JSON response parsing.

AgentReviewInput

Bases: BaseModel

Input for individual specialized agents.

AgentReviewOutput

Bases: BaseModel

Output from individual specialized agents.

FileReview

Bases: BaseModel

Review for a specific file with inline comments.

PRReivewAggregatorOutput

Bases: BaseModel

Output model for PR review aggregation.

PRReviewInput

Bases: BaseModel

Input for PR review generation workflow.

ReviewComment

Bases: BaseModel

Structured comment with metadata.

ReviewDepth

Bases: str, Enum

Review depth levels for PR reviews.

ReviewType

Bases: str, Enum

Review strategy types for PR reviews.

`diff_by_diff_pipeline`

Diff-by-diff parallel review pipeline using BaseParallelWorkflow.

DiffByDiffPipelineWorkflow(timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any)

Bases: Workflow

Workflow that reviews each file diff in parallel, then aggregates results with LLM.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

def __init__(
    self,
    timeout: int | None = None,
    verbose: bool = False,
    *args: Any,
    **kwargs: Any,
):
    super().__init__(*args, timeout=timeout, verbose=verbose, **kwargs)
    self.verbose = verbose
    self.timeout = timeout
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)
    self.aggregation_workflow = LLMAggregationWorkflow(timeout=timeout, verbose=verbose)
    # Create parallel workflow with wrapper as inner workflow
    self.parallel_workflow = BaseParallelWorkflow(
        inner=DiffReviewWrapperWorkflow(timeout=timeout, verbose=verbose), timeout=timeout, verbose=verbose
    )

aggregate_reviews(ctx: Context, ev: ParallelDiffReviewsCompleteEvent) -> PRReviewComplete async

Aggregate and clean reviews using LLM workflow.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

@step
async def aggregate_reviews(self, ctx: Context, ev: ParallelDiffReviewsCompleteEvent) -> PRReviewComplete:
    """Aggregate and clean reviews using LLM workflow."""
    if not ev.agent_reviews:
        if self.verbose:
            self.logger.debug("No reviews to aggregate")
        return PRReviewComplete(output=[])

    # Run aggregation workflow
    aggregation_result: LLMAggregationCompleteEvent = await self.aggregation_workflow.run(
        start_event=LLMAggregationStartEvent(
            agent_reviews=ev.agent_reviews,
            files_changed=ev.files_changed,
        )
    )

    if self.verbose:
        self.logger.debug(f"Pipeline complete with {len(aggregation_result.aggregated_reviews)} aggregated reviews")

    return PRReviewComplete(output=aggregation_result.aggregated_reviews)

start_pipeline(ctx: Context, ev: DiffByDiffStartEvent) -> ParallelDiffReviewsCompleteEvent | None async

Start the diff-by-diff review pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

@step
async def start_pipeline(self, ctx: Context, ev: DiffByDiffStartEvent) -> ParallelDiffReviewsCompleteEvent | None:
    """Start the diff-by-diff review pipeline."""
    # Get list of changed files as objects
    file_diffs: list[FileDiffInfo] = list_changed_files_as_objects(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        repo_path=ev.input.repository.local_path,
    )

    # Apply file exclusion patterns if provided
    if ev.input.files_exclude_patterns:
        from fnmatch import fnmatch

        filtered_file_diffs = []
        for file_diff in file_diffs:
            if not any(fnmatch(file_diff.file_path, pattern) for pattern in ev.input.files_exclude_patterns):
                filtered_file_diffs.append(file_diff)
            elif ev.input.files_reinclude_patterns and any(
                fnmatch(file_diff.file_path, pattern) for pattern in ev.input.files_reinclude_patterns
            ):
                filtered_file_diffs.append(file_diff)
        file_diffs = filtered_file_diffs

    if not file_diffs:
        if self.verbose:
            self.logger.debug("No files to review after filtering")
        return ParallelDiffReviewsCompleteEvent(agent_reviews=[], files_changed="")

    if self.verbose:
        self.logger.debug(f"Starting parallel review of {len(file_diffs)} file diffs...")

    # Get formatted files changed string
    files_changed = list_changed_files(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        repo_path=ev.input.repository.local_path,
    )

    # Create agent start events for each file diff
    agent_start_events: list[SpecializedAgentStart] = []
    for file_diff in file_diffs:
        agent_input = AgentReviewInput(
            repository=ev.input.repository,
            pull_request=ev.input.pull_request,
            files_changed=files_changed,
            review_depth=ev.input.review_depth,
            custom_guidelines=ev.input.custom_guidelines,
            target_file_path=file_diff.file_path,
        )
        agent_start_events.append(SpecializedAgentStart(input=agent_input))

    # Run parallel workflow
    # Cast to list[Event] since SpecializedAgentStart extends Event
    inner_events: list[Event] = agent_start_events  # type: ignore[assignment]
    parallel_results = await self.parallel_workflow.run(start_event=ParallelStartEvent(inner_events=inner_events))

    # Extract results (filter out None for failed reviews)
    # parallel_results is a list of StopEvent results from the wrapper workflow
    agent_reviews: list[AgentReviewOutput] = [
        result for result in parallel_results if result is not None and isinstance(result, AgentReviewOutput)
    ]

    if self.verbose:
        self.logger.debug(f"Completed {len(agent_reviews)} of {len(file_diffs)} parallel reviews")

    return ParallelDiffReviewsCompleteEvent(agent_reviews=agent_reviews, files_changed=files_changed)

DiffByDiffStartEvent

Bases: StartEvent

Start event for diff-by-diff pipeline workflow.

DiffReviewWrapperWorkflow(timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any)

Bases: Workflow

Wrapper workflow to run a single diff-focused agent review.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

def __init__(self, timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any):
    super().__init__(*args, timeout=timeout, verbose=verbose, **kwargs)
    self.timeout = timeout
    self.verbose = verbose
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)

run_agent_review(ev: SpecializedAgentStart) -> StopEvent async

Run a single agent review and return the result.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

@step
async def run_agent_review(self, ev: SpecializedAgentStart) -> StopEvent:
    """Run a single agent review and return the result."""
    agent = DiffFocusedAgent(timeout=self.timeout, verbose=self.verbose)
    try:
        agent_output: SpecializedAgentComplete = await agent.run(start_event=ev)
        return StopEvent(result=agent_output.review_output)
    except Exception as e:
        self.logger.exception(f"Failed to run agent review: {e}")
        # Return None to indicate failure
        return StopEvent(result=None)

ParallelDiffReviewsCompleteEvent

Bases: Event

Event when all parallel diff reviews are complete.

generate_diff_by_diff_pr_review(repository: Repository, pull_request: PullRequest, review_depth: ReviewDepth = ReviewDepth.STANDARD, custom_guidelines: list[str] | None = None, files_exclude_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, timeout: int | None = None, verbose: bool = False) -> PRReviewComplete

async

Generate a PR review using the diff-by-diff parallel pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/diff_by_diff_pipeline.py

async def generate_diff_by_diff_pr_review(
    repository: Repository,
    pull_request: PullRequest,
    review_depth: ReviewDepth = ReviewDepth.STANDARD,
    custom_guidelines: list[str] | None = None,
    files_exclude_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    timeout: int | None = None,
    verbose: bool = False,
) -> PRReviewComplete:
    """Generate a PR review using the diff-by-diff parallel pipeline."""
    if files_exclude_patterns is None:
        files_exclude_patterns = []

    # Create the workflow
    workflow = DiffByDiffPipelineWorkflow(timeout=timeout, verbose=verbose)

    # Create input data
    input_data = PRReviewInput(
        repository=repository,
        pull_request=pull_request,
        review_depth=review_depth,
        custom_guidelines=custom_guidelines,
        files_exclude_patterns=files_exclude_patterns,
        files_reinclude_patterns=files_reinclude_patterns,
        use_multi_agent=False,  # This is a different pipeline
    )

    # Run the workflow
    result: PRReviewComplete = await workflow.run(start_event=DiffByDiffStartEvent(input=input_data))

    return result

`llm_aggregation_prompt`

Prompt for LLM-based review aggregation step.

`llm_aggregation_step`

LLM-based aggregation workflow for cleaning and deduplicating review comments.

AggregatedReviewsModel

Bases: BaseModel

Pydantic model for LLM aggregation output.

LLMAggregationCompleteEvent

Bases: StopEvent

Complete event for LLM aggregation workflow.

LLMAggregationStartEvent

Bases: StartEvent

Start event for LLM aggregation workflow.

LLMAggregationWorkflow(timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any)

Bases: Workflow

Workflow for aggregating and cleaning review comments using LLM.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/llm_aggregation_step.py

def __init__(
    self,
    timeout: int | None = None,
    verbose: bool = False,
    *args: Any,
    **kwargs: Any,
):
    super().__init__(*args, timeout=timeout, verbose=verbose, **kwargs)
    self.verbose = verbose
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)
    self.llm = LiteLLM(model=MODELS.GPT_5_2025_08_07, temperature=1, reasoning_effort="high")

aggregate_reviews(ctx: Context, ev: LLMAggregationStartEvent) -> LLMAggregationCompleteEvent async

Aggregate and clean reviews using LLM.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/llm_aggregation_step.py

@step
async def aggregate_reviews(self, ctx: Context, ev: LLMAggregationStartEvent) -> LLMAggregationCompleteEvent:
    """Aggregate and clean reviews using LLM."""
    if not ev.agent_reviews:
        if self.verbose:
            self.logger.debug("No agent reviews to aggregate")
        return LLMAggregationCompleteEvent(aggregated_reviews=[])

    if self.verbose:
        self.logger.debug(f"Aggregating {len(ev.agent_reviews)} agent reviews...")

    # Prepare agent reviews as JSON
    agent_reviews_dict = [review.model_dump() for review in ev.agent_reviews]
    agent_reviews_json = json.dumps(agent_reviews_dict, indent=2)

    # Create prompt
    user_prompt = LLM_AGGREGATION_USER_PROMPT.format(
        files_changed=ev.files_changed, agent_reviews_json=agent_reviews_json
    )

    # Call LLM
    response = await self.llm.achat(
        messages=[
            ChatMessage(role="system", content=LLM_AGGREGATION_SYSTEM_PROMPT),
            ChatMessage(role="user", content=user_prompt),
        ]
    )

    # Parse response
    try:
        parser = PydanticOutputParser(output_cls=AggregatedReviewsModel)
        parsed_data = parser.parse(response.message.content or "")

        # Convert back to AgentReviewOutput objects
        aggregated_reviews = []
        for agent_output_dict in parsed_data.agent_outputs:
            try:
                aggregated_reviews.append(AgentReviewOutput.model_validate(agent_output_dict))
            except Exception as e:
                self.logger.exception(f"Failed to parse agent output: {e}")
                continue

        if self.verbose:
            self.logger.debug(f"Aggregation complete: {len(aggregated_reviews)} cleaned reviews")

        return LLMAggregationCompleteEvent(aggregated_reviews=aggregated_reviews)

    except Exception as e:
        self.logger.exception(f"Failed to parse aggregation response: {e}")
        # Fallback: return original reviews if aggregation fails
        if self.verbose:
            self.logger.debug("Falling back to original reviews due to aggregation failure")
        return LLMAggregationCompleteEvent(aggregated_reviews=ev.agent_reviews)

`multi_agent_pipeline`

Multi-agent sequential pipeline for PR review using LlamaIndex Workflow.

AgentReviewEvent

Bases: Event

Event containing agent review results.

AggregationEvent

Bases: Event

Event containing aggregated reviews.

FilesChangedEvent

Bases: Event

Event containing the list of changed files.

MultiAgentPipelineWorkflow(agents: list[SpecializedReviewAgent], timeout: int | None = None, verbose: bool = False, *args: Any, **kwargs: Any)

Bases: Workflow

LlamaIndex Workflow for multi-agent PR review pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

def __init__(
    self,
    agents: list[SpecializedReviewAgent],
    timeout: int | None = None,
    verbose: bool = False,
    *args: Any,
    **kwargs: Any,
):
    super().__init__(*args, timeout=timeout, verbose=verbose, **kwargs)
    self.verbose = verbose
    self.aggregator = ReviewAggregator()

    # Initialize all specialized agents
    self.agents = agents or [
        # SecurityAgent(timeout=timeout, verbose=verbose),
        # APIUsageAgent(timeout=timeout, verbose=verbose),
        DesignPatternAgent(timeout=timeout, verbose=verbose),
        # PerformanceAgent(timeout=timeout, verbose=verbose),
        # CodeQualityAgent(timeout=timeout, verbose=verbose),
        # TestingAgent(timeout=timeout, verbose=verbose),
    ]
    self.logger = logging.getLogger(name=LAMPE_LOGGER_NAME)

execute_pipeline(ctx: Context, ev: PRReviewStart) -> PRReviewComplete async

Execute the complete multi-agent review pipeline.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

@step
async def execute_pipeline(self, ctx: Context, ev: PRReviewStart) -> PRReviewComplete:
    """Execute the complete multi-agent review pipeline."""
    # Get list of changed files
    files_changed = list_changed_files(
        base_reference=ev.input.pull_request.base_commit_hash,
        head_reference=ev.input.pull_request.head_commit_hash,
        repo_path=ev.input.repository.local_path,
    )

    # Collect reviews from all agents
    all_agent_reviews: list[AgentReviewOutput] = []

    for agent in self.agents:
        if self.verbose:
            self.logger.debug(f"Running {agent.agent_name} review...")

        # Create input for this agent
        agent_input = AgentReviewInput(
            repository=ev.input.repository,
            pull_request=ev.input.pull_request,
            files_changed=files_changed,
            review_depth=ev.input.review_depth,
            custom_guidelines=ev.input.custom_guidelines,
        )

        try:
            # Run the agent
            agent_output: SpecializedAgentComplete = await agent.run(
                start_event=SpecializedAgentStart(input=agent_input)
            )
            all_agent_reviews.append(agent_output.review_output)

            if self.verbose:
                self.logger.debug(f"✓ {agent.agent_name} completed with {len(agent_output.reviews)} file reviews")

        except Exception as e:
            if self.verbose:
                self.logger.debug(f"✗ {agent.agent_name} failed: {e}")
            self.logger.exception(e)
            # Continue with other agents even if one fails
            continue

    # Aggregate all reviews
    if self.verbose:
        self.logger.debug("Aggregating reviews from all agents...")

    # aggregated_reviews = self.aggregator.aggregate_reviews(all_agent_reviews)

    # if self.verbose:
    #     self.logger.debug(f"✓ Pipeline completed with {len(aggregated_reviews)} aggregated reviews")

    return PRReviewComplete(output=all_agent_reviews)

PRReviewComplete

Bases: StopEvent

Complete event for PR review workflow.

PRReviewStart

Bases: StartEvent

Start event for PR review workflow.

generate_multi_agent_pr_review(repository: Repository, pull_request: PullRequest, review_depth: ReviewDepth = ReviewDepth.STANDARD, custom_guidelines: list[str] | None = None, files_exclude_patterns: list[str] | None = None, timeout: int | None = None, verbose: bool = False, agents_required: list[type[SpecializedReviewAgent]] | None = None) -> PRReviewComplete

async

Generate a PR review using the multi-agent LlamaIndex workflow.

Source code in packages/lampe-review/src/lampe/review/workflows/pr_review/multi_agent_pipeline.py

async def generate_multi_agent_pr_review(
    repository: Repository,
    pull_request: PullRequest,
    review_depth: ReviewDepth = ReviewDepth.STANDARD,
    custom_guidelines: list[str] | None = None,
    files_exclude_patterns: list[str] | None = None,
    timeout: int | None = None,
    verbose: bool = False,
    agents_required: list[type[SpecializedReviewAgent]] | None = None,
) -> PRReviewComplete:
    """Generate a PR review using the multi-agent LlamaIndex workflow."""
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    agents = []
    if agents_required:
        agents = [agent(timeout=timeout, verbose=verbose) for agent in agents_required]
    # Create the LlamaIndex workflow
    workflow = MultiAgentPipelineWorkflow(agents=agents, timeout=timeout, verbose=verbose)

    # Create input data
    input_data = PRReviewInput(
        repository=repository,
        pull_request=pull_request,
        review_depth=review_depth,
        custom_guidelines=custom_guidelines,
        files_exclude_patterns=files_exclude_patterns,
        use_multi_agent=True,
    )

    # Run the workflow
    result: PRReviewComplete = await workflow.run(start_event=PRReviewStart(input=input_data))

    return result

`template`

`TemplateWorkflow`

Bases: Workflow

A template workflow that demonstrates basic workflow structure.

This workflow is responsible for: - Demonstrating the basic workflow pattern - Showing how to handle events - Providing a template for new workflows

`template_workflow`

`TemplateWorkflow`

Bases: Workflow

A template workflow that demonstrates basic workflow structure.

This workflow is responsible for: - Demonstrating the basic workflow pattern - Showing how to handle events - Providing a template for new workflows

Reference

lampe

cli

commands

check_reviewed

describe

healthcheck

healthcheck() -> None

review

entrypoint

version() -> None

providers

base

Provider(repository: Repository, pull_request: PullRequest)

ProviderType

update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str

bitbucket

BitbucketProvider(repository: Repository, pull_request: PullRequest)

console

ConsoleProvider(repository: Repository, pull_request: PullRequest)

github

GitHubProvider(repository: Repository, pull_request: PullRequest)

core

data_models

Issue

PullRequest

Repository

issue

Issue

pull_request

PullRequest

repository

Repository

gitconfig

init_git()

valid_git_version_available() -> bool

parsers

MarkdownCodeBlockRemoverOutputParser

parse(output: str) -> str

YAMLPydanticOutputParser

format_string: str property

parse(text: str) -> Model

markdown_code_block_remover_output

MarkdownCodeBlockRemoverOutputParser

utils

extract_md_code_block(output: str, language: str = '', match_any_language: bool = False) -> str | None

yaml_pydantic_output

YAMLParsingError

YAMLPydanticOutputParser

tools

TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

view_file(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

repository

FileDiffInfo

LocalCommitsAvailability(repo_path: str, commits: list[str])

TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

fetch_commit_ref(repo_path: str, commit_ref: str) -> None

find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str

get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str

get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

is_sparse_clone(repo_path: str) -> bool

list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str

list_changed_files_as_objects(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> list[FileDiffInfo]

search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str

show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str

content

diff

encoding

history

management

search

utils

truncate_to_token_limit(content: str, max_tokens: int) -> str

token

truncate_to_token_limit(content: str, max_tokens: int) -> str

describe

PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

`lampe`

`cli`

`commands`

`check_reviewed`

`describe`

`healthcheck`

`healthcheck() -> None`

`review`

`entrypoint`

`version() -> None`

`providers`

`base`

`Provider(repository: Repository, pull_request: PullRequest)`

`ProviderType`

`update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str`

`bitbucket`

`BitbucketProvider(repository: Repository, pull_request: PullRequest)`

`console`

`ConsoleProvider(repository: Repository, pull_request: PullRequest)`

`github`

`GitHubProvider(repository: Repository, pull_request: PullRequest)`

`core`

`data_models`

`Issue`

`PullRequest`

`Repository`

`issue`

`Issue`

`pull_request`

`PullRequest`

`repository`

`Repository`

`gitconfig`

`init_git()`

`valid_git_version_available() -> bool`

`parsers`

`MarkdownCodeBlockRemoverOutputParser`

`parse(output: str) -> str`

`YAMLPydanticOutputParser`

`format_string: str` `property`

`parse(text: str) -> Model`

`markdown_code_block_remover_output`

`MarkdownCodeBlockRemoverOutputParser`

`utils`

`extract_md_code_block(output: str, language: str = '', match_any_language: bool = False) -> str | None`

`yaml_pydantic_output`

`YAMLParsingError`

`YAMLPydanticOutputParser`

`tools`

`TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)`

`clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str`

`view_file(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

`repository`

`FileDiffInfo`

`LocalCommitsAvailability(repo_path: str, commits: list[str])`

`TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)`

`clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str`

`fetch_commit_ref(repo_path: str, commit_ref: str) -> None`

`find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str`

`get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str`

`get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

`is_sparse_clone(repo_path: str) -> bool`

`list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str`

`list_changed_files_as_objects(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> list[FileDiffInfo]`

`search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, include_line_numbers: bool = False, repo_path: str = '/tmp/') -> str`

`show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str`

`content`

`diff`

`encoding`

`history`

`management`

`search`

`utils`

`truncate_to_token_limit(content: str, max_tokens: int) -> str`

`token`

`truncate_to_token_limit(content: str, max_tokens: int) -> str`

`describe`

`PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)`

`generate_description(ev: PRDescriptionPromptEvent) -> StopEvent` `async`

`prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent` `async`