Skip to content

Reference

This section provides a comprehensive reference for all functions and classes in our project.

lampe

cli

commands

describe
describe(repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True), repo_full_name: str | None = typer.Option(None, help='Repository full name (e.g. owner/repo)'), base: str = typer.Option(..., help='Base commit SHA'), head: str = typer.Option(..., help='Head commit SHA'), title: str = typer.Option('Pull Request', help='PR title (local runs)'), output: str = typer.Option('auto', help='Output provider (auto|console|github|gitlab|bitbucket)'), variant: str = typer.Option('default', help='default|agentic'), files_exclude: list[str] | None = typer.Option(None, '--exclude'), files_reinclude: list[str] | None = typer.Option(None, '--reinclude'), truncation_tokens: int = typer.Option(DEFAULT_MAX_TOKENS, '--max-tokens'), timeout: int | None = typer.Option(None, '--timeout-seconds'), verbose: bool = typer.Option(False, '--verbose/--no-verbose'))

Generate a PR description and deliver it to the specified output provider.

Source code in packages/lampe-cli/src/lampe/cli/commands/describe.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def describe(
    repo: Path = typer.Option(..., exists=True, file_okay=False, dir_okay=True, readable=True),
    repo_full_name: str | None = typer.Option(None, help="Repository full name (e.g. owner/repo)"),
    base: str = typer.Option(..., help="Base commit SHA"),
    head: str = typer.Option(..., help="Head commit SHA"),
    title: str = typer.Option("Pull Request", help="PR title (local runs)"),
    output: str = typer.Option("auto", help="Output provider (auto|console|github|gitlab|bitbucket)"),
    variant: str = typer.Option("default", help="default|agentic"),
    files_exclude: list[str] | None = typer.Option(None, "--exclude"),
    files_reinclude: list[str] | None = typer.Option(None, "--reinclude"),
    truncation_tokens: int = typer.Option(DEFAULT_MAX_TOKENS, "--max-tokens"),
    timeout: int | None = typer.Option(None, "--timeout-seconds"),
    verbose: bool = typer.Option(False, "--verbose/--no-verbose"),
):
    """Generate a PR description and deliver it to the specified output provider."""
    initialize()
    repo_model = Repository(local_path=str(repo), full_name=repo_full_name)
    pr_model = PullRequest(
        number=0,
        title=title,
        body=None,
        base_commit_hash=base,
        base_branch_name="",
        head_commit_hash=head,
        head_branch_name="",
    )

    provider = Provider.create_provider(provider_name=output, repository=repo_model, pull_request=pr_model)

    generator = DefaultGeneratorAdapter() if variant == "default" else AgenticGeneratorAdapter()
    pr_cfg = PRDescriptionConfig(
        files_exclude_patterns=list(files_exclude) if files_exclude else None,
        files_reinclude_patterns=list(files_reinclude) if files_reinclude else None,
        truncation_tokens=truncation_tokens,
        timeout=timeout,
        verbose=verbose,
    )

    async def _run():
        workflow_task = PRDescriptionOrchestratorWorkflow(
            provider=provider, generator=generator, timeout=timeout, verbose=verbose
        )
        await workflow_task.run(
            start_event=PRDescriptionStart(repository=repo_model, pull_request=pr_model, config=pr_cfg)
        )

    asyncio.run(_run())
healthcheck
healthcheck() -> None

Check if the CLI is healthy and can connect to GitHub.

Source code in packages/lampe-cli/src/lampe/cli/commands/healthcheck.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def healthcheck() -> None:
    """Check if the CLI is healthy and can connect to GitHub."""
    logger.info("🔍 Checking CLI health...")
    initialize()
    # Check GitHub repository environment variable
    github_repo = os.getenv("GITHUB_REPOSITORY")
    if not github_repo or len(github_repo.split("/")) != 2:
        logger.info("❌ GITHUB_REPOSITORY environment variable not set")
        logger.info("   Set it to 'owner/repo' format (e.g., 'montagne-dev/lampe')")
        sys.exit(1)
    logger.info(f"✅ GITHUB_REPOSITORY set to: {github_repo}")

    # Check authentication environment variables
    app_id = os.getenv("LAMPE_GITHUB_APP_ID")
    private_key = os.getenv("LAMPE_GITHUB_APP_PRIVATE_KEY")
    token = os.getenv("LAMPE_GITHUB_TOKEN")

    auth_method = None
    if app_id and private_key:
        auth_method = "GitHub App"
        logger.info(f"✅ GitHub App authentication detected (App ID: {app_id})")
    elif token:
        auth_method = "User Token"
        logger.info("✅ User token authentication detected")
    else:
        logger.info("❌ No GitHub authentication found")
        logger.info("   Set either:")
        logger.info("   - LAMPE_GITHUB_APP_ID and LAMPE_GITHUB_APP_PRIVATE_KEY for GitHub App")
        logger.info("   - LAMPE_GITHUB_TOKEN for user token authentication")
        sys.exit(1)

    # Test GitHub connection
    try:
        # Create dummy repository and pull request objects for testing
        repo = Repository(local_path=".", full_name=github_repo)
        pr = PullRequest(
            number=1,
            title="Test PR",
            base_commit_hash="test-base",
            base_branch_name="main",
            head_commit_hash="test-head",
            head_branch_name="feature/test",
        )

        # Initialize GitHub provider to test authentication
        provider = GitHubProvider(repository=repo, pull_request=pr)
        logger.info(f"✅ GitHub {auth_method} authentication successful")

        # Test API access by getting repository info
        repo_info = provider.github_client.get_repo(github_repo)
        logger.info(f"✅ Repository access confirmed: {repo_info.full_name}")
        logger.info(f"   Description: {repo_info.description or 'No description'}")
        logger.info(f"   Private: {repo_info.private}")

        # Check LLM API keys
        logger.info("\n🔑 Checking LLM API keys...")
        openai_key = os.getenv("OPENAI_API_KEY")
        anthropic_key = os.getenv("ANTHROPIC_API_KEY")

        if not openai_key and not anthropic_key:
            logger.info("❌ No LLM API keys found")
            logger.info("   Set at least one of:")
            logger.info("   - OPENAI_API_KEY for OpenAI models")
            logger.info("   - ANTHROPIC_API_KEY for Anthropic models")
            sys.exit(1)

        if openai_key:
            logger.info("✅ OPENAI_API_KEY is set")
        if anthropic_key:
            logger.info("✅ ANTHROPIC_API_KEY is set")

        logger.info("\n🎉 All health checks passed! CLI is ready to use.")

    except Exception as e:
        logger.info(f"❌ GitHub connection failed: {e}")
        logger.info("\nTroubleshooting tips:")
        if auth_method == "GitHub App":
            logger.info("- Verify LAMPE_GITHUB_APP_ID and LAMPE_GITHUB_APP_PRIVATE_KEY are correct")
            logger.info("- Ensure the GitHub App is installed on the repository")
            logger.info("- Check that the private key is properly formatted")
        else:
            logger.info("- Verify LAMPE_GITHUB_TOKEN is valid and has appropriate permissions")
            logger.info("- Ensure the token has 'repo' scope for private repositories")
        sys.exit(1)

entrypoint

version() -> None

Show version information.

Source code in packages/lampe-cli/src/lampe/cli/entrypoint.py
16
17
18
19
20
21
22
23
@app.command()
def version() -> None:
    """Show version information."""
    import importlib.metadata

    version = importlib.metadata.version("lampe-cli")
    logger.info(f"🔦 Lampe CLI v{version}")
    logger.info("   Put some light on your codebase! ✨")

providers

base
Provider(repository: Repository, pull_request: PullRequest)

Bases: ABC

Abstract provider for delivering workflow outputs.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py
34
35
36
def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    self.repository = repository
    self.pull_request = pull_request
create_provider(provider_name: ProviderType | str, repository: Repository, pull_request: PullRequest) -> 'Provider' staticmethod

Create a provider instance based on the specified type.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
@staticmethod
def create_provider(
    provider_name: ProviderType | str, repository: Repository, pull_request: PullRequest
) -> "Provider":
    """Create a provider instance based on the specified type."""
    if isinstance(provider_name, str):
        # Handle "auto" detection
        if provider_name == "auto":
            provider_name = Provider.detect_provider_type()
        else:
            provider_name = ProviderType(provider_name)

    if provider_name == ProviderType.CONSOLE:
        from lampe.cli.providers.console import ConsoleProvider

        return ConsoleProvider(repository=repository, pull_request=pull_request)
    elif provider_name == ProviderType.GITHUB:
        from lampe.cli.providers.github import GitHubProvider

        return GitHubProvider(repository=repository, pull_request=pull_request)
    else:
        raise ValueError(f"Provider type {provider_name} not yet implemented")
deliver_pr_description(payload: PRDescriptionPayload) -> None abstractmethod

Deliver a PR description to the configured destination.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py
38
39
40
41
@abstractmethod
def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Deliver a PR description to the configured destination."""
    ...
detect_provider_type() -> ProviderType staticmethod

Detect the appropriate provider type based on available environment variables.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@staticmethod
def detect_provider_type() -> ProviderType:
    """Detect the appropriate provider type based on available environment variables."""
    # Priority order for provider detection
    env_var_mapping = {
        "GITHUB_API_TOKEN": ProviderType.GITHUB,
        "GITHUB_TOKEN": ProviderType.GITHUB,
        "LAMPE_GITHUB_TOKEN": ProviderType.GITHUB,
        "LAMPE_GITHUB_APP_ID": ProviderType.GITHUB,
        "LAMPE_GITHUB_APP_PRIVATE_KEY": ProviderType.GITHUB,
        "GITLAB_API_TOKEN": ProviderType.GITLAB,
        "BITBUCKET_API_TOKEN": ProviderType.BITBUCKET,
    }

    for env_var, provider_type in env_var_mapping.items():
        if os.getenv(env_var):
            return provider_type

    # Fallback to console if no API tokens are found
    return ProviderType.CONSOLE
ProviderType

Bases: StrEnum

Available provider types.

update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str

Update the text between the tags and with new_text. If the tags don't exist, add them at the bottom of the text. The tags and new_text are preserved in the output.

Source code in packages/lampe-cli/src/lampe/cli/providers/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def update_or_add_text_between_tags(text: str, new_text: str, feature: str) -> str:
    """
    Update the text between the tags [](lampe-sdk-{feature}-start) and [](lampe-sdk-{feature}-end)
    with new_text. If the tags don't exist, add them at the bottom of the text.
    The tags and new_text are preserved in the output.
    """
    identifier = f"lampe-sdk-{feature}-start"
    start_tag = rf"\[\]\(lampe-sdk-{feature}-start\)"
    end_tag = rf"\[\]\(lampe-sdk-{feature}-end\)"

    pattern = re.compile(rf"({start_tag})(.*?|\s*?){end_tag}", re.DOTALL)

    def replacer(match):
        return f"{match.group(1)}\n{new_text}\n[]({identifier.replace('-start', '')}-end)"

    # Try to replace the first occurrence
    result, count = pattern.subn(replacer, text, count=1)

    # If no tags were found, add them at the bottom
    if count == 0:
        result = f"{text}\n\n[]({identifier})\n{new_text}\n[]({identifier.replace('-start', '')}-end)"

    return result
console
ConsoleProvider(repository: Repository, pull_request: PullRequest)

Bases: Provider

Console provider for delivering PR descriptions to stdout.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py
11
12
def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    super().__init__(repository, pull_request)
deliver_pr_description(payload: PRDescriptionPayload) -> None

Print the PR description to console.

Source code in packages/lampe-cli/src/lampe/cli/providers/console.py
14
15
16
def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Print the PR description to console."""
    print(payload.description)
github
GitHubProvider(repository: Repository, pull_request: PullRequest)

Bases: Provider

GitHub provider for delivering PR descriptions to GitHub API.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, repository: Repository, pull_request: PullRequest) -> None:
    if pull_request.number == 0:
        pr_number = os.getenv("PR_NUMBER")
        if not pr_number:
            raise ValueError("PR_NUMBER environment variable is required for GitHub provider")
        pull_request.number = int(pr_number)

    super().__init__(repository, pull_request)

    # github action has many default environment variables, including the repository full name:
    # https://docs.github.com/en/actions/reference/workflows-and-actions/variables#default-environment-variables
    if repo_name := os.getenv("GITHUB_REPOSITORY"):
        self.owner, self.repo_name = repo_name.split("/")
    else:
        raise ValueError("GITHUB_REPOSITORY environment variable is required for GitHub provider")

    # Initialize GitHub client with appropriate authentication
    self.github_client = self._initialize_github_client()
deliver_pr_description(payload: PRDescriptionPayload) -> None

Update the PR description on GitHub.

Source code in packages/lampe-cli/src/lampe/cli/providers/github.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def deliver_pr_description(self, payload: PRDescriptionPayload) -> None:
    """Update the PR description on GitHub."""
    if self.pull_request.number == 0:
        raise ValueError("Cannot update GitHub PR description for local run")

    try:
        repo = self.github_client.get_repo(f"{self.owner}/{self.repo_name}")
        pull_request = repo.get_pull(self.pull_request.number)
        new_description = update_or_add_text_between_tags(
            pull_request.body or "", payload.description_with_title, "description"
        )
        pull_request.edit(body=new_description)
        logger.info(f"✅ Successfully updated PR #{self.pull_request.number} description on GitHub")
    except Exception as e:
        logger.info(f"❌ Failed to update GitHub PR: {e}")
        # Fallback to console output
        logger.info("Description:")
        logger.info(payload.description)

core

data_models

Issue

Bases: BaseModel

Individual issue to be resolved.

PullRequest

Bases: BaseModel

Pull request information.

Repository

Bases: BaseModel

Repository information.

issue
Issue

Bases: BaseModel

Individual issue to be resolved.

pull_request
PullRequest

Bases: BaseModel

Pull request information.

repository
Repository

Bases: BaseModel

Repository information.

gitconfig

init_git()

Initialize Git configuration and check version requirements.

Source code in src/lampe/core/gitconfig.py
58
59
60
61
def init_git():
    """Initialize Git configuration and check version requirements."""
    logger.debug("Initializing Git configuration...")
    valid_git_version_available()
valid_git_version_available() -> bool

Check if the installed Git version meets the minimum requirement.

Returns:

Type Description
bool

True if Git version meets requirement, False otherwise

Source code in src/lampe/core/gitconfig.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def valid_git_version_available() -> bool:
    """
    Check if the installed Git version meets the minimum requirement.

    Returns
    -------
    :
        True if Git version meets requirement, False otherwise
    """
    try:
        version_line = git.Git().version().strip()
        if not version_line:
            logger.critical("Unable to determine Git version from output.")
            return False

        # Extract version number from output like "git version 2.39.0"
        version_parts = version_line.split()
        if len(version_parts) < 3:
            logger.critical(f"Unexpected Git version output format: {version_line}")
            return False

        current_version = version_parts[2]

        # Handle version strings with additional info (e.g., "2.39.0.windows.1")
        # Take only the semantic version part
        current_version = current_version.split(".")[0:3]
        current_version = ".".join(current_version)

        if version.parse(current_version) >= version.parse(MINIMUM_GIT_VERSION):
            logger.debug(f"Git version {current_version} meets requirement ({MINIMUM_GIT_VERSION}+)")
            return True
        else:
            logger.critical(
                f"CRITICAL: Git version {current_version} does not meet the minimum requirement "
                f"({MINIMUM_GIT_VERSION}+). The lampe-sdk requires Git {MINIMUM_GIT_VERSION} or higher "
                f"for proper functionality. Git operations may fail or behave unexpectedly. "
                f"Please upgrade your Git installation. See the README for installation instructions."
            )
            return False
    except Exception as e:
        logger.critical(f"Unexpected error while checking Git version: {e}")
        return False

parsers

MarkdownCodeBlockRemoverOutputParser

Bases: BaseOutputParser

Output parser that extracts and returns the content of markdown code blocks marked with 'md' or 'markdown'.

This parser is designed to process LLM outputs or other text that may contain markdown code blocks.
It specifically targets code blocks with the language tag 'md' or 'markdown', removing the code block
markers and returning only the inner content. If no such block is found, it falls back to extracting
a generic code block (```). If the result still contains any other code block (with a language tag),
it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace)
is returned.
Edge Cases:
- If the input is an empty string, returns an empty string.
- If the input contains a code block with a language other than 'md' or 'markdown', it is preserved.
- If the input contains text before or after a markdown code block, only the content inside the block is returned.
- If the input contains an incomplete code block, returns the input with the trailing backticks removed if present.
Examples
>>> parser = MarkdownCodeBlockRemoverOutputParser()
>>> text = '''```md
... This is inside md block.
... ```'''
>>> parser.parse(text)
'This is inside md block.'

>>> text = '''```python
... Multiple lines
... are here.
... ```'''
>>> parser.parse(text)
'```python

Multiple lines are here. ```'

>>> text = 'No code block here.'
>>> parser.parse(text)
'No code block here.'
parse(output: str) -> str

Extracts and returns the content of a markdown code block marked with md ormarkdown from the input text.

If the input contains a markdown code block with language tag 'md' or 'markdown', the content inside that block is returned, with the code block markers removed. If no such block is found, but a generic code block (```) is present, its content is returned. If the result still contains any other code block (with a language tag), it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.

Source code in src/lampe/core/parsers/markdown_code_block_remover_output.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def parse(self, output: str) -> str:
    """
    Extracts and returns the content of a markdown code block marked with ```md or ```markdown from the input text.

    If the input contains a markdown code block with language tag 'md' or 'markdown',
    the content inside that block is returned, with the code block markers removed.
    If no such block is found, but a generic code block (```) is present, its content is returned.
    If the result still contains any other code block (with a language tag), it is preserved as-is.
    If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.
    """
    if output == "":
        return output
    # Try to extract content from markdown code blocks with specific languages
    content = (
        extract_md_code_block(output, "md")
        or extract_md_code_block(output, "markdown")
        or extract_md_code_block(output, "")
    ) or output.strip()

    if extract_md_code_block(content, match_any_language=True) is not None:
        # if there is any other remaining code block, we don't want to remove triple backticks
        return content

    if content.startswith("```"):
        content = content[3:]
    if content.endswith("```"):
        content = content[:-3]
    return content
YAMLPydanticOutputParser

Bases: PydanticOutputParser[Model], Generic[Model]

A parser that extracts and validates YAML content using Pydantic models.

Parameters:

Name Type Description Default
output_cls

Pydantic output class used for validation

required
excluded_schema_keys_from_format

Schema keys to exclude from format string, by default None

required
pydantic_format_tmpl

Template for format string, by default PYDANTIC_FORMAT_TMPL

required
Notes

This parser extracts YAML content from markdown code blocks, validates the structure using a Pydantic model, and returns the validated data. It first looks for YAML-specific code blocks, then falls back to any code block if needed.

format_string: str property

Get the format string that instructs the LLM how to output YAML.

This method will provide a format string that includes the Pydantic model's JSON schema converted to a YAML example, helping the LLM understand the expected output structure.

Returns:

Type Description
str

Format string with YAML schema example

Raises:

Type Description
NotImplementedError

The method is not yet implemented

parse(text: str) -> Model

Extract, parse and validate YAML content using the configured Pydantic model.

Parameters:

Name Type Description Default
text str

Raw text containing YAML content in markdown code blocks

required

Returns:

Type Description
Model

Validated data matching the Pydantic model structure

Raises:

Type Description
YAMLParsingError

If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors

ValidationError

If the data does not match the Pydantic model schema

Source code in src/lampe/core/parsers/yaml_pydantic_output.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def parse(self, text: str) -> Model:
    """
    Extract, parse and validate YAML content using the configured Pydantic model.

    Parameters
    ----------
    text
        Raw text containing YAML content in markdown code blocks

    Returns
    -------
    :
        Validated data matching the Pydantic model structure

    Raises
    ------
    YAMLParsingError
        If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
    ValidationError
        If the data does not match the Pydantic model schema
    """
    if not text:
        raise YAMLParsingError("No text provided")

    yaml_block = extract_md_code_block(text, "yaml")
    if not yaml_block:
        logger.warning("No YAML block found, attempting to parse generic code block")
        yaml_block = extract_md_code_block(text)
    if not yaml_block:
        yaml_block = text
    try:
        data = yaml.safe_load(yaml_block)
    except yaml.YAMLError as e:
        raise YAMLParsingError(f"Invalid YAML syntax: {e}") from e

    return self.output_cls.model_validate(data)
markdown_code_block_remover_output
MarkdownCodeBlockRemoverOutputParser

Bases: BaseOutputParser

Output parser that extracts and returns the content of markdown code blocks marked with 'md' or 'markdown'.

This parser is designed to process LLM outputs or other text that may contain markdown code blocks.
It specifically targets code blocks with the language tag 'md' or 'markdown', removing the code block
markers and returning only the inner content. If no such block is found, it falls back to extracting
a generic code block (```). If the result still contains any other code block (with a language tag),
it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace)
is returned.
Edge Cases:
- If the input is an empty string, returns an empty string.
- If the input contains a code block with a language other than 'md' or 'markdown', it is preserved.
- If the input contains text before or after a markdown code block, only the content inside the block is returned.
- If the input contains an incomplete code block, returns the input with the trailing backticks removed if present.
Examples
>>> parser = MarkdownCodeBlockRemoverOutputParser()
>>> text = '''```md
... This is inside md block.
... ```'''
>>> parser.parse(text)
'This is inside md block.'

>>> text = '''```python
... Multiple lines
... are here.
... ```'''
>>> parser.parse(text)
'```python

Multiple lines are here. ```'

>>> text = 'No code block here.'
>>> parser.parse(text)
'No code block here.'
parse(output: str) -> str

Extracts and returns the content of a markdown code block marked with md ormarkdown from the input text.

If the input contains a markdown code block with language tag 'md' or 'markdown', the content inside that block is returned, with the code block markers removed. If no such block is found, but a generic code block (```) is present, its content is returned. If the result still contains any other code block (with a language tag), it is preserved as-is. If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.

Source code in src/lampe/core/parsers/markdown_code_block_remover_output.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def parse(self, output: str) -> str:
    """
    Extracts and returns the content of a markdown code block marked with ```md or ```markdown from the input text.

    If the input contains a markdown code block with language tag 'md' or 'markdown',
    the content inside that block is returned, with the code block markers removed.
    If no such block is found, but a generic code block (```) is present, its content is returned.
    If the result still contains any other code block (with a language tag), it is preserved as-is.
    If no code block is found, the original text (stripped of leading/trailing whitespace) is returned.
    """
    if output == "":
        return output
    # Try to extract content from markdown code blocks with specific languages
    content = (
        extract_md_code_block(output, "md")
        or extract_md_code_block(output, "markdown")
        or extract_md_code_block(output, "")
    ) or output.strip()

    if extract_md_code_block(content, match_any_language=True) is not None:
        # if there is any other remaining code block, we don't want to remove triple backticks
        return content

    if content.startswith("```"):
        content = content[3:]
    if content.endswith("```"):
        content = content[:-3]
    return content
utils
extract_md_code_block(output: str, language: str = '', match_any_language: bool = False) -> str | None

Extract markdown code block content from a string, handling nested code blocks.

Parameters:

Name Type Description Default
output str

The string to extract code block content from.

required
language str

The language identifier for the code block (e.g., 'yaml', 'python', 'json').

''
match_any_language bool

If True, the language of the code block is optional and the function will return the first code block found.

False

Returns:

Type Description
str | None

The extracted code block content, or the entire input if no language is specified or no matching code block is found.

Notes

This function extracts content between {language} tags, preserving any nested code blocks within the content. The regex pattern handles: - Optional text before the code block - Nested code blocks (e.g.json, python, inside the main block) - Proper indentation of nested content - Case-insensitive language tag matching

Examples:

>>> text = '''
... Some text
... ```yaml
... key: value
... nested: |
...   ```python
...   print("Hello")
...   ```
... ```
... '''
>>> result = extract_md_code_block(text, 'yaml')
>>> print(result)
key: value
nested: |
  ```python
  print("Hello")
  ```
Source code in src/lampe/core/parsers/utils.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def extract_md_code_block(output: str, language: str = "", match_any_language: bool = False) -> str | None:
    """Extract markdown code block content from a string, handling nested code blocks.

    Parameters
    ----------
    output : str
        The string to extract code block content from.
    language : str
        The language identifier for the code block (e.g., 'yaml', 'python', 'json').
    match_any_language : bool
        If True, the language of the code block is optional and the function will return the first code block found.
    Returns
    -------
    :
        The extracted code block content, or the entire input if no language is specified
        or no matching code block is found.

    Notes
    -----
    This function extracts content between ```{language} tags, preserving any nested
    code blocks within the content. The regex pattern handles:
    - Optional text before the code block
    - Nested code blocks (e.g. ```json, ```python, ``` inside the main block)
    - Proper indentation of nested content
    - Case-insensitive language tag matching

    Examples
    --------
    >>> text = '''
    ... Some text
    ... ```yaml
    ... key: value
    ... nested: |
    ...   ```python
    ...   print("Hello")
    ...   ```
    ... ```
    ... '''
    >>> result = extract_md_code_block(text, 'yaml')
    >>> print(result)
    key: value
    nested: |
      ```python
      print("Hello")
      ```
    """

    if match_any_language:
        code_block_pattern = MARKDOWN_CODE_BLOCK_PATTERN.format(language=MARKDOWN_CODE_BLOCK_MATCH_ANY_LANGUAGE_PATTERN)
    else:
        code_block_pattern = MARKDOWN_CODE_BLOCK_PATTERN.format(language=language)

    result = re.search(code_block_pattern, output, re.MULTILINE | re.IGNORECASE | re.DOTALL)
    if result:
        return result.group(1)
    return None
yaml_pydantic_output
YAMLParsingError

Bases: Exception

Raised when YAML parsing or validation fails.

YAMLPydanticOutputParser

Bases: PydanticOutputParser[Model], Generic[Model]

A parser that extracts and validates YAML content using Pydantic models.

Parameters:

Name Type Description Default
output_cls

Pydantic output class used for validation

required
excluded_schema_keys_from_format

Schema keys to exclude from format string, by default None

required
pydantic_format_tmpl

Template for format string, by default PYDANTIC_FORMAT_TMPL

required
Notes

This parser extracts YAML content from markdown code blocks, validates the structure using a Pydantic model, and returns the validated data. It first looks for YAML-specific code blocks, then falls back to any code block if needed.

format_string: str property

Get the format string that instructs the LLM how to output YAML.

This method will provide a format string that includes the Pydantic model's JSON schema converted to a YAML example, helping the LLM understand the expected output structure.

Returns:

Type Description
str

Format string with YAML schema example

Raises:

Type Description
NotImplementedError

The method is not yet implemented

parse(text: str) -> Model

Extract, parse and validate YAML content using the configured Pydantic model.

Parameters:

Name Type Description Default
text str

Raw text containing YAML content in markdown code blocks

required

Returns:

Type Description
Model

Validated data matching the Pydantic model structure

Raises:

Type Description
YAMLParsingError

If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors

ValidationError

If the data does not match the Pydantic model schema

Source code in src/lampe/core/parsers/yaml_pydantic_output.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def parse(self, text: str) -> Model:
    """
    Extract, parse and validate YAML content using the configured Pydantic model.

    Parameters
    ----------
    text
        Raw text containing YAML content in markdown code blocks

    Returns
    -------
    :
        Validated data matching the Pydantic model structure

    Raises
    ------
    YAMLParsingError
        If no valid YAML content is found in the text or if the YAML parsing fails due to syntax errors
    ValidationError
        If the data does not match the Pydantic model schema
    """
    if not text:
        raise YAMLParsingError("No text provided")

    yaml_block = extract_md_code_block(text, "yaml")
    if not yaml_block:
        logger.warning("No YAML block found, attempting to parse generic code block")
        yaml_block = extract_md_code_block(text)
    if not yaml_block:
        yaml_block = text
    try:
        data = yaml.safe_load(yaml_block)
    except yaml.YAMLError as e:
        raise YAMLParsingError(f"Invalid YAML syntax: {e}") from e

    return self.output_cls.model_validate(data)

tools

TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name Type Description
repo_url

Repository URL to clone

head_ref

Optional head ref to check out.

folder_name

Optional name prefix for temp directory

sparse

Enable sparse checkout mode to avoid populating all files initially.

shallow

Enable shallow clone (depth=1) to fetch only the target commit.

blob_filter

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

remove_existing

Remove existing directory if it exists

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

UnableToDeleteError

If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None
clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name Type Description Default
repo_url str

Repository URL to clone

required
head_ref str | None

Head ref to checkout

None
base_ref str | None

Base ref to fetch for diff computation

None
folder_name str | None

Optional name prefix for temp directory

None
sparse bool

Enable sparse checkout mode to avoid populating all files initially

True
shallow bool

Enable shallow clone (depth=1) to fetch only the target commit

True
blob_filter bool

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

True
remove_existing bool

Remove existing directory if it exists

True

Returns:

Type Description
str

Path to the cloned repository

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

Source code in src/lampe/core/tools/repository/management.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path
get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, repo_path: str = '/tmp/') -> str

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name Type Description Default
base_hash str

Base commit hash to compare from

required
head_hash str

Head commit hash to compare to. If not provided, uses HEAD

'HEAD'
files_exclude_patterns list[str] | None

List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.

None
files_include_patterns list[str] | None

List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.

None
files_reinclude_patterns list[str] | None

List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.

None
repo_path str

Path to the git repository

'/tmp/'
batch_size int

Number of files to process in each batch.

50

Returns:

Type Description
str

Diff as a string

Raises:

Type Description
DiffNotFoundError

If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diffs.append(repo.git.diff(base_hash, head_hash, "--", *batch))
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e
view_file(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, repo_path: str = '/tmp/') -> str

Get file content from a specific commit.

Parameters:

Name Type Description Default
commit_hash str

Commit reference (e.g., "main", commit hash)

required
file_path str

Path to the file within the repository

required
line_start int | None

Line range start index (0-based) of head_content to extract content from

None
line_end int | None

Line range end index (0-based) of head_content to extract content to

None
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type Description
GitCommandError

If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])
        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise
repository
LocalCommitsAvailability(repo_path: str, commits: list[str])

Context manager to check if commits are available locally before git operations.

Checks if specified commits exist locally using git fsck --root and fetches them if they're not present. This is useful for ensuring all required commits are available before performing git operations that depend on them.

Attributes:

Name Type Description
repo_path

Path to the git repository

commits

List of commit references to check and fetch if needed

Source code in src/lampe/core/tools/repository/management.py
219
220
221
222
223
def __init__(self, repo_path: str, commits: list[str]):
    self.repo_path = repo_path
    self.commits = commits
    self.repo = Repo(path=repo_path)
    self._fetched_commits = []
TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name Type Description
repo_url

Repository URL to clone

head_ref

Optional head ref to check out.

folder_name

Optional name prefix for temp directory

sparse

Enable sparse checkout mode to avoid populating all files initially.

shallow

Enable shallow clone (depth=1) to fetch only the target commit.

blob_filter

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

remove_existing

Remove existing directory if it exists

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

UnableToDeleteError

If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None
clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name Type Description Default
repo_url str

Repository URL to clone

required
head_ref str | None

Head ref to checkout

None
base_ref str | None

Base ref to fetch for diff computation

None
folder_name str | None

Optional name prefix for temp directory

None
sparse bool

Enable sparse checkout mode to avoid populating all files initially

True
shallow bool

Enable shallow clone (depth=1) to fetch only the target commit

True
blob_filter bool

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

True
remove_existing bool

Remove existing directory if it exists

True

Returns:

Type Description
str

Path to the cloned repository

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

Source code in src/lampe/core/tools/repository/management.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path
fetch_commit_ref(repo_path: str, commit_ref: str) -> None

Fetch a base reference from the remote repository.

Parameters:

Name Type Description Default
repo_path str

Path to the git repository

required
commit_ref str

Commit reference to fetch (e.g., branch name, commit hash)

required

Raises:

Type Description
GitCommandError

If the fetch operation fails

Source code in src/lampe/core/tools/repository/management.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def fetch_commit_ref(repo_path: str, commit_ref: str) -> None:
    """Fetch a base reference from the remote repository.

    Parameters
    ----------
    repo_path
        Path to the git repository
    commit_ref
        Commit reference to fetch (e.g., branch name, commit hash)

    Raises
    ------
    GitCommandError
        If the fetch operation fails
    """
    repo = Repo(path=repo_path)

    repo.git.fetch("--no-tags", "--depth=1", "--filter=blob:none", "origin", commit_ref)
find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str

Search for files using git ls-files and pattern matching.

Parameters:

Name Type Description Default
pattern str

Pattern to search for (e.g. ".py", "src//.md")

required
repo_path str

Path to git repository

'/tmp/'

Returns:

Type Description
str

Formatted string containing matching file paths

Source code in src/lampe/core/tools/repository/search.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def find_files_by_pattern(pattern: str, repo_path: str = "/tmp/") -> str:
    """Search for files using git ls-files and pattern matching.

    Parameters
    ----------
    pattern
        Pattern to search for (e.g. "*.py", "src/**/*.md")
    repo_path
        Path to git repository

    Returns
    -------
    str
        Formatted string containing matching file paths
    """
    repo = Repo(path=repo_path)
    try:
        # Filter files matching pattern using git's pathspec matching
        matching = repo.git.ls_files("--", pattern).splitlines()

        if not matching:
            return "No files found"

        return f"```shell\n{'\n'.join(matching)}\n```"

    except GitCommandError as e:
        logger.exception(f"Error finding files: {e}")
        return f"Error: {str(e)}"
get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, repo_path: str = '/tmp/') -> str

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name Type Description Default
base_hash str

Base commit hash to compare from

required
head_hash str

Head commit hash to compare to. If not provided, uses HEAD

'HEAD'
files_exclude_patterns list[str] | None

List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.

None
files_include_patterns list[str] | None

List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.

None
files_reinclude_patterns list[str] | None

List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.

None
repo_path str

Path to the git repository

'/tmp/'
batch_size int

Number of files to process in each batch.

50

Returns:

Type Description
str

Diff as a string

Raises:

Type Description
DiffNotFoundError

If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diffs.append(repo.git.diff(base_hash, head_hash, "--", *batch))
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e
get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str

Get the diff between two commits, optionally for specific files.

Parameters:

Name Type Description Default
base_reference str

Base commit reference (e.g., "main", commit hash)

required
file_paths list[str] | None

List of file paths to get diff for

None
head_reference str

Head commit reference (e.g., "main", commit hash). Defaults to "HEAD"

'HEAD'
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string containing diffs for specified files or all changed files

Source code in src/lampe/core/tools/repository/diff.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def get_diff_for_files(
    base_reference: str,
    file_paths: list[str] | None = None,
    head_reference: str = "HEAD",
    repo_path: str = "/tmp/",
    batch_size: int = 50,
) -> str:
    """Get the diff between two commits, optionally for specific files.

    Parameters
    ----------
    base_reference
        Base commit reference (e.g., "main", commit hash)
    file_paths
        List of file paths to get diff for
    head_reference
        Head commit reference (e.g., "main", commit hash). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing diffs for specified files or all changed files
    """
    repo = Repo(path=repo_path)
    if file_paths:
        # Get diff for specific files
        diffs = []
        for batch_file_paths in batched(iterable=file_paths, n=batch_size):
            try:
                diff = repo.git.diff(base_reference, head_reference, "--", *batch_file_paths)
                if diff:
                    diffs.append(diff)
            except GitCommandError:
                # Skip files that don't exist or can't be diffed
                logger.debug(f"Files {batch_file_paths} not found or can't be diffed in get_diff_for_files")
                continue
        return "\n".join(diffs)
    else:
        # Get diff for all changed files
        return repo.git.diff(base_reference, head_reference)
get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, repo_path: str = '/tmp/') -> str

Get file content from a specific commit.

Parameters:

Name Type Description Default
commit_hash str

Commit reference (e.g., "main", commit hash)

required
file_path str

Path to the file within the repository

required
line_start int | None

Line range start index (0-based) of head_content to extract content from

None
line_end int | None

Line range end index (0-based) of head_content to extract content to

None
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type Description
GitCommandError

If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])
        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise
is_sparse_clone(repo_path: str) -> bool

Check if a repository is a sparse clone.

A sparse clone is detected by checking multiple indicators: 1. If core.sparseCheckout is enabled 2. If .git/info/sparse-checkout file exists and has content

Parameters:

Name Type Description Default
repo_path str

Path to the git repository

required

Returns:

Type Description
bool

True if the repository appears to be a sparse clone, False otherwise

Raises:

Type Description
GitCommandError

If git commands fail

Source code in src/lampe/core/tools/repository/management.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def is_sparse_clone(repo_path: str) -> bool:
    """Check if a repository is a sparse clone.

    A sparse clone is detected by checking multiple indicators:
    1. If core.sparseCheckout is enabled
    2. If .git/info/sparse-checkout file exists and has content

    Parameters
    ----------
    repo_path
        Path to the git repository

    Returns
    -------
    bool
        True if the repository appears to be a sparse clone, False otherwise

    Raises
    ------
    GitCommandError
        If git commands fail
    """
    try:
        repo = Repo(path=repo_path)

        # Check if sparse checkout is enabled
        try:
            sparse_checkout = repo.git.config("core.sparseCheckout")
            if sparse_checkout.strip().lower() == "true":
                logger.debug(f"Sparse checkout enabled in {repo_path}")
                return True
        except GitCommandError:
            # core.sparseCheckout not set, continue with other checks
            pass

        # Check if .git/info/sparse-checkout file exists and has content
        sparse_checkout_file = Path(repo_path) / ".git" / "info" / "sparse-checkout"
        if sparse_checkout_file.exists():
            with open(sparse_checkout_file, "r") as f:
                content = f.read().strip()
                if content:
                    logger.debug(f"Sparse checkout file found with content in {repo_path}")
                    return True

        logger.debug(f"No sparse clone indicators found in {repo_path}")
        return False

    except Exception as e:
        logger.exception(f"Error checking if repository is sparse clone: {e}")
        return False
list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str

List files changed between base reference and HEAD, with change stats.

Parameters:

Name Type Description Default
base_reference str

Git reference (commit hash, branch name, etc.) to compare against HEAD

required
head_reference str

Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"

'HEAD'
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string listing changed files with status, additions/deletions and size Format: "[STATUS] filepath | +additions -deletions | sizeKB" STATUS is one of: A (added), D (deleted), M (modified)

Raises:

Type Description
GitCommandError

If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def list_changed_files(base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/") -> str:
    """List files changed between base reference and HEAD, with change stats.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string listing changed files with status, additions/deletions and size
        Format: "[STATUS] filepath | +additions -deletions | sizeKB"
        STATUS is one of: A (added), D (deleted), M (modified)

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, "--numstat")
    status_output = repo.git.diff(base_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(f"[{status}] {file_path} | +{additions} -{deletions} | {size_kb}KB")

    return "\n".join(sorted(result))
search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, repo_path: str = '/tmp/') -> str

Search for a pattern in files within a directory at a specific commit.

Parameters:

Name Type Description Default
pattern str

Pattern to search for

required
relative_dir_path str

Directory path to search in

required
commit_reference str

Commit reference to search at

required
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Search results as a string

Source code in src/lampe/core/tools/repository/search.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Search for a pattern in files within a directory at a specific commit.

    Parameters
    ----------
    pattern
        Pattern to search for
    relative_dir_path
        Directory path to search in
    commit_reference
        Commit reference to search at
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    str
        Search results as a string
    """
    try:
        repo = Repo(path=repo_path)
        grep_output = repo.git.grep("-n", pattern, f"{commit_reference}:{relative_dir_path}")
        if grep_output:
            return f"```grep\n{grep_output}\n```"
        return "No matches found"
    except GitCommandError as e:
        if e.status == 128:
            return "No matches found"
        return f"Error executing git grep: {str(e)}"
show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str

Show the contents of a commit.

This function shows the contents of a commit, including the commit details and diffs.

Parameters:

Name Type Description Default
commit_reference str

Commit reference (e.g., "main", commit hash)

required
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string containing commit details and diffs

Source code in src/lampe/core/tools/repository/history.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def show_commit(commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Show the contents of a commit.

    This function shows the contents of a commit, including the commit details and diffs.

    Parameters
    ----------
    commit_reference
        Commit reference (e.g., "main", commit hash)
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and diffs
    """
    repo = Repo(path=repo_path)
    commit = repo.commit(commit_reference)
    output = [
        f"Commit: {commit.hexsha}\n"
        f"Author: {commit.author}\n"
        f"Date: {commit.authored_datetime}\n"
        f"Message: {commit.message}\n"
        f"Files: {len(commit.stats.files)} files changed\n"
        f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
        f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
    ]
    if commit.parents:
        parent = commit.parents[0]
        diff = parent.diff(commit, create_patch=True)
    else:
        diff = commit.diff(None, create_patch=True)
    for d in diff:
        output.append(f"\n--- {d.a_path}\n+++ {d.b_path}\n")
        if d.diff:
            output.append(str(d.diff))
    return "".join(output)
content
file_exists(file_path: str, commit_hash: str = 'HEAD', repo_path: str = '/tmp/') -> bool

Check if a file exists in a specific commit.

Parameters:

Name Type Description Default
file_path str

Path to the file within the repository

required
commit_hash str

Commit reference to check (e.g., commit hash, branch name, tag). Defaults to "HEAD"

'HEAD'
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
bool

True if file exists in the commit, False otherwise

Raises:

Type Description
GitCommandError

If there is an unexpected git error

Source code in src/lampe/core/tools/repository/content.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def file_exists(file_path: str, commit_hash: str = "HEAD", repo_path: str = "/tmp/") -> bool:
    """Check if a file exists in a specific commit.

    Parameters
    ----------
    file_path
        Path to the file within the repository
    commit_hash
        Commit reference to check (e.g., commit hash, branch name, tag). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    bool
        True if file exists in the commit, False otherwise

    Raises
    ------
    GitCommandError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            repo.git.cat_file("-e", f"{commit_hash}:{file_path}")
        return True
    except GitCommandError as e:
        if e.status == 128:
            return False
        logger.exception(f"Unexpected error checking if file exists: {e}")
        raise
get_file_content_at_commit(commit_hash: str, file_path: str, line_start: int | None = None, line_end: int | None = None, repo_path: str = '/tmp/') -> str

Get file content from a specific commit.

Parameters:

Name Type Description Default
commit_hash str

Commit reference (e.g., "main", commit hash)

required
file_path str

Path to the file within the repository

required
line_start int | None

Line range start index (0-based) of head_content to extract content from

None
line_end int | None

Line range end index (0-based) of head_content to extract content to

None
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

File content as a string, empty string if file doesn't exist or line range is invalid

Raises:

Type Description
GitCommandError

If the file doesn't exist or any other git error occurs

Source code in src/lampe/core/tools/repository/content.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def get_file_content_at_commit(
    commit_hash: str,
    file_path: str,
    line_start: int | None = None,
    line_end: int | None = None,
    repo_path: str = "/tmp/",
) -> str:
    """Get file content from a specific commit.

    Parameters
    ----------
    commit_hash
        Commit reference (e.g., "main", commit hash)
    file_path
        Path to the file within the repository
    line_start
        Line range start index (0-based) of head_content to extract content from
    line_end
        Line range end index (0-based) of head_content to extract content to
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        File content as a string, empty string if file doesn't exist or line range is invalid

    Raises
    ------
    GitCommandError
        If the file doesn't exist or any other git error occurs
    """
    try:
        blob = ""
        repo = Repo(path=repo_path)
        with LocalCommitsAvailability(repo_path, [commit_hash]):
            blob = repo.git.show(f"{commit_hash}:{file_path}")
        if line_start is not None and line_end is not None:
            blob = "\n".join(blob.splitlines()[line_start : line_end + 1])
        return blob
    except GitCommandError as e:
        logger.exception(f"Error getting file content: {e}")
        raise
get_file_size_at_commit(file_path: str, commit_hash: str = 'HEAD', repo_path: str = '/tmp/') -> int

Get the size of a file at a specific commit.

Parameters:

Name Type Description Default
file_path str

Path to the file within the repository

required
commit_hash str

Commit reference (e.g., "main", commit hash). Defaults to "HEAD"

'HEAD'
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
int

Size of the file in bytes

Source code in src/lampe/core/tools/repository/content.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def get_file_size_at_commit(file_path: str, commit_hash: str = "HEAD", repo_path: str = "/tmp/") -> int:
    """Get the size of a file at a specific commit.

    Parameters
    ----------
    file_path
        Path to the file within the repository
    commit_hash
        Commit reference (e.g., "main", commit hash). Defaults to "HEAD"
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    :
        Size of the file in bytes
    """
    repo = Repo(path=repo_path)
    with LocalCommitsAvailability(repo_path, [commit_hash]):
        tree = repo.commit(rev=commit_hash).tree
    try:
        git_obj = tree[file_path]
        return git_obj.size
    except KeyError:
        return 0
diff
get_diff_between_commits(base_hash: str, head_hash: str = 'HEAD', files_exclude_patterns: list[str] | None = None, files_include_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, batch_size: int = 50, repo_path: str = '/tmp/') -> str

Get the diff between two commits, optionally filtering files by glob patterns.

The filtering is done in a specific order to ensure correct pattern application: 1. First, if include patterns are provided, only files matching those patterns are kept 2. Then, exclude patterns are applied to filter out matching files 3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

This order ensures that reinclude patterns only affect files that were actually excluded, preventing the reinclude of files that weren't matched by include patterns in the first place.

Parameters:

Name Type Description Default
base_hash str

Base commit hash to compare from

required
head_hash str

Head commit hash to compare to. If not provided, uses HEAD

'HEAD'
files_exclude_patterns list[str] | None

List of glob patterns to exclude from the diff (relative to repo root). These patterns take precedence over include patterns.

None
files_include_patterns list[str] | None

List of glob patterns to include in the diff (relative to repo root). Note that exclude patterns will override these if there are conflicts.

None
files_reinclude_patterns list[str] | None

List of glob patterns to re-include files that were excluded by the exclude patterns. These patterns will only affect files that were previously excluded.

None
repo_path str

Path to the git repository

'/tmp/'
batch_size int

Number of files to process in each batch.

50

Returns:

Type Description
str

Diff as a string

Raises:

Type Description
DiffNotFoundError

If there is an unexpected git error

Source code in src/lampe/core/tools/repository/diff.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def get_diff_between_commits(
    base_hash: str,
    head_hash: str = "HEAD",
    files_exclude_patterns: list[str] | None = None,
    files_include_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    batch_size: int = 50,
    repo_path: str = "/tmp/",
) -> str:
    """Get the diff between two commits, optionally filtering files by glob patterns.

    The filtering is done in a specific order to ensure correct pattern application:
    1. First, if include patterns are provided, only files matching those patterns are kept
    2. Then, exclude patterns are applied to filter out matching files
    3. Finally, reinclude patterns can override the exclude patterns to bring back specific files

    This order ensures that reinclude patterns only affect files that were actually excluded,
    preventing the reinclude of files that weren't matched by include patterns in the first place.

    Parameters
    ----------
    base_hash
        Base commit hash to compare from
    head_hash
        Head commit hash to compare to. If not provided, uses HEAD
    files_exclude_patterns
        List of glob patterns to exclude from the diff (relative to repo root).
        These patterns take precedence over include patterns.
    files_include_patterns
        List of glob patterns to include in the diff (relative to repo root).
        Note that exclude patterns will override these if there are conflicts.
    files_reinclude_patterns
        List of glob patterns to re-include files that were excluded by the exclude patterns.
        These patterns will only affect files that were previously excluded.
    repo_path
        Path to the git repository
    batch_size
        Number of files to process in each batch.
    Returns
    -------
    :
        Diff as a string

    Raises
    ------
    DiffNotFoundError
        If there is an unexpected git error
    """
    try:
        repo = Repo(path=repo_path)
        changed_files = ""
        with LocalCommitsAvailability(repo_path, [base_hash, head_hash]):
            changed_files = repo.git.diff(base_hash, head_hash, "--name-only")

        if files_include_patterns and files_exclude_patterns:
            include_patterns = set(files_include_patterns)
            exclude_patterns = set(files_exclude_patterns)
            overlap = include_patterns & exclude_patterns
            if overlap:
                logger.warning(
                    f"Overlapping patterns found in include and exclude patterns: {overlap}. "
                    "Exclude patterns will take precedence as per git pathspec documentation."
                )

        filtered_files = []
        for f in changed_files.splitlines():
            if files_include_patterns and not any(fnmatch(f, pat) for pat in files_include_patterns):
                continue
            if files_exclude_patterns and any(fnmatch(f, pat) for pat in files_exclude_patterns):
                if not (files_reinclude_patterns and any(fnmatch(f, pat) for pat in files_reinclude_patterns)):
                    continue
            filtered_files.append(f)

        diffs = []
        for batch in batched(filtered_files, batch_size):
            diffs.append(repo.git.diff(base_hash, head_hash, "--", *batch))
        return "\n".join(diffs)
    except GitCommandError as e:
        logger.exception(f"Unexpected error getting diff: {e}")
        raise DiffNotFoundError(f"Diff not found for commits {base_hash} and {head_hash}") from e
get_diff_for_files(base_reference: str, file_paths: list[str] | None = None, head_reference: str = 'HEAD', repo_path: str = '/tmp/', batch_size: int = 50) -> str

Get the diff between two commits, optionally for specific files.

Parameters:

Name Type Description Default
base_reference str

Base commit reference (e.g., "main", commit hash)

required
file_paths list[str] | None

List of file paths to get diff for

None
head_reference str

Head commit reference (e.g., "main", commit hash). Defaults to "HEAD"

'HEAD'
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string containing diffs for specified files or all changed files

Source code in src/lampe/core/tools/repository/diff.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def get_diff_for_files(
    base_reference: str,
    file_paths: list[str] | None = None,
    head_reference: str = "HEAD",
    repo_path: str = "/tmp/",
    batch_size: int = 50,
) -> str:
    """Get the diff between two commits, optionally for specific files.

    Parameters
    ----------
    base_reference
        Base commit reference (e.g., "main", commit hash)
    file_paths
        List of file paths to get diff for
    head_reference
        Head commit reference (e.g., "main", commit hash). Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing diffs for specified files or all changed files
    """
    repo = Repo(path=repo_path)
    if file_paths:
        # Get diff for specific files
        diffs = []
        for batch_file_paths in batched(iterable=file_paths, n=batch_size):
            try:
                diff = repo.git.diff(base_reference, head_reference, "--", *batch_file_paths)
                if diff:
                    diffs.append(diff)
            except GitCommandError:
                # Skip files that don't exist or can't be diffed
                logger.debug(f"Files {batch_file_paths} not found or can't be diffed in get_diff_for_files")
                continue
        return "\n".join(diffs)
    else:
        # Get diff for all changed files
        return repo.git.diff(base_reference, head_reference)
list_changed_files(base_reference: str, head_reference: str = 'HEAD', repo_path: str = '/tmp/') -> str

List files changed between base reference and HEAD, with change stats.

Parameters:

Name Type Description Default
base_reference str

Git reference (commit hash, branch name, etc.) to compare against HEAD

required
head_reference str

Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"

'HEAD'
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string listing changed files with status, additions/deletions and size Format: "[STATUS] filepath | +additions -deletions | sizeKB" STATUS is one of: A (added), D (deleted), M (modified)

Raises:

Type Description
GitCommandError

If there is an error executing git commands

Source code in src/lampe/core/tools/repository/diff.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def list_changed_files(base_reference: str, head_reference: str = "HEAD", repo_path: str = "/tmp/") -> str:
    """List files changed between base reference and HEAD, with change stats.

    Parameters
    ----------
    base_reference
        Git reference (commit hash, branch name, etc.) to compare against HEAD
    head_reference
        Git reference (commit hash, branch name, etc.) to compare against base reference. Defaults to "HEAD"
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string listing changed files with status, additions/deletions and size
        Format: "[STATUS] filepath | +additions -deletions | sizeKB"
        STATUS is one of: A (added), D (deleted), M (modified)

    Raises
    ------
    GitCommandError
        If there is an error executing git commands
    """
    repo = Repo(path=repo_path)
    numstat = repo.git.diff(base_reference, "--numstat")
    status_output = repo.git.diff(base_reference, "--name-status")

    status_map = {}
    for line in status_output.splitlines():
        if line:
            parts = line.split("\t")
            if len(parts) >= 2:
                status, path = parts[0], parts[-1]
                status_map[path] = "A" if status == "A" else "D" if status == "D" else "M"

    result = []
    for line in numstat.splitlines():
        parts = line.split("\t")
        if len(parts) == 3:
            additions, deletions, file_path = parts
            try:
                additions = int(additions)
            except ValueError:
                additions = 0
            try:
                deletions = int(deletions)
            except ValueError:
                deletions = 0
            try:
                size_kb = get_file_size_at_commit(file_path, head_reference, repo_path)
            except GitCommandError as e:
                size_kb = 0
                logger.exception(f"During list_changed_files, error getting file size: {e}, continuing...")

            status = status_map.get(file_path, "M")

            result.append(f"[{status}] {file_path} | +{additions} -{deletions} | {size_kb}KB")

    return "\n".join(sorted(result))
history
get_commit_log(max_count: int, repo_path: str = '/tmp/') -> str

Get the log of commits for a repository.

This function gets the log of commits for a repository, including the commit details and the list of files path that were changed.

Parameters:

Name Type Description Default
max_count int

Maximum number of commits to return

required
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string containing commit details and list of files that were changed

Source code in src/lampe/core/tools/repository/history.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def get_commit_log(max_count: int, repo_path: str = "/tmp/") -> str:
    """Get the log of commits for a repository.

    This function gets the log of commits for a repository, including the commit details
    and the list of files path that were changed.

    Parameters
    ----------
    max_count
        Maximum number of commits to return
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and list of files that were changed
    """
    repo = Repo(path=repo_path)
    commits = list(repo.iter_commits(max_count=max_count))
    log = []
    for commit in commits:
        log.append(
            f"Commit: {commit.hexsha}\n"
            f"Author: {commit.author}\n"
            f"Date: {commit.authored_datetime}\n"
            f"Message: {commit.message}\n"
            f"Files: {len(commit.stats.files)} files changed\n"
            f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
            f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
        )
    return "\n".join(log)
show_commit(commit_reference: str, repo_path: str = '/tmp/') -> str

Show the contents of a commit.

This function shows the contents of a commit, including the commit details and diffs.

Parameters:

Name Type Description Default
commit_reference str

Commit reference (e.g., "main", commit hash)

required
repo_path str

Path to git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Formatted string containing commit details and diffs

Source code in src/lampe/core/tools/repository/history.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def show_commit(commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Show the contents of a commit.

    This function shows the contents of a commit, including the commit details and diffs.

    Parameters
    ----------
    commit_reference
        Commit reference (e.g., "main", commit hash)
    repo_path
        Path to git repository, by default "/tmp/"

    Returns
    -------
    str
        Formatted string containing commit details and diffs
    """
    repo = Repo(path=repo_path)
    commit = repo.commit(commit_reference)
    output = [
        f"Commit: {commit.hexsha}\n"
        f"Author: {commit.author}\n"
        f"Date: {commit.authored_datetime}\n"
        f"Message: {commit.message}\n"
        f"Files: {len(commit.stats.files)} files changed\n"
        f"Changes: +{commit.stats.total['insertions']} -{commit.stats.total['deletions']}\n"
        f"Modified files:\n" + "\n".join(f"  - {f}" for f in commit.stats.files)
    ]
    if commit.parents:
        parent = commit.parents[0]
        diff = parent.diff(commit, create_patch=True)
    else:
        diff = commit.diff(None, create_patch=True)
    for d in diff:
        output.append(f"\n--- {d.a_path}\n+++ {d.b_path}\n")
        if d.diff:
            output.append(str(d.diff))
    return "".join(output)
management
LocalCommitsAvailability(repo_path: str, commits: list[str])

Context manager to check if commits are available locally before git operations.

Checks if specified commits exist locally using git fsck --root and fetches them if they're not present. This is useful for ensuring all required commits are available before performing git operations that depend on them.

Attributes:

Name Type Description
repo_path

Path to the git repository

commits

List of commit references to check and fetch if needed

Source code in src/lampe/core/tools/repository/management.py
219
220
221
222
223
def __init__(self, repo_path: str, commits: list[str]):
    self.repo_path = repo_path
    self.commits = commits
    self.repo = Repo(path=repo_path)
    self._fetched_commits = []
TempGitRepository(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True)

Context Manager for cloning and cleaning up a local clone of a repository

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content. Upon exit, will attempt to delete the cloned repository.

Attributes:

Name Type Description
repo_url

Repository URL to clone

head_ref

Optional head ref to check out.

folder_name

Optional name prefix for temp directory

sparse

Enable sparse checkout mode to avoid populating all files initially.

shallow

Enable shallow clone (depth=1) to fetch only the target commit.

blob_filter

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

remove_existing

Remove existing directory if it exists

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

UnableToDeleteError

If unable to delete the cloned repository

Source code in src/lampe/core/tools/repository/management.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(
    self,
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
):
    self.repo_url = repo_url
    self.head_ref = head_ref
    self.base_ref = base_ref
    self.folder_name = folder_name
    self.sparse = sparse
    self.shallow = shallow
    self.blob_filter = blob_filter
    self.remove_existing = remove_existing
    self.path_to_local_repo = None
clone_repo(repo_url: str, head_ref: str | None = None, base_ref: str | None = None, folder_name: str | None = None, sparse: bool = True, shallow: bool = True, blob_filter: bool = True, remove_existing: bool = True) -> str

Clone a repository optimized for PR review.

Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering to efficiently fetch only required content.

Parameters:

Name Type Description Default
repo_url str

Repository URL to clone

required
head_ref str | None

Head ref to checkout

None
base_ref str | None

Base ref to fetch for diff computation

None
folder_name str | None

Optional name prefix for temp directory

None
sparse bool

Enable sparse checkout mode to avoid populating all files initially

True
shallow bool

Enable shallow clone (depth=1) to fetch only the target commit

True
blob_filter bool

Enable blob filtering (--filter=blob:none) to fetch file contents on-demand

True
remove_existing bool

Remove existing directory if it exists

True

Returns:

Type Description
str

Path to the cloned repository

Raises:

Type Description
RuntimeError

If Git version check fails

GitCommandError

If clone operation fails

Source code in src/lampe/core/tools/repository/management.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def clone_repo(
    repo_url: str,
    head_ref: str | None = None,
    base_ref: str | None = None,
    folder_name: str | None = None,
    sparse: bool = True,
    shallow: bool = True,
    blob_filter: bool = True,
    remove_existing: bool = True,
) -> str:
    """Clone a repository optimized for PR review.

    Uses partial clone optimizations including shallow clone, sparse checkout, and blob filtering
    to efficiently fetch only required content.

    Parameters
    ----------
    repo_url
        Repository URL to clone
    head_ref
        Head ref to checkout
    base_ref
        Base ref to fetch for diff computation
    folder_name
        Optional name prefix for temp directory
    sparse
        Enable sparse checkout mode to avoid populating all files initially
    shallow
        Enable shallow clone (depth=1) to fetch only the target commit
    blob_filter
        Enable blob filtering (--filter=blob:none) to fetch file contents on-demand
    remove_existing
        Remove existing directory if it exists

    Returns
    -------
    :
        Path to the cloned repository

    Raises
    ------
    RuntimeError
        If Git version check fails
    GitCommandError
        If clone operation fails
    """
    if not valid_git_version_available():
        raise RuntimeError("Git version check failed. Please upgrade Git to the minimum required version.")

    tmp_dir = f"/tmp/{folder_name}" if folder_name else mkdtemp(prefix=str(uuid.uuid4()))
    logger.info(f"Cloning repo (sparse={sparse}, shallow={shallow}, blob_filter={blob_filter}) to {tmp_dir}")

    if os.path.exists(tmp_dir):
        if remove_existing:
            logger.info(f"Removing existing directory {tmp_dir}")
            shutil.rmtree(tmp_dir)
        else:
            return tmp_dir

    clone_args = []
    if shallow:
        clone_args.extend(["--depth", "1"])
    if sparse:
        clone_args.append("--sparse")
    if blob_filter:
        clone_args.extend(["--filter", "blob:none"])
    if head_ref:
        clone_args.extend(["--revision", head_ref])

    try:
        repository_path = ""
        repo = Repo.clone_from(repo_url, tmp_dir, multi_options=clone_args)
        repository_path = _repo_to_path(repo)
        if sparse and blob_filter:
            logger.info("Partial clone ready - file contents will be fetched on-demand during git operations")
        if base_ref:
            fetch_commit_ref(repository_path, base_ref)
    except GitCommandError as e:
        logger.exception(f"Clone failed: {e}\nClone arguments used: {clone_args}")
        raise e

    return repository_path
fetch_commit_ref(repo_path: str, commit_ref: str) -> None

Fetch a base reference from the remote repository.

Parameters:

Name Type Description Default
repo_path str

Path to the git repository

required
commit_ref str

Commit reference to fetch (e.g., branch name, commit hash)

required

Raises:

Type Description
GitCommandError

If the fetch operation fails

Source code in src/lampe/core/tools/repository/management.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def fetch_commit_ref(repo_path: str, commit_ref: str) -> None:
    """Fetch a base reference from the remote repository.

    Parameters
    ----------
    repo_path
        Path to the git repository
    commit_ref
        Commit reference to fetch (e.g., branch name, commit hash)

    Raises
    ------
    GitCommandError
        If the fetch operation fails
    """
    repo = Repo(path=repo_path)

    repo.git.fetch("--no-tags", "--depth=1", "--filter=blob:none", "origin", commit_ref)
is_sparse_clone(repo_path: str) -> bool

Check if a repository is a sparse clone.

A sparse clone is detected by checking multiple indicators: 1. If core.sparseCheckout is enabled 2. If .git/info/sparse-checkout file exists and has content

Parameters:

Name Type Description Default
repo_path str

Path to the git repository

required

Returns:

Type Description
bool

True if the repository appears to be a sparse clone, False otherwise

Raises:

Type Description
GitCommandError

If git commands fail

Source code in src/lampe/core/tools/repository/management.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def is_sparse_clone(repo_path: str) -> bool:
    """Check if a repository is a sparse clone.

    A sparse clone is detected by checking multiple indicators:
    1. If core.sparseCheckout is enabled
    2. If .git/info/sparse-checkout file exists and has content

    Parameters
    ----------
    repo_path
        Path to the git repository

    Returns
    -------
    bool
        True if the repository appears to be a sparse clone, False otherwise

    Raises
    ------
    GitCommandError
        If git commands fail
    """
    try:
        repo = Repo(path=repo_path)

        # Check if sparse checkout is enabled
        try:
            sparse_checkout = repo.git.config("core.sparseCheckout")
            if sparse_checkout.strip().lower() == "true":
                logger.debug(f"Sparse checkout enabled in {repo_path}")
                return True
        except GitCommandError:
            # core.sparseCheckout not set, continue with other checks
            pass

        # Check if .git/info/sparse-checkout file exists and has content
        sparse_checkout_file = Path(repo_path) / ".git" / "info" / "sparse-checkout"
        if sparse_checkout_file.exists():
            with open(sparse_checkout_file, "r") as f:
                content = f.read().strip()
                if content:
                    logger.debug(f"Sparse checkout file found with content in {repo_path}")
                    return True

        logger.debug(f"No sparse clone indicators found in {repo_path}")
        return False

    except Exception as e:
        logger.exception(f"Error checking if repository is sparse clone: {e}")
        return False
search
find_files_by_pattern(pattern: str, repo_path: str = '/tmp/') -> str

Search for files using git ls-files and pattern matching.

Parameters:

Name Type Description Default
pattern str

Pattern to search for (e.g. ".py", "src//.md")

required
repo_path str

Path to git repository

'/tmp/'

Returns:

Type Description
str

Formatted string containing matching file paths

Source code in src/lampe/core/tools/repository/search.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def find_files_by_pattern(pattern: str, repo_path: str = "/tmp/") -> str:
    """Search for files using git ls-files and pattern matching.

    Parameters
    ----------
    pattern
        Pattern to search for (e.g. "*.py", "src/**/*.md")
    repo_path
        Path to git repository

    Returns
    -------
    str
        Formatted string containing matching file paths
    """
    repo = Repo(path=repo_path)
    try:
        # Filter files matching pattern using git's pathspec matching
        matching = repo.git.ls_files("--", pattern).splitlines()

        if not matching:
            return "No files found"

        return f"```shell\n{'\n'.join(matching)}\n```"

    except GitCommandError as e:
        logger.exception(f"Error finding files: {e}")
        return f"Error: {str(e)}"
search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, repo_path: str = '/tmp/') -> str

Search for a pattern in files within a directory at a specific commit.

Parameters:

Name Type Description Default
pattern str

Pattern to search for

required
relative_dir_path str

Directory path to search in

required
commit_reference str

Commit reference to search at

required
repo_path str

Path to the git repository, by default "/tmp/"

'/tmp/'

Returns:

Type Description
str

Search results as a string

Source code in src/lampe/core/tools/repository/search.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def search_in_files(pattern: str, relative_dir_path: str, commit_reference: str, repo_path: str = "/tmp/") -> str:
    """Search for a pattern in files within a directory at a specific commit.

    Parameters
    ----------
    pattern
        Pattern to search for
    relative_dir_path
        Directory path to search in
    commit_reference
        Commit reference to search at
    repo_path
        Path to the git repository, by default "/tmp/"

    Returns
    -------
    str
        Search results as a string
    """
    try:
        repo = Repo(path=repo_path)
        grep_output = repo.git.grep("-n", pattern, f"{commit_reference}:{relative_dir_path}")
        if grep_output:
            return f"```grep\n{grep_output}\n```"
        return "No matches found"
    except GitCommandError as e:
        if e.status == 128:
            return "No matches found"
        return f"Error executing git grep: {str(e)}"

utils

truncate_to_token_limit(content: str, max_tokens: int) -> str

Truncate the content to the maximum number of tokens. If the content is too long, truncate it to 200000 characters (3-4 characters per token) before encoding for performance reasons. We allow endoftext token to be encoded, since in the past we encountered issues with the tokenizer.

Args: content (str): The content to truncate. max_tokens (int): The maximum number of tokens to keep.

Returns: str: The truncated content.

Source code in src/lampe/core/utils/token.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def truncate_to_token_limit(content: str, max_tokens: int) -> str:
    """Truncate the content to the maximum number of tokens.
    If the content is too long, truncate it to 200000 characters (3-4 characters per token)
    before encoding for performance reasons.
    We allow `endoftext` token to be encoded, since in the past we encountered issues with the tokenizer.

    Args:
        content (str): The content to truncate.
        max_tokens (int): The maximum number of tokens to keep.

    Returns:
        str: The truncated content.
    """
    if max_tokens <= 0:
        raise ValueError("max_tokens must be a positive integer")
    if len(content) >= CHARACTER_TRUNCATION_THRESHOLD:
        logger.warning(
            f"Truncating content to {CHARACTER_TRUNCATION_THRESHOLD} characters before encoding "
            f"for performance reasons. Content length: {len(content)}"
        )
        content = safe_truncate(content, CHARACTER_TRUNCATION_THRESHOLD)
    tokens = encoder.encode(
        content,
        disallowed_special=(),
    )
    truncated = encoder.decode(tokens[:max_tokens])
    return truncated
token
truncate_to_token_limit(content: str, max_tokens: int) -> str

Truncate the content to the maximum number of tokens. If the content is too long, truncate it to 200000 characters (3-4 characters per token) before encoding for performance reasons. We allow endoftext token to be encoded, since in the past we encountered issues with the tokenizer.

Args: content (str): The content to truncate. max_tokens (int): The maximum number of tokens to keep.

Returns: str: The truncated content.

Source code in src/lampe/core/utils/token.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def truncate_to_token_limit(content: str, max_tokens: int) -> str:
    """Truncate the content to the maximum number of tokens.
    If the content is too long, truncate it to 200000 characters (3-4 characters per token)
    before encoding for performance reasons.
    We allow `endoftext` token to be encoded, since in the past we encountered issues with the tokenizer.

    Args:
        content (str): The content to truncate.
        max_tokens (int): The maximum number of tokens to keep.

    Returns:
        str: The truncated content.
    """
    if max_tokens <= 0:
        raise ValueError("max_tokens must be a positive integer")
    if len(content) >= CHARACTER_TRUNCATION_THRESHOLD:
        logger.warning(
            f"Truncating content to {CHARACTER_TRUNCATION_THRESHOLD} characters before encoding "
            f"for performance reasons. Content length: {len(content)}"
        )
        content = safe_truncate(content, CHARACTER_TRUNCATION_THRESHOLD)
    tokens = encoder.encode(
        content,
        disallowed_special=(),
    )
    truncated = encoder.decode(tokens[:max_tokens])
    return truncated

describe

PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name Type Description Default
truncation_tokens

Maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
47
48
49
50
51
def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()
generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name Type Description Default
ev PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

required

Returns:

Type Description
StopEvent

The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))
prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name Type Description Default
ev PRDescriptionStartEvent

The start event containing the PR details.

required

Returns:

Type Description
PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)

generate_pr_description(repository: Repository, pull_request: PullRequest, files_exclude_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, truncation_tokens: int = MAX_TOKENS, timeout: int | None = None, verbose: bool = False, metadata: dict | None = None) -> PRDescriptionOutput async

Generate a PR description.

This function generates a PR description for a given pull request. It uses the PRDescriptionWorkflow to generate the description.

Parameters:

Name Type Description Default
repository Repository

The repository to generate the PR description for.

required
pull_request PullRequest

The pull request to generate the PR description for.

required
files_exclude_patterns list[str] | None

The glob matching patterns to exclude from the diff, by default None

None
files_reinclude_patterns list[str] | None

The glob matching patterns to re-include in the diff, by default None

None
truncation_tokens int

The maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
timeout int | None

The timeout for the workflow, by default None

None
verbose bool

Whether to print verbose output, by default False

False
metadata dict | None

The metadata to use for the workflow, by default None

None

Returns:

Type Description
PRDescriptionOutput

The output containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
async def generate_pr_description(
    repository: Repository,
    pull_request: PullRequest,
    files_exclude_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    truncation_tokens: int = MAX_TOKENS,
    timeout: int | None = None,
    verbose: bool = False,
    metadata: dict | None = None,
) -> PRDescriptionOutput:
    """Generate a PR description.

    This function generates a PR description for a given pull request.
    It uses the PRDescriptionWorkflow to generate the description.

    Parameters
    ----------
    repository
        The repository to generate the PR description for.
    pull_request
        The pull request to generate the PR description for.
    files_exclude_patterns
        The glob matching patterns to exclude from the diff, by default None
    files_reinclude_patterns
        The glob matching patterns to re-include in the diff, by default None
    truncation_tokens
        The maximum number of tokens to use for the diff content, by default MAX_TOKENS
    timeout
        The timeout for the workflow, by default None
    verbose
        Whether to print verbose output, by default False
    metadata
        The metadata to use for the workflow, by default None

    Returns
    -------
    :
        The output containing the generated description.
    """
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    workflow = PRDescriptionWorkflow(truncation_tokens=truncation_tokens, timeout=timeout, verbose=verbose)
    result = await workflow.run(
        start_event=PRDescriptionStartEvent(
            pr_title=pull_request.title,
            repository=repository,
            pull_request=pull_request,
            files_exclude_patterns=files_exclude_patterns,
        )
    )
    return result

workflows

PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name Type Description Default
truncation_tokens

Maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
47
48
49
50
51
def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()
generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name Type Description Default
ev PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

required

Returns:

Type Description
StopEvent

The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))
prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name Type Description Default
ev PRDescriptionStartEvent

The start event containing the PR details.

required

Returns:

Type Description
PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)
pr_description
PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name Type Description Default
truncation_tokens

Maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
47
48
49
50
51
def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()
generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name Type Description Default
ev PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

required

Returns:

Type Description
StopEvent

The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))
prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name Type Description Default
ev PRDescriptionStartEvent

The start event containing the PR details.

required

Returns:

Type Description
PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)
data_models
PRDescriptionInput

Bases: BaseModel

Input for PR description generation workflow.

generation
PRDescriptionWorkflow(truncation_tokens=MAX_TOKENS, *args, **kwargs)

Bases: Workflow

A workflow that generates a PR description.

Based on the pull request's diff generate a clear, concise description explaining what are the changes being made and why.

Parameters:

Name Type Description Default
truncation_tokens

Maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
47
48
49
50
51
def __init__(self, truncation_tokens=MAX_TOKENS, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.llm = LiteLLM(model=MODELS.GPT_5_NANO_2025_08_07, temperature=1.0)
    self.truncation_tokens = truncation_tokens
    self.output_parser = MarkdownCodeBlockRemoverOutputParser()
generate_description(ev: PRDescriptionPromptEvent) -> StopEvent async

Generate a PR description.

This step generates a PR description using the LLM. It uses the truncated diff of all the changes between 2 commits.

Parameters:

Name Type Description Default
ev PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

required

Returns:

Type Description
StopEvent

The stop event containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@step
async def generate_description(self, ev: PRDescriptionPromptEvent) -> StopEvent:
    """Generate a PR description.

    This step generates a PR description using the LLM.
    It uses the truncated diff of all the changes between 2 commits.

    Parameters
    ----------
    ev
        The prompt event containing the prepared diff and prompt.

    Returns
    -------
    :
        The stop event containing the generated description.
    """
    response = await self.llm.achat(
        messages=[
            ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PR_DESCRIPTION_MESSAGE),
            ChatMessage(role=MessageRole.USER, content=ev.formatted_prompt),
        ]
    )

    description = self.output_parser.parse(response.message.content or "")
    return StopEvent(result=PRDescriptionOutput(description=description))
prepare_diff_and_prompt(ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent async

Prepare the diff and prompt for the LLM.

This step prepares the diff and prompt for the LLM. It truncates the diff to the maximum number of tokens and formats the prompt. The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns. The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like "!readme.txt" that should override "*.txt" exclusions.

Parameters:

Name Type Description Default
ev PRDescriptionStartEvent

The start event containing the PR details.

required

Returns:

Type Description
PRDescriptionPromptEvent

The prompt event containing the prepared diff and prompt.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@step
async def prepare_diff_and_prompt(self, ev: PRDescriptionStartEvent) -> PRDescriptionPromptEvent:
    """Prepare the diff and prompt for the LLM.

    This step prepares the diff and prompt for the LLM.
    It truncates the diff to the maximum number of tokens and formats the prompt.
    The diff is filtered using files_exclude_patterns, files_include_patterns and files_reinclude_patterns.
    The files_reinclude_patterns allow overriding files_exclude_patterns, which is useful for patterns like
    "!readme.txt" that should override "*.txt" exclusions.

    Parameters
    ----------
    ev
        The start event containing the PR details.

    Returns
    -------
    :
        The prompt event containing the prepared diff and prompt.
    """
    repo_path = ev.repository.local_path
    base_hash = ev.pull_request.base_commit_hash
    head_hash = ev.pull_request.head_commit_hash
    diff = get_diff_between_commits(
        base_hash, head_hash, files_exclude_patterns=ev.files_exclude_patterns, repo_path=repo_path
    )
    diff = truncate_to_token_limit(diff, self.truncation_tokens)
    formatted_prompt = USER_PR_DESCRIPTION_MESSAGE.format(
        pr_title=ev.pr_title,
        pull_request_diff=diff,
    )
    return PRDescriptionPromptEvent(formatted_prompt=formatted_prompt)
generate_pr_description(repository: Repository, pull_request: PullRequest, files_exclude_patterns: list[str] | None = None, files_reinclude_patterns: list[str] | None = None, truncation_tokens: int = MAX_TOKENS, timeout: int | None = None, verbose: bool = False, metadata: dict | None = None) -> PRDescriptionOutput async

Generate a PR description.

This function generates a PR description for a given pull request. It uses the PRDescriptionWorkflow to generate the description.

Parameters:

Name Type Description Default
repository Repository

The repository to generate the PR description for.

required
pull_request PullRequest

The pull request to generate the PR description for.

required
files_exclude_patterns list[str] | None

The glob matching patterns to exclude from the diff, by default None

None
files_reinclude_patterns list[str] | None

The glob matching patterns to re-include in the diff, by default None

None
truncation_tokens int

The maximum number of tokens to use for the diff content, by default MAX_TOKENS

MAX_TOKENS
timeout int | None

The timeout for the workflow, by default None

None
verbose bool

Whether to print verbose output, by default False

False
metadata dict | None

The metadata to use for the workflow, by default None

None

Returns:

Type Description
PRDescriptionOutput

The output containing the generated description.

Source code in packages/lampe-describe/src/lampe/describe/workflows/pr_description/generation.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
async def generate_pr_description(
    repository: Repository,
    pull_request: PullRequest,
    files_exclude_patterns: list[str] | None = None,
    files_reinclude_patterns: list[str] | None = None,
    truncation_tokens: int = MAX_TOKENS,
    timeout: int | None = None,
    verbose: bool = False,
    metadata: dict | None = None,
) -> PRDescriptionOutput:
    """Generate a PR description.

    This function generates a PR description for a given pull request.
    It uses the PRDescriptionWorkflow to generate the description.

    Parameters
    ----------
    repository
        The repository to generate the PR description for.
    pull_request
        The pull request to generate the PR description for.
    files_exclude_patterns
        The glob matching patterns to exclude from the diff, by default None
    files_reinclude_patterns
        The glob matching patterns to re-include in the diff, by default None
    truncation_tokens
        The maximum number of tokens to use for the diff content, by default MAX_TOKENS
    timeout
        The timeout for the workflow, by default None
    verbose
        Whether to print verbose output, by default False
    metadata
        The metadata to use for the workflow, by default None

    Returns
    -------
    :
        The output containing the generated description.
    """
    if files_exclude_patterns is None:
        files_exclude_patterns = []
    workflow = PRDescriptionWorkflow(truncation_tokens=truncation_tokens, timeout=timeout, verbose=verbose)
    result = await workflow.run(
        start_event=PRDescriptionStartEvent(
            pr_title=pull_request.title,
            repository=repository,
            pull_request=pull_request,
            files_exclude_patterns=files_exclude_patterns,
        )
    )
    return result

template

TemplateWorkflow

Bases: Workflow

A template workflow that demonstrates basic workflow structure.

This workflow is responsible for: - Demonstrating the basic workflow pattern - Showing how to handle events - Providing a template for new workflows

template_workflow

TemplateWorkflow

Bases: Workflow

A template workflow that demonstrates basic workflow structure.

This workflow is responsible for: - Demonstrating the basic workflow pattern - Showing how to handle events - Providing a template for new workflows