From db4a164fecdb17eb22abcfb96648173249ea559b Mon Sep 17 00:00:00 2001 From: Lex Herbert <4326548+lexh@users.noreply.github.com> Date: Sun, 8 Sep 2024 22:19:59 -0700 Subject: [PATCH] Add `--cxml` flag (#16) Refs #15 --------- Co-authored-by: Simon Willison --- .gitignore | 1 + README.md | 23 ++++++++++ files_to_prompt/cli.py | 83 +++++++++++++++++++++++++++-------- tests/test_files_to_prompt.py | 55 +++++++++++++++++++++++ 4 files changed, 144 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 53605b7..3ff5e13 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ venv .pytest_cache *.egg-info .DS_Store +build/ diff --git a/README.md b/README.md index 5d2359a..b8bf40e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau ## Installation Install this tool using `pip`: + ```bash pip install files-to-prompt ``` @@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela ### Options - `--include-hidden`: Include files and folders starting with `.` (hidden files and directories). + ```bash files-to-prompt path/to/directory --include-hidden ``` - `--ignore-gitignore`: Ignore `.gitignore` files and include all files. + ```bash files-to-prompt path/to/directory --ignore-gitignore ``` @@ -101,6 +104,25 @@ Contents of file3.txt --- ``` +### XML Output + +Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. + +To structure the output in this way, use the optional `--cxml` flag, which will produce output like this: + +```xml + + +Contents of file1.txt + + + +Contents of file2.txt + +... + +``` + ## Development To contribute to this tool, first checkout the code. Then create a new virtual environment: @@ -118,6 +140,7 @@ pip install -e '.[test]' ``` To run the tests: + ```bash pytest ``` diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index afba856..379a065 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -1,7 +1,8 @@ import os -import click from fnmatch import fnmatch +import click + def should_ignore(path, gitignore_rules): for rule in gitignore_rules: @@ -22,18 +23,39 @@ def read_gitignore(path): return [] +def print_path(path, content, xml): + if xml: + print_as_xml(path, content) + else: + print_default(path, content) + + +def print_default(path, content): + click.echo(path) + click.echo("---") + click.echo(content) + click.echo() + click.echo("---") + + +def print_as_xml(path, content): + click.echo(f'') + click.echo(content) + click.echo("") + + def process_path( - path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns + path, + include_hidden, + ignore_gitignore, + gitignore_rules, + ignore_patterns, + claude_xml, ): if os.path.isfile(path): try: with open(path, "r") as f: - file_contents = f.read() - click.echo(path) - click.echo("---") - click.echo(file_contents) - click.echo() - click.echo("---") + print_path(path, f.read(), claude_xml) except UnicodeDecodeError: warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError" click.echo(click.style(warning_message, fg="red"), err=True) @@ -63,17 +85,11 @@ def process_path( if not any(fnmatch(f, pattern) for pattern in ignore_patterns) ] - for file in files: + for file in sorted(files): file_path = os.path.join(root, file) try: with open(file_path, "r") as f: - file_contents = f.read() - - click.echo(file_path) - click.echo("---") - click.echo(file_contents) - click.echo() - click.echo("---") + print_path(file_path, f.read(), claude_xml) except UnicodeDecodeError: warning_message = ( f"Warning: Skipping file {file_path} due to UnicodeDecodeError" @@ -100,8 +116,15 @@ def process_path( default=[], help="List of patterns to ignore", ) +@click.option( + "claude_xml", + "-c", + "--cxml", + is_flag=True, + help="Output in XML-ish format suitable for Claude's long context window.", +) @click.version_option() -def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): +def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml): """ Takes one or more paths to files or directories and outputs every file, recursively, each one preceded with its filename like this: @@ -114,6 +137,19 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): path/to/file2.py --- ... + + If the `--cxml` flag is provided, the output will be structured as follows: + + + + Contents of file1.txt + + + + Contents of file2.txt + + ... + """ gitignore_rules = [] for path in paths: @@ -121,6 +157,17 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): raise click.BadArgumentUsage(f"Path does not exist: {path}") if not ignore_gitignore: gitignore_rules.extend(read_gitignore(os.path.dirname(path))) + if claude_xml and path == paths[0]: + click.echo("") + process_path( - path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns + path, + include_hidden, + ignore_gitignore, + gitignore_rules, + ignore_patterns, + claude_xml, ) + + if claude_xml: + click.echo("") diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 5e20af1..cf37549 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -1,5 +1,7 @@ import os + from click.testing import CliRunner + from files_to_prompt.cli import cli @@ -186,3 +188,56 @@ def test_binary_file_warning(tmpdir): "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError" in stderr ) + + +def test_xml_format_dir(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + os.makedirs("test_dir") + with open("test_dir/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir/file2.txt", "w") as f: + f.write("Contents of file2") + + result = runner.invoke(cli, ["test_dir", "--cxml"]) + assert result.exit_code == 0 + actual = result.output + expected = """ + + +Contents of file1 + + +Contents of file2 + + +""" + assert expected.strip() == actual.strip() + + +def test_cxml_format_multiple_paths(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + os.makedirs("test_dir") + with open("test_dir/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir/file2.txt", "w") as f: + f.write("Contents of file2") + + result = runner.invoke( + cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"] + ) + + assert result.exit_code == 0 + actual = result.output + expected = """ + + +Contents of file1 + + +Contents of file2 + + +""" + assert expected.strip() == actual.strip()