mirror of
https://github.com/simonw/files-to-prompt.git
synced 2025-10-23 00:02:47 +03:00
Add --cxml flag (#16)
Refs #15 --------- Co-authored-by: Simon Willison <swillison@gmail.com>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@ venv
|
||||
.pytest_cache
|
||||
*.egg-info
|
||||
.DS_Store
|
||||
build/
|
||||
|
||||
23
README.md
23
README.md
@@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau
|
||||
## Installation
|
||||
|
||||
Install this tool using `pip`:
|
||||
|
||||
```bash
|
||||
pip install files-to-prompt
|
||||
```
|
||||
@@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela
|
||||
### Options
|
||||
|
||||
- `--include-hidden`: Include files and folders starting with `.` (hidden files and directories).
|
||||
|
||||
```bash
|
||||
files-to-prompt path/to/directory --include-hidden
|
||||
```
|
||||
|
||||
- `--ignore-gitignore`: Ignore `.gitignore` files and include all files.
|
||||
|
||||
```bash
|
||||
files-to-prompt path/to/directory --ignore-gitignore
|
||||
```
|
||||
@@ -101,6 +104,25 @@ Contents of file3.txt
|
||||
---
|
||||
```
|
||||
|
||||
### XML Output
|
||||
|
||||
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
|
||||
|
||||
To structure the output in this way, use the optional `--cxml` flag, which will produce output like this:
|
||||
|
||||
```xml
|
||||
<documents>
|
||||
<document path="my_directory/file1.txt">
|
||||
Contents of file1.txt
|
||||
</document>
|
||||
|
||||
<document path="my_directory/file2.txt">
|
||||
Contents of file2.txt
|
||||
</document>
|
||||
...
|
||||
</documents>
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
To contribute to this tool, first checkout the code. Then create a new virtual environment:
|
||||
@@ -118,6 +140,7 @@ pip install -e '.[test]'
|
||||
```
|
||||
|
||||
To run the tests:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import os
|
||||
import click
|
||||
from fnmatch import fnmatch
|
||||
|
||||
import click
|
||||
|
||||
|
||||
def should_ignore(path, gitignore_rules):
|
||||
for rule in gitignore_rules:
|
||||
@@ -22,18 +23,39 @@ def read_gitignore(path):
|
||||
return []
|
||||
|
||||
|
||||
def print_path(path, content, xml):
|
||||
if xml:
|
||||
print_as_xml(path, content)
|
||||
else:
|
||||
print_default(path, content)
|
||||
|
||||
|
||||
def print_default(path, content):
|
||||
click.echo(path)
|
||||
click.echo("---")
|
||||
click.echo(content)
|
||||
click.echo()
|
||||
click.echo("---")
|
||||
|
||||
|
||||
def print_as_xml(path, content):
|
||||
click.echo(f'<document path="{path}">')
|
||||
click.echo(content)
|
||||
click.echo("</document>")
|
||||
|
||||
|
||||
def process_path(
|
||||
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
|
||||
path,
|
||||
include_hidden,
|
||||
ignore_gitignore,
|
||||
gitignore_rules,
|
||||
ignore_patterns,
|
||||
claude_xml,
|
||||
):
|
||||
if os.path.isfile(path):
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
file_contents = f.read()
|
||||
click.echo(path)
|
||||
click.echo("---")
|
||||
click.echo(file_contents)
|
||||
click.echo()
|
||||
click.echo("---")
|
||||
print_path(path, f.read(), claude_xml)
|
||||
except UnicodeDecodeError:
|
||||
warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError"
|
||||
click.echo(click.style(warning_message, fg="red"), err=True)
|
||||
@@ -63,17 +85,11 @@ def process_path(
|
||||
if not any(fnmatch(f, pattern) for pattern in ignore_patterns)
|
||||
]
|
||||
|
||||
for file in files:
|
||||
for file in sorted(files):
|
||||
file_path = os.path.join(root, file)
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
file_contents = f.read()
|
||||
|
||||
click.echo(file_path)
|
||||
click.echo("---")
|
||||
click.echo(file_contents)
|
||||
click.echo()
|
||||
click.echo("---")
|
||||
print_path(file_path, f.read(), claude_xml)
|
||||
except UnicodeDecodeError:
|
||||
warning_message = (
|
||||
f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
|
||||
@@ -100,8 +116,15 @@ def process_path(
|
||||
default=[],
|
||||
help="List of patterns to ignore",
|
||||
)
|
||||
@click.option(
|
||||
"claude_xml",
|
||||
"-c",
|
||||
"--cxml",
|
||||
is_flag=True,
|
||||
help="Output in XML-ish format suitable for Claude's long context window.",
|
||||
)
|
||||
@click.version_option()
|
||||
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
|
||||
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
|
||||
"""
|
||||
Takes one or more paths to files or directories and outputs every file,
|
||||
recursively, each one preceded with its filename like this:
|
||||
@@ -114,6 +137,19 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
|
||||
path/to/file2.py
|
||||
---
|
||||
...
|
||||
|
||||
If the `--cxml` flag is provided, the output will be structured as follows:
|
||||
|
||||
<documents>
|
||||
<document path="path/to/file1.txt">
|
||||
Contents of file1.txt
|
||||
</document>
|
||||
|
||||
<document path="path/to/file2.txt">
|
||||
Contents of file2.txt
|
||||
</document>
|
||||
...
|
||||
</documents>
|
||||
"""
|
||||
gitignore_rules = []
|
||||
for path in paths:
|
||||
@@ -121,6 +157,17 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
|
||||
raise click.BadArgumentUsage(f"Path does not exist: {path}")
|
||||
if not ignore_gitignore:
|
||||
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
|
||||
if claude_xml and path == paths[0]:
|
||||
click.echo("<documents>")
|
||||
|
||||
process_path(
|
||||
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
|
||||
path,
|
||||
include_hidden,
|
||||
ignore_gitignore,
|
||||
gitignore_rules,
|
||||
ignore_patterns,
|
||||
claude_xml,
|
||||
)
|
||||
|
||||
if claude_xml:
|
||||
click.echo("</documents>")
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
|
||||
from click.testing import CliRunner
|
||||
|
||||
from files_to_prompt.cli import cli
|
||||
|
||||
|
||||
@@ -186,3 +188,56 @@ def test_binary_file_warning(tmpdir):
|
||||
"Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
|
||||
in stderr
|
||||
)
|
||||
|
||||
|
||||
def test_xml_format_dir(tmpdir):
|
||||
runner = CliRunner()
|
||||
with tmpdir.as_cwd():
|
||||
os.makedirs("test_dir")
|
||||
with open("test_dir/file1.txt", "w") as f:
|
||||
f.write("Contents of file1")
|
||||
with open("test_dir/file2.txt", "w") as f:
|
||||
f.write("Contents of file2")
|
||||
|
||||
result = runner.invoke(cli, ["test_dir", "--cxml"])
|
||||
assert result.exit_code == 0
|
||||
actual = result.output
|
||||
expected = """
|
||||
<documents>
|
||||
<document path="test_dir/file1.txt">
|
||||
Contents of file1
|
||||
</document>
|
||||
<document path="test_dir/file2.txt">
|
||||
Contents of file2
|
||||
</document>
|
||||
</documents>
|
||||
"""
|
||||
assert expected.strip() == actual.strip()
|
||||
|
||||
|
||||
def test_cxml_format_multiple_paths(tmpdir):
|
||||
runner = CliRunner()
|
||||
with tmpdir.as_cwd():
|
||||
os.makedirs("test_dir")
|
||||
with open("test_dir/file1.txt", "w") as f:
|
||||
f.write("Contents of file1")
|
||||
with open("test_dir/file2.txt", "w") as f:
|
||||
f.write("Contents of file2")
|
||||
|
||||
result = runner.invoke(
|
||||
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"]
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
actual = result.output
|
||||
expected = """
|
||||
<documents>
|
||||
<document path="test_dir/file1.txt">
|
||||
Contents of file1
|
||||
</document>
|
||||
<document path="test_dir/file2.txt">
|
||||
Contents of file2
|
||||
</document>
|
||||
</documents>
|
||||
"""
|
||||
assert expected.strip() == actual.strip()
|
||||
|
||||
Reference in New Issue
Block a user