Switch to more recent Claude XML format

Refs #15

Refs https://github.com/simonw/files-to-prompt/pull/16#discussion_r1744298522
This commit is contained in:
Simon Willison
2024-09-08 22:33:07 -07:00
parent db4a164fec
commit d016523c22
3 changed files with 38 additions and 45 deletions

View File

@@ -104,7 +104,7 @@ Contents of file3.txt
---
```
### XML Output
### Claude XML Output
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
@@ -112,14 +112,18 @@ To structure the output in this way, use the optional `--cxml` flag, which will
```xml
<documents>
<document path="my_directory/file1.txt">
<document index="1">
<source>my_directory/file1.txt</source>
<document_content>
Contents of file1.txt
</document_content>
</document>
<document path="my_directory/file2.txt">
<document index="2">
<source>my_directory/file2.txt</source>
<document_content>
Contents of file2.txt
</document_content>
</document>
...
</documents>
```

View File

@@ -3,6 +3,8 @@ from fnmatch import fnmatch
import click
global_index = 1
def should_ignore(path, gitignore_rules):
for rule in gitignore_rules:
@@ -39,9 +41,14 @@ def print_default(path, content):
def print_as_xml(path, content):
click.echo(f'<document path="{path}">')
global global_index
click.echo(f'<document index="{global_index}">')
click.echo(f"<source>{path}</source>")
click.echo("<document_content>")
click.echo(content)
click.echo("</document_content>")
click.echo("</document>")
global_index += 1
def process_path(
@@ -151,6 +158,9 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
...
</documents>
"""
# Reset global_index for pytest
global global_index
global_index = 1
gitignore_rules = []
for path in paths:
if not os.path.exists(path):
@@ -159,7 +169,6 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if claude_xml and path == paths[0]:
click.echo("<documents>")
process_path(
path,
include_hidden,
@@ -168,6 +177,5 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
ignore_patterns,
claude_xml,
)
if claude_xml:
click.echo("</documents>")

View File

@@ -1,4 +1,5 @@
import os
import pytest
from click.testing import CliRunner
@@ -190,53 +191,33 @@ def test_binary_file_warning(tmpdir):
)
def test_xml_format_dir(tmpdir):
@pytest.mark.parametrize(
"args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
)
def test_xml_format_dir(tmpdir, args):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
f.write("Contents of file1.txt")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")
result = runner.invoke(cli, ["test_dir", "--cxml"])
f.write("Contents of file2.txt")
result = runner.invoke(cli, args + ["--cxml"])
assert result.exit_code == 0
actual = result.output
expected = """
<documents>
<document path="test_dir/file1.txt">
Contents of file1
<document index="1">
<source>test_dir/file1.txt</source>
<document_content>
Contents of file1.txt
</document_content>
</document>
<document path="test_dir/file2.txt">
Contents of file2
</document>
</documents>
"""
assert expected.strip() == actual.strip()
def test_cxml_format_multiple_paths(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")
result = runner.invoke(
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"]
)
assert result.exit_code == 0
actual = result.output
expected = """
<documents>
<document path="test_dir/file1.txt">
Contents of file1
</document>
<document path="test_dir/file2.txt">
Contents of file2
<document index="2">
<source>test_dir/file2.txt</source>
<document_content>
Contents of file2.txt
</document_content>
</document>
</documents>
"""