Switch to more recent Claude XML format

Refs #15

Refs https://github.com/simonw/files-to-prompt/pull/16#discussion_r1744298522
This commit is contained in:
Simon Willison
2024-09-08 22:33:07 -07:00
parent db4a164fec
commit d016523c22
3 changed files with 38 additions and 45 deletions

View File

@@ -104,7 +104,7 @@ Contents of file3.txt
--- ---
``` ```
### XML Output ### Claude XML Output
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
@@ -112,14 +112,18 @@ To structure the output in this way, use the optional `--cxml` flag, which will
```xml ```xml
<documents> <documents>
<document path="my_directory/file1.txt"> <document index="1">
<source>my_directory/file1.txt</source>
<document_content>
Contents of file1.txt Contents of file1.txt
</document_content>
</document> </document>
<document index="2">
<document path="my_directory/file2.txt"> <source>my_directory/file2.txt</source>
<document_content>
Contents of file2.txt Contents of file2.txt
</document_content>
</document> </document>
...
</documents> </documents>
``` ```

View File

@@ -3,6 +3,8 @@ from fnmatch import fnmatch
import click import click
global_index = 1
def should_ignore(path, gitignore_rules): def should_ignore(path, gitignore_rules):
for rule in gitignore_rules: for rule in gitignore_rules:
@@ -39,9 +41,14 @@ def print_default(path, content):
def print_as_xml(path, content): def print_as_xml(path, content):
click.echo(f'<document path="{path}">') global global_index
click.echo(f'<document index="{global_index}">')
click.echo(f"<source>{path}</source>")
click.echo("<document_content>")
click.echo(content) click.echo(content)
click.echo("</document_content>")
click.echo("</document>") click.echo("</document>")
global_index += 1
def process_path( def process_path(
@@ -151,6 +158,9 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
... ...
</documents> </documents>
""" """
# Reset global_index for pytest
global global_index
global_index = 1
gitignore_rules = [] gitignore_rules = []
for path in paths: for path in paths:
if not os.path.exists(path): if not os.path.exists(path):
@@ -159,7 +169,6 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
gitignore_rules.extend(read_gitignore(os.path.dirname(path))) gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if claude_xml and path == paths[0]: if claude_xml and path == paths[0]:
click.echo("<documents>") click.echo("<documents>")
process_path( process_path(
path, path,
include_hidden, include_hidden,
@@ -168,6 +177,5 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
ignore_patterns, ignore_patterns,
claude_xml, claude_xml,
) )
if claude_xml: if claude_xml:
click.echo("</documents>") click.echo("</documents>")

View File

@@ -1,4 +1,5 @@
import os import os
import pytest
from click.testing import CliRunner from click.testing import CliRunner
@@ -190,53 +191,33 @@ def test_binary_file_warning(tmpdir):
) )
def test_xml_format_dir(tmpdir): @pytest.mark.parametrize(
"args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
)
def test_xml_format_dir(tmpdir, args):
runner = CliRunner() runner = CliRunner()
with tmpdir.as_cwd(): with tmpdir.as_cwd():
os.makedirs("test_dir") os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f: with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1") f.write("Contents of file1.txt")
with open("test_dir/file2.txt", "w") as f: with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2") f.write("Contents of file2.txt")
result = runner.invoke(cli, args + ["--cxml"])
result = runner.invoke(cli, ["test_dir", "--cxml"])
assert result.exit_code == 0 assert result.exit_code == 0
actual = result.output actual = result.output
expected = """ expected = """
<documents> <documents>
<document path="test_dir/file1.txt"> <document index="1">
Contents of file1 <source>test_dir/file1.txt</source>
<document_content>
Contents of file1.txt
</document_content>
</document> </document>
<document path="test_dir/file2.txt"> <document index="2">
Contents of file2 <source>test_dir/file2.txt</source>
</document> <document_content>
</documents> Contents of file2.txt
""" </document_content>
assert expected.strip() == actual.strip()
def test_cxml_format_multiple_paths(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")
result = runner.invoke(
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"]
)
assert result.exit_code == 0
actual = result.output
expected = """
<documents>
<document path="test_dir/file1.txt">
Contents of file1
</document>
<document path="test_dir/file2.txt">
Contents of file2
</document> </document>
</documents> </documents>
""" """