mirror of
https://github.com/simonw/files-to-prompt.git
synced 2025-10-23 00:02:47 +03:00
@@ -1,7 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
from files_to_prompt.utils import allowed_by_gitignore
|
||||||
|
import pathlib
|
||||||
import click
|
import click
|
||||||
|
|
||||||
global_index = 1
|
global_index = 1
|
||||||
@@ -24,25 +25,6 @@ EXT_TO_LANG = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def should_ignore(path, gitignore_rules):
|
|
||||||
for rule in gitignore_rules:
|
|
||||||
if fnmatch(os.path.basename(path), rule):
|
|
||||||
return True
|
|
||||||
if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def read_gitignore(path):
|
|
||||||
gitignore_path = os.path.join(path, ".gitignore")
|
|
||||||
if os.path.isfile(gitignore_path):
|
|
||||||
with open(gitignore_path, "r") as f:
|
|
||||||
return [
|
|
||||||
line.strip() for line in f if line.strip() and not line.startswith("#")
|
|
||||||
]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def add_line_numbers(content):
|
def add_line_numbers(content):
|
||||||
lines = content.splitlines()
|
lines = content.splitlines()
|
||||||
|
|
||||||
@@ -104,7 +86,6 @@ def process_path(
|
|||||||
include_hidden,
|
include_hidden,
|
||||||
ignore_files_only,
|
ignore_files_only,
|
||||||
ignore_gitignore,
|
ignore_gitignore,
|
||||||
gitignore_rules,
|
|
||||||
ignore_patterns,
|
ignore_patterns,
|
||||||
writer,
|
writer,
|
||||||
claude_xml,
|
claude_xml,
|
||||||
@@ -124,17 +105,13 @@ def process_path(
|
|||||||
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
||||||
files = [f for f in files if not f.startswith(".")]
|
files = [f for f in files if not f.startswith(".")]
|
||||||
|
|
||||||
|
root_path = pathlib.Path(root)
|
||||||
if not ignore_gitignore:
|
if not ignore_gitignore:
|
||||||
gitignore_rules.extend(read_gitignore(root))
|
|
||||||
dirs[:] = [
|
dirs[:] = [
|
||||||
d
|
d for d in dirs if allowed_by_gitignore(root_path, root_path / d)
|
||||||
for d in dirs
|
|
||||||
if not should_ignore(os.path.join(root, d), gitignore_rules)
|
|
||||||
]
|
]
|
||||||
files = [
|
files = [
|
||||||
f
|
f for f in files if allowed_by_gitignore(root_path, root_path / f)
|
||||||
for f in files
|
|
||||||
if not should_ignore(os.path.join(root, f), gitignore_rules)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
if ignore_patterns:
|
if ignore_patterns:
|
||||||
@@ -302,7 +279,6 @@ def cli(
|
|||||||
# Combine paths from arguments and stdin
|
# Combine paths from arguments and stdin
|
||||||
paths = [*paths, *stdin_paths]
|
paths = [*paths, *stdin_paths]
|
||||||
|
|
||||||
gitignore_rules = []
|
|
||||||
writer = click.echo
|
writer = click.echo
|
||||||
fp = None
|
fp = None
|
||||||
if output_file:
|
if output_file:
|
||||||
@@ -311,8 +287,6 @@ def cli(
|
|||||||
for path in paths:
|
for path in paths:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise click.BadArgumentUsage(f"Path does not exist: {path}")
|
raise click.BadArgumentUsage(f"Path does not exist: {path}")
|
||||||
if not ignore_gitignore:
|
|
||||||
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
|
|
||||||
if claude_xml and path == paths[0]:
|
if claude_xml and path == paths[0]:
|
||||||
writer("<documents>")
|
writer("<documents>")
|
||||||
process_path(
|
process_path(
|
||||||
@@ -321,7 +295,6 @@ def cli(
|
|||||||
include_hidden,
|
include_hidden,
|
||||||
ignore_files_only,
|
ignore_files_only,
|
||||||
ignore_gitignore,
|
ignore_gitignore,
|
||||||
gitignore_rules,
|
|
||||||
ignore_patterns,
|
ignore_patterns,
|
||||||
writer,
|
writer,
|
||||||
claude_xml,
|
claude_xml,
|
||||||
|
|||||||
75
files_to_prompt/utils.py
Normal file
75
files_to_prompt/utils.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from pathspec.gitignore import GitIgnoreSpec
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_by_gitignore(root: Path, file_path: Path) -> bool:
|
||||||
|
"""
|
||||||
|
Check whether the file (file_path) should be included (i.e. not ignored)
|
||||||
|
based on all .gitignore files encountered from the root directory down to
|
||||||
|
the directory where the file resides.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
root (Path): The root directory under which .gitignore files are searched.
|
||||||
|
file_path (Path): The file to be checked.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the file should be included (not ignored); False if it should be ignored.
|
||||||
|
"""
|
||||||
|
# Resolve absolute paths.
|
||||||
|
abs_root = root.resolve()
|
||||||
|
abs_file = file_path.resolve()
|
||||||
|
|
||||||
|
# Ensure file is under the provided root.
|
||||||
|
try:
|
||||||
|
_ = abs_file.relative_to(abs_root)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"File {abs_file!r} is not under the root {abs_root!r}.")
|
||||||
|
|
||||||
|
# Build a list of directories from the root to the file's directory.
|
||||||
|
directories = [abs_root]
|
||||||
|
file_dir = abs_file.parent
|
||||||
|
rel_dir = file_dir.relative_to(abs_root)
|
||||||
|
for part in rel_dir.parts:
|
||||||
|
directories.append(directories[-1] / part)
|
||||||
|
|
||||||
|
# The decision will be updated by any matching .gitignore rule encountered.
|
||||||
|
decision = None
|
||||||
|
|
||||||
|
# Process each directory (from root to file's directory)
|
||||||
|
for directory in directories:
|
||||||
|
gitignore_file = directory / ".gitignore"
|
||||||
|
if gitignore_file.is_file():
|
||||||
|
try:
|
||||||
|
# Read nonempty lines (ignoring blank lines).
|
||||||
|
lines = [
|
||||||
|
line.rstrip("\n")
|
||||||
|
for line in gitignore_file.read_text(encoding="utf-8").splitlines()
|
||||||
|
if line.strip()
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Could not read {gitignore_file}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compile a GitIgnoreSpec for the rules in the current directory.
|
||||||
|
spec = GitIgnoreSpec.from_lines(lines)
|
||||||
|
|
||||||
|
# .gitignore patterns are relative to the directory they are in.
|
||||||
|
# Compute the file path relative to this directory in POSIX format.
|
||||||
|
rel_file = abs_file.relative_to(directory).as_posix()
|
||||||
|
|
||||||
|
# Check the file against these rules.
|
||||||
|
result = spec.check_file(rel_file)
|
||||||
|
|
||||||
|
# If a rule from this .gitignore file applied, update the decision.
|
||||||
|
if result.include is not None:
|
||||||
|
decision = result.include
|
||||||
|
|
||||||
|
# If no .gitignore rule matched, the file is included by default.
|
||||||
|
if decision is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Interpretation:
|
||||||
|
# • decision == True --> a normal ignore rule matched (file should be ignored)
|
||||||
|
# • decision == False --> a negation rule matched (file re-included)
|
||||||
|
# So, we return not decision.
|
||||||
|
return not decision
|
||||||
@@ -10,7 +10,8 @@ classifiers = [
|
|||||||
"License :: OSI Approved :: Apache Software License"
|
"License :: OSI Approved :: Apache Software License"
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"click"
|
"click",
|
||||||
|
"pathspec",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|||||||
54
tests/test_allowed_by_gitignore.py
Normal file
54
tests/test_allowed_by_gitignore.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
from files_to_prompt.utils import allowed_by_gitignore
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def test_allowed_by_gitignore(tmpdir):
|
||||||
|
# Create a temporary directory structure.
|
||||||
|
base = Path(tmpdir)
|
||||||
|
repo = base / "repo"
|
||||||
|
repo.mkdir()
|
||||||
|
|
||||||
|
# Create a top-level .gitignore in repo that ignores the "build/" directory.
|
||||||
|
(repo / ".gitignore").write_text("build/\n", encoding="utf-8")
|
||||||
|
|
||||||
|
# Create a "build" subdirectory and add an output file which should be ignored.
|
||||||
|
build_dir = repo / "build"
|
||||||
|
build_dir.mkdir()
|
||||||
|
output_file = build_dir / "output.txt"
|
||||||
|
output_file.write_text("dummy build output", encoding="utf-8")
|
||||||
|
|
||||||
|
# Create a "src" subdirectory with its own .gitignore.
|
||||||
|
src_dir = repo / "src"
|
||||||
|
src_dir.mkdir()
|
||||||
|
# In src, ignore "temp.txt"
|
||||||
|
(src_dir / ".gitignore").write_text("temp.txt\n", encoding="utf-8")
|
||||||
|
|
||||||
|
# Create files in "src"
|
||||||
|
main_file = src_dir / "main.py"
|
||||||
|
main_file.write_text("print('Hello')", encoding="utf-8")
|
||||||
|
temp_file = src_dir / "temp.txt"
|
||||||
|
temp_file.write_text("should be ignored", encoding="utf-8")
|
||||||
|
keep_file = src_dir / "keep.txt"
|
||||||
|
keep_file.write_text("keep this file", encoding="utf-8")
|
||||||
|
|
||||||
|
# Create a file at repo root that is not ignored.
|
||||||
|
root_file = repo / "README.md"
|
||||||
|
root_file.write_text("# Repo README", encoding="utf-8")
|
||||||
|
|
||||||
|
# Test cases:
|
||||||
|
# 1. File in "build" should be ignored.
|
||||||
|
assert (
|
||||||
|
allowed_by_gitignore(repo, output_file) is False
|
||||||
|
), "build/output.txt should be ignored"
|
||||||
|
|
||||||
|
# 2. File in "src" that is ignored per src/.gitignore.
|
||||||
|
assert allowed_by_gitignore(repo, temp_file) is False, "src/temp.txt should be ignored"
|
||||||
|
|
||||||
|
# 3. Files in "src" not mentioned in .gitignore should be included.
|
||||||
|
assert allowed_by_gitignore(repo, main_file) is True, "src/main.py should be included"
|
||||||
|
assert allowed_by_gitignore(repo, keep_file) is True, "src/keep.txt should be included"
|
||||||
|
|
||||||
|
# 4. File at the repo root not mentioned in .gitignore.
|
||||||
|
assert (
|
||||||
|
allowed_by_gitignore(repo, root_file) is True
|
||||||
|
), "repo/README.md should be included"
|
||||||
@@ -21,7 +21,7 @@ def test_basic_functionality(tmpdir):
|
|||||||
with open("test_dir/file2.txt", "w") as f:
|
with open("test_dir/file2.txt", "w") as f:
|
||||||
f.write("Contents of file2")
|
f.write("Contents of file2")
|
||||||
|
|
||||||
result = runner.invoke(cli, ["test_dir"])
|
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "test_dir/file1.txt" in result.output
|
assert "test_dir/file1.txt" in result.output
|
||||||
assert "Contents of file1" in result.output
|
assert "Contents of file1" in result.output
|
||||||
@@ -36,7 +36,7 @@ def test_include_hidden(tmpdir):
|
|||||||
with open("test_dir/.hidden.txt", "w") as f:
|
with open("test_dir/.hidden.txt", "w") as f:
|
||||||
f.write("Contents of hidden file")
|
f.write("Contents of hidden file")
|
||||||
|
|
||||||
result = runner.invoke(cli, ["test_dir"])
|
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "test_dir/.hidden.txt" not in result.output
|
assert "test_dir/.hidden.txt" not in result.output
|
||||||
|
|
||||||
@@ -61,11 +61,9 @@ def test_ignore_gitignore(tmpdir):
|
|||||||
with open("test_dir/nested_include/included2.txt", "w") as f:
|
with open("test_dir/nested_include/included2.txt", "w") as f:
|
||||||
f.write("This nested file should be included")
|
f.write("This nested file should be included")
|
||||||
with open("test_dir/nested_ignore/.gitignore", "w") as f:
|
with open("test_dir/nested_ignore/.gitignore", "w") as f:
|
||||||
f.write("nested_ignore.txt")
|
f.write("*")
|
||||||
with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
|
with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
|
||||||
f.write("This nested file should not be included")
|
f.write("This nested file should not be included")
|
||||||
with open("test_dir/nested_ignore/actually_include.txt", "w") as f:
|
|
||||||
f.write("This nested file should actually be included")
|
|
||||||
|
|
||||||
result = runner.invoke(cli, ["test_dir", "-c"])
|
result = runner.invoke(cli, ["test_dir", "-c"])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
@@ -74,7 +72,6 @@ def test_ignore_gitignore(tmpdir):
|
|||||||
assert filenames == {
|
assert filenames == {
|
||||||
"test_dir/included.txt",
|
"test_dir/included.txt",
|
||||||
"test_dir/nested_include/included2.txt",
|
"test_dir/nested_include/included2.txt",
|
||||||
"test_dir/nested_ignore/actually_include.txt",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
|
result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
|
||||||
@@ -86,7 +83,6 @@ def test_ignore_gitignore(tmpdir):
|
|||||||
"test_dir/ignored.txt",
|
"test_dir/ignored.txt",
|
||||||
"test_dir/nested_include/included2.txt",
|
"test_dir/nested_include/included2.txt",
|
||||||
"test_dir/nested_ignore/nested_ignore.txt",
|
"test_dir/nested_ignore/nested_ignore.txt",
|
||||||
"test_dir/nested_ignore/actually_include.txt",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -243,7 +239,7 @@ def test_binary_file_warning(tmpdir):
|
|||||||
with open("test_dir/text_file.txt", "w") as f:
|
with open("test_dir/text_file.txt", "w") as f:
|
||||||
f.write("This is a text file")
|
f.write("This is a text file")
|
||||||
|
|
||||||
result = runner.invoke(cli, ["test_dir"])
|
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
stdout = result.stdout
|
stdout = result.stdout
|
||||||
@@ -331,7 +327,7 @@ def test_line_numbers(tmpdir):
|
|||||||
with open("test_dir/multiline.txt", "w") as f:
|
with open("test_dir/multiline.txt", "w") as f:
|
||||||
f.write(test_content)
|
f.write(test_content)
|
||||||
|
|
||||||
result = runner.invoke(cli, ["test_dir"])
|
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "1 First line" not in result.output
|
assert "1 First line" not in result.output
|
||||||
assert test_content in result.output
|
assert test_content in result.output
|
||||||
|
|||||||
Reference in New Issue
Block a user