mirror of
https://github.com/simonw/files-to-prompt.git
synced 2025-10-23 00:02:47 +03:00
@@ -1,7 +1,8 @@
|
||||
import os
|
||||
import sys
|
||||
from fnmatch import fnmatch
|
||||
|
||||
from files_to_prompt.utils import allowed_by_gitignore
|
||||
import pathlib
|
||||
import click
|
||||
|
||||
global_index = 1
|
||||
@@ -24,25 +25,6 @@ EXT_TO_LANG = {
|
||||
}
|
||||
|
||||
|
||||
def should_ignore(path, gitignore_rules):
|
||||
for rule in gitignore_rules:
|
||||
if fnmatch(os.path.basename(path), rule):
|
||||
return True
|
||||
if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def read_gitignore(path):
|
||||
gitignore_path = os.path.join(path, ".gitignore")
|
||||
if os.path.isfile(gitignore_path):
|
||||
with open(gitignore_path, "r") as f:
|
||||
return [
|
||||
line.strip() for line in f if line.strip() and not line.startswith("#")
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def add_line_numbers(content):
|
||||
lines = content.splitlines()
|
||||
|
||||
@@ -104,7 +86,6 @@ def process_path(
|
||||
include_hidden,
|
||||
ignore_files_only,
|
||||
ignore_gitignore,
|
||||
gitignore_rules,
|
||||
ignore_patterns,
|
||||
writer,
|
||||
claude_xml,
|
||||
@@ -124,17 +105,13 @@ def process_path(
|
||||
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
||||
files = [f for f in files if not f.startswith(".")]
|
||||
|
||||
root_path = pathlib.Path(root)
|
||||
if not ignore_gitignore:
|
||||
gitignore_rules.extend(read_gitignore(root))
|
||||
dirs[:] = [
|
||||
d
|
||||
for d in dirs
|
||||
if not should_ignore(os.path.join(root, d), gitignore_rules)
|
||||
d for d in dirs if allowed_by_gitignore(root_path, root_path / d)
|
||||
]
|
||||
files = [
|
||||
f
|
||||
for f in files
|
||||
if not should_ignore(os.path.join(root, f), gitignore_rules)
|
||||
f for f in files if allowed_by_gitignore(root_path, root_path / f)
|
||||
]
|
||||
|
||||
if ignore_patterns:
|
||||
@@ -302,7 +279,6 @@ def cli(
|
||||
# Combine paths from arguments and stdin
|
||||
paths = [*paths, *stdin_paths]
|
||||
|
||||
gitignore_rules = []
|
||||
writer = click.echo
|
||||
fp = None
|
||||
if output_file:
|
||||
@@ -311,8 +287,6 @@ def cli(
|
||||
for path in paths:
|
||||
if not os.path.exists(path):
|
||||
raise click.BadArgumentUsage(f"Path does not exist: {path}")
|
||||
if not ignore_gitignore:
|
||||
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
|
||||
if claude_xml and path == paths[0]:
|
||||
writer("<documents>")
|
||||
process_path(
|
||||
@@ -321,7 +295,6 @@ def cli(
|
||||
include_hidden,
|
||||
ignore_files_only,
|
||||
ignore_gitignore,
|
||||
gitignore_rules,
|
||||
ignore_patterns,
|
||||
writer,
|
||||
claude_xml,
|
||||
|
||||
75
files_to_prompt/utils.py
Normal file
75
files_to_prompt/utils.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from pathlib import Path
|
||||
from pathspec.gitignore import GitIgnoreSpec
|
||||
|
||||
|
||||
def allowed_by_gitignore(root: Path, file_path: Path) -> bool:
|
||||
"""
|
||||
Check whether the file (file_path) should be included (i.e. not ignored)
|
||||
based on all .gitignore files encountered from the root directory down to
|
||||
the directory where the file resides.
|
||||
|
||||
Parameters:
|
||||
root (Path): The root directory under which .gitignore files are searched.
|
||||
file_path (Path): The file to be checked.
|
||||
|
||||
Returns:
|
||||
bool: True if the file should be included (not ignored); False if it should be ignored.
|
||||
"""
|
||||
# Resolve absolute paths.
|
||||
abs_root = root.resolve()
|
||||
abs_file = file_path.resolve()
|
||||
|
||||
# Ensure file is under the provided root.
|
||||
try:
|
||||
_ = abs_file.relative_to(abs_root)
|
||||
except ValueError:
|
||||
raise ValueError(f"File {abs_file!r} is not under the root {abs_root!r}.")
|
||||
|
||||
# Build a list of directories from the root to the file's directory.
|
||||
directories = [abs_root]
|
||||
file_dir = abs_file.parent
|
||||
rel_dir = file_dir.relative_to(abs_root)
|
||||
for part in rel_dir.parts:
|
||||
directories.append(directories[-1] / part)
|
||||
|
||||
# The decision will be updated by any matching .gitignore rule encountered.
|
||||
decision = None
|
||||
|
||||
# Process each directory (from root to file's directory)
|
||||
for directory in directories:
|
||||
gitignore_file = directory / ".gitignore"
|
||||
if gitignore_file.is_file():
|
||||
try:
|
||||
# Read nonempty lines (ignoring blank lines).
|
||||
lines = [
|
||||
line.rstrip("\n")
|
||||
for line in gitignore_file.read_text(encoding="utf-8").splitlines()
|
||||
if line.strip()
|
||||
]
|
||||
except Exception as e:
|
||||
print(f"Could not read {gitignore_file}: {e}")
|
||||
continue
|
||||
|
||||
# Compile a GitIgnoreSpec for the rules in the current directory.
|
||||
spec = GitIgnoreSpec.from_lines(lines)
|
||||
|
||||
# .gitignore patterns are relative to the directory they are in.
|
||||
# Compute the file path relative to this directory in POSIX format.
|
||||
rel_file = abs_file.relative_to(directory).as_posix()
|
||||
|
||||
# Check the file against these rules.
|
||||
result = spec.check_file(rel_file)
|
||||
|
||||
# If a rule from this .gitignore file applied, update the decision.
|
||||
if result.include is not None:
|
||||
decision = result.include
|
||||
|
||||
# If no .gitignore rule matched, the file is included by default.
|
||||
if decision is None:
|
||||
return True
|
||||
|
||||
# Interpretation:
|
||||
# • decision == True --> a normal ignore rule matched (file should be ignored)
|
||||
# • decision == False --> a negation rule matched (file re-included)
|
||||
# So, we return not decision.
|
||||
return not decision
|
||||
@@ -10,7 +10,8 @@ classifiers = [
|
||||
"License :: OSI Approved :: Apache Software License"
|
||||
]
|
||||
dependencies = [
|
||||
"click"
|
||||
"click",
|
||||
"pathspec",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
54
tests/test_allowed_by_gitignore.py
Normal file
54
tests/test_allowed_by_gitignore.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from files_to_prompt.utils import allowed_by_gitignore
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_allowed_by_gitignore(tmpdir):
|
||||
# Create a temporary directory structure.
|
||||
base = Path(tmpdir)
|
||||
repo = base / "repo"
|
||||
repo.mkdir()
|
||||
|
||||
# Create a top-level .gitignore in repo that ignores the "build/" directory.
|
||||
(repo / ".gitignore").write_text("build/\n", encoding="utf-8")
|
||||
|
||||
# Create a "build" subdirectory and add an output file which should be ignored.
|
||||
build_dir = repo / "build"
|
||||
build_dir.mkdir()
|
||||
output_file = build_dir / "output.txt"
|
||||
output_file.write_text("dummy build output", encoding="utf-8")
|
||||
|
||||
# Create a "src" subdirectory with its own .gitignore.
|
||||
src_dir = repo / "src"
|
||||
src_dir.mkdir()
|
||||
# In src, ignore "temp.txt"
|
||||
(src_dir / ".gitignore").write_text("temp.txt\n", encoding="utf-8")
|
||||
|
||||
# Create files in "src"
|
||||
main_file = src_dir / "main.py"
|
||||
main_file.write_text("print('Hello')", encoding="utf-8")
|
||||
temp_file = src_dir / "temp.txt"
|
||||
temp_file.write_text("should be ignored", encoding="utf-8")
|
||||
keep_file = src_dir / "keep.txt"
|
||||
keep_file.write_text("keep this file", encoding="utf-8")
|
||||
|
||||
# Create a file at repo root that is not ignored.
|
||||
root_file = repo / "README.md"
|
||||
root_file.write_text("# Repo README", encoding="utf-8")
|
||||
|
||||
# Test cases:
|
||||
# 1. File in "build" should be ignored.
|
||||
assert (
|
||||
allowed_by_gitignore(repo, output_file) is False
|
||||
), "build/output.txt should be ignored"
|
||||
|
||||
# 2. File in "src" that is ignored per src/.gitignore.
|
||||
assert allowed_by_gitignore(repo, temp_file) is False, "src/temp.txt should be ignored"
|
||||
|
||||
# 3. Files in "src" not mentioned in .gitignore should be included.
|
||||
assert allowed_by_gitignore(repo, main_file) is True, "src/main.py should be included"
|
||||
assert allowed_by_gitignore(repo, keep_file) is True, "src/keep.txt should be included"
|
||||
|
||||
# 4. File at the repo root not mentioned in .gitignore.
|
||||
assert (
|
||||
allowed_by_gitignore(repo, root_file) is True
|
||||
), "repo/README.md should be included"
|
||||
@@ -21,7 +21,7 @@ def test_basic_functionality(tmpdir):
|
||||
with open("test_dir/file2.txt", "w") as f:
|
||||
f.write("Contents of file2")
|
||||
|
||||
result = runner.invoke(cli, ["test_dir"])
|
||||
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||
assert result.exit_code == 0
|
||||
assert "test_dir/file1.txt" in result.output
|
||||
assert "Contents of file1" in result.output
|
||||
@@ -36,7 +36,7 @@ def test_include_hidden(tmpdir):
|
||||
with open("test_dir/.hidden.txt", "w") as f:
|
||||
f.write("Contents of hidden file")
|
||||
|
||||
result = runner.invoke(cli, ["test_dir"])
|
||||
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||
assert result.exit_code == 0
|
||||
assert "test_dir/.hidden.txt" not in result.output
|
||||
|
||||
@@ -61,11 +61,9 @@ def test_ignore_gitignore(tmpdir):
|
||||
with open("test_dir/nested_include/included2.txt", "w") as f:
|
||||
f.write("This nested file should be included")
|
||||
with open("test_dir/nested_ignore/.gitignore", "w") as f:
|
||||
f.write("nested_ignore.txt")
|
||||
f.write("*")
|
||||
with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f:
|
||||
f.write("This nested file should not be included")
|
||||
with open("test_dir/nested_ignore/actually_include.txt", "w") as f:
|
||||
f.write("This nested file should actually be included")
|
||||
|
||||
result = runner.invoke(cli, ["test_dir", "-c"])
|
||||
assert result.exit_code == 0
|
||||
@@ -74,7 +72,6 @@ def test_ignore_gitignore(tmpdir):
|
||||
assert filenames == {
|
||||
"test_dir/included.txt",
|
||||
"test_dir/nested_include/included2.txt",
|
||||
"test_dir/nested_ignore/actually_include.txt",
|
||||
}
|
||||
|
||||
result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"])
|
||||
@@ -86,7 +83,6 @@ def test_ignore_gitignore(tmpdir):
|
||||
"test_dir/ignored.txt",
|
||||
"test_dir/nested_include/included2.txt",
|
||||
"test_dir/nested_ignore/nested_ignore.txt",
|
||||
"test_dir/nested_ignore/actually_include.txt",
|
||||
}
|
||||
|
||||
|
||||
@@ -243,7 +239,7 @@ def test_binary_file_warning(tmpdir):
|
||||
with open("test_dir/text_file.txt", "w") as f:
|
||||
f.write("This is a text file")
|
||||
|
||||
result = runner.invoke(cli, ["test_dir"])
|
||||
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||
assert result.exit_code == 0
|
||||
|
||||
stdout = result.stdout
|
||||
@@ -331,7 +327,7 @@ def test_line_numbers(tmpdir):
|
||||
with open("test_dir/multiline.txt", "w") as f:
|
||||
f.write(test_content)
|
||||
|
||||
result = runner.invoke(cli, ["test_dir"])
|
||||
result = runner.invoke(cli, ["test_dir"], catch_exceptions=False)
|
||||
assert result.exit_code == 0
|
||||
assert "1 First line" not in result.output
|
||||
assert test_content in result.output
|
||||
|
||||
Reference in New Issue
Block a user