From 2a431845803adcded25746d24d6d093c4f4076cb Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Fri, 1 Aug 2025 22:39:50 -0300 Subject: [PATCH] feat: grep should support gitignore/crushignore (#428) * feat: support .crushignore as well as .gitignore * docs: update * refactor: simplify * chore: fmt * feat: grep should support gitignore/crushignore * fix: small fixes * fix: small fixes * fix: ripgrep * fix: rg * fix: tst * test: fixes * refactor: organized code a bit * fix: try * fix: temp * chore: lint --------- Signed-off-by: Carlos Alexandro Becker --- CRUSH.md | 3 +- internal/fsext/fileutil.go | 57 +------------- internal/llm/tools/glob.go | 2 +- internal/llm/tools/grep.go | 19 ++++- internal/llm/tools/grep_test.go | 114 +++++++++++++++++++++++++++ internal/llm/tools/rg.go | 53 +++++++++++++ internal/shell/command_block_test.go | 9 +-- 7 files changed, 193 insertions(+), 64 deletions(-) create mode 100644 internal/llm/tools/rg.go diff --git a/CRUSH.md b/CRUSH.md index a49f8958..69132723 100644 --- a/CRUSH.md +++ b/CRUSH.md @@ -20,7 +20,8 @@ - **Structs**: Use struct embedding for composition, group related fields - **Constants**: Use typed constants with iota for enums, group in const blocks - **Testing**: Use testify's `require` package, parallel tests with `t.Parallel()`, - `t.SetEnv()` to set environment variables. + `t.SetEnv()` to set environment variables. Always use `t.Tempdir()` when in + need of a temporary directory. This directory does not need to be removed. - **JSON tags**: Use snake_case for JSON field names - **File permissions**: Use octal notation (0o755, 0o644) for file permissions - **Comments**: End comments in periods unless comments are at the end of the line. diff --git a/internal/fsext/fileutil.go b/internal/fsext/fileutil.go index 5fea26c4..27b5e48f 100644 --- a/internal/fsext/fileutil.go +++ b/internal/fsext/fileutil.go @@ -1,11 +1,8 @@ package fsext import ( - "context" "fmt" - "log/slog" "os" - "os/exec" "path/filepath" "sort" "strings" @@ -13,55 +10,10 @@ import ( "github.com/bmatcuk/doublestar/v4" "github.com/charlievieth/fastwalk" - "github.com/charmbracelet/crush/internal/log" ignore "github.com/sabhiram/go-gitignore" ) -var rgPath string - -func init() { - var err error - rgPath, err = exec.LookPath("rg") - if err != nil { - if log.Initialized() { - slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.") - } - } -} - -func GetRgCmd(ctx context.Context, globPattern string) *exec.Cmd { - if rgPath == "" { - return nil - } - rgArgs := []string{ - "--files", - "-L", - "--null", - } - if globPattern != "" { - if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") { - globPattern = "/" + globPattern - } - rgArgs = append(rgArgs, "--glob", globPattern) - } - return exec.CommandContext(ctx, rgPath, rgArgs...) -} - -func GetRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd { - if rgPath == "" { - return nil - } - // Use -n to show line numbers and include the matched line - args := []string{"-H", "-n", pattern} - if include != "" { - args = append(args, "--glob", include) - } - args = append(args, path) - - return exec.CommandContext(ctx, rgPath, args...) -} - type FileInfo struct { Path string ModTime time.Time @@ -89,8 +41,6 @@ func SkipHidden(path string) bool { "obj": true, "out": true, "coverage": true, - "tmp": true, - "temp": true, "logs": true, "generated": true, "bower_components": true, @@ -137,7 +87,8 @@ func NewFastGlobWalker(searchPath string) *FastGlobWalker { return walker } -func (w *FastGlobWalker) shouldSkip(path string) bool { +// ShouldSkip checks if a path should be skipped based on gitignore, crushignore, and hidden file rules +func (w *FastGlobWalker) ShouldSkip(path string) bool { if SkipHidden(path) { return true } @@ -177,13 +128,13 @@ func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool, } if d.IsDir() { - if walker.shouldSkip(path) { + if walker.ShouldSkip(path) { return filepath.SkipDir } return nil } - if walker.shouldSkip(path) { + if walker.ShouldSkip(path) { return nil } diff --git a/internal/llm/tools/glob.go b/internal/llm/tools/glob.go index 3fe71fa3..1d558cc7 100644 --- a/internal/llm/tools/glob.go +++ b/internal/llm/tools/glob.go @@ -139,7 +139,7 @@ func (g *globTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) } func globFiles(ctx context.Context, pattern, searchPath string, limit int) ([]string, bool, error) { - cmdRg := fsext.GetRgCmd(ctx, pattern) + cmdRg := getRgCmd(ctx, pattern) if cmdRg != nil { cmdRg.Dir = searchPath matches, err := runRipgrep(cmdRg, searchPath, limit) diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index b8af7cb2..4d0fbd75 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -125,6 +125,11 @@ LIMITATIONS: - Very large binary files may be skipped - Hidden files (starting with '.') are skipped +IGNORE FILE SUPPORT: +- Respects .gitignore patterns to skip ignored files and directories +- Respects .crushignore patterns for additional ignore rules +- Both ignore files are automatically detected in the search root directory + CROSS-PLATFORM NOTES: - Uses ripgrep (rg) command if available for better performance - Falls back to built-in Go implementation if ripgrep is not available @@ -269,11 +274,17 @@ func searchFiles(ctx context.Context, pattern, rootPath, include string, limit i } func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) { - cmd := fsext.GetRgSearchCmd(ctx, pattern, path, include) + cmd := getRgSearchCmd(ctx, pattern, path, include) if cmd == nil { return nil, fmt.Errorf("ripgrep not found in $PATH") } + cmd.Args = append( + cmd.Args, + "--ignore-file", filepath.Join(path, ".gitignore"), + "--ignore-file", filepath.Join(path, ".crushignore"), + ) + output, err := cmd.Output() if err != nil { if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 { @@ -337,6 +348,9 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error } } + // Create walker with gitignore and crushignore support + walker := fsext.NewFastGlobWalker(rootPath) + err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error { if err != nil { return nil // Skip errors @@ -346,7 +360,8 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error return nil // Skip directories } - if fsext.SkipHidden(path) { + // Use walker's shouldSkip method instead of just SkipHidden + if walker.ShouldSkip(path) { return nil } diff --git a/internal/llm/tools/grep_test.go b/internal/llm/tools/grep_test.go index 22680fd0..cb16a610 100644 --- a/internal/llm/tools/grep_test.go +++ b/internal/llm/tools/grep_test.go @@ -1,8 +1,14 @@ package tools import ( + "context" + "encoding/json" + "os" + "path/filepath" "regexp" "testing" + + "github.com/stretchr/testify/require" ) func TestRegexCache(t *testing.T) { @@ -52,6 +58,114 @@ func TestGlobToRegexCaching(t *testing.T) { } } +func TestGrepWithIgnoreFiles(t *testing.T) { + tempDir := t.TempDir() + + // Create test files + testFiles := map[string]string{ + "file1.txt": "hello world", + "file2.txt": "hello world", + "ignored/file3.txt": "hello world", + "node_modules/lib.js": "hello world", + "secret.key": "hello world", + } + + for path, content := range testFiles { + fullPath := filepath.Join(tempDir, path) + require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755)) + require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644)) + } + + // Create .gitignore file + gitignoreContent := "ignored/\n*.key\n" + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644)) + + // Create .crushignore file + crushignoreContent := "node_modules/\n" + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644)) + + // Create grep tool + grepTool := NewGrepTool(tempDir) + + // Create grep parameters + params := GrepParams{ + Pattern: "hello world", + Path: tempDir, + } + paramsJSON, err := json.Marshal(params) + require.NoError(t, err) + + // Run grep + call := ToolCall{Input: string(paramsJSON)} + response, err := grepTool.Run(context.Background(), call) + require.NoError(t, err) + + // Check results - should only find file1.txt and file2.txt + // ignored/file3.txt should be ignored by .gitignore + // node_modules/lib.js should be ignored by .crushignore + // secret.key should be ignored by .gitignore + result := response.Content + require.Contains(t, result, "file1.txt") + require.Contains(t, result, "file2.txt") + require.NotContains(t, result, "file3.txt") + require.NotContains(t, result, "lib.js") + require.NotContains(t, result, "secret.key") +} + +func TestSearchImplementations(t *testing.T) { + t.Parallel() + tempDir := t.TempDir() + + for path, content := range map[string]string{ + "file1.go": "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}", + "file2.js": "console.log('hello world');", + "file3.txt": "hello world from text file", + "binary.exe": "\x00\x01\x02\x03", + "empty.txt": "", + "subdir/nested.go": "package nested\n// hello world comment", + ".hidden.txt": "hello world in hidden file", + "file4.txt": "hello world from a banana", + "file5.txt": "hello world from a grape", + } { + fullPath := filepath.Join(tempDir, path) + require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755)) + require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644)) + } + + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644)) + + for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){ + "regex": searchFilesWithRegex, + "rg": func(pattern, path, include string) ([]grepMatch, error) { + return searchWithRipgrep(t.Context(), pattern, path, include) + }, + } { + t.Run(name, func(t *testing.T) { + t.Parallel() + + if name == "rg" && getRg() == "" { + t.Skip("rg is not in $PATH") + } + + matches, err := fn("hello world", tempDir, "") + require.NoError(t, err) + + require.Equal(t, len(matches), 4) + for _, match := range matches { + require.NotEmpty(t, match.path) + require.NotZero(t, match.lineNum) + require.NotEmpty(t, match.lineText) + require.NotZero(t, match.modTime) + require.NotContains(t, match.path, ".hidden.txt") + require.NotContains(t, match.path, "file4.txt") + require.NotContains(t, match.path, "file5.txt") + require.NotContains(t, match.path, "binary.exe") + } + }) + } +} + // Benchmark to show performance improvement func BenchmarkRegexCacheVsCompile(b *testing.B) { cache := newRegexCache() diff --git a/internal/llm/tools/rg.go b/internal/llm/tools/rg.go new file mode 100644 index 00000000..40ab7f2f --- /dev/null +++ b/internal/llm/tools/rg.go @@ -0,0 +1,53 @@ +package tools + +import ( + "context" + "log/slog" + "os/exec" + "path/filepath" + "strings" + "sync" + + "github.com/charmbracelet/crush/internal/log" +) + +var getRg = sync.OnceValue(func() string { + path, err := exec.LookPath("rg") + if err != nil { + if log.Initialized() { + slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.") + } + return "" + } + return path +}) + +func getRgCmd(ctx context.Context, globPattern string) *exec.Cmd { + name := getRg() + if name == "" { + return nil + } + args := []string{"--files", "-L", "--null"} + if globPattern != "" { + if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") { + globPattern = "/" + globPattern + } + args = append(args, "--glob", globPattern) + } + return exec.CommandContext(ctx, name, args...) +} + +func getRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd { + name := getRg() + if name == "" { + return nil + } + // Use -n to show line numbers and include the matched line + args := []string{"-H", "-n", pattern} + if include != "" { + args = append(args, "--glob", include) + } + args = append(args, path) + + return exec.CommandContext(ctx, name, args...) +} diff --git a/internal/shell/command_block_test.go b/internal/shell/command_block_test.go index fd7c46bc..0d29b61e 100644 --- a/internal/shell/command_block_test.go +++ b/internal/shell/command_block_test.go @@ -2,7 +2,6 @@ package shell import ( "context" - "os" "strings" "testing" ) @@ -92,18 +91,14 @@ func TestCommandBlocking(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Create a temporary directory for each test - tmpDir, err := os.MkdirTemp("", "shell-test-*") - if err != nil { - t.Fatalf("Failed to create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) + tmpDir := t.TempDir() shell := NewShell(&Options{ WorkingDir: tmpDir, BlockFuncs: tt.blockFuncs, }) - _, _, err = shell.Exec(context.Background(), tt.command) + _, _, err := shell.Exec(context.Background(), tt.command) if tt.shouldBlock { if err == nil {