feat: grep should support gitignore/crushignore (#428)

* feat: support .crushignore as well as .gitignore
* docs: update
* refactor: simplify
* chore: fmt
* feat: grep should support gitignore/crushignore
* fix: small fixes
* fix: small fixes
* fix: ripgrep
* fix: rg
* fix: tst
* test: fixes
* refactor: organized code a bit
* fix: try
* fix: temp
* chore: lint

---------

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>
This commit is contained in:
Carlos Alexandro Becker
2025-08-01 22:39:50 -03:00
committed by GitHub
parent ab7a093c8c
commit 2a43184580
7 changed files with 193 additions and 64 deletions

View File

@@ -20,7 +20,8 @@
- **Structs**: Use struct embedding for composition, group related fields
- **Constants**: Use typed constants with iota for enums, group in const blocks
- **Testing**: Use testify's `require` package, parallel tests with `t.Parallel()`,
`t.SetEnv()` to set environment variables.
`t.SetEnv()` to set environment variables. Always use `t.Tempdir()` when in
need of a temporary directory. This directory does not need to be removed.
- **JSON tags**: Use snake_case for JSON field names
- **File permissions**: Use octal notation (0o755, 0o644) for file permissions
- **Comments**: End comments in periods unless comments are at the end of the line.

View File

@@ -1,11 +1,8 @@
package fsext
import (
"context"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
@@ -13,55 +10,10 @@ import (
"github.com/bmatcuk/doublestar/v4"
"github.com/charlievieth/fastwalk"
"github.com/charmbracelet/crush/internal/log"
ignore "github.com/sabhiram/go-gitignore"
)
var rgPath string
func init() {
var err error
rgPath, err = exec.LookPath("rg")
if err != nil {
if log.Initialized() {
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
}
}
}
func GetRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
if rgPath == "" {
return nil
}
rgArgs := []string{
"--files",
"-L",
"--null",
}
if globPattern != "" {
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
globPattern = "/" + globPattern
}
rgArgs = append(rgArgs, "--glob", globPattern)
}
return exec.CommandContext(ctx, rgPath, rgArgs...)
}
func GetRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
if rgPath == "" {
return nil
}
// Use -n to show line numbers and include the matched line
args := []string{"-H", "-n", pattern}
if include != "" {
args = append(args, "--glob", include)
}
args = append(args, path)
return exec.CommandContext(ctx, rgPath, args...)
}
type FileInfo struct {
Path string
ModTime time.Time
@@ -89,8 +41,6 @@ func SkipHidden(path string) bool {
"obj": true,
"out": true,
"coverage": true,
"tmp": true,
"temp": true,
"logs": true,
"generated": true,
"bower_components": true,
@@ -137,7 +87,8 @@ func NewFastGlobWalker(searchPath string) *FastGlobWalker {
return walker
}
func (w *FastGlobWalker) shouldSkip(path string) bool {
// ShouldSkip checks if a path should be skipped based on gitignore, crushignore, and hidden file rules
func (w *FastGlobWalker) ShouldSkip(path string) bool {
if SkipHidden(path) {
return true
}
@@ -177,13 +128,13 @@ func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool,
}
if d.IsDir() {
if walker.shouldSkip(path) {
if walker.ShouldSkip(path) {
return filepath.SkipDir
}
return nil
}
if walker.shouldSkip(path) {
if walker.ShouldSkip(path) {
return nil
}

View File

@@ -139,7 +139,7 @@ func (g *globTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error)
}
func globFiles(ctx context.Context, pattern, searchPath string, limit int) ([]string, bool, error) {
cmdRg := fsext.GetRgCmd(ctx, pattern)
cmdRg := getRgCmd(ctx, pattern)
if cmdRg != nil {
cmdRg.Dir = searchPath
matches, err := runRipgrep(cmdRg, searchPath, limit)

View File

@@ -125,6 +125,11 @@ LIMITATIONS:
- Very large binary files may be skipped
- Hidden files (starting with '.') are skipped
IGNORE FILE SUPPORT:
- Respects .gitignore patterns to skip ignored files and directories
- Respects .crushignore patterns for additional ignore rules
- Both ignore files are automatically detected in the search root directory
CROSS-PLATFORM NOTES:
- Uses ripgrep (rg) command if available for better performance
- Falls back to built-in Go implementation if ripgrep is not available
@@ -269,11 +274,17 @@ func searchFiles(ctx context.Context, pattern, rootPath, include string, limit i
}
func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
cmd := fsext.GetRgSearchCmd(ctx, pattern, path, include)
cmd := getRgSearchCmd(ctx, pattern, path, include)
if cmd == nil {
return nil, fmt.Errorf("ripgrep not found in $PATH")
}
cmd.Args = append(
cmd.Args,
"--ignore-file", filepath.Join(path, ".gitignore"),
"--ignore-file", filepath.Join(path, ".crushignore"),
)
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
@@ -337,6 +348,9 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
}
}
// Create walker with gitignore and crushignore support
walker := fsext.NewFastGlobWalker(rootPath)
err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil // Skip errors
@@ -346,7 +360,8 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
return nil // Skip directories
}
if fsext.SkipHidden(path) {
// Use walker's shouldSkip method instead of just SkipHidden
if walker.ShouldSkip(path) {
return nil
}

View File

@@ -1,8 +1,14 @@
package tools
import (
"context"
"encoding/json"
"os"
"path/filepath"
"regexp"
"testing"
"github.com/stretchr/testify/require"
)
func TestRegexCache(t *testing.T) {
@@ -52,6 +58,114 @@ func TestGlobToRegexCaching(t *testing.T) {
}
}
func TestGrepWithIgnoreFiles(t *testing.T) {
tempDir := t.TempDir()
// Create test files
testFiles := map[string]string{
"file1.txt": "hello world",
"file2.txt": "hello world",
"ignored/file3.txt": "hello world",
"node_modules/lib.js": "hello world",
"secret.key": "hello world",
}
for path, content := range testFiles {
fullPath := filepath.Join(tempDir, path)
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
}
// Create .gitignore file
gitignoreContent := "ignored/\n*.key\n"
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644))
// Create .crushignore file
crushignoreContent := "node_modules/\n"
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
// Create grep tool
grepTool := NewGrepTool(tempDir)
// Create grep parameters
params := GrepParams{
Pattern: "hello world",
Path: tempDir,
}
paramsJSON, err := json.Marshal(params)
require.NoError(t, err)
// Run grep
call := ToolCall{Input: string(paramsJSON)}
response, err := grepTool.Run(context.Background(), call)
require.NoError(t, err)
// Check results - should only find file1.txt and file2.txt
// ignored/file3.txt should be ignored by .gitignore
// node_modules/lib.js should be ignored by .crushignore
// secret.key should be ignored by .gitignore
result := response.Content
require.Contains(t, result, "file1.txt")
require.Contains(t, result, "file2.txt")
require.NotContains(t, result, "file3.txt")
require.NotContains(t, result, "lib.js")
require.NotContains(t, result, "secret.key")
}
func TestSearchImplementations(t *testing.T) {
t.Parallel()
tempDir := t.TempDir()
for path, content := range map[string]string{
"file1.go": "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}",
"file2.js": "console.log('hello world');",
"file3.txt": "hello world from text file",
"binary.exe": "\x00\x01\x02\x03",
"empty.txt": "",
"subdir/nested.go": "package nested\n// hello world comment",
".hidden.txt": "hello world in hidden file",
"file4.txt": "hello world from a banana",
"file5.txt": "hello world from a grape",
} {
fullPath := filepath.Join(tempDir, path)
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
}
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644))
for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
"regex": searchFilesWithRegex,
"rg": func(pattern, path, include string) ([]grepMatch, error) {
return searchWithRipgrep(t.Context(), pattern, path, include)
},
} {
t.Run(name, func(t *testing.T) {
t.Parallel()
if name == "rg" && getRg() == "" {
t.Skip("rg is not in $PATH")
}
matches, err := fn("hello world", tempDir, "")
require.NoError(t, err)
require.Equal(t, len(matches), 4)
for _, match := range matches {
require.NotEmpty(t, match.path)
require.NotZero(t, match.lineNum)
require.NotEmpty(t, match.lineText)
require.NotZero(t, match.modTime)
require.NotContains(t, match.path, ".hidden.txt")
require.NotContains(t, match.path, "file4.txt")
require.NotContains(t, match.path, "file5.txt")
require.NotContains(t, match.path, "binary.exe")
}
})
}
}
// Benchmark to show performance improvement
func BenchmarkRegexCacheVsCompile(b *testing.B) {
cache := newRegexCache()

53
internal/llm/tools/rg.go Normal file
View File

@@ -0,0 +1,53 @@
package tools
import (
"context"
"log/slog"
"os/exec"
"path/filepath"
"strings"
"sync"
"github.com/charmbracelet/crush/internal/log"
)
var getRg = sync.OnceValue(func() string {
path, err := exec.LookPath("rg")
if err != nil {
if log.Initialized() {
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
}
return ""
}
return path
})
func getRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
name := getRg()
if name == "" {
return nil
}
args := []string{"--files", "-L", "--null"}
if globPattern != "" {
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
globPattern = "/" + globPattern
}
args = append(args, "--glob", globPattern)
}
return exec.CommandContext(ctx, name, args...)
}
func getRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
name := getRg()
if name == "" {
return nil
}
// Use -n to show line numbers and include the matched line
args := []string{"-H", "-n", pattern}
if include != "" {
args = append(args, "--glob", include)
}
args = append(args, path)
return exec.CommandContext(ctx, name, args...)
}

View File

@@ -2,7 +2,6 @@ package shell
import (
"context"
"os"
"strings"
"testing"
)
@@ -92,18 +91,14 @@ func TestCommandBlocking(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a temporary directory for each test
tmpDir, err := os.MkdirTemp("", "shell-test-*")
if err != nil {
t.Fatalf("Failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
tmpDir := t.TempDir()
shell := NewShell(&Options{
WorkingDir: tmpDir,
BlockFuncs: tt.blockFuncs,
})
_, _, err = shell.Exec(context.Background(), tt.command)
_, _, err := shell.Exec(context.Background(), tt.command)
if tt.shouldBlock {
if err == nil {