mirror of
https://github.com/charmbracelet/crush.git
synced 2025-08-02 05:20:46 +03:00
feat: grep should support gitignore/crushignore (#428)
* feat: support .crushignore as well as .gitignore * docs: update * refactor: simplify * chore: fmt * feat: grep should support gitignore/crushignore * fix: small fixes * fix: small fixes * fix: ripgrep * fix: rg * fix: tst * test: fixes * refactor: organized code a bit * fix: try * fix: temp * chore: lint --------- Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ab7a093c8c
commit
2a43184580
3
CRUSH.md
3
CRUSH.md
@@ -20,7 +20,8 @@
|
||||
- **Structs**: Use struct embedding for composition, group related fields
|
||||
- **Constants**: Use typed constants with iota for enums, group in const blocks
|
||||
- **Testing**: Use testify's `require` package, parallel tests with `t.Parallel()`,
|
||||
`t.SetEnv()` to set environment variables.
|
||||
`t.SetEnv()` to set environment variables. Always use `t.Tempdir()` when in
|
||||
need of a temporary directory. This directory does not need to be removed.
|
||||
- **JSON tags**: Use snake_case for JSON field names
|
||||
- **File permissions**: Use octal notation (0o755, 0o644) for file permissions
|
||||
- **Comments**: End comments in periods unless comments are at the end of the line.
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
package fsext
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -13,55 +10,10 @@ import (
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/charlievieth/fastwalk"
|
||||
"github.com/charmbracelet/crush/internal/log"
|
||||
|
||||
ignore "github.com/sabhiram/go-gitignore"
|
||||
)
|
||||
|
||||
var rgPath string
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
rgPath, err = exec.LookPath("rg")
|
||||
if err != nil {
|
||||
if log.Initialized() {
|
||||
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func GetRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
|
||||
if rgPath == "" {
|
||||
return nil
|
||||
}
|
||||
rgArgs := []string{
|
||||
"--files",
|
||||
"-L",
|
||||
"--null",
|
||||
}
|
||||
if globPattern != "" {
|
||||
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
|
||||
globPattern = "/" + globPattern
|
||||
}
|
||||
rgArgs = append(rgArgs, "--glob", globPattern)
|
||||
}
|
||||
return exec.CommandContext(ctx, rgPath, rgArgs...)
|
||||
}
|
||||
|
||||
func GetRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
|
||||
if rgPath == "" {
|
||||
return nil
|
||||
}
|
||||
// Use -n to show line numbers and include the matched line
|
||||
args := []string{"-H", "-n", pattern}
|
||||
if include != "" {
|
||||
args = append(args, "--glob", include)
|
||||
}
|
||||
args = append(args, path)
|
||||
|
||||
return exec.CommandContext(ctx, rgPath, args...)
|
||||
}
|
||||
|
||||
type FileInfo struct {
|
||||
Path string
|
||||
ModTime time.Time
|
||||
@@ -89,8 +41,6 @@ func SkipHidden(path string) bool {
|
||||
"obj": true,
|
||||
"out": true,
|
||||
"coverage": true,
|
||||
"tmp": true,
|
||||
"temp": true,
|
||||
"logs": true,
|
||||
"generated": true,
|
||||
"bower_components": true,
|
||||
@@ -137,7 +87,8 @@ func NewFastGlobWalker(searchPath string) *FastGlobWalker {
|
||||
return walker
|
||||
}
|
||||
|
||||
func (w *FastGlobWalker) shouldSkip(path string) bool {
|
||||
// ShouldSkip checks if a path should be skipped based on gitignore, crushignore, and hidden file rules
|
||||
func (w *FastGlobWalker) ShouldSkip(path string) bool {
|
||||
if SkipHidden(path) {
|
||||
return true
|
||||
}
|
||||
@@ -177,13 +128,13 @@ func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool,
|
||||
}
|
||||
|
||||
if d.IsDir() {
|
||||
if walker.shouldSkip(path) {
|
||||
if walker.ShouldSkip(path) {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if walker.shouldSkip(path) {
|
||||
if walker.ShouldSkip(path) {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -139,7 +139,7 @@ func (g *globTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error)
|
||||
}
|
||||
|
||||
func globFiles(ctx context.Context, pattern, searchPath string, limit int) ([]string, bool, error) {
|
||||
cmdRg := fsext.GetRgCmd(ctx, pattern)
|
||||
cmdRg := getRgCmd(ctx, pattern)
|
||||
if cmdRg != nil {
|
||||
cmdRg.Dir = searchPath
|
||||
matches, err := runRipgrep(cmdRg, searchPath, limit)
|
||||
|
||||
@@ -125,6 +125,11 @@ LIMITATIONS:
|
||||
- Very large binary files may be skipped
|
||||
- Hidden files (starting with '.') are skipped
|
||||
|
||||
IGNORE FILE SUPPORT:
|
||||
- Respects .gitignore patterns to skip ignored files and directories
|
||||
- Respects .crushignore patterns for additional ignore rules
|
||||
- Both ignore files are automatically detected in the search root directory
|
||||
|
||||
CROSS-PLATFORM NOTES:
|
||||
- Uses ripgrep (rg) command if available for better performance
|
||||
- Falls back to built-in Go implementation if ripgrep is not available
|
||||
@@ -269,11 +274,17 @@ func searchFiles(ctx context.Context, pattern, rootPath, include string, limit i
|
||||
}
|
||||
|
||||
func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
|
||||
cmd := fsext.GetRgSearchCmd(ctx, pattern, path, include)
|
||||
cmd := getRgSearchCmd(ctx, pattern, path, include)
|
||||
if cmd == nil {
|
||||
return nil, fmt.Errorf("ripgrep not found in $PATH")
|
||||
}
|
||||
|
||||
cmd.Args = append(
|
||||
cmd.Args,
|
||||
"--ignore-file", filepath.Join(path, ".gitignore"),
|
||||
"--ignore-file", filepath.Join(path, ".crushignore"),
|
||||
)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
|
||||
@@ -337,6 +348,9 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
|
||||
}
|
||||
}
|
||||
|
||||
// Create walker with gitignore and crushignore support
|
||||
walker := fsext.NewFastGlobWalker(rootPath)
|
||||
|
||||
err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return nil // Skip errors
|
||||
@@ -346,7 +360,8 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
|
||||
return nil // Skip directories
|
||||
}
|
||||
|
||||
if fsext.SkipHidden(path) {
|
||||
// Use walker's shouldSkip method instead of just SkipHidden
|
||||
if walker.ShouldSkip(path) {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestRegexCache(t *testing.T) {
|
||||
@@ -52,6 +58,114 @@ func TestGlobToRegexCaching(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrepWithIgnoreFiles(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// Create test files
|
||||
testFiles := map[string]string{
|
||||
"file1.txt": "hello world",
|
||||
"file2.txt": "hello world",
|
||||
"ignored/file3.txt": "hello world",
|
||||
"node_modules/lib.js": "hello world",
|
||||
"secret.key": "hello world",
|
||||
}
|
||||
|
||||
for path, content := range testFiles {
|
||||
fullPath := filepath.Join(tempDir, path)
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
|
||||
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
|
||||
}
|
||||
|
||||
// Create .gitignore file
|
||||
gitignoreContent := "ignored/\n*.key\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644))
|
||||
|
||||
// Create .crushignore file
|
||||
crushignoreContent := "node_modules/\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
|
||||
|
||||
// Create grep tool
|
||||
grepTool := NewGrepTool(tempDir)
|
||||
|
||||
// Create grep parameters
|
||||
params := GrepParams{
|
||||
Pattern: "hello world",
|
||||
Path: tempDir,
|
||||
}
|
||||
paramsJSON, err := json.Marshal(params)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Run grep
|
||||
call := ToolCall{Input: string(paramsJSON)}
|
||||
response, err := grepTool.Run(context.Background(), call)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check results - should only find file1.txt and file2.txt
|
||||
// ignored/file3.txt should be ignored by .gitignore
|
||||
// node_modules/lib.js should be ignored by .crushignore
|
||||
// secret.key should be ignored by .gitignore
|
||||
result := response.Content
|
||||
require.Contains(t, result, "file1.txt")
|
||||
require.Contains(t, result, "file2.txt")
|
||||
require.NotContains(t, result, "file3.txt")
|
||||
require.NotContains(t, result, "lib.js")
|
||||
require.NotContains(t, result, "secret.key")
|
||||
}
|
||||
|
||||
func TestSearchImplementations(t *testing.T) {
|
||||
t.Parallel()
|
||||
tempDir := t.TempDir()
|
||||
|
||||
for path, content := range map[string]string{
|
||||
"file1.go": "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}",
|
||||
"file2.js": "console.log('hello world');",
|
||||
"file3.txt": "hello world from text file",
|
||||
"binary.exe": "\x00\x01\x02\x03",
|
||||
"empty.txt": "",
|
||||
"subdir/nested.go": "package nested\n// hello world comment",
|
||||
".hidden.txt": "hello world in hidden file",
|
||||
"file4.txt": "hello world from a banana",
|
||||
"file5.txt": "hello world from a grape",
|
||||
} {
|
||||
fullPath := filepath.Join(tempDir, path)
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
|
||||
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
|
||||
}
|
||||
|
||||
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644))
|
||||
|
||||
for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
|
||||
"regex": searchFilesWithRegex,
|
||||
"rg": func(pattern, path, include string) ([]grepMatch, error) {
|
||||
return searchWithRipgrep(t.Context(), pattern, path, include)
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if name == "rg" && getRg() == "" {
|
||||
t.Skip("rg is not in $PATH")
|
||||
}
|
||||
|
||||
matches, err := fn("hello world", tempDir, "")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, len(matches), 4)
|
||||
for _, match := range matches {
|
||||
require.NotEmpty(t, match.path)
|
||||
require.NotZero(t, match.lineNum)
|
||||
require.NotEmpty(t, match.lineText)
|
||||
require.NotZero(t, match.modTime)
|
||||
require.NotContains(t, match.path, ".hidden.txt")
|
||||
require.NotContains(t, match.path, "file4.txt")
|
||||
require.NotContains(t, match.path, "file5.txt")
|
||||
require.NotContains(t, match.path, "binary.exe")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark to show performance improvement
|
||||
func BenchmarkRegexCacheVsCompile(b *testing.B) {
|
||||
cache := newRegexCache()
|
||||
|
||||
53
internal/llm/tools/rg.go
Normal file
53
internal/llm/tools/rg.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/charmbracelet/crush/internal/log"
|
||||
)
|
||||
|
||||
var getRg = sync.OnceValue(func() string {
|
||||
path, err := exec.LookPath("rg")
|
||||
if err != nil {
|
||||
if log.Initialized() {
|
||||
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return path
|
||||
})
|
||||
|
||||
func getRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
|
||||
name := getRg()
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
args := []string{"--files", "-L", "--null"}
|
||||
if globPattern != "" {
|
||||
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
|
||||
globPattern = "/" + globPattern
|
||||
}
|
||||
args = append(args, "--glob", globPattern)
|
||||
}
|
||||
return exec.CommandContext(ctx, name, args...)
|
||||
}
|
||||
|
||||
func getRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
|
||||
name := getRg()
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
// Use -n to show line numbers and include the matched line
|
||||
args := []string{"-H", "-n", pattern}
|
||||
if include != "" {
|
||||
args = append(args, "--glob", include)
|
||||
}
|
||||
args = append(args, path)
|
||||
|
||||
return exec.CommandContext(ctx, name, args...)
|
||||
}
|
||||
@@ -2,7 +2,6 @@ package shell
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
@@ -92,18 +91,14 @@ func TestCommandBlocking(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create a temporary directory for each test
|
||||
tmpDir, err := os.MkdirTemp("", "shell-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
shell := NewShell(&Options{
|
||||
WorkingDir: tmpDir,
|
||||
BlockFuncs: tt.blockFuncs,
|
||||
})
|
||||
|
||||
_, _, err = shell.Exec(context.Background(), tt.command)
|
||||
_, _, err := shell.Exec(context.Background(), tt.command)
|
||||
|
||||
if tt.shouldBlock {
|
||||
if err == nil {
|
||||
|
||||
Reference in New Issue
Block a user