mirror of
https://github.com/charmbracelet/crush.git
synced 2025-08-02 05:20:46 +03:00
feat: grep should support gitignore/crushignore (#428)
* feat: support .crushignore as well as .gitignore * docs: update * refactor: simplify * chore: fmt * feat: grep should support gitignore/crushignore * fix: small fixes * fix: small fixes * fix: ripgrep * fix: rg * fix: tst * test: fixes * refactor: organized code a bit * fix: try * fix: temp * chore: lint --------- Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ab7a093c8c
commit
2a43184580
3
CRUSH.md
3
CRUSH.md
@@ -20,7 +20,8 @@
|
|||||||
- **Structs**: Use struct embedding for composition, group related fields
|
- **Structs**: Use struct embedding for composition, group related fields
|
||||||
- **Constants**: Use typed constants with iota for enums, group in const blocks
|
- **Constants**: Use typed constants with iota for enums, group in const blocks
|
||||||
- **Testing**: Use testify's `require` package, parallel tests with `t.Parallel()`,
|
- **Testing**: Use testify's `require` package, parallel tests with `t.Parallel()`,
|
||||||
`t.SetEnv()` to set environment variables.
|
`t.SetEnv()` to set environment variables. Always use `t.Tempdir()` when in
|
||||||
|
need of a temporary directory. This directory does not need to be removed.
|
||||||
- **JSON tags**: Use snake_case for JSON field names
|
- **JSON tags**: Use snake_case for JSON field names
|
||||||
- **File permissions**: Use octal notation (0o755, 0o644) for file permissions
|
- **File permissions**: Use octal notation (0o755, 0o644) for file permissions
|
||||||
- **Comments**: End comments in periods unless comments are at the end of the line.
|
- **Comments**: End comments in periods unless comments are at the end of the line.
|
||||||
|
|||||||
@@ -1,11 +1,8 @@
|
|||||||
package fsext
|
package fsext
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -13,55 +10,10 @@ import (
|
|||||||
|
|
||||||
"github.com/bmatcuk/doublestar/v4"
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
"github.com/charlievieth/fastwalk"
|
"github.com/charlievieth/fastwalk"
|
||||||
"github.com/charmbracelet/crush/internal/log"
|
|
||||||
|
|
||||||
ignore "github.com/sabhiram/go-gitignore"
|
ignore "github.com/sabhiram/go-gitignore"
|
||||||
)
|
)
|
||||||
|
|
||||||
var rgPath string
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
var err error
|
|
||||||
rgPath, err = exec.LookPath("rg")
|
|
||||||
if err != nil {
|
|
||||||
if log.Initialized() {
|
|
||||||
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
|
|
||||||
if rgPath == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
rgArgs := []string{
|
|
||||||
"--files",
|
|
||||||
"-L",
|
|
||||||
"--null",
|
|
||||||
}
|
|
||||||
if globPattern != "" {
|
|
||||||
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
|
|
||||||
globPattern = "/" + globPattern
|
|
||||||
}
|
|
||||||
rgArgs = append(rgArgs, "--glob", globPattern)
|
|
||||||
}
|
|
||||||
return exec.CommandContext(ctx, rgPath, rgArgs...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
|
|
||||||
if rgPath == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Use -n to show line numbers and include the matched line
|
|
||||||
args := []string{"-H", "-n", pattern}
|
|
||||||
if include != "" {
|
|
||||||
args = append(args, "--glob", include)
|
|
||||||
}
|
|
||||||
args = append(args, path)
|
|
||||||
|
|
||||||
return exec.CommandContext(ctx, rgPath, args...)
|
|
||||||
}
|
|
||||||
|
|
||||||
type FileInfo struct {
|
type FileInfo struct {
|
||||||
Path string
|
Path string
|
||||||
ModTime time.Time
|
ModTime time.Time
|
||||||
@@ -89,8 +41,6 @@ func SkipHidden(path string) bool {
|
|||||||
"obj": true,
|
"obj": true,
|
||||||
"out": true,
|
"out": true,
|
||||||
"coverage": true,
|
"coverage": true,
|
||||||
"tmp": true,
|
|
||||||
"temp": true,
|
|
||||||
"logs": true,
|
"logs": true,
|
||||||
"generated": true,
|
"generated": true,
|
||||||
"bower_components": true,
|
"bower_components": true,
|
||||||
@@ -137,7 +87,8 @@ func NewFastGlobWalker(searchPath string) *FastGlobWalker {
|
|||||||
return walker
|
return walker
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *FastGlobWalker) shouldSkip(path string) bool {
|
// ShouldSkip checks if a path should be skipped based on gitignore, crushignore, and hidden file rules
|
||||||
|
func (w *FastGlobWalker) ShouldSkip(path string) bool {
|
||||||
if SkipHidden(path) {
|
if SkipHidden(path) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@@ -177,13 +128,13 @@ func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if d.IsDir() {
|
if d.IsDir() {
|
||||||
if walker.shouldSkip(path) {
|
if walker.ShouldSkip(path) {
|
||||||
return filepath.SkipDir
|
return filepath.SkipDir
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if walker.shouldSkip(path) {
|
if walker.ShouldSkip(path) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ func (g *globTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func globFiles(ctx context.Context, pattern, searchPath string, limit int) ([]string, bool, error) {
|
func globFiles(ctx context.Context, pattern, searchPath string, limit int) ([]string, bool, error) {
|
||||||
cmdRg := fsext.GetRgCmd(ctx, pattern)
|
cmdRg := getRgCmd(ctx, pattern)
|
||||||
if cmdRg != nil {
|
if cmdRg != nil {
|
||||||
cmdRg.Dir = searchPath
|
cmdRg.Dir = searchPath
|
||||||
matches, err := runRipgrep(cmdRg, searchPath, limit)
|
matches, err := runRipgrep(cmdRg, searchPath, limit)
|
||||||
|
|||||||
@@ -125,6 +125,11 @@ LIMITATIONS:
|
|||||||
- Very large binary files may be skipped
|
- Very large binary files may be skipped
|
||||||
- Hidden files (starting with '.') are skipped
|
- Hidden files (starting with '.') are skipped
|
||||||
|
|
||||||
|
IGNORE FILE SUPPORT:
|
||||||
|
- Respects .gitignore patterns to skip ignored files and directories
|
||||||
|
- Respects .crushignore patterns for additional ignore rules
|
||||||
|
- Both ignore files are automatically detected in the search root directory
|
||||||
|
|
||||||
CROSS-PLATFORM NOTES:
|
CROSS-PLATFORM NOTES:
|
||||||
- Uses ripgrep (rg) command if available for better performance
|
- Uses ripgrep (rg) command if available for better performance
|
||||||
- Falls back to built-in Go implementation if ripgrep is not available
|
- Falls back to built-in Go implementation if ripgrep is not available
|
||||||
@@ -269,11 +274,17 @@ func searchFiles(ctx context.Context, pattern, rootPath, include string, limit i
|
|||||||
}
|
}
|
||||||
|
|
||||||
func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
|
func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
|
||||||
cmd := fsext.GetRgSearchCmd(ctx, pattern, path, include)
|
cmd := getRgSearchCmd(ctx, pattern, path, include)
|
||||||
if cmd == nil {
|
if cmd == nil {
|
||||||
return nil, fmt.Errorf("ripgrep not found in $PATH")
|
return nil, fmt.Errorf("ripgrep not found in $PATH")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmd.Args = append(
|
||||||
|
cmd.Args,
|
||||||
|
"--ignore-file", filepath.Join(path, ".gitignore"),
|
||||||
|
"--ignore-file", filepath.Join(path, ".crushignore"),
|
||||||
|
)
|
||||||
|
|
||||||
output, err := cmd.Output()
|
output, err := cmd.Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
|
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
|
||||||
@@ -337,6 +348,9 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create walker with gitignore and crushignore support
|
||||||
|
walker := fsext.NewFastGlobWalker(rootPath)
|
||||||
|
|
||||||
err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
|
err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil // Skip errors
|
return nil // Skip errors
|
||||||
@@ -346,7 +360,8 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
|
|||||||
return nil // Skip directories
|
return nil // Skip directories
|
||||||
}
|
}
|
||||||
|
|
||||||
if fsext.SkipHidden(path) {
|
// Use walker's shouldSkip method instead of just SkipHidden
|
||||||
|
if walker.ShouldSkip(path) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,14 @@
|
|||||||
package tools
|
package tools
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRegexCache(t *testing.T) {
|
func TestRegexCache(t *testing.T) {
|
||||||
@@ -52,6 +58,114 @@ func TestGlobToRegexCaching(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGrepWithIgnoreFiles(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
// Create test files
|
||||||
|
testFiles := map[string]string{
|
||||||
|
"file1.txt": "hello world",
|
||||||
|
"file2.txt": "hello world",
|
||||||
|
"ignored/file3.txt": "hello world",
|
||||||
|
"node_modules/lib.js": "hello world",
|
||||||
|
"secret.key": "hello world",
|
||||||
|
}
|
||||||
|
|
||||||
|
for path, content := range testFiles {
|
||||||
|
fullPath := filepath.Join(tempDir, path)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create .gitignore file
|
||||||
|
gitignoreContent := "ignored/\n*.key\n"
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644))
|
||||||
|
|
||||||
|
// Create .crushignore file
|
||||||
|
crushignoreContent := "node_modules/\n"
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
|
||||||
|
|
||||||
|
// Create grep tool
|
||||||
|
grepTool := NewGrepTool(tempDir)
|
||||||
|
|
||||||
|
// Create grep parameters
|
||||||
|
params := GrepParams{
|
||||||
|
Pattern: "hello world",
|
||||||
|
Path: tempDir,
|
||||||
|
}
|
||||||
|
paramsJSON, err := json.Marshal(params)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Run grep
|
||||||
|
call := ToolCall{Input: string(paramsJSON)}
|
||||||
|
response, err := grepTool.Run(context.Background(), call)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Check results - should only find file1.txt and file2.txt
|
||||||
|
// ignored/file3.txt should be ignored by .gitignore
|
||||||
|
// node_modules/lib.js should be ignored by .crushignore
|
||||||
|
// secret.key should be ignored by .gitignore
|
||||||
|
result := response.Content
|
||||||
|
require.Contains(t, result, "file1.txt")
|
||||||
|
require.Contains(t, result, "file2.txt")
|
||||||
|
require.NotContains(t, result, "file3.txt")
|
||||||
|
require.NotContains(t, result, "lib.js")
|
||||||
|
require.NotContains(t, result, "secret.key")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchImplementations(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
for path, content := range map[string]string{
|
||||||
|
"file1.go": "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}",
|
||||||
|
"file2.js": "console.log('hello world');",
|
||||||
|
"file3.txt": "hello world from text file",
|
||||||
|
"binary.exe": "\x00\x01\x02\x03",
|
||||||
|
"empty.txt": "",
|
||||||
|
"subdir/nested.go": "package nested\n// hello world comment",
|
||||||
|
".hidden.txt": "hello world in hidden file",
|
||||||
|
"file4.txt": "hello world from a banana",
|
||||||
|
"file5.txt": "hello world from a grape",
|
||||||
|
} {
|
||||||
|
fullPath := filepath.Join(tempDir, path)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644))
|
||||||
|
|
||||||
|
for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
|
||||||
|
"regex": searchFilesWithRegex,
|
||||||
|
"rg": func(pattern, path, include string) ([]grepMatch, error) {
|
||||||
|
return searchWithRipgrep(t.Context(), pattern, path, include)
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
if name == "rg" && getRg() == "" {
|
||||||
|
t.Skip("rg is not in $PATH")
|
||||||
|
}
|
||||||
|
|
||||||
|
matches, err := fn("hello world", tempDir, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, len(matches), 4)
|
||||||
|
for _, match := range matches {
|
||||||
|
require.NotEmpty(t, match.path)
|
||||||
|
require.NotZero(t, match.lineNum)
|
||||||
|
require.NotEmpty(t, match.lineText)
|
||||||
|
require.NotZero(t, match.modTime)
|
||||||
|
require.NotContains(t, match.path, ".hidden.txt")
|
||||||
|
require.NotContains(t, match.path, "file4.txt")
|
||||||
|
require.NotContains(t, match.path, "file5.txt")
|
||||||
|
require.NotContains(t, match.path, "binary.exe")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Benchmark to show performance improvement
|
// Benchmark to show performance improvement
|
||||||
func BenchmarkRegexCacheVsCompile(b *testing.B) {
|
func BenchmarkRegexCacheVsCompile(b *testing.B) {
|
||||||
cache := newRegexCache()
|
cache := newRegexCache()
|
||||||
|
|||||||
53
internal/llm/tools/rg.go
Normal file
53
internal/llm/tools/rg.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/charmbracelet/crush/internal/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
var getRg = sync.OnceValue(func() string {
|
||||||
|
path, err := exec.LookPath("rg")
|
||||||
|
if err != nil {
|
||||||
|
if log.Initialized() {
|
||||||
|
slog.Warn("Ripgrep (rg) not found in $PATH. Some grep features might be limited or slower.")
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
})
|
||||||
|
|
||||||
|
func getRgCmd(ctx context.Context, globPattern string) *exec.Cmd {
|
||||||
|
name := getRg()
|
||||||
|
if name == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
args := []string{"--files", "-L", "--null"}
|
||||||
|
if globPattern != "" {
|
||||||
|
if !filepath.IsAbs(globPattern) && !strings.HasPrefix(globPattern, "/") {
|
||||||
|
globPattern = "/" + globPattern
|
||||||
|
}
|
||||||
|
args = append(args, "--glob", globPattern)
|
||||||
|
}
|
||||||
|
return exec.CommandContext(ctx, name, args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRgSearchCmd(ctx context.Context, pattern, path, include string) *exec.Cmd {
|
||||||
|
name := getRg()
|
||||||
|
if name == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Use -n to show line numbers and include the matched line
|
||||||
|
args := []string{"-H", "-n", pattern}
|
||||||
|
if include != "" {
|
||||||
|
args = append(args, "--glob", include)
|
||||||
|
}
|
||||||
|
args = append(args, path)
|
||||||
|
|
||||||
|
return exec.CommandContext(ctx, name, args...)
|
||||||
|
}
|
||||||
@@ -2,7 +2,6 @@ package shell
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@@ -92,18 +91,14 @@ func TestCommandBlocking(t *testing.T) {
|
|||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
// Create a temporary directory for each test
|
// Create a temporary directory for each test
|
||||||
tmpDir, err := os.MkdirTemp("", "shell-test-*")
|
tmpDir := t.TempDir()
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to create temp dir: %v", err)
|
|
||||||
}
|
|
||||||
defer os.RemoveAll(tmpDir)
|
|
||||||
|
|
||||||
shell := NewShell(&Options{
|
shell := NewShell(&Options{
|
||||||
WorkingDir: tmpDir,
|
WorkingDir: tmpDir,
|
||||||
BlockFuncs: tt.blockFuncs,
|
BlockFuncs: tt.blockFuncs,
|
||||||
})
|
})
|
||||||
|
|
||||||
_, _, err = shell.Exec(context.Background(), tt.command)
|
_, _, err := shell.Exec(context.Background(), tt.command)
|
||||||
|
|
||||||
if tt.shouldBlock {
|
if tt.shouldBlock {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user