Skip to content

Commit

Permalink
Only search files that seem to contain text
Browse files Browse the repository at this point in the history
  • Loading branch information
hrs committed May 14, 2023
1 parent 144d473 commit 08612d3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
18 changes: 18 additions & 0 deletions document.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@ package main

import (
"bufio"
"fmt"
"math"
"mime"
"os"
"path/filepath"
"regexp"
"strings"

"golang.org/x/tools/godoc/util"
"golang.org/x/tools/godoc/vfs"
)

type TermMap map[string]float64
Expand All @@ -27,6 +33,11 @@ func NewDocument(path string) (*Document, error) {
}
defer file.Close()

// Ensure that this is a text file
if !isTextFile(path) {
return nil, fmt.Errorf("not a text file, skipping: %s", path)
}

// Create a scanner from the file
scanner := bufio.NewScanner(file)

Expand Down Expand Up @@ -87,3 +98,10 @@ func (doc *Document) NormalizeTfIdf(invDocFreq TermMap) {

doc.Norm = math.Sqrt(norm)
}

func isTextFile(path string) bool {
mimeType := mime.TypeByExtension(filepath.Ext(path))

return strings.HasPrefix(mimeType, "text/") ||
(mimeType == "" && util.IsTextFile(vfs.OS("."), path))
}
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module github.com/hrs/docsim

go 1.19

require github.com/reiver/go-porterstemmer v1.0.1 // indirect
require (
github.com/reiver/go-porterstemmer v1.0.1 // indirect
golang.org/x/tools v0.9.1 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
github.com/reiver/go-porterstemmer v1.0.1 h1:WyERBkASXgoXrTwq/IQ6wyNj/YG7j/ZURvTuMCoud5w=
github.com/reiver/go-porterstemmer v1.0.1/go.mod h1:Z8uL/f/7UEwaeAJNwx1sO8kbqXiEuQieNuD735hLrSU=
golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=

0 comments on commit 08612d3

Please # to comment.