From 2ca928dcad48f7a7c78034c2934ba52024f95fc3 Mon Sep 17 00:00:00 2001 From: carlospolop Date: Mon, 2 Oct 2023 00:50:54 +0200 Subject: [PATCH] rex-client-impr --- clients/go/regexFinder.go | 273 +++++++++++++++++++++----------------- 1 file changed, 148 insertions(+), 125 deletions(-) diff --git a/clients/go/regexFinder.go b/clients/go/regexFinder.go index d034e80..f232d6f 100644 --- a/clients/go/regexFinder.go +++ b/clients/go/regexFinder.go @@ -1,22 +1,24 @@ package main import ( + "bufio" + "encoding/json" "fmt" + "io" "io/ioutil" "os" + "os/exec" "path/filepath" "regexp" "strings" - "os/exec" - "encoding/json" "gopkg.in/yaml.v2" ) type Regex struct { - Name string `yaml:"name"` - Regex string `yaml:"regex"` - Example string `yaml:"example"` + Name string `yaml:"name"` + Regex string `yaml:"regex"` + Example string `yaml:"example"` FalsePositives string `yaml:"falsePositives"` } @@ -29,7 +31,6 @@ type Config struct { RegularExpressions []Pattern `yaml:"regular_expresions"` } - type Match struct { Regex string `json:"regex"` Match string `json:"match"` @@ -38,35 +39,57 @@ type Match struct { } func main() { - // Read YAML config file - configFile, err := ioutil.ReadFile("../../regex.yaml") + configPath := "" + usage := "Usage: -r [-d regex-search | -g | -gs ] [-c]" + + args := os.Args[1:] + + // Check for -r parameter for the regex.yaml path + for i, arg := range args { + if arg == "-r" && i+1 < len(args) { + configPath = args[i+1] + // Remove the -r and its value from args + args = append(args[:i], args[i+2:]...) + break + } + } + + // If the configPath hasn't changed from the default, it means -r wasn't provided. + if configPath == "" { + fmt.Println("You must provide the '-r' parameter followed by the path to regex.yaml.") + fmt.Println(usage) + return + } + + // Read YAML config file using the determined path + configFile, err := ioutil.ReadFile(configPath) if err != nil { panic(err) } + var config Config var isFaslePos bool - - args := os.Args[1:] + if len(args) == 0 { - fmt.Println("Usage: -d | -r | -rs ") + fmt.Println(usage) return } isFaslePos = checkIfFaslePos(args) - if(isFaslePos){ + if isFaslePos { // Parse YAML config file err = yaml.Unmarshal(configFile, &config) if err != nil { panic(err) } - }else{ + } else { // Parse YAML config file err = yaml.Unmarshal(configFile, &config) if err != nil { panic(err) } - + // Remove the regexes with falsePositives set to true for i, pattern := range config.RegularExpressions { filteredRegexes := make([]Regex, 0, len(pattern.Regexes)) @@ -80,14 +103,14 @@ func main() { } switch arg := args[0]; arg { case "-h": - fmt.Println("Usage: -d regex-search | -r | -rs [-c]") + fmt.Println(usage) case "-d": - if(isFaslePos){ + if isFaslePos { if len(args) < 3 { fmt.Println("Usage: regex-search -d -c") return } - }else{ + } else { if len(args) < 2 { fmt.Println("Usage: regex-search -d ") return @@ -95,58 +118,58 @@ func main() { } dir := args[1] searchRegexInDir(dir, config, "") - case "-r": - if(isFaslePos){ + case "-g": + if isFaslePos { if len(args) < 3 { - fmt.Println("Usage: regex-search -r -c") + fmt.Println("Usage: regex-search -g -c") return } - }else{ + } else { if len(args) < 2 { - fmt.Println("Usage: regex-search -r ") + fmt.Println("Usage: regex-search -g ") return } } repoUrl := args[1] searchRegexInRepoGithub(repoUrl, config) - - case "-rs": - if(isFaslePos){ + + case "-gs": + if isFaslePos { if len(args) < 3 { - fmt.Println("Usage: regex-search -rs -c") + fmt.Println("Usage: regex-search -gs -c") return } if len(args) == 3 { - fmt.Println("Usage: regex-search -rs -c. Add more repos or change to -r") + fmt.Println("Usage: regex-search -gs -c. Add more repos or use -g") return } for i := 2; i < len(os.Args)-1; i++ { - repoUrl := os.Args[i]; - searchRegexInRepoGithub(repoUrl,config) + repoUrl := os.Args[i] + searchRegexInRepoGithub(repoUrl, config) } - }else{ + } else { if len(args) < 2 { - fmt.Println("Usage: regex-search -rs ") + fmt.Println("Usage: regex-search -gs ") return } if len(args) == 2 { - fmt.Println("Usage: regex-search -rs . Add more repos or change to -r") + fmt.Println("Usage: regex-search -gs . Add more repos or use -r") return } for i := 2; i < len(os.Args); i++ { - repoUrl := os.Args[i]; - searchRegexInRepoGithub(repoUrl,config) + repoUrl := os.Args[i] + searchRegexInRepoGithub(repoUrl, config) } } default: - fmt.Println("Usage: -d regex-search | -r | -rs optional[-c]") + fmt.Println(usage) } } -func checkIfFaslePos(args []string) bool{ +func checkIfFaslePos(args []string) bool { var isFaslePos bool = false for i := 0; i < len(args); i++ { - if args[i] == "-c" { + if args[i] == "-c" { isFaslePos := true return isFaslePos } @@ -154,126 +177,126 @@ func checkIfFaslePos(args []string) bool{ return isFaslePos } -func searchRegexInDir(dir string, config Config, repoName string){ - - // Loop over all files in directory +func searchRegexInDir(dir string, config Config, repoName string) { + + // Loop over all files in the directory filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - // Skip directories and hidden files - if info.IsDir() || filepath.Base(path)[0] == '.' { - return nil - } + // Skip directories and hidden files + if info.IsDir() || filepath.Base(path)[0] == '.' { + return nil + } - // Read file content - content, err := ioutil.ReadFile(path) - if err != nil { - fmt.Printf("Error reading file %s: %v\n", path, err) - return nil - } + // Read file content + content, err := ioutil.ReadFile(path) + if err != nil { + fmt.Printf("Error reading file %s: %v\n", path, err) + return nil + } - if err := os.MkdirAll("matches", 0777); err != nil { - panic(err) - } + // Check file content against each regex pattern + for _, pattern := range config.RegularExpressions { + for _, regex := range pattern.Regexes { + rex := strings.Replace(regex.Regex, "\n", "", -1) + re := regexp.MustCompile(rex) + foundMatches := re.FindAllString(string(content), -1) + for _, foundMatch := range foundMatches { + if foundMatch != "" { + // Truncate match if it's longer than 500 chars + if len(foundMatch) < 500 { + match := Match{ + RegexName: regex.Name, + Regex: rex, + Match: foundMatch, + File: filepath.Base(path), + } + jsonData, err := json.Marshal(match) + if err != nil { + fmt.Println("Error marshaling match to JSON:", err) + continue + } - matches := make([]Match, 0) - - // Check file content against each regex pattern - for _, pattern := range config.RegularExpressions { - for _, regex := range pattern.Regexes { - rex := strings.Replace(regex.Regex, "\n", "", -1) - re := regexp.MustCompile(rex) - foundMatches := re.FindAllString(string(content), -1) - for _, foundMatch := range foundMatches { - if foundMatch != "" { - match := Match{ - RegexName: regex.Name, - Regex: rex, - Match: foundMatch, - File: strings.ReplaceAll(path,"\\","\\\\"), + fmt.Println(string(jsonData)) + } } - matches = append(matches, match) } - } + } } - } - jsonData, err := json.MarshalIndent(matches, "", " ") - if err != nil { - fmt.Println("Error marshaling matches to JSON:", err) + return nil - } + }) +} - fmt.Println(string(jsonData)) +func searchRegexInRepoGithub(repoUrl string, config Config) { + // Download the github repo and split the github log file in chunks of 5MB + // Then call the filesystem analysis - //Getting name for matches.json - _, direc := filepath.Split(dir) + const chunkSize = 5 * 1024 * 1024 // 5MB in bytes - //Create the json file - jsonName := direc + "-" + repoName + ".json" - jsonDir := "matches/" + jsonName - jsonFile, err := os.Create(jsonDir) + // Create a unique temporary directory + tempDir, err := ioutil.TempDir(os.TempDir(), "repoClone_") if err != nil { panic(err) } - defer jsonFile.Close() - if _, err := jsonFile.Write(jsonData); err != nil { - panic(err) - } + defer os.RemoveAll(tempDir) // Ensure removal of temp directory upon function exit - return nil -}) -} - -func searchRegexInRepoGithub(repoUrl string, config Config) { - - // Create the log directory if it doesn't exist - if err := os.MkdirAll("tmp", 0777); err != nil { - panic(err) - } // Clone the repository - if err := os.RemoveAll("repo"); err != nil { - panic(err) - } - cmd := exec.Command("git", "clone", repoUrl, "repo") + repoPath := fmt.Sprintf("%s/repo", tempDir) + cmd := exec.Command("git", "clone", repoUrl, repoPath) if err := cmd.Run(); err != nil { panic(err) } - - // Change to repository directory - if err := os.Chdir("repo"); err != nil { - panic(err) - } - - // Generate git log - logCmd := exec.Command("git", "log", "-p") - logOutput, err := logCmd.Output() - if err != nil { - panic(err) - } - - // Change back to original directory - if err := os.Chdir("../"); err != nil { - panic(err) - } - //Name the gitlog-file + // Name the gitlog-file rmGitUrl := strings.Replace(repoUrl, "https://github.com/", "", 1) logUrl := strings.Replace(rmGitUrl, "/", "-", 1) - logDir := "tmp/gitlog-" + logUrl +".txt" - logFile, err := os.Create(logDir) + + // Generate git log and write it directly to logFile + logCmd := exec.Command("git", "-C", repoPath, "log", "-p") + output, err := logCmd.StdoutPipe() if err != nil { panic(err) } - if _, err := logFile.Write(logOutput); err != nil { + if err := logCmd.Start(); err != nil { panic(err) } - searchRegexInDir("tmp", config, logUrl); + reader := bufio.NewReader(output) + chunkCount := 0 + + for { + chunk := make([]byte, chunkSize) + _, err := io.ReadFull(reader, chunk) + if err == io.EOF { + break + } + if err != nil && err != io.ErrUnexpectedEOF { + panic(err) + } + + logDir := fmt.Sprintf("%s/gitlog-%s-part%d.txt", tempDir, logUrl, chunkCount) + logFile, err := os.Create(logDir) + if err != nil { + panic(err) + } + + _, err = logFile.Write(chunk) + if err != nil { + logFile.Close() + panic(err) + } + logFile.Close() + chunkCount++ + } - // Remove the cloned repository - if err := os.RemoveAll("repo"); err != nil { + if err := logCmd.Wait(); err != nil { panic(err) } - if err := os.RemoveAll("tmp"); err != nil { + + // Remove the cloned repository as it's no longer needed + if err := os.RemoveAll(repoPath); err != nil { panic(err) } + + searchRegexInDir(tempDir, config, logUrl) }