From 575a09f6f4a55ae2eb3546b2e8789d400952b49b Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Tue, 1 Nov 2022 07:56:47 +1030 Subject: [PATCH 1/2] docs: explain Go symbol hash Put an explicit description of the Go symbol hash algorithm in the godoc and in the README. --- README.md | 7 ++++++- toutoumomoma.go | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f974ad1..702f5f9 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,9 @@ - `Stripped`: scan files that may be executable and report whether they are a Go executable that has had its symbols stripped. - `ImportHash`: calculate the [imphash](https://www.fireeye.com/blog/threat-research/2014/01/tracking-malware-import-hashing.html) of an executable with dynamic imports. - `GoSymbolHash`: calculate an imphash analogue for Go executables compiled by the gc-compiler. -- `Sections`: provide section size statistics for an executable. + + The `GoSymbolHash` algorithm is analogous to the algorithm described for `ImportHash` with the exception that Go's static symbols are used in place of the dynamic import symbols used by `ImportHash`. + + The list of symbols referenced by the executable is obtained and the MD5 hash of the ordered list of symbols, separated by commas, is calculated. + The fully qualified import path of each symbol is included and while symbols used by `ImportHash` are canonicalised to lowercase, `GoSymbolHash` retains the case of the original symbol. `GoSymbolHash` may be calculated including or excluding standard library imports. +- `Sections`: provide section size and entropy statistics for an executable. diff --git a/toutoumomoma.go b/toutoumomoma.go index b742af9..01954d9 100644 --- a/toutoumomoma.go +++ b/toutoumomoma.go @@ -155,6 +155,10 @@ func (f *File) Stripped() (sneaky bool, err error) { // Darwin imports are the list of symbols without a library prefix and is equivalent // to the Anomali SymHash https://www.anomali.com/blog/symhash. // +// The algorithm obtains the list of imported function names and converts them to all +// lowercase. Any file extension is removed and then the MD5 hash of the ordered list of +// symbols, separated by commas, is calculated. +// // Darwin: // ___error // __exit @@ -202,8 +206,15 @@ func (f *File) ImportHash() (hash []byte, imports []string, err error) { // from the Go standard library are included, otherwise only third-party symbols // are considered. // -// If the file at is an executable, but not a gc-compiled Go executable, -// ErrNotGoExecutable will be returned. +// The algorithm is analogous to the algorithm described for ImportHash with the exception +// that Go's static symbols are used in place of the dynamic import symbols used by the +// ImportHash. The list of symbols referenced by the executable is obtained and the MD5 hash +// of the ordered list of symbols, separated by commas, is calculated. The fully qualified +// import path of each symbol is included and while symbols used by ImportHash are +// canonicalised to lowercase, GoSymbolHash retains the case of the original symbol. +// +// If the file is an executable, but not a gc-compiled Go executable, ErrNotGoExecutable +// will be returned. func (f *File) GoSymbolHash(stdlib bool) (hash []byte, imports []string, err error) { ok, err := f.isGoExecutable() if !ok || err != nil { From 8a1410aee5af7aa12aa0a91ee7ab92a47e48ebb4 Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Tue, 1 Nov 2022 09:43:33 +1030 Subject: [PATCH 2/2] address pr comment --- README.md | 1 + toutoumomoma.go | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 702f5f9..d4f5422 100644 --- a/README.md +++ b/README.md @@ -9,5 +9,6 @@ The `GoSymbolHash` algorithm is analogous to the algorithm described for `ImportHash` with the exception that Go's static symbols are used in place of the dynamic import symbols used by `ImportHash`. The list of symbols referenced by the executable is obtained and the MD5 hash of the ordered list of symbols, separated by commas, is calculated. + The order of the symbols is as exists in the executable and returned by the Go standard library debug packages. The fully qualified import path of each symbol is included and while symbols used by `ImportHash` are canonicalised to lowercase, `GoSymbolHash` retains the case of the original symbol. `GoSymbolHash` may be calculated including or excluding standard library imports. - `Sections`: provide section size and entropy statistics for an executable. diff --git a/toutoumomoma.go b/toutoumomoma.go index 01954d9..a20d31a 100644 --- a/toutoumomoma.go +++ b/toutoumomoma.go @@ -209,9 +209,11 @@ func (f *File) ImportHash() (hash []byte, imports []string, err error) { // The algorithm is analogous to the algorithm described for ImportHash with the exception // that Go's static symbols are used in place of the dynamic import symbols used by the // ImportHash. The list of symbols referenced by the executable is obtained and the MD5 hash -// of the ordered list of symbols, separated by commas, is calculated. The fully qualified -// import path of each symbol is included and while symbols used by ImportHash are -// canonicalised to lowercase, GoSymbolHash retains the case of the original symbol. +// of the ordered list of symbols, separated by commas, is calculated. The order of the +// symbols is as exists in the executable and returned by the standard library debug packages +// The fully qualified import path of each symbol is included and while symbols used by +// ImportHash are canonicalised to lowercase, GoSymbolHash retains the case of the original +// symbol. // // If the file is an executable, but not a gc-compiled Go executable, ErrNotGoExecutable // will be returned.