diff --git a/Cargo.lock b/Cargo.lock index 8ecefc9fd..1a9430a9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1705,6 +1705,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.6.3" @@ -2178,6 +2184,12 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + [[package]] name = "pkg-config" version = "0.3.27" @@ -2731,6 +2743,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest 0.10.7", +] + [[package]] name = "sha2" version = "0.9.9" @@ -2755,6 +2778,19 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "sha256" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7895c8ae88588ccead14ff438b939b0c569cd619116f14b4d13fdff7b8333386" +dependencies = [ + "async-trait", + "bytes", + "hex", + "sha2 0.10.7", + "tokio", +] + [[package]] name = "shlex" version = "1.1.0" @@ -3132,6 +3168,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokio" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +dependencies = [ + "backtrace", + "bytes", + "pin-project-lite", +] + [[package]] name = "toml" version = "0.5.11" @@ -4014,6 +4061,7 @@ dependencies = [ "lingua", "linkme", "log", + "md5", "memchr", "memx", "num", @@ -4026,6 +4074,8 @@ dependencies = [ "rustc-hash", "serde", "serde_json", + "sha1", + "sha256", "smallvec", "thiserror", "walrus", diff --git a/Cargo.toml b/Cargo.toml index dce7bf854..6148c59c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,9 @@ lazy_static = "1.4.0" line-span = "0.1.3" linkme = "0.3" log = "0.4" +md5 = "0.7.0" +sha1 = "0.10.6" +sha256 = "1.4.0" memchr = "2.6.3" memx = "0.1.28" num = "0.4.0" diff --git a/yara-x/Cargo.toml b/yara-x/Cargo.toml index e754ddbb8..8b85b8ffc 100644 --- a/yara-x/Cargo.toml +++ b/yara-x/Cargo.toml @@ -40,11 +40,18 @@ text-module = [ # The Time module allows you to retrieve epoch in seconds that can # be used in conditions of a rule to check againts other epoch time. time-module = [] +# The hash module provides functions for computing md5, sha1 and sha-256 hashes +hash-module = [ + "dep:md5", + "dep:sha1", + "dep:sha256", +] # Features that are enabled by default. default = [ "constant-folding", "fast-regexp", + "hash-module", "time-module", "test_proto2-module", "test_proto3-module", @@ -67,6 +74,9 @@ itertools = { workspace = true } lazy_static = { workspace = true } linkme = { workspace = true } log = { workspace = true, optional = true } +md5 = { workspace = true, optional = true } +sha1 = { workspace = true, optional = true } +sha256 = { workspace = true, optional = true } memchr = { workspace = true } memx = { workspace = true } num = { workspace = true } diff --git a/yara-x/src/modules/hash.rs b/yara-x/src/modules/hash.rs new file mode 100644 index 000000000..e55c37f99 --- /dev/null +++ b/yara-x/src/modules/hash.rs @@ -0,0 +1,149 @@ +use std::cell::RefCell; + +use md5 as md5_hash; +use rustc_hash::FxHashMap; +use sha1 as sha1_hash; +use sha1::Digest; +use sha256::digest as sha256_digest; + +use crate::modules::prelude::*; +use crate::modules::protos::hash::*; + +thread_local!( + static SHA256_CACHE: RefCell> = + RefCell::new(FxHashMap::default()); + + static SHA1_CACHE: RefCell> = + RefCell::new(FxHashMap::default()); + + static MD5_CACHE: RefCell> = + RefCell::new(FxHashMap::default()); +); + +#[module_main] +fn main(_ctx: &ScanContext) -> Hash { + // With every scanned file the cache must be cleared. + SHA256_CACHE.with(|cache| cache.borrow_mut().clear()); + SHA1_CACHE.with(|cache| cache.borrow_mut().clear()); + MD5_CACHE.with(|cache| cache.borrow_mut().clear()); + + Hash::new() +} + +#[module_export(name = "md5")] +fn md5( + ctx: &mut ScanContext, + offset: i64, + size: i64, +) -> Option { + let cached = MD5_CACHE.with(|cache| -> Option { + Some(RuntimeString::from_bytes( + ctx, + cache.borrow().get(&(offset, size))?, + )) + }); + + if cached.is_some() { + return cached; + } + + let range = offset.try_into().ok()?..(offset + size).try_into().ok()?; + let data = ctx.scanned_data().get(range)?; + let digest = format!("{:x}", md5_hash::compute(data)); + let result = RuntimeString::from_bytes(ctx, digest.as_bytes()); + + MD5_CACHE.with(|cache| { + cache.borrow_mut().insert((offset, size), digest); + }); + + Some(result) +} + +#[module_export(name = "md5")] +fn md5_str(ctx: &mut ScanContext, s: RuntimeString) -> Option { + Some(RuntimeString::from_bytes( + ctx, + format!("{:x}", md5_hash::compute(s.as_bstr(ctx))), + )) +} + +#[module_export(name = "sha1")] +fn sha1( + ctx: &mut ScanContext, + offset: i64, + size: i64, +) -> Option { + let cached = SHA1_CACHE.with(|cache| -> Option { + Some(RuntimeString::from_bytes( + ctx, + cache.borrow().get(&(offset, size))?, + )) + }); + + if cached.is_some() { + return cached; + } + + let range = offset.try_into().ok()?..(offset + size).try_into().ok()?; + let data = ctx.scanned_data().get(range)?; + let mut hasher = sha1_hash::Sha1::new(); + + hasher.update(data); + + let digest = format!("{:x}", hasher.finalize()); + let result = RuntimeString::from_bytes(ctx, digest.as_bytes()); + + SHA1_CACHE.with(|cache| { + cache.borrow_mut().insert((offset, size), digest); + }); + + Some(result) +} + +#[module_export(name = "sha1")] +fn sha1_str(ctx: &mut ScanContext, s: RuntimeString) -> Option { + let mut hasher = sha1_hash::Sha1::new(); + hasher.update(s.as_bstr(ctx)); + + Some(RuntimeString::from_bytes(ctx, format!("{:x}", hasher.finalize()))) +} + +#[module_export(name = "sha256")] +fn sha256( + ctx: &mut ScanContext, + offset: i64, + size: i64, +) -> Option { + let cached = SHA256_CACHE.with(|cache| -> Option { + Some(RuntimeString::from_bytes( + ctx, + cache.borrow().get(&(offset, size))?, + )) + }); + + if cached.is_some() { + return cached; + } + + let range = offset.try_into().ok()?..(offset + size).try_into().ok()?; + let data = ctx.scanned_data().get(range)?; + let digest = sha256_digest(data); + let result = RuntimeString::from_bytes(ctx, digest.as_bytes()); + + SHA256_CACHE.with(|cache| { + cache.borrow_mut().insert((offset, size), digest); + }); + + Some(result) +} + +#[module_export(name = "sha256")] +fn sha256_str( + ctx: &mut ScanContext, + s: RuntimeString, +) -> Option { + Some(RuntimeString::from_bytes( + ctx, + sha256_digest(s.as_bstr(ctx).as_bytes()), + )) +} diff --git a/yara-x/src/modules/modules.rs b/yara-x/src/modules/modules.rs index 8dfb8359a..0854f5dc1 100644 --- a/yara-x/src/modules/modules.rs +++ b/yara-x/src/modules/modules.rs @@ -1,6 +1,8 @@ // File generated automatically by build.rs. Do not edit. #[cfg(feature = "text-module")] pub mod text; +#[cfg(feature = "hash-module")] +pub mod hash; #[cfg(feature = "test_proto2-module")] pub mod test_proto2; #[cfg(feature = "time-module")] diff --git a/yara-x/src/modules/protos/hash.proto b/yara-x/src/modules/protos/hash.proto new file mode 100644 index 000000000..294663739 --- /dev/null +++ b/yara-x/src/modules/protos/hash.proto @@ -0,0 +1,13 @@ +syntax = "proto2"; + +import "yara.proto"; + +option (yara.module_options) = { + name : "hash" + root_message: "Hash" + rust_module: "hash" +}; + +message Hash { + // This module contains only exported functions, and doesn't return any data +} \ No newline at end of file diff --git a/yara-x/src/tests/mod.rs b/yara-x/src/tests/mod.rs index 644b8d7cd..dab08c087 100644 --- a/yara-x/src/tests/mod.rs +++ b/yara-x/src/tests/mod.rs @@ -2801,6 +2801,52 @@ fn test_defined_1() { condition_false!(r#"defined true and false"#); } +#[test] +#[cfg(feature = "hash-module")] +fn test_hash_module() { + rule_true!( + r#" + import "hash" + rule test { + condition: + hash.md5(0, filesize) == "6df23dc03f9b54cc38a0fc1483df6e21" and + hash.md5(3, 3) == "37b51d194a7513e45b56f6524f2d51f2" and + hash.md5(0, filesize) == hash.md5("foobarbaz") and + hash.md5(3, 3) == hash.md5("bar") + } + "#, + b"foobarbaz" + ); + + rule_true!( + r#" + import "hash" + rule test { + condition: + hash.sha1(0, filesize) == "5f5513f8822fdbe5145af33b64d8d970dcf95c6e" and + hash.sha1(3, 3) == "62cdb7020ff920e5aa642c3d4066950dd1f01f4d" and + hash.sha1(0, filesize) == hash.sha1("foobarbaz") and + hash.sha1(3, 3) == hash.sha1("bar") + } + "#, + b"foobarbaz" + ); + + rule_true!( + r#" + import "hash" + rule test { + condition: + hash.sha256(0, filesize) == "97df3588b5a3f24babc3851b372f0ba71a9dcdded43b14b9d06961bfc1707d9d" and + hash.sha256(3, 3) == "fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9" and + hash.sha256(0, filesize) == hash.sha256("foobarbaz") and + hash.sha256(3, 3) == hash.sha256("bar") + } + "#, + b"foobarbaz" + ); +} + #[test] #[cfg(feature = "test_proto2-module")] fn test_defined_2() {