From 9e44d57cb845d259c1eee136ca6832c6e3d69daa Mon Sep 17 00:00:00 2001 From: simonjiao Date: Tue, 23 Apr 2024 15:47:17 +0800 Subject: [PATCH] refactor module loading/checking functions --- Cargo.lock | 1 + language/move-vm/runtime/Cargo.toml | 1 + language/move-vm/runtime/src/data_cache.rs | 77 ++++++++++ language/move-vm/runtime/src/lib.rs | 1 + language/move-vm/runtime/src/loader.rs | 139 +++++++++++++++++- .../move-vm/runtime/src/module_traversal.rs | 50 +++++++ language/move-vm/runtime/src/session.rs | 54 +++++++ 7 files changed, 317 insertions(+), 6 deletions(-) create mode 100644 language/move-vm/runtime/src/module_traversal.rs diff --git a/Cargo.lock b/Cargo.lock index 05eec6cc99..e3dc46dfc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3890,6 +3890,7 @@ dependencies = [ "proptest", "sha3 0.9.1", "tracing", + "typed-arena", ] [[package]] diff --git a/language/move-vm/runtime/Cargo.toml b/language/move-vm/runtime/Cargo.toml index fb28ca5964..884c591e3d 100644 --- a/language/move-vm/runtime/Cargo.toml +++ b/language/move-vm/runtime/Cargo.toml @@ -23,6 +23,7 @@ move-bytecode-verifier = { path = "../../move-bytecode-verifier" } move-core-types = { path = "../../move-core/types" } move-vm-types = { path = "../types" } move-binary-format = { path = "../../move-binary-format" } +typed-arena = "2.0.2" [dev-dependencies] anyhow = "1.0.52" diff --git a/language/move-vm/runtime/src/data_cache.rs b/language/move-vm/runtime/src/data_cache.rs index 23f80b01e7..3d43fc5394 100644 --- a/language/move-vm/runtime/src/data_cache.rs +++ b/language/move-vm/runtime/src/data_cache.rs @@ -3,8 +3,11 @@ // SPDX-License-Identifier: Apache-2.0 use crate::loader::Loader; +use std::collections::btree_map; use move_binary_format::errors::*; +use move_binary_format::file_format::CompiledScript; +use move_binary_format::CompiledModule; use move_core_types::language_storage::StructTag; use move_core_types::{ account_address::AccountAddress, @@ -21,7 +24,9 @@ use move_vm_types::{ loaded_data::runtime_types::Type, values::{GlobalValue, Value}, }; +use sha3::{Digest, Sha3_256}; use std::collections::btree_map::BTreeMap; +use std::sync::Arc; pub struct AccountDataCache { data_map: BTreeMap, @@ -55,6 +60,10 @@ pub(crate) struct TransactionDataCache<'r, 'l, S> { loader: &'l Loader, account_map: BTreeMap, event_data: Vec<(Vec, u64, Type, MoveTypeLayout, Value)>, + + // Caches to help avoid duplicate deserialization calls. + compiled_scripts: BTreeMap<[u8; 32], Arc>, + compiled_modules: BTreeMap, usize, [u8; 32])>, } impl<'r, 'l, S: MoveResolver> TransactionDataCache<'r, 'l, S> { @@ -66,6 +75,9 @@ impl<'r, 'l, S: MoveResolver> TransactionDataCache<'r, 'l, S> { loader, account_map: BTreeMap::new(), event_data: vec![], + + compiled_scripts: BTreeMap::new(), + compiled_modules: BTreeMap::new(), } } @@ -154,6 +166,71 @@ impl<'r, 'l, S: MoveResolver> TransactionDataCache<'r, 'l, S> { } map.get_mut(k).unwrap() } + + pub(crate) fn load_compiled_script_to_cache( + &mut self, + script_blob: &[u8], + hash_value: [u8; 32], + ) -> VMResult> { + let cache = &mut self.compiled_scripts; + match cache.entry(hash_value) { + btree_map::Entry::Occupied(entry) => Ok(entry.get().clone()), + btree_map::Entry::Vacant(entry) => { + let script = match CompiledScript::deserialize(script_blob) { + Ok(script) => script, + Err(err) => { + let msg = format!("[VM] deserializer for script returned error: {:?}", err); + return Err(PartialVMError::new(StatusCode::CODE_DESERIALIZATION_ERROR) + .with_message(msg) + .finish(Location::Script)); + } + }; + Ok(entry.insert(Arc::new(script)).clone()) + } + } + } + + pub(crate) fn load_compiled_module_to_cache( + &mut self, + id: ModuleId, + allow_loading_failure: bool, + ) -> VMResult<(Arc, usize, [u8; 32])> { + let cache = &mut self.compiled_modules; + match cache.entry(id) { + btree_map::Entry::Occupied(entry) => Ok(entry.get().clone()), + btree_map::Entry::Vacant(entry) => { + // bytes fetching, allow loading to fail if the flag is set + let bytes = match load_moule_impl(self.remote, &self.account_map, entry.key()) + .map_err(|err| err.finish(Location::Undefined)) + { + Ok(bytes) => bytes, + Err(err) if allow_loading_failure => return Err(err), + Err(err) => { + return Err(expect_no_verification_errors(err)); + } + }; + + let mut sha3_256 = Sha3_256::new(); + sha3_256.update(&bytes); + let hash_value: [u8; 32] = sha3_256.finalize().into(); + + // for bytes obtained from the data store, they should always deserialize and verify. + // It is an invariant violation if they don't. + let module = CompiledModule::deserialize(&bytes) + .map_err(|err| { + let msg = format!("Deserialization error: {:?}", err); + PartialVMError::new(StatusCode::CODE_DESERIALIZATION_ERROR) + .with_message(msg) + .finish(Location::Module(entry.key().clone())) + }) + .map_err(expect_no_verification_errors)?; + + Ok(entry + .insert((Arc::new(module), bytes.len(), hash_value)) + .clone()) + } + } + } } // `DataStore` implementation for the `TransactionDataCache` diff --git a/language/move-vm/runtime/src/lib.rs b/language/move-vm/runtime/src/lib.rs index e9567ca7e8..b65f5102cb 100644 --- a/language/move-vm/runtime/src/lib.rs +++ b/language/move-vm/runtime/src/lib.rs @@ -27,5 +27,6 @@ pub mod config; #[cfg(any(debug_assertions, feature = "debugging"))] mod debug; +pub mod module_traversal; #[cfg(test)] mod unit_tests; diff --git a/language/move-vm/runtime/src/loader.rs b/language/move-vm/runtime/src/loader.rs index 987a7cf709..f01c98499d 100644 --- a/language/move-vm/runtime/src/loader.rs +++ b/language/move-vm/runtime/src/loader.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::data_cache::TransactionDataCache; +use crate::module_traversal::TraversalContext; use crate::{ config::VMConfig, logging::expect_no_verification_errors, @@ -24,6 +25,7 @@ use move_binary_format::{ }; use move_bytecode_verifier::{self, cyclic_dependencies, dependencies}; use move_core_types::account_address::AccountAddress; +use move_core_types::gas_algebra::NumBytes; use move_core_types::resolver::MoveResolver; use move_core_types::{ identifier::{IdentStr, Identifier}, @@ -32,6 +34,7 @@ use move_core_types::{ value::{MoveFieldLayout, MoveStructLayout, MoveTypeLayout}, vm_status::StatusCode, }; +use move_vm_types::gas::GasMeter; use move_vm_types::{ data_store::DataStore, loaded_data::runtime_types::{CachedStructIndex, DepthFormula, StructType, Type}, @@ -46,6 +49,7 @@ use std::{ sync::Arc, }; use tracing::error; +use typed_arena::Arena; type ScriptHash = [u8; 32]; @@ -173,6 +177,7 @@ impl ModuleCache { &mut self, natives: &NativeFunctions, id: ModuleId, + module_size: usize, module: CompiledModule, ) -> VMResult> { if let Some(cached) = self.module_at(&id) { @@ -183,7 +188,7 @@ impl ModuleCache { // leave a clean state let sig_cache = self.add_module(natives, &module)?; - match Module::new(&sig_cache, module, self) { + match Module::new(&sig_cache, module_size, module, self) { Ok(module) => Ok(Arc::clone(self.modules.insert(id, module))), Err((err, module)) => { // remove all structs and functions that have been pushed @@ -1268,7 +1273,7 @@ impl Loader { id: &ModuleId, data_store: &impl DataStore, allow_loading_failure: bool, - ) -> VMResult { + ) -> VMResult<(CompiledModule, usize)> { // bytes fetching, allow loading to fail if the flag is set let bytes = match data_store.load_module(id) { Ok(bytes) => bytes, @@ -1308,7 +1313,7 @@ impl Loader { .map_err(expect_no_verification_errors)?; self.check_natives(&module) .map_err(expect_no_verification_errors)?; - Ok(module) + Ok((module, bytes.len())) } // Everything in `load_and_verify_module` and also recursively load and verify all the @@ -1330,7 +1335,8 @@ impl Loader { } // module self-check - let module = self.load_and_verify_module(id, data_store, allow_module_loading_failure)?; + let (module, module_size) = + self.load_and_verify_module(id, data_store, allow_module_loading_failure)?; visited.insert(id.clone()); friends_discovered.extend(module.immediate_friends()); @@ -1348,7 +1354,7 @@ impl Loader { // if linking goes well, insert the module to the code cache let mut locked_cache = self.module_cache.write(); - let module_ref = locked_cache.insert(&self.natives, id.clone(), module)?; + let module_ref = locked_cache.insert(&self.natives, id.clone(), module_size, module)?; drop(locked_cache); // explicit unlock Ok(module_ref) @@ -1612,6 +1618,123 @@ impl Loader { } } } + + // + // Script verification and loading + // + + pub(crate) fn check_script_dependencies_and_check_gas( + &self, + data_store: &mut TransactionDataCache, + gas_meter: &mut impl GasMeter, + traversal_context: &mut TraversalContext, + script_blob: &[u8], + ) -> VMResult<()> { + let mut sha3_256 = Sha3_256::new(); + sha3_256.update(script_blob); + let hash_value: [u8; 32] = sha3_256.finalize().into(); + + let script = data_store.load_compiled_script_to_cache(script_blob, hash_value)?; + let script = traversal_context.referenced_scripts.alloc(script); + + // TODO(Gas): Should we charge dependency gas for the script itself? + self.check_dependencies_and_charge_gas( + data_store, + gas_meter, + &mut traversal_context.visited, + traversal_context.referenced_modules, + script.immediate_dependencies_iter(), + )?; + + Ok(()) + } + + /// Traverses the whole transitive closure of dependencies, starting from the specified + /// modules and performs gas metering. + /// + /// The traversal follows a depth-first order, with the module itself being visited first, + /// followed by its dependencies, and finally its friends. + /// DO NOT CHANGE THE ORDER unless you have a good reason, or otherwise this could introduce + /// a breaking change to the gas semantics. + /// + /// This will result in the shallow-loading of the modules -- they will be read from the + /// storage as bytes and then deserialized, but NOT converted into the runtime representation. + /// + /// It should also be noted that this is implemented in a way that avoids the cloning of + /// `ModuleId`, a.k.a. heap allocations, as much as possible, which is critical for + /// performance. + /// + /// TODO: Revisit the order of traversal. Consider switching to alphabetical order. + pub(crate) fn check_dependencies_and_charge_gas<'a, S, I>( + &self, + data_store: &mut TransactionDataCache, + gas_meter: &mut impl GasMeter, + visited: &mut BTreeMap<(&'a AccountAddress, &'a IdentStr), ()>, + referenced_modules: &'a Arena>, + ids: I, + ) -> VMResult<()> + where + I: IntoIterator, + I::IntoIter: DoubleEndedIterator, + { + // Initialize the work list (stack) and the map of visited modules. + // + // TODO: Determine the reserved capacity based on the max number of dependencies allowed. + let mut stack = Vec::with_capacity(512); + + for (addr, name) in ids.into_iter().rev() { + // TODO: Allow the check of special addresses to be customized. + if !addr.is_special() && visited.insert((addr, name), ()).is_none() { + stack.push((addr, name, true)); + } + } + + while let Some((addr, name, allow_loading_failure)) = stack.pop() { + // Load and deserialize the module only if it has not been cached by the loader. + // Otherwise this will cause a significant regression in performance. + let (module, size) = match self + .module_cache + .read() + .module_at(&ModuleId::new(addr.clone(), name.into())) + { + Some(module) => (module.module.clone(), module.size), + None => { + let (module, size, _) = data_store.load_compiled_module_to_cache( + ModuleId::new(*addr, name.to_owned()), + allow_loading_failure, + )?; + (module, size) + } + }; + + // Extend the lifetime of the module to the remainder of the function body + // by storing it in an arena. + // + // This is needed because we need to store references derived from it in the + // work list. + let module = referenced_modules.alloc(module); + + gas_meter + .charge_dependency(false, addr, name, NumBytes::new(size as u64)) + .map_err(|err| { + err.finish(Location::Module(ModuleId::new(*addr, name.to_owned()))) + })?; + + // Explore all dependencies and friends that have been visited yet. + for (addr, name) in module + .immediate_dependencies_iter() + .chain(module.immediate_friends_iter()) + .rev() + { + // TODO: Allow the check of special addresses to be customized. + if !addr.is_special() && visited.insert((addr, name), ()).is_none() { + stack.push((addr, name, false)); + } + } + } + + Ok(()) + } } // @@ -1936,8 +2059,10 @@ impl<'a> Resolver<'a> { pub struct Module { #[allow(dead_code)] id: ModuleId, + // module size in bytes + pub(crate) size: usize, // primitive pools - module: Arc, + pub(crate) module: Arc, // // types as indexes into the Loader type list @@ -1984,6 +2109,7 @@ pub struct Module { impl Module { fn new( sig_cache: &[Vec], + module_size: usize, module: CompiledModule, cache: &ModuleCache, ) -> Result { @@ -2148,6 +2274,7 @@ impl Module { match create() { Ok(_) => Ok(Self { id, + size: module_size, module: Arc::new(module), struct_refs, structs, diff --git a/language/move-vm/runtime/src/module_traversal.rs b/language/move-vm/runtime/src/module_traversal.rs new file mode 100644 index 0000000000..ae1a431268 --- /dev/null +++ b/language/move-vm/runtime/src/module_traversal.rs @@ -0,0 +1,50 @@ +// Copyright (c) The Move Contributors +// SPDX-License-Identifier: Apache-2.0 + +use move_binary_format::{file_format::CompiledScript, CompiledModule}; +use move_core_types::{ + account_address::AccountAddress, identifier::IdentStr, language_storage::ModuleId, +}; +use std::{collections::BTreeMap, sync::Arc}; +use typed_arena::Arena; + +pub struct TraversalStorage { + referenced_scripts: Arena>, + referenced_modules: Arena>, + referenced_module_ids: Arena, + referenced_module_bundles: Arena>, +} + +pub struct TraversalContext<'a> { + pub visited: BTreeMap<(&'a AccountAddress, &'a IdentStr), ()>, + + pub referenced_scripts: &'a Arena>, + pub referenced_modules: &'a Arena>, + pub referenced_module_ids: &'a Arena, + pub referenced_module_bundles: &'a Arena>, +} + +impl TraversalStorage { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self { + referenced_scripts: Arena::new(), + referenced_modules: Arena::new(), + referenced_module_ids: Arena::new(), + referenced_module_bundles: Arena::new(), + } + } +} + +impl<'a> TraversalContext<'a> { + pub fn new(storage: &'a TraversalStorage) -> Self { + Self { + visited: BTreeMap::new(), + + referenced_scripts: &storage.referenced_scripts, + referenced_modules: &storage.referenced_modules, + referenced_module_ids: &storage.referenced_module_ids, + referenced_module_bundles: &storage.referenced_module_bundles, + } + } +} diff --git a/language/move-vm/runtime/src/session.rs b/language/move-vm/runtime/src/session.rs index 4f1b22d1f2..c00dc8796b 100644 --- a/language/move-vm/runtime/src/session.rs +++ b/language/move-vm/runtime/src/session.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::loader::{Function, Module}; +use crate::module_traversal::TraversalContext; use crate::{ data_cache::TransactionDataCache, native_extensions::NativeContextExtensions, runtime::VMRuntime, @@ -421,6 +422,59 @@ impl<'r, 'l, S: MoveResolver> Session<'r, 'l, S> { .deserialize_args(arg_tys, serialized_args) .map_err(|err| err.finish(Location::Undefined)) } + pub fn check_dependencies_and_charge_gas<'a, I>( + &mut self, + gas_meter: &mut impl GasMeter, + traversal_context: &mut TraversalContext<'a>, + ids: I, + ) -> VMResult<()> + where + I: IntoIterator, + I::IntoIter: DoubleEndedIterator, + { + self.runtime.loader().check_dependencies_and_charge_gas( + &mut self.data_cache, + gas_meter, + &mut traversal_context.visited, + traversal_context.referenced_modules, + ids, + ) + } + + pub fn check_dependencies_and_charge_gas_non_recursive_optional<'a, I>( + &mut self, + gas_meter: &mut impl GasMeter, + traversal_context: &mut TraversalContext<'a>, + ids: I, + ) -> VMResult<()> + where + I: IntoIterator, + { + self.runtime + .loader() + .check_dependencies_and_charge_gas_non_recursive_optional( + &mut self.data_cache, + gas_meter, + &mut traversal_context.visited, + ids, + ) + } + + pub fn check_script_dependencies_and_check_gas( + &mut self, + gas_meter: &mut impl GasMeter, + traversal_context: &mut TraversalContext, + script: impl Borrow<[u8]>, + ) -> VMResult<()> { + self.runtime + .loader() + .check_script_dependencies_and_check_gas( + &mut self.data_cache, + gas_meter, + traversal_context, + script.borrow(), + ) + } } pub struct LoadedFunctionInstantiation {