Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

fix(hstr): Prevent memory leak for global stores #10047

Merged
merged 8 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/chatty-horses-invent.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
hstr: patch
swc_core: patch
---

fix(hstr): Prevent memory leak for global stores
100 changes: 84 additions & 16 deletions crates/hstr/src/dynamic.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::{
borrow::Cow,
cell::RefCell,
ffi::c_void,
hash::{BuildHasherDefault, Hash, Hasher},
mem::ManuallyDrop,
mem::{forget, ManuallyDrop},
ops::Deref,
ptr::NonNull,
ptr::{self, NonNull},
};

use rustc_hash::FxHasher;
Expand All @@ -15,13 +16,26 @@ use crate::{
Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
};

#[derive(PartialEq, Eq)]
pub(crate) struct Metadata {
pub hash: u64,
pub is_global: bool,
}

#[derive(Clone)]
#[derive(Clone, PartialEq, Eq)]
pub(crate) struct Item(pub ThinArc<HeaderWithLength<Metadata>, u8>);

impl Item {
/// https://users.rust-lang.org/t/what-is-the-ideal-way-to-destruct-a-guard-type/25974/8
fn into_inner(self) -> ThinArc<HeaderWithLength<Metadata>, u8> {
unsafe {
let inner = ptr::read(&self.0);
forget(self);
inner
}
}
}

impl Deref for Item {
type Target = <ThinArc<HeaderWithLength<Metadata>, u8> as Deref>::Target;

Expand All @@ -30,9 +44,6 @@ impl Deref for Item {
}
}

/// TODO: Use real weak pointer
type WeakItem = Item;

impl Hash for Item {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0.header.header.header.hash);
Expand All @@ -56,7 +67,7 @@ pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
///
/// # Merging [AtomStore]
pub struct AtomStore {
pub(crate) data: hashbrown::HashMap<WeakItem, (), BuildEntryHasher>,
pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
}

impl Default for AtomStore {
Expand All @@ -70,13 +81,42 @@ impl Default for AtomStore {
impl AtomStore {
#[inline(always)]
pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
new_atom(self, text.into())
atom_in(self, &text.into(), false)
}
}

impl Drop for Item {
fn drop(&mut self) {
// If we are going to drop the last reference, we need to remove the
// entry from the global store if it is a global atom
if self.0.header.header.header.is_global && ThinArc::strong_count(&self.0) == 2 {
let v = GLOBAL_DATA.try_with(|global| {
let mut store = global.borrow_mut();
store.data.remove_entry(self)
});

if let Ok(Some((v, _))) = v {
v.into_inner();
}
}
}
}

thread_local! {
static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
}

pub(crate) fn global_atom(text: &str) -> Atom {
GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();

atom_in(&mut *store, text, true)
})
}

/// This can create any kind of [Atom], although this lives in the `dynamic`
/// module.
pub(crate) fn new_atom<S>(storage: S, text: Cow<str>) -> Atom
pub(crate) fn atom_in<S>(storage: S, text: &str, is_global: bool) -> Atom
where
S: Storage,
{
Expand All @@ -92,9 +132,9 @@ where
return Atom { unsafe_data };
}

let hash = calc_hash(&text);
let entry = storage.insert_entry(text, hash);
let entry = ThinArc::into_raw(entry.0) as *mut c_void;
let hash = calc_hash(text);
let entry = storage.insert_entry(text, hash, is_global);
let entry = ThinArc::into_raw(entry.into_inner()) as *mut c_void;

let ptr: NonNull<c_void> = unsafe {
// Safety: Arc::into_raw returns a non-null pointer
Expand All @@ -107,16 +147,16 @@ where
}

pub(crate) trait Storage {
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item;
fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item;
}

impl Storage for &'_ mut AtomStore {
#[inline(never)]
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item {
fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item {
// If the text is too long, interning is not worth it.
if text.len() > 512 {
return Item(ThinArc::from_header_and_slice(
HeaderWithLength::new(Metadata { hash }, text.len()),
HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
text.as_bytes(),
));
}
Expand All @@ -130,7 +170,7 @@ impl Storage for &'_ mut AtomStore {
.or_insert_with(move || {
(
Item(ThinArc::from_header_and_slice(
HeaderWithLength::new(Metadata { hash }, text.len()),
HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
text.as_bytes(),
)),
(),
Expand Down Expand Up @@ -187,3 +227,31 @@ impl Hasher for EntryHasher {
self.hash = val;
}
}

#[cfg(test)]
mod tests {
use crate::{dynamic::GLOBAL_DATA, Atom};

#[test]
fn global_ref_count_dynamic() {
let atom1 = Atom::new("Hello, world!");

let atom2 = Atom::new("Hello, world!");

// 2 for the two atoms, 1 for the global store
assert_eq!(atom1.ref_count(), 3);
assert_eq!(atom2.ref_count(), 3);

drop(atom1);

// 1 for the atom2, 1 for the global store
assert_eq!(atom2.ref_count(), 2);

drop(atom2);

GLOBAL_DATA.with(|global| {
let store = global.borrow();
assert_eq!(store.data.len(), 0);
});
}
}
20 changes: 4 additions & 16 deletions crates/hstr/src/global_store.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
use std::{borrow::Cow, cell::RefCell};
use std::borrow::Cow;

use crate::{Atom, AtomStore};

fn atom(text: Cow<str>) -> Atom {
thread_local! {
static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
}

GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();

store.atom(text)
})
}
use crate::{dynamic::global_atom, Atom};

macro_rules! direct_from_impl {
($T:ty) => {
impl From<$T> for Atom {
fn from(s: $T) -> Self {
atom(s.into())
global_atom(&s)
}
}
};
Expand All @@ -30,6 +18,6 @@ direct_from_impl!(String);

impl From<Box<str>> for crate::Atom {
fn from(s: Box<str>) -> Self {
atom(Cow::Owned(String::from(s)))
global_atom(&s)
}
}
Loading