Optimize init and update implementations.

jab · Feb 14, 2024 · ddc2871 · ddc2871
1 parent ca4a9bd
commit ddc2871
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 66 deletions.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -17,7 +17,7 @@ name: benchmark
 env:
   FORCE_COLOR: "1"
   PYTHONHASHSEED: "42"
-  BASELINE_URL: https://github.com/jab/bidict/releases/download/microbenchmarks/GHA-linux-cachegrind-x86_64-CPython-3.12.1-baseline.json
+  BASELINE_URL: https://github.com/jab/bidict/releases/download/microbenchmarks/GHA-linux-cachegrind-x86_64-CPython-3.12.2-baseline.json
 
 jobs:
   benchmark:
@@ -34,7 +34,10 @@ jobs:
       - name: set up Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
         with:
-          python-version: '3.12'
+          # Pin to micro-release for better reproducibility.
+          # When upgrading to a new Python version, remember to upload new associated baseline
+          # benchmark results here: https://github.com/jab/bidict/releases/edit/microbenchmarks
+          python-version: '3.12.2'
           cache: pip
           cache-dependency-path: dev-deps/python3.12/test.txt
       - name: install PyPI dependencies

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -28,8 +28,13 @@ please consider sponsoring bidict on GitHub.`
 
 Primarily, this release simplifies bidict by removing minor features
 that are no longer necessary or that have little to no apparent usage,
-and it also includes some minor performance optimizations.
-These changes will make it easier to maintain and improve bidict in the future,
+and it also includes some performance optimizations.
+
+Specifically, initializing or updating a bidict
+is now up to 70% faster in microbenchmarks.
+
+The changes in this release will also make it easier
+to maintain and improve bidict in the future,
 including further potential performance optimizations.
 
 It also contains several other improvements.

diff --git a/bidict/_base.py b/bidict/_base.py
@@ -46,9 +46,7 @@
 
 OldKV: t.TypeAlias = t.Tuple[OKT[KT], OVT[VT]]
 DedupResult: t.TypeAlias = t.Optional[OldKV[KT, VT]]
-Write: t.TypeAlias = t.Callable[[], None]
-Unwrite: t.TypeAlias = Write
-WriteSpec: t.TypeAlias = t.Tuple[t.List[Write], t.List[Unwrite]]
+Unwrite: t.TypeAlias = t.Callable[[], None]
 BT = t.TypeVar('BT', bound='BidictBase[t.Any, t.Any]')
 
 
@@ -355,66 +353,63 @@ def _dedup(self, key: KT, val: VT, on_dup: OnDup) -> DedupResult[KT, VT]:
         # else neither isdupkey nor isdupval.
         return oldkey, oldval
 
-    def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrites: bool) -> WriteSpec:
-        """Given (newkey, newval) to insert, return the operations necessary to perform the write.
+    def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: list[Unwrite] | None) -> None:
+        """Insert (newkey, newval), extending *unwrites* with associated inverse operations if provided.
 
         *oldkey* and *oldval* are as returned by :meth:`_dedup`.
 
-        If *save_unwrites* is true, also include the inverse operations necessary to undo the write.
+        If *unwrites* is not None, it is extended with the inverse operations necessary to undo the write.
         This design allows :meth:`_update` to roll back a partially applied update that fails part-way through
-        when necessary. This design also allows subclasses that require additional operations to complete
-        a write to easily extend this implementation. For example, :class:`bidict.OrderedBidictBase` calls this
-        inherited implementation, and then extends the list of ops returned with additional operations
-        needed to keep its internal linked list nodes consistent with its items' order as changes are made.
+        when necessary.
+
+        This design also allows subclasses that require additional operations to easily extend this implementation.
+        For example, :class:`bidict.OrderedBidictBase` calls this inherited implementation, and then extends *unwrites*
+        with additional operations needed to keep its internal linked list nodes consistent with its items' order
+        as changes are made.
         """
         fwdm, invm = self._fwdm, self._invm
         fwdm_set, invm_set = fwdm.__setitem__, invm.__setitem__
         fwdm_del, invm_del = fwdm.__delitem__, invm.__delitem__
-        writes: list[Write] = [
-            partial(fwdm_set, newkey, newval),
-            partial(invm_set, newval, newkey),
-        ]
-        unwrites: list[Unwrite] = []
+        # Always perform the following writes regardless of duplication.
+        fwdm_set(newkey, newval)
+        invm_set(newval, newkey)
         if oldval is MISSING and oldkey is MISSING:  # no key or value duplication
             # {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
-            if save_unwrites:
-                unwrites = [
+            if unwrites is not None:
+                unwrites.extend((
                     partial(fwdm_del, newkey),
                     partial(invm_del, newval),
-                ]
+                ))
         elif oldval is not MISSING and oldkey is not MISSING:  # key and value duplication across two different items
             # {0: 1, 2: 3} | {0: 3} => {0: 3}
-            writes.extend((
-                partial(fwdm_del, oldkey),
-                partial(invm_del, oldval),
-            ))
-            if save_unwrites:
-                unwrites = [
+            fwdm_del(oldkey)
+            invm_del(oldval)
+            if unwrites is not None:
+                unwrites.extend((
                     partial(fwdm_set, newkey, oldval),
                     partial(invm_set, oldval, newkey),
                     partial(fwdm_set, oldkey, newval),
                     partial(invm_set, newval, oldkey),
-                ]
+                ))
         elif oldval is not MISSING:  # just key duplication
             # {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
-            writes.append(partial(invm_del, oldval))
-            if save_unwrites:
-                unwrites = [
+            invm_del(oldval)
+            if unwrites is not None:
+                unwrites.extend((
                     partial(fwdm_set, newkey, oldval),
                     partial(invm_set, oldval, newkey),
                     partial(invm_del, newval),
-                ]
+                ))
         else:
             assert oldkey is not MISSING  # just value duplication
             # {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
-            writes.append(partial(fwdm_del, oldkey))
-            if save_unwrites:
-                unwrites = [
+            fwdm_del(oldkey)
+            if unwrites is not None:
+                unwrites.extend((
                     partial(fwdm_set, oldkey, newval),
                     partial(invm_set, newval, oldkey),
                     partial(fwdm_del, newkey),
-                ]
-        return writes, unwrites
+                ))
 
     def _update(
         self,
@@ -449,28 +444,23 @@ def _update(
             return
 
         # In all other cases, benchmarking has indicated that the update is best implemented as follows:
-        # For each new item, perform a dup check (raising if necessary), compute the associated writes we need to
-        # perform on our backing _fwdm and _invm mappings, and apply the writes. If rollback is enabled, also compute
-        # the associated unwrites as we go. If the update results in a DuplicationError and rollback is enabled, apply
-        # the accumulated unwrites before raising to ensure we fail clean.
-        unwrites: list[Unwrite] = []
-        extend_unwrites = unwrites.extend
-        spec_write = self._spec_write
+        # For each new item, perform a dup check (raising if necessary), and apply the associated writes we need to
+        # perform on our backing _fwdm and _invm mappings. If rollback is enabled, also compute the associated unwrites
+        # as we go. If the update results in a DuplicationError and rollback is enabled, apply the accumulated unwrites
+        # before raising, to ensure that we fail clean.
+        write = self._write
+        unwrites: list[Unwrite] | None = [] if rollback else None
         for key, val in iteritems(arg, **kw):
             try:
                 dedup_result = self._dedup(key, val, on_dup)
             except DuplicationError:
-                if rollback:
+                if unwrites is not None:
                     for unwrite in reversed(unwrites):
                         unwrite()
                 raise
             if dedup_result is None:  # no-op
                 continue
-            writes, new_unwrites = spec_write(key, val, *dedup_result, save_unwrites=rollback)
-            for write in writes:
-                write()
-            if rollback and new_unwrites:  # save new unwrites in case we need them later
-                extend_unwrites(new_unwrites)
+            write(key, val, *dedup_result, unwrites=unwrites)
 
     def __copy__(self: BT) -> BT:
         """Used for the copy protocol. See the :mod:`copy` module."""

diff --git a/bidict/_orderedbase.py b/bidict/_orderedbase.py
@@ -21,7 +21,7 @@
 from weakref import ref as weakref
 
 from ._base import BidictBase
-from ._base import WriteSpec
+from ._base import Unwrite
 from ._bidict import bidict
 from ._iter import iteritems
 from ._typing import KT
@@ -167,16 +167,16 @@ def _init_from(self, other: MapOrItems[KT, VT]) -> None:
         for k, v in iteritems(other):
             korv_by_node_set(new_node(), k if bykey else v)
 
-    def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrites: bool) -> WriteSpec:
+    def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: list[Unwrite] | None) -> None:
         """See :meth:`bidict.BidictBase._spec_write`."""
-        writes, unwrites = super()._spec_write(newkey, newval, oldkey, oldval, save_unwrites)
+        super()._write(newkey, newval, oldkey, oldval, unwrites)
         assoc, dissoc = self._assoc_node, self._dissoc_node
         node_by_korv, bykey = self._node_by_korv, self._bykey
         if oldval is MISSING and oldkey is MISSING:  # no key or value duplication
             # {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
             newnode = self._sntl.new_last_node()
-            writes.append(partial(assoc, newnode, newkey, newval))
-            if save_unwrites:
+            assoc(newnode, newkey, newval)
+            if unwrites is not None:
                 unwrites.append(partial(dissoc, newnode))
         elif oldval is not MISSING and oldkey is not MISSING:  # key and value duplication across two different items
             # {0: 1, 2: 3} | {0: 3} => {0: 3}
@@ -188,11 +188,9 @@ def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT],
             else:
                 oldnode = node_by_korv[newval]
                 newnode = node_by_korv[oldval]
-            writes.extend((
-                partial(dissoc, oldnode),
-                partial(assoc, newnode, newkey, newval),
-            ))
-            if save_unwrites:
+            dissoc(oldnode)
+            assoc(newnode, newkey, newval)
+            if unwrites is not None:
                 unwrites.extend((
                     partial(assoc, newnode, newkey, oldval),
                     partial(assoc, oldnode, oldkey, newval),
@@ -202,18 +200,17 @@ def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT],
             # {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
             # oldkey: MISSING, oldval: 3, newkey: 2, newval: 4
             node = node_by_korv[newkey if bykey else oldval]
-            writes.append(partial(assoc, node, newkey, newval))
-            if save_unwrites:
+            assoc(node, newkey, newval)
+            if unwrites is not None:
                 unwrites.append(partial(assoc, node, newkey, oldval))
         else:
             assert oldkey is not MISSING  # just value duplication
             # {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
             # oldkey: 2, oldval: MISSING, newkey: 4, newval: 3
             node = node_by_korv[oldkey if bykey else newval]
-            writes.append(partial(assoc, node, newkey, newval))
-            if save_unwrites:
+            assoc(node, newkey, newval)
+            if unwrites is not None:
                 unwrites.append(partial(assoc, node, oldkey, newval))
-        return writes, unwrites
 
     def __iter__(self) -> t.Iterator[KT]:
         """Iterator over the contained keys in insertion order."""