Skip to content

Commit

Permalink
Optimize init and update implementations.
Browse files Browse the repository at this point in the history
  • Loading branch information
jab committed Feb 14, 2024
1 parent ca4a9bd commit ddc2871
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 66 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ name: benchmark
env:
FORCE_COLOR: "1"
PYTHONHASHSEED: "42"
BASELINE_URL: https://github.com/jab/bidict/releases/download/microbenchmarks/GHA-linux-cachegrind-x86_64-CPython-3.12.1-baseline.json
BASELINE_URL: https://github.com/jab/bidict/releases/download/microbenchmarks/GHA-linux-cachegrind-x86_64-CPython-3.12.2-baseline.json

jobs:
benchmark:
Expand All @@ -34,7 +34,10 @@ jobs:
- name: set up Python
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
with:
python-version: '3.12'
# Pin to micro-release for better reproducibility.
# When upgrading to a new Python version, remember to upload new associated baseline
# benchmark results here: https://github.com/jab/bidict/releases/edit/microbenchmarks
python-version: '3.12.2'
cache: pip
cache-dependency-path: dev-deps/python3.12/test.txt
- name: install PyPI dependencies
Expand Down
9 changes: 7 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@ please consider sponsoring bidict on GitHub.`

Primarily, this release simplifies bidict by removing minor features
that are no longer necessary or that have little to no apparent usage,
and it also includes some minor performance optimizations.
These changes will make it easier to maintain and improve bidict in the future,
and it also includes some performance optimizations.

Specifically, initializing or updating a bidict
is now up to 70% faster in microbenchmarks.

The changes in this release will also make it easier
to maintain and improve bidict in the future,
including further potential performance optimizations.

It also contains several other improvements.
Expand Down
84 changes: 37 additions & 47 deletions bidict/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@

OldKV: t.TypeAlias = t.Tuple[OKT[KT], OVT[VT]]
DedupResult: t.TypeAlias = t.Optional[OldKV[KT, VT]]
Write: t.TypeAlias = t.Callable[[], None]
Unwrite: t.TypeAlias = Write
WriteSpec: t.TypeAlias = t.Tuple[t.List[Write], t.List[Unwrite]]
Unwrite: t.TypeAlias = t.Callable[[], None]
BT = t.TypeVar('BT', bound='BidictBase[t.Any, t.Any]')


Expand Down Expand Up @@ -355,66 +353,63 @@ def _dedup(self, key: KT, val: VT, on_dup: OnDup) -> DedupResult[KT, VT]:
# else neither isdupkey nor isdupval.
return oldkey, oldval

def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrites: bool) -> WriteSpec:
"""Given (newkey, newval) to insert, return the operations necessary to perform the write.
def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: list[Unwrite] | None) -> None:
"""Insert (newkey, newval), extending *unwrites* with associated inverse operations if provided.
*oldkey* and *oldval* are as returned by :meth:`_dedup`.
If *save_unwrites* is true, also include the inverse operations necessary to undo the write.
If *unwrites* is not None, it is extended with the inverse operations necessary to undo the write.
This design allows :meth:`_update` to roll back a partially applied update that fails part-way through
when necessary. This design also allows subclasses that require additional operations to complete
a write to easily extend this implementation. For example, :class:`bidict.OrderedBidictBase` calls this
inherited implementation, and then extends the list of ops returned with additional operations
needed to keep its internal linked list nodes consistent with its items' order as changes are made.
when necessary.
This design also allows subclasses that require additional operations to easily extend this implementation.
For example, :class:`bidict.OrderedBidictBase` calls this inherited implementation, and then extends *unwrites*
with additional operations needed to keep its internal linked list nodes consistent with its items' order
as changes are made.
"""
fwdm, invm = self._fwdm, self._invm
fwdm_set, invm_set = fwdm.__setitem__, invm.__setitem__
fwdm_del, invm_del = fwdm.__delitem__, invm.__delitem__
writes: list[Write] = [
partial(fwdm_set, newkey, newval),
partial(invm_set, newval, newkey),
]
unwrites: list[Unwrite] = []
# Always perform the following writes regardless of duplication.
fwdm_set(newkey, newval)
invm_set(newval, newkey)
if oldval is MISSING and oldkey is MISSING: # no key or value duplication
# {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
if save_unwrites:
unwrites = [
if unwrites is not None:
unwrites.extend((
partial(fwdm_del, newkey),
partial(invm_del, newval),
]
))
elif oldval is not MISSING and oldkey is not MISSING: # key and value duplication across two different items
# {0: 1, 2: 3} | {0: 3} => {0: 3}
writes.extend((
partial(fwdm_del, oldkey),
partial(invm_del, oldval),
))
if save_unwrites:
unwrites = [
fwdm_del(oldkey)
invm_del(oldval)
if unwrites is not None:
unwrites.extend((
partial(fwdm_set, newkey, oldval),
partial(invm_set, oldval, newkey),
partial(fwdm_set, oldkey, newval),
partial(invm_set, newval, oldkey),
]
))
elif oldval is not MISSING: # just key duplication
# {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
writes.append(partial(invm_del, oldval))
if save_unwrites:
unwrites = [
invm_del(oldval)
if unwrites is not None:
unwrites.extend((
partial(fwdm_set, newkey, oldval),
partial(invm_set, oldval, newkey),
partial(invm_del, newval),
]
))
else:
assert oldkey is not MISSING # just value duplication
# {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
writes.append(partial(fwdm_del, oldkey))
if save_unwrites:
unwrites = [
fwdm_del(oldkey)
if unwrites is not None:
unwrites.extend((
partial(fwdm_set, oldkey, newval),
partial(invm_set, newval, oldkey),
partial(fwdm_del, newkey),
]
return writes, unwrites
))

def _update(
self,
Expand Down Expand Up @@ -449,28 +444,23 @@ def _update(
return

# In all other cases, benchmarking has indicated that the update is best implemented as follows:
# For each new item, perform a dup check (raising if necessary), compute the associated writes we need to
# perform on our backing _fwdm and _invm mappings, and apply the writes. If rollback is enabled, also compute
# the associated unwrites as we go. If the update results in a DuplicationError and rollback is enabled, apply
# the accumulated unwrites before raising to ensure we fail clean.
unwrites: list[Unwrite] = []
extend_unwrites = unwrites.extend
spec_write = self._spec_write
# For each new item, perform a dup check (raising if necessary), and apply the associated writes we need to
# perform on our backing _fwdm and _invm mappings. If rollback is enabled, also compute the associated unwrites
# as we go. If the update results in a DuplicationError and rollback is enabled, apply the accumulated unwrites
# before raising, to ensure that we fail clean.
write = self._write
unwrites: list[Unwrite] | None = [] if rollback else None
for key, val in iteritems(arg, **kw):
try:
dedup_result = self._dedup(key, val, on_dup)
except DuplicationError:
if rollback:
if unwrites is not None:
for unwrite in reversed(unwrites):
unwrite()
raise
if dedup_result is None: # no-op
continue
writes, new_unwrites = spec_write(key, val, *dedup_result, save_unwrites=rollback)
for write in writes:
write()
if rollback and new_unwrites: # save new unwrites in case we need them later
extend_unwrites(new_unwrites)
write(key, val, *dedup_result, unwrites=unwrites)

def __copy__(self: BT) -> BT:
"""Used for the copy protocol. See the :mod:`copy` module."""
Expand Down
27 changes: 12 additions & 15 deletions bidict/_orderedbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from weakref import ref as weakref

from ._base import BidictBase
from ._base import WriteSpec
from ._base import Unwrite
from ._bidict import bidict
from ._iter import iteritems
from ._typing import KT
Expand Down Expand Up @@ -167,16 +167,16 @@ def _init_from(self, other: MapOrItems[KT, VT]) -> None:
for k, v in iteritems(other):
korv_by_node_set(new_node(), k if bykey else v)

def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrites: bool) -> WriteSpec:
def _write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], unwrites: list[Unwrite] | None) -> None:
"""See :meth:`bidict.BidictBase._spec_write`."""
writes, unwrites = super()._spec_write(newkey, newval, oldkey, oldval, save_unwrites)
super()._write(newkey, newval, oldkey, oldval, unwrites)
assoc, dissoc = self._assoc_node, self._dissoc_node
node_by_korv, bykey = self._node_by_korv, self._bykey
if oldval is MISSING and oldkey is MISSING: # no key or value duplication
# {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
newnode = self._sntl.new_last_node()
writes.append(partial(assoc, newnode, newkey, newval))
if save_unwrites:
assoc(newnode, newkey, newval)
if unwrites is not None:
unwrites.append(partial(dissoc, newnode))
elif oldval is not MISSING and oldkey is not MISSING: # key and value duplication across two different items
# {0: 1, 2: 3} | {0: 3} => {0: 3}
Expand All @@ -188,11 +188,9 @@ def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT],
else:
oldnode = node_by_korv[newval]
newnode = node_by_korv[oldval]
writes.extend((
partial(dissoc, oldnode),
partial(assoc, newnode, newkey, newval),
))
if save_unwrites:
dissoc(oldnode)
assoc(newnode, newkey, newval)
if unwrites is not None:
unwrites.extend((
partial(assoc, newnode, newkey, oldval),
partial(assoc, oldnode, oldkey, newval),
Expand All @@ -202,18 +200,17 @@ def _spec_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT],
# {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
# oldkey: MISSING, oldval: 3, newkey: 2, newval: 4
node = node_by_korv[newkey if bykey else oldval]
writes.append(partial(assoc, node, newkey, newval))
if save_unwrites:
assoc(node, newkey, newval)
if unwrites is not None:
unwrites.append(partial(assoc, node, newkey, oldval))
else:
assert oldkey is not MISSING # just value duplication
# {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
# oldkey: 2, oldval: MISSING, newkey: 4, newval: 3
node = node_by_korv[oldkey if bykey else newval]
writes.append(partial(assoc, node, newkey, newval))
if save_unwrites:
assoc(node, newkey, newval)
if unwrites is not None:
unwrites.append(partial(assoc, node, oldkey, newval))
return writes, unwrites

def __iter__(self) -> t.Iterator[KT]:
"""Iterator over the contained keys in insertion order."""
Expand Down

0 comments on commit ddc2871

Please # to comment.