Skip to content

Commit fa75049

Browse files
timopollmeiergreenbonebot
authored andcommitted
Change: Reuse matches in CPE match strings API
When parsing the CPE match string JSON from the NVD API, the matching CPEs are cached and reused if identical ones appear in another match string. This can save memory when processing a large amount of CPE match strings with duplicate matches.
1 parent b172bee commit fa75049

File tree

2 files changed

+60
-10
lines changed

2 files changed

+60
-10
lines changed

pontos/nvd/cpe_match/api.py

+32-9
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,6 @@
3434
MAX_CPE_MATCHES_PER_PAGE = 500
3535

3636

37-
def _result_iterator(data: JSON) -> Iterator[CPEMatchString]:
38-
results: list[dict[str, Any]] = data.get("match_strings", []) # type: ignore
39-
return (
40-
CPEMatchString.from_dict(result["match_string"]) for result in results
41-
)
42-
43-
4437
class CPEMatchApi(NVDApi):
4538
"""
4639
API for querying the NIST NVD CPE match information.
@@ -62,6 +55,7 @@ def __init__(
6255
token: Optional[str] = None,
6356
timeout: Optional[Timeout] = DEFAULT_TIMEOUT_CONFIG,
6457
rate_limit: bool = True,
58+
cache_cpe_matches: bool = True,
6559
) -> None:
6660
"""
6761
Create a new instance of the CPE API.
@@ -76,13 +70,22 @@ def __init__(
7670
rolling 30 second window.
7771
See https://nvd.nist.gov/developers/start-here#divRateLimits
7872
Default: True.
73+
cache_cpe_matches: If set to True (the default) the entries in the
74+
lists of matching CPEs for each match string are cached and reused
75+
to use less memory.
76+
If set to False, a separate CPEMatch object is kept for each entry
77+
to avoid possible side effects when modifying the data.
7978
"""
8079
super().__init__(
8180
DEFAULT_NIST_NVD_CPE_MATCH_URL,
8281
token=token,
8382
timeout=timeout,
8483
rate_limit=rate_limit,
8584
)
85+
if cache_cpe_matches:
86+
self._cpe_match_cache = {}
87+
else:
88+
self._cpe_match_cache = None
8689

8790
def cpe_matches(
8891
self,
@@ -157,12 +160,30 @@ def cpe_matches(
157160
return NVDResults(
158161
self,
159162
params,
160-
_result_iterator,
163+
self._result_iterator,
161164
request_results=request_results,
162165
results_per_page=results_per_page,
163166
start_index=start_index,
164167
)
165168

169+
def _result_iterator(self, data: JSON) -> Iterator[CPEMatchString]:
170+
"""
171+
Creates an iterator of all the CPEMatchStrings in given API response JSON
172+
173+
Args:
174+
data: The JSON response data to get the match strings from
175+
176+
Returns:
177+
An iterator over the CPEMatchStrings
178+
"""
179+
results: list[dict[str, Any]] = data.get("match_strings", []) # type: ignore
180+
return (
181+
CPEMatchString.from_dict_with_cache(
182+
result["match_string"], self._cpe_match_cache
183+
)
184+
for result in results
185+
)
186+
166187
async def cpe_match(self, match_criteria_id: str) -> CPEMatchString:
167188
"""
168189
Returns a single CPE match for the given match criteria id.
@@ -201,7 +222,9 @@ async def cpe_match(self, match_criteria_id: str) -> CPEMatchString:
201222
)
202223

203224
match_string = match_strings[0]
204-
return CPEMatchString.from_dict(match_string["match_string"])
225+
return CPEMatchString.from_dict_with_cache(
226+
match_string["match_string"], self._cpe_match_cache
227+
)
205228

206229
async def __aenter__(self) -> "CPEMatchApi":
207230
await super().__aenter__()

pontos/nvd/models/cpe_match_string.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from dataclasses import dataclass, field
77
from datetime import datetime
8-
from typing import List, Optional
8+
from typing import Any, Dict, List, Optional
99
from uuid import UUID
1010

1111
from pontos.models import Model
@@ -55,3 +55,30 @@ class CPEMatchString(Model):
5555
version_start_excluding: Optional[str] = None
5656
version_end_including: Optional[str] = None
5757
version_end_excluding: Optional[str] = None
58+
59+
@classmethod
60+
def from_dict_with_cache(
61+
cls, data: Dict[str, Any], cpe_match_cache: Dict[str, CPEMatch] | None
62+
):
63+
"""
64+
Create a CPEMatchString model from a dict, reusing
65+
duplicate CPEMatch objects to reduce memory usage if a cache
66+
dict is given.
67+
68+
Args:
69+
data: The JSON dict to generate the model from
70+
cpe_match_cache: A dictionary to store CPE matches or None
71+
to not cache and reused CPE matches
72+
"""
73+
new_match_string = cls.from_dict(data)
74+
if cpe_match_cache is None:
75+
return new_match_string
76+
77+
for i, match in enumerate(new_match_string.matches):
78+
if match.cpe_name_id in cpe_match_cache:
79+
cached_match: CPEMatch = cpe_match_cache[match.cpe_name_id]
80+
if cached_match.cpe_name == match.cpe_name:
81+
new_match_string.matches[i] = cached_match
82+
else:
83+
cpe_match_cache[match.cpe_name_id] = match
84+
return new_match_string

0 commit comments

Comments
 (0)