From 21f3b335e6ec873eff5679e67e9f5eecf416a0ce Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 20 Jun 2023 16:43:17 -0400 Subject: [PATCH 1/9] cache sql columns and schemas --- singer_sdk/connectors/sql.py | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index aecfbb0c1..f781f4253 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -53,6 +53,8 @@ def __init__( """ self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None + self._table_cols_cache: dict = {} + self._schema_cache: dict = {} @property def config(self) -> dict: @@ -565,8 +567,10 @@ def schema_exists(self, schema_name: str) -> bool: Returns: True if the database schema exists, False if not. """ - schema_names = sqlalchemy.inspect(self._engine).get_schema_names() - return schema_name in schema_names + if schema_name not in self._schema_cache: + self._schema_cache = sqlalchemy.inspect(self._engine).get_schema_names() + + return schema_name in self._schema_cache def get_table_columns( self, @@ -582,20 +586,24 @@ def get_table_columns( Returns: An ordered list of column objects. """ - _, schema_name, table_name = self.parse_full_table_name(full_table_name) - inspector = sqlalchemy.inspect(self._engine) - columns = inspector.get_columns(table_name, schema_name) - - return { - col_meta["name"]: sqlalchemy.Column( - col_meta["name"], - col_meta["type"], - nullable=col_meta.get("nullable", False), - ) - for col_meta in columns - if not column_names - or col_meta["name"].casefold() in {col.casefold() for col in column_names} - } + if full_table_name not in self._table_cols_cache: + _, schema_name, table_name = self.parse_full_table_name(full_table_name) + inspector = sqlalchemy.inspect(self._engine) + columns = inspector.get_columns(table_name, schema_name) + + self._table_cols_cache[full_table_name] = { + col_meta["name"]: sqlalchemy.Column( + col_meta["name"], + col_meta["type"], + nullable=col_meta.get("nullable", False), + ) + for col_meta in columns + if not column_names + or col_meta["name"].casefold() + in {col.casefold() for col in column_names} + } + + return self._table_cols_cache[full_table_name] def get_table( self, From a70efe6999b219ea56608928c942a206ed8feecf Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 20 Jun 2023 16:43:47 -0400 Subject: [PATCH 2/9] lint fix --- singer_sdk/connectors/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index f781f4253..b2a25a2c2 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -569,7 +569,7 @@ def schema_exists(self, schema_name: str) -> bool: """ if schema_name not in self._schema_cache: self._schema_cache = sqlalchemy.inspect(self._engine).get_schema_names() - + return schema_name in self._schema_cache def get_table_columns( From c75734206b220392f41ee84dcef95ee40e880366 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 20 Jun 2023 16:59:07 -0400 Subject: [PATCH 3/9] fix mypy typing --- singer_sdk/connectors/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index b2a25a2c2..09b4eee8d 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -53,8 +53,8 @@ def __init__( """ self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None - self._table_cols_cache: dict = {} - self._schema_cache: dict = {} + self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} + self._schema_cache: list = [] @property def config(self) -> dict: From 668832b7125f0cec971c35be744841577f9530cb Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Thu, 14 Sep 2023 16:57:16 -0400 Subject: [PATCH 4/9] use lru_cache instead of custom cache --- singer_sdk/connectors/sql.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index a16b3b7dd..e66434f7e 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -58,7 +58,6 @@ def __init__( """ self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None - self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} self._schema_cache: list = [] @property @@ -593,6 +592,7 @@ def schema_exists(self, schema_name: str) -> bool: return schema_name in self._schema_cache + @lru_cache() def get_table_columns( self, full_table_name: str, @@ -607,24 +607,20 @@ def get_table_columns( Returns: An ordered list of column objects. """ - if full_table_name not in self._table_cols_cache: - _, schema_name, table_name = self.parse_full_table_name(full_table_name) - inspector = sqlalchemy.inspect(self._engine) - columns = inspector.get_columns(table_name, schema_name) - - self._table_cols_cache[full_table_name] = { - col_meta["name"]: sqlalchemy.Column( - col_meta["name"], - col_meta["type"], - nullable=col_meta.get("nullable", False), - ) - for col_meta in columns - if not column_names - or col_meta["name"].casefold() - in {col.casefold() for col in column_names} - } - - return self._table_cols_cache[full_table_name] + _, schema_name, table_name = self.parse_full_table_name(full_table_name) + inspector = sqlalchemy.inspect(self._engine) + columns = inspector.get_columns(table_name, schema_name) + + return { + col_meta["name"]: sqlalchemy.Column( + col_meta["name"], + col_meta["type"], + nullable=col_meta.get("nullable", False), + ) + for col_meta in columns + if not column_names + or col_meta["name"].casefold() in {col.casefold() for col in column_names} + } def get_table( self, From d56d963f205b4186a6d9000f9dc9ec55948b11ea Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Thu, 14 Sep 2023 17:00:00 -0400 Subject: [PATCH 5/9] Revert "use lru_cache instead of custom cache" This reverts commit 668832b7125f0cec971c35be744841577f9530cb. --- singer_sdk/connectors/sql.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index e66434f7e..a16b3b7dd 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -58,6 +58,7 @@ def __init__( """ self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None + self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} self._schema_cache: list = [] @property @@ -592,7 +593,6 @@ def schema_exists(self, schema_name: str) -> bool: return schema_name in self._schema_cache - @lru_cache() def get_table_columns( self, full_table_name: str, @@ -607,20 +607,24 @@ def get_table_columns( Returns: An ordered list of column objects. """ - _, schema_name, table_name = self.parse_full_table_name(full_table_name) - inspector = sqlalchemy.inspect(self._engine) - columns = inspector.get_columns(table_name, schema_name) - - return { - col_meta["name"]: sqlalchemy.Column( - col_meta["name"], - col_meta["type"], - nullable=col_meta.get("nullable", False), - ) - for col_meta in columns - if not column_names - or col_meta["name"].casefold() in {col.casefold() for col in column_names} - } + if full_table_name not in self._table_cols_cache: + _, schema_name, table_name = self.parse_full_table_name(full_table_name) + inspector = sqlalchemy.inspect(self._engine) + columns = inspector.get_columns(table_name, schema_name) + + self._table_cols_cache[full_table_name] = { + col_meta["name"]: sqlalchemy.Column( + col_meta["name"], + col_meta["type"], + nullable=col_meta.get("nullable", False), + ) + for col_meta in columns + if not column_names + or col_meta["name"].casefold() + in {col.casefold() for col in column_names} + } + + return self._table_cols_cache[full_table_name] def get_table( self, From 14031f2f293f40771e47eb224dc46e2a67a06725 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 3 Oct 2023 13:27:09 -0400 Subject: [PATCH 6/9] use set for cache instead of list --- singer_sdk/connectors/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index b2565e8c0..fa08b73f6 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -59,7 +59,7 @@ def __init__( self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} - self._schema_cache: list = [] + self._schema_cache: set = {} @property def config(self) -> dict: @@ -587,7 +587,7 @@ def schema_exists(self, schema_name: str) -> bool: True if the database schema exists, False if not. """ if schema_name not in self._schema_cache: - self._schema_cache = sqlalchemy.inspect(self._engine).get_schema_names() + self._schema_cache = set(sqlalchemy.inspect(self._engine).get_schema_names()) return schema_name in self._schema_cache From 61413ee3577c1b5340c9a192cd92c401c03c69f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 17:27:35 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- singer_sdk/connectors/sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index fa08b73f6..b5f8eb757 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -587,7 +587,9 @@ def schema_exists(self, schema_name: str) -> bool: True if the database schema exists, False if not. """ if schema_name not in self._schema_cache: - self._schema_cache = set(sqlalchemy.inspect(self._engine).get_schema_names()) + self._schema_cache = set( + sqlalchemy.inspect(self._engine).get_schema_names() + ) return schema_name in self._schema_cache From e0451b6b5cd0195b8b71de99eec5e06eae194e45 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 3 Oct 2023 14:28:06 -0400 Subject: [PATCH 8/9] fix set typing --- singer_sdk/connectors/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index fa08b73f6..5d86f056a 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -59,7 +59,7 @@ def __init__( self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} - self._schema_cache: set = {} + self._schema_cache: t.Set[str] = set() @property def config(self) -> dict: From 0ef8701f5cc452886621aea390b0685b66068e9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 18:29:09 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- singer_sdk/connectors/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index 3174d8ef5..c6f957589 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -59,7 +59,7 @@ def __init__( self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} - self._schema_cache: t.Set[str] = set() + self._schema_cache: set[str] = set() @property def config(self) -> dict: @@ -588,7 +588,7 @@ def schema_exists(self, schema_name: str) -> bool: """ if schema_name not in self._schema_cache: self._schema_cache = set( - sqlalchemy.inspect(self._engine).get_schema_names() + sqlalchemy.inspect(self._engine).get_schema_names(), ) return schema_name in self._schema_cache