Skip to content

Commit

Permalink
docs: Added an example implemetation of JSON schema validation that u…
Browse files Browse the repository at this point in the history
…ses `fastjsonschema` (#2231)

Supersedes #2066

Co-authored-by: Dan Norman <buzzcutnorman@gmail.com>
  • Loading branch information
edgarrmondragon and BuzzCutNorman committed Mar 6, 2024
1 parent 664076b commit 18427b1
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 1 deletion.
1 change: 1 addition & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"coverage[toml]",
"duckdb",
"duckdb-engine",
"fastjsonschema",
"pyarrow",
"pytest",
"pytest-benchmark",
Expand Down
16 changes: 15 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ coverage = {extras = ["toml"], version = ">=7.4"}
duckdb = { version = ">=0.8.0", python = "<3.12" }
duckdb-engine = { version = ">=0.9.4", python = "<3.12" }

fastjsonschema = ">=2.19.1"
mypy = ">=1.0"
pytest-benchmark = ">=4.0.0"
pytest-snapshot = ">=0.9.0"
Expand Down
25 changes: 25 additions & 0 deletions singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,31 @@ def get_validator(self) -> BaseJSONSchemaValidator | None:
Returns:
An instance of a subclass of ``BaseJSONSchemaValidator``.
Example implementation using the `fastjsonschema`_ library:
.. code-block:: python
import fastjsonschema
class FastJSONSchemaValidator(BaseJSONSchemaValidator):
def __init__(self, schema: dict[str, t.Any]) -> None:
super().__init__(schema)
try:
self.validator = fastjsonschema.compile(self.schema)
except fastjsonschema.JsonSchemaDefinitionException as e:
error_message = "Schema Validation Error"
raise InvalidJSONSchema(error_message) from e
def validate(self, record: dict):
try:
self.validator(record)
except fastjsonschema.JsonSchemaValueException as e:
error_message = f"Record Message Validation Error: {e.message}"
raise InvalidRecord(error_message, record) from e
.. _fastjsonschema: https://pypi.org/project/fastjsonschema/
"""
if self.validate_schema:
return JSONSchemaValidator(
Expand Down
61 changes: 61 additions & 0 deletions tests/core/sinks/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,38 @@

import datetime
import itertools
import typing as t

import fastjsonschema
import pytest

from singer_sdk.exceptions import InvalidRecord
from singer_sdk.sinks.core import BaseJSONSchemaValidator, InvalidJSONSchema
from tests.conftest import BatchSinkMock, TargetMock


class FastJSONSchemaValidator(BaseJSONSchemaValidator):
def __init__(self, schema: dict[str, t.Any]) -> None:
super().__init__(schema)
try:
self.validator = fastjsonschema.compile(self.schema)
except fastjsonschema.JsonSchemaDefinitionException as e:
error_message = "Schema Validation Error"
raise InvalidJSONSchema(error_message) from e

def validate(self, record: dict):
try:
self.validator(record)
except fastjsonschema.JsonSchemaValueException as e:
error_message = f"Record Message Validation Error: {e.message}"
raise InvalidRecord(error_message, record) from e


class FastJSONSchemaSink(BatchSinkMock):
def get_validator(self) -> BaseJSONSchemaValidator | None:
return FastJSONSchemaValidator(self.schema)


def test_validate_record():
target = TargetMock()
sink = BatchSinkMock(
Expand Down Expand Up @@ -59,6 +84,42 @@ def test_validate_record():
assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999"


def test_validate_fastjsonschema():
target = TargetMock()
sink = FastJSONSchemaSink(
target,
"users",
{
"type": "object",
"properties": {
"id": {"type": "integer"},
"created_at": {"type": "string", "format": "date-time"},
"created_at_date": {"type": "string", "format": "date"},
"created_at_time": {"type": "string", "format": "time"},
"invalid_datetime": {"type": "string", "format": "date-time"},
},
},
["id"],
)

record = {
"id": 1,
"created_at": "2021-01-01T00:00:00+00:00",
"created_at_date": "2021-01-01",
"created_at_time": "00:01:00+00:00",
"missing_datetime": "2021-01-01T00:00:00+00:00",
"invalid_datetime": "not a datetime",
}

with pytest.raises(
InvalidRecord,
match=r"Record Message Validation Error",
) as exc_info:
sink._validator.validate(record)

assert isinstance(exc_info.value.__cause__, fastjsonschema.JsonSchemaValueException)


@pytest.fixture
def draft7_sink_stop():
"""Return a sink object with Draft7 checks enabled."""
Expand Down

0 comments on commit 18427b1

Please # to comment.