Skip to content

Commit

Permalink
feat: Languages (#63)
Browse files Browse the repository at this point in the history
* Models

* Alembic

* DB

* GX

* Pretty

* Compound

* Add Primary

* Output

* Format

* Overlaps

* Add/Remove

* Lint

* Function

* Clarity

* Format

* Test Add

* Lint

* Warning

* Test More

* Remove I

* Remove II

* Clarity

* Remove III

* Whoops

* Query

* Test Update

* Format

* Version

* Format

* Coverage

* Coverage

* Test Query
  • Loading branch information
RickiJay-WMDE authored Jan 31, 2025
1 parent 13fdf3a commit bb210e5
Show file tree
Hide file tree
Showing 24 changed files with 1,004 additions and 3 deletions.
42 changes: 42 additions & 0 deletions alembic/versions/eeea3f16e54c_language_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Language Table
Revision ID: eeea3f16e54c
Revises: 0de663bb9c13
Create Date: 2025-01-28 10:07:19.177995
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "eeea3f16e54c"
down_revision: Union[str, None] = "0de663bb9c13"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"wikibase_language",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("wikibase_id", sa.Integer(), nullable=False),
sa.Column("language", sa.String(), nullable=False),
sa.Column("primary", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(
["wikibase_id"], ["wikibase.id"], name="observation_wikibase"
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("wikibase_id", "language", name="unique_wikibase_language"),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("wikibase_language")
# ### end Alembic commands ###
8 changes: 8 additions & 0 deletions data/gx/checkpoints/wikibase_checkpoint.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
{
"id": "cc0fac73-9df0-4962-b78a-b28f033eb2d4",
"name": "wikibase_url_validation_definition"
},
{
"id": "2a80d595-c12e-4c56-ad29-290e800dfe72",
"name": "wikibase_language_validation_definition"
},
{
"id": "4bc400ce-a637-4699-9095-123cee9c018a",
"name": "wikibase_language_count_validation_definition"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"expectations": [
{
"id": "934889b2-7f84-4ca8-afef-dc0c573fc9a7",
"kwargs": { "column": "wikibase_id" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "671d3494-8e38-4b8e-8bf6-d5f587d1ed18",
"kwargs": { "column": "wikibase_id" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "572639d5-cf91-4a75-8fb9-b3fd40905848",
"kwargs": { "column": "wikibase_id" },
"meta": {},
"type": "expect_column_values_to_be_unique"
},
{
"id": "c1e34007-8f5a-4412-9499-a13245a91dbc",
"kwargs": { "column": "primary_language_count" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "27badfe5-7cd2-4c99-8d46-307320996874",
"kwargs": { "column": "primary_language_count" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "5625cfb2-4a1f-4dfd-ac8e-4ee0b08decb2",
"kwargs": { "column": "primary_language_count", "value_set": [1] },
"meta": {},
"type": "expect_column_distinct_values_to_be_in_set"
},
{
"id": "b34d5a6f-5f6e-4eae-8149-1b6e0afb27e2",
"kwargs": { "column": "additional_language_count" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "31397b9c-f7e0-4d69-a510-13e639235660",
"kwargs": { "column": "additional_language_count" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
}
],
"id": "fa88a358-28c5-4dfd-9ca3-aaa391d30e10",
"meta": { "great_expectations_version": "1.3.3" },
"name": "wikibase_language_count_expectation_suite",
"notes": null
}
88 changes: 88 additions & 0 deletions data/gx/expectations/wikibase_language_expectation_suite.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"expectations": [
{
"id": "e0fddf5c-80f9-4c05-909c-d3380280627f",
"kwargs": { "column": "id" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "1a65f1b0-1dbb-4473-9506-576b1ca344c0",
"kwargs": { "column": "id" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "701f7aae-f0f4-4c37-9500-dd8e54eb2396",
"kwargs": { "column": "id" },
"meta": {},
"type": "expect_column_values_to_be_unique"
},
{
"id": "92486feb-40b6-4b0b-8dab-ac4deec45130",
"kwargs": { "column": "wikibase_id" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "4c4fdcb7-a938-42bb-a57a-c6b30b157e6a",
"kwargs": { "column": "wikibase_id" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "7677ea79-f7fe-4335-97b3-6f7a20583a2e",
"kwargs": { "column": "language" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "0d92131d-d654-4825-b8df-f5ca15e68109",
"kwargs": { "column": "language" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "95fc7a1e-8c53-4415-9cbd-d9b5875ad698",
"kwargs": {
"column": "language",
"regex_list": [
"^[ \t\r\n]*$",
"^[ \t\r\n]+",
"[ \t\r\n]+$",
"[ \t\r\n]{2,}"
]
},
"meta": {},
"type": "expect_column_values_to_not_match_regex_list"
},
{
"id": "ad053b4c-9d8f-4c53-95b6-b39ffc52ede1",
"kwargs": { "column": "primary" },
"meta": {},
"type": "expect_column_to_exist"
},
{
"id": "72b92e2a-763a-4e77-9390-f0a07e9d6e9e",
"kwargs": { "column": "primary" },
"meta": {},
"type": "expect_column_values_to_not_be_null"
},
{
"id": "89b68f0e-f861-4071-a25e-ebec03f48e2f",
"kwargs": { "column": "primary", "value_set": [true, false] },
"meta": {},
"type": "expect_column_distinct_values_to_be_in_set"
},
{
"id": "022203b3-ac36-4c8a-99b0-29d9e256a0e2",
"kwargs": { "column_list": ["wikibase_id", "language"] },
"meta": {},
"type": "expect_compound_columns_to_be_unique"
}
],
"id": "5599fa3b-2819-4c5d-9b52-4882576b7832",
"meta": { "great_expectations_version": "1.3.3" },
"name": "wikibase_language_expectation_suite",
"notes": null
}
22 changes: 22 additions & 0 deletions data/gx/great_expectations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -389,5 +389,27 @@ fluent_datasources:
id: 17906408-5608-45ff-95fe-45ae29b85630
partitioner:
query: SELECT * FROM wikibase_software WHERE url IS NOT NULL
wikibase_language_table:
type: table
id: bf9ac0e5-2242-4350-b9b9-3bb9ffe09dd7
batch_metadata: {}
batch_definitions:
FULL_TABLE:
id: ef258a6b-943c-4e72-97d6-48205e388e01
partitioner:
table_name: wikibase_language
schema_name:
wikibase_language_count:
type: query
id: de236522-ceb5-4822-a550-388b53c8dfb6
batch_metadata: {}
batch_definitions:
FULL_TABLE:
id: 7948822f-014c-440e-a0b0-7823e2ab85d9
partitioner:
query:
"SELECT\n\twikibase_id,\n\tSUM(CASE WHEN \"primary\" THEN 1 ELSE 0
END) AS primary_language_count,\n\tSUM(CASE WHEN \"primary\" THEN 0 ELSE
1 END) AS additional_language_count\nFROM wikibase_language\nGROUP BY wikibase_id"
connection_string: sqlite:///data/wikibase-data.db
data_context_id: 14fd50bc-47c8-4033-9643-e60c471dc6c5
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"data": {
"asset": {
"id": "de236522-ceb5-4822-a550-388b53c8dfb6",
"name": "wikibase_language_count"
},
"batch_definition": {
"id": "7948822f-014c-440e-a0b0-7823e2ab85d9",
"name": "FULL_TABLE"
},
"datasource": {
"id": "4a58b404-1ee3-4e91-a373-78d1b365f987",
"name": "wikibase_datasource"
}
},
"id": "4bc400ce-a637-4699-9095-123cee9c018a",
"name": "wikibase_language_count_validation_definition",
"suite": {
"id": "fa88a358-28c5-4dfd-9ca3-aaa391d30e10",
"name": "wikibase_language_count_expectation_suite"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"data": {
"asset": {
"id": "bf9ac0e5-2242-4350-b9b9-3bb9ffe09dd7",
"name": "wikibase_language_table"
},
"batch_definition": {
"id": "ef258a6b-943c-4e72-97d6-48205e388e01",
"name": "FULL_TABLE"
},
"datasource": {
"id": "4a58b404-1ee3-4e91-a373-78d1b365f987",
"name": "wikibase_datasource"
}
},
"id": "2a80d595-c12e-4c56-ad29-290e800dfe72",
"name": "wikibase_language_validation_definition",
"suite": {
"id": "5599fa3b-2819-4c5d-9b52-4882576b7832",
"name": "wikibase_language_expectation_suite"
}
}
Binary file modified data/wikibase-data.db
Binary file not shown.
Binary file modified data/wikibase-test-data.db
Binary file not shown.
7 changes: 6 additions & 1 deletion fetch_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,9 @@
create_special_statistics_observation,
update_software_data,
)
from fetch_data.update_data import merge_software_by_id
from fetch_data.update_data import (
add_wikibase_language,
merge_software_by_id,
remove_wikibase_language,
update_wikibase_primary_language,
)
5 changes: 5 additions & 0 deletions fetch_data/update_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""Update Data"""

from fetch_data.update_data.merge_software import merge_software_by_id
from fetch_data.update_data.update_wikibase_language import (
add_wikibase_language,
remove_wikibase_language,
update_wikibase_primary_language,
)
Loading

0 comments on commit bb210e5

Please # to comment.