Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Truncate staging tables after ingestion #778

Merged
10 changes: 9 additions & 1 deletion configuration/etl/etl.d/jobs_cloud_generic.json
Original file line number Diff line number Diff line change
@@ -177,6 +177,7 @@
"name": "GenericCloudStagingEventIngestor",
"description": "Generic staging data for cloud events",
"class": "DatabaseIngestor",
"truncate_destination": "true",
"definition_file": "cloud_generic/staging_event.json"
},
{
@@ -196,10 +197,17 @@
{
"#": "Instance data must be ingested after staging events",
"name": "GenericCloudEventIngestor",
"description": "Generic cloud event and instance data",
"description": "Generic cloud event",
"class": "DatabaseIngestor",
"definition_file": "cloud_generic/event.json"
},
{
"#": "Instance data must be ingested after events",
"name": "GenericCloudInstanceDataIngestor",
"description": "Generic cloud instance data",
"class": "DatabaseIngestor",
"definition_file": "cloud_generic/instance_data.json"
},
{
"#": "Asset data must be ingested after events",
"name": "GenericCloudEventAssetRootVolumeIngestor",
12 changes: 10 additions & 2 deletions configuration/etl/etl.d/jobs_cloud_openstack.json
Original file line number Diff line number Diff line change
@@ -147,6 +147,7 @@
"name": "OpenStackCloudStagingEventIngestor",
"description": "OpenStack staging data for cloud events",
"class": "DatabaseIngestor",
"truncate_destination": true,
"definition_file": "cloud_openstack/staging_event.json"
},
{
@@ -157,12 +158,19 @@
"definition_file": "cloud_openstack/root_volume.json"
},
{
"#": "Instance data must be ingested after staging events",
"#": "Events must be ingested after all other dimensions",
"name": "OpenStackCloudEventIngestor",
"description": "OpenStack cloud event and instance data",
"description": "OpenStack cloud event",
"class": "DatabaseIngestor",
"definition_file": "cloud_openstack/event.json"
},
{
"#": "Instance data must be ingested after events",
"name": "OpenStackInstanceDataIngestor",
"description": "OpenStack instance data",
"class": "DatabaseIngestor",
"definition_file": "cloud_openstack/instance_data.json"
},
{
"#": "Events must be ingested after all other dimensions",
"name": "OpenStackCloudEventAssetRootVolumeIngestor",
25 changes: 0 additions & 25 deletions configuration/etl/etl_action_defs.d/cloud_generic/event.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
{
"table_definition": [
{
"$ref": "${table_definition_dir}/cloud_common/instance_data.json#/table_definition"
},
{
"$ref": "${table_definition_dir}/cloud_common/event.json#/table_definition"
}
],

"source_query": {
"records": {
"event_id": "staging.event_id",
"resource_id": "staging.resource_id",
"instance_id": "staging.instance_id",
"instance_type_id": "staging.instance_type_id",
"event_time_utc": "staging.event_time_utc",
"event_type_id": "staging.event_type_id",
"record_type_id": "staging.record_type_id",
"image_id": "staging.image_id",
"host_id": "staging.host_id"
},

@@ -28,24 +22,5 @@
"alias": "staging"
}
]
},

"destination_field_map": {
"instance_data": {
"resource_id": "resource_id",
"event_id": "event_id",
"instance_type_id": "instance_type_id",
"image_id": "image_id",
"host_id": "host_id"
},
"event": {
"event_id": "event_id",
"resource_id": "resource_id",
"instance_id": "instance_id",
"event_time_utc": "event_time_utc",
"event_type_id": "event_type_id",
"record_type_id": "record_type_id",
"host_id": "host_id"
}
}
}
18 changes: 11 additions & 7 deletions configuration/etl/etl_action_defs.d/cloud_generic/event_asset.json
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
{
"#": "Associate events with assets. Event logs specify an event_data key with an asset",
"#": "identifier that we can use to make the association.",
"#": "See also event_asset_root_volume.def.json",
"#": "Associate events with assets. Only volume assets are associated as they are",
"#": "the only assets that we have data for to associate with instances using the",
"#": "event_data field. See also event_asset_root_volume.def.json",

"table_definition": [
{
"$ref": "${table_definition_dir}/cloud_common/event_asset.json#/table_definition"
}
],

"#": "Also bring in root volumes",

"source_query": {
"records": {
"resource_id": "staging.resource_id",
"event_id": "staging.event_id",
"resource_id": "ev.resource_id",
"event_id": "ev.event_id",
"asset_id": "a.asset_id"
},

@@ -24,6 +22,12 @@
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
},
{
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "ev",
"on": "ev.resource_id = staging.resource_id AND ev.instance_id = staging.instance_id AND ev.event_time_utc = staging.event_time_utc AND ev.event_type_id = staging.event_type_id"
},
{
"name": "asset",
"schema": "${SOURCE_SCHEMA}",
Original file line number Diff line number Diff line change
@@ -13,34 +13,34 @@

"source_query": {
"records": {
"resource_id": "staging.resource_id",
"event_id": "staging.event_id",
"resource_id": "ev.resource_id",
"event_id": "ev.event_id",
"asset_id": "a.asset_id"
},

"joins": [
{
"name": "generic_cloud_staging_event",
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
"alias": "ev"
},
{
"name": "instance",
"schema": "${SOURCE_SCHEMA}",
"alias": "i",
"on": "i.instance_id = staging.instance_id AND i.resource_id = staging.resource_id"
"on": "i.instance_id = ev.instance_id AND i.resource_id = ev.resource_id"
},
{
"name": "asset",
"schema": "${SOURCE_SCHEMA}",
"alias": "a",
"on": "a.resource_id = staging.resource_id AND a.provider_identifier = CONCAT('root-vol-', i.provider_identifier)"
"on": "a.resource_id = ev.resource_id AND a.provider_identifier = CONCAT('root-vol-', i.provider_identifier)"
},
{
"name": "event_type",
"schema": "${SOURCE_SCHEMA}",
"alias": "etype",
"on": "etype.event_type_id = staging.event_type_id"
"on": "etype.event_type_id = ev.event_type_id"
}
],

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"table_definition": [
{
"$ref": "${table_definition_dir}/cloud_common/instance_data.json#/table_definition"
}
],

"source_query": {
"records": {
"event_id": "ev.event_id",
"resource_id": "staging.resource_id",
"instance_type_id": "staging.instance_type_id",
"host_id": "staging.host_id",
"image_id": "staging.image_id"
},

"joins": [
{
"name": "generic_cloud_staging_event",
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
},
{
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "ev",
"on": "ev.resource_id = staging.resource_id AND ev.instance_id = staging.instance_id AND ev.event_time_utc = staging.event_time_utc AND ev.event_type_id = staging.event_type_id"
}
]
}
}
26 changes: 0 additions & 26 deletions configuration/etl/etl_action_defs.d/cloud_openstack/event.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
{
"table_definition": [
{
"$ref": "${table_definition_dir}/cloud_common/instance_data.json#/table_definition"
},
{
"$ref": "${table_definition_dir}/cloud_common/event.json#/table_definition"
}
],

"source_query": {
"records": {
"event_id": "staging.event_id",
"resource_id": "staging.resource_id",
"instance_id": "staging.instance_id",
"instance_type_id": "staging.instance_type_id",
"event_time_utc": "staging.event_time_utc",
"event_type_id": "staging.event_type_id",
"record_type_id": "staging.record_type_id",
"host_id": "staging.host_id",
"image_id": "staging.image_id",
"submission_venue_id": 3
},

@@ -29,25 +23,5 @@
"alias": "staging"
}
]
},

"destination_field_map": {
"instance_data": {
"resource_id": "resource_id",
"event_id": "event_id",
"instance_type_id": "instance_type_id",
"host_id": "host_id",
"image_id": "image_id"
},
"event": {
"event_id": "event_id",
"resource_id": "resource_id",
"instance_id": "instance_id",
"event_time_utc": "event_time_utc",
"event_type_id": "event_type_id",
"record_type_id": "record_type_id",
"host_id": "host_id",
"submission_venue_id": "submission_venue_id"
}
}
}
Original file line number Diff line number Diff line change
@@ -13,8 +13,8 @@

"source_query": {
"records": {
"resource_id": "staging.resource_id",
"event_id": "staging.event_id",
"resource_id": "ev.resource_id",
"event_id": "ev.event_id",
"asset_id": "a.asset_id"
},

@@ -24,6 +24,12 @@
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
},
{
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "ev",
"on": "ev.resource_id = staging.resource_id AND ev.instance_id = staging.instance_id AND ev.event_time_utc = staging.event_time_utc AND ev.event_type_id = staging.event_type_id"
},
{
"name": "asset",
"schema": "${SOURCE_SCHEMA}",
Original file line number Diff line number Diff line change
@@ -13,34 +13,34 @@

"source_query": {
"records": {
"resource_id": "staging.resource_id",
"event_id": "staging.event_id",
"resource_id": "ev.resource_id",
"event_id": "ev.event_id",
"asset_id": "a.asset_id"
},

"joins": [
{
"name": "openstack_staging_event",
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
"alias": "ev"
},
{
"name": "instance",
"schema": "${SOURCE_SCHEMA}",
"alias": "i",
"on": "i.instance_id = staging.instance_id AND i.resource_id = staging.resource_id"
"on": "i.instance_id = ev.instance_id AND i.resource_id = ev.resource_id"
},
{
"name": "asset",
"schema": "${SOURCE_SCHEMA}",
"alias": "a",
"on": "a.resource_id = staging.resource_id AND a.provider_identifier = CONCAT('root-vol-', i.provider_identifier)"
"on": "a.resource_id = ev.resource_id AND a.provider_identifier = CONCAT('root-vol-', i.provider_identifier)"
},
{
"name": "event_type",
"schema": "${SOURCE_SCHEMA}",
"alias": "etype",
"on": "etype.event_type_id = staging.event_type_id"
"on": "etype.event_type_id = ev.event_type_id"
}
],

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"table_definition": [
{
"$ref": "${table_definition_dir}/cloud_common/instance_data.json#/table_definition"
}
],

"source_query": {
"records": {
"event_id": "ev.event_id",
"resource_id": "staging.resource_id",
"instance_type_id": "staging.instance_type_id",
"host_id": "staging.host_id",
"image_id": "staging.image_id"
},

"joins": [
{
"name": "openstack_staging_event",
"schema": "${SOURCE_SCHEMA}",
"alias": "staging"
},
{
"name": "event",
"schema": "${SOURCE_SCHEMA}",
"alias": "ev",
"on": "ev.resource_id = staging.resource_id AND ev.instance_id = staging.instance_id AND ev.event_time_utc = staging.event_time_utc AND ev.event_type_id = staging.event_type_id"
}
]
}
}
17 changes: 16 additions & 1 deletion configuration/etl/etl_tables.d/cloud_common/event.json
Original file line number Diff line number Diff line change
@@ -14,8 +14,9 @@
{
"name": "event_id",
"type": "bigint(20) unsigned",
"extra": "auto_increment",
"nullable": false,
"comment": "Generated during intermediate ingest, relative to the resource."
"comment": "Generated during ingest, relative to the resource."
},
{
"name": "instance_id",
@@ -70,6 +71,20 @@
],
"is_unique": true
},
{
"#": "For MyISAM tables, you can specify AUTO_INCREMENT on a secondary column in a",
"#": "multiple-column index. In this case, the generated value for the AUTO_INCREMENT column",
"#": "is calculated as MAX(auto_increment_column) + 1 WHERE prefix=given-prefix. This is",
"#": "useful when you want to put data into ordered groups.",
"#": "See [MyISAM Notes](https://dev.mysql.com/doc/refman/5.7/en/example-auto-increment.html)",

"name": "increment_key",
"columns": [
"resource_id",
"event_id"
],
"is_unique": true
},
{
"name": "fk_event_type",
"columns": [
Loading