Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Do not truncate aggregate tables on each ingest #841

Merged
merged 8 commits into from
Mar 18, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Do not truncate aggregate table
  • Loading branch information
rnchakraborty committed Mar 15, 2019
commit 55aed11b4d89c5f0e5bdfb3ef417009ae455fbc1
6 changes: 3 additions & 3 deletions bin/xdmod-ingestor
Original file line number Diff line number Diff line change
@@ -263,11 +263,11 @@ function main()
}

if($datatypeValue == 'openstack'){
$dwi->ingestCloudDataOpenStack();
$dwi->ingestCloudDataOpenStack($lastModifiedStartDate);
}

if($datatypeValue == 'genericcloud'){
$dwi->ingestCloudDataGeneric();
$dwi->ingestCloudDataGeneric($lastModifiedStartDate);
}

if ($datatypeValue == 'storage') {
@@ -293,7 +293,7 @@ function main()
}

if($realmToAggregate == 'cloud' || $realmToAggregate === false){
$dwi->aggregateCloudData();
$dwi->aggregateCloudData($lastModifiedStartDate);
}

if ($realmToAggregate == 'storage' || $realmToAggregate === false) {
Original file line number Diff line number Diff line change
@@ -58,7 +58,7 @@ public function __construct(aOptions $options, EtlConfiguration $etlConfig, Log
$this->_start_event_ids = array(self::START, self::RESUME, self::STATE_REPORT, self::UNSHELVE, self::UNPAUSE, self::UNSUSPEND, self::POWER_ON);
$this->_all_event_ids = array_merge($this->_start_event_ids, $this->_stop_event_ids);
$this->_end_time = $etlConfig->getVariableStore()->endDate ? date('Y-m-d H:i:s', strtotime($etlConfig->getVariableStore()->endDate)) : null;

Copy link
Member

@jpwhite4 jpwhite4 Mar 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should come up with a code parser rule that flags this class of whitespace change (and marks it as FAIL).

$this->resetInstance();
}

@@ -132,8 +132,9 @@ protected function getSourceQueryString()
// is lost. To work around this we add a dummy row filled with zeroes.
$colCount = count($this->etlSourceQuery->records);
$unionValues = array_fill(0, $colCount, 0);
$subSelect = "(SELECT DISTINCT instance_id from modw_cloud.event WHERE last_modified > \"" . $this->getEtlOverseerOptions()->getLastModifiedStartDate() . "\")";

$sql = "$sql WHERE event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC";
$sql = "$sql WHERE instance_id IN " . $subSelect . " AND event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC";

return $sql;
}
28 changes: 20 additions & 8 deletions classes/OpenXdmod/DataWarehouseInitializer.php
Original file line number Diff line number Diff line change
@@ -202,12 +202,16 @@ public function ingestAllHpcdb($startDate = null, $endDate = null)
* tables do not exist then catch the resulting exception and display a message
* saying that there is no OpenStack data to ingest.
*/
public function ingestCloudDataOpenStack()
public function ingestCloudDataOpenStack($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
try{
try {
$this->logger->notice('Ingesting OpenStack event log data');
Utilities::runEtlPipeline(array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'), $this->logger);
Utilities::runEtlPipeline(
array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
@@ -225,12 +229,16 @@ public function ingestCloudDataOpenStack()
* tables do not exist then catch the resulting exception and display a message
* saying that there is no generic cloud data to ingest.
*/
public function ingestCloudDataGeneric()
public function ingestCloudDataGeneric($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
try{
try {
$this->logger->notice('Ingesting generic cloud log files');
Utilities::runEtlPipeline(array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'), $this->logger);
Utilities::runEtlPipeline(
array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
@@ -273,11 +281,15 @@ public function ingestStorageData()
* catch the resulting exception and display a message saying that there
* is no cloud data to aggregate and cloud aggregation is being skipped.
*/
public function aggregateCloudData()
public function aggregateCloudData($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
$this->logger->notice('Aggregating Cloud data');
Utilities::runEtlPipeline(array('cloud-state-pipeline'), $this->logger);
Utilities::runEtlPipeline(
array('cloud-state-pipeline'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);

$filterListBuilder = new FilterListBuilder();
$filterListBuilder->setLogger($this->logger);
2 changes: 0 additions & 2 deletions configuration/etl/etl.d/cloud_state_machine.json
Original file line number Diff line number Diff line change
@@ -35,7 +35,6 @@
"name": "cloud-transient",
"class": "DatabaseIngestor",
"definition_file": "cloud_common/cloud_transient.json",
"truncate_destination": true,
"description": "Builds intermediate cloud event table"
},
{
@@ -46,7 +45,6 @@
"class": "SimpleAggregator",
"description": "Aggregate cloud records.",
"definition_file": "cloud_common/cloud_metrics_aggregation.json",
"truncate_destination": true,
"table_prefix": "cloudfact_by_",
"aggregation_units": [
"day", "month", "quarter", "year"
Original file line number Diff line number Diff line change
@@ -5,8 +5,8 @@
},
"aggregation_period_query": {
"overseer_restrictions": {
"#last_modified_start_date": "last_modified >= ${VALUE}",
"#last_modified_end_date": "last_modified <= ${VALUE}",
"last_modified_start_date": "last_modified >= ${VALUE}",
"last_modified_end_date": "last_modified <= ${VALUE}",
"include_only_resource_codes": "resource_id IN ${VALUE}",
"exclude_resource_codes": "resource_id NOT IN ${VALUE}"
}
17 changes: 15 additions & 2 deletions configuration/etl/etl_tables.d/cloud_common/cloud_transient.json
Original file line number Diff line number Diff line change
@@ -103,6 +103,13 @@
"name": "submission_venue_id",
"type": "int(5)",
"nullable": true
},
{
"name": "last_modified",
"type": "timestamp",
"nullable": false,
"default": "CURRENT_TIMESTAMP",
"extra": "on update CURRENT_TIMESTAMP"
}
],
"indexes": [
@@ -111,7 +118,7 @@
"columns": [
"resource_id",
"instance",
"start_time"
"start_time_ts"
],
"is_unique": true
},
@@ -120,9 +127,15 @@
"columns": [
"instance_id",
"resource_id",
"start_time"
"start_time_ts"
],
"is_unique": true
},
{
"name": "index_last_modified",
"columns": [
"last_modified"
]
}
]
}
13 changes: 13 additions & 0 deletions configuration/etl/etl_tables.d/cloud_common/event.json
Original file line number Diff line number Diff line change
@@ -68,6 +68,13 @@
"type": "int(5)",
"nullable": false,
"default": -1
},
{
"name": "last_modified",
"type": "timestamp",
"nullable": false,
"default": "CURRENT_TIMESTAMP",
"extra": "on update CURRENT_TIMESTAMP"
}
],
"indexes": [
@@ -110,6 +117,12 @@
"resource_id"
],
"is_unique": false
},
{
"name": "index_last_modified",
"columns": [
"last_modified"
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

namespace Controllers;

class UsageExplorerCloudPostIngestTest extends UsageExplorerTest
{

public function csvExportProvider(){
parent::$baseDir = __DIR__ . '/../../../tests/artifacts/xdmod-test-artifacts/xdmod/post_ingest/';
parent::defaultSetup();

$statistics = array(
'cloud_num_sessions_ended',
'cloud_num_sessions_running',
'cloud_num_sessions_started',
);

$group_bys = array(
'none',
);

$varSettings = array(
'realm' => array('Cloud'),
'dataset_type' => array('aggregate', 'timeseries'),
'statistic' => $statistics,
'group_by' => $group_bys,
'aggregation_unit' => array_keys($this->aggregationUnits)
);

return parent::generateTests($varSettings, '2018-05-19', '2018-05-19');
}
}
36 changes: 36 additions & 0 deletions open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/sh
# Tests that cloud ETL works after upgrade + reingestion

BASEDIR=./open_xdmod/modules/xdmod/regression_tests
REF_DIR=/var/tmp/referencedata
last_modified_start_date=$(date +'%F %T')

oldCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;")

if [ $oldCount -ne 53 ]
then
echo " Count $oldCount did not match expected result of 53"
exit 1
fi

sudo -u xdmod xdmod-shredder -r openstack -d $REF_DIR/openstack_upgrade -f openstack
sudo -u xdmod xdmod-ingestor --datatype=openstack --last-modified-start-date "$last_modified_start_date"
sudo -u xdmod xdmod-ingestor --aggregate=cloud --last-modified-start-date "$last_modified_start_date"

newCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;")

if [ $newCount -ne 52 ]
then
echo " Count $newCount did not match expected result of 52"
exit 1

fi

newRows=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 5;")

if [ $newRows -ne 2 ]
then
echo " Count $newRows did not match expected result of 2"
exit 1

fi
1 change: 1 addition & 0 deletions shippable.yml
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ build:
- cp ./configuration/portal_settings.ini ./configuration/portal_settings.ini.old
- cp -f /etc/xdmod/portal_settings.ini ./configuration/portal_settings.ini
- ./open_xdmod/modules/xdmod/integration_tests/runtests.sh --junit-output-dir `pwd`/shippable/testresults/
- ./open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh --junit-output-dir `pwd`/shippable/testresults/
- ./open_xdmod/modules/xdmod/component_tests/runtests.sh --log-junit `pwd`/shippable/testresults/xdmod-component.xml
- ./open_xdmod/modules/xdmod/automated_tests/runtests.sh --headless --log-junit `pwd`/shippable/testresults
- ./open_xdmod/modules/xdmod/integration_tests/scripts/samlSetup.sh
Loading