Skip to content

Commit

Permalink
Merge pull request #1824 from aaronweeden/log-file-split-request
Browse files Browse the repository at this point in the history
Parse request method, URL, and protocol separately in web server logs.
  • Loading branch information
aaronweeden authored May 22, 2024
2 parents 72c19ab + edbfc29 commit 397d999
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 2 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ configuration/aggregation_meta/
# configuration/etl/*.json
# configuration/etl/**/*.json

# Include log files that are used for regression testing
!tests/artifacts/xdmod-test-artifacts/xdmod/referencedata/*.log
# Include log files that are used for CI testing
!tests/artifacts/**/*.log

# Include CCR Log Class
!classes/Log
Expand Down
7 changes: 7 additions & 0 deletions classes/ETL/DataEndpoint/WebServerLogFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ public function __construct(DataEndpointOptions $options, LoggerInterface $logge
$this->web_parser->addPattern('%u', '(?P<user>(?:-|[\w\-\.@]+))');

if (isset($options->log_format)) {
// Replace `%r` with `%m %U %H` so the request method, URL, and
// protocol can be parsed separately.
$options->log_format = str_replace(
'%r',
'%m %U %H',
$options->log_format
);
$this->web_parser->setFormat($options->log_format);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
127.0.0.0 - testuser1 [01/Jul/2021:03:17:06 -0500] "GET /pun/sys/dashboard/apps/icon/jupyter_quantum_chem/sys/sys?foo=bar HTTP/1.1" 200 381 "https://ondemand.ccr.buffalo.edu/pun/sys/dashboard/batch_connect/sessions" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
113 changes: 113 additions & 0 deletions tests/unit/lib/ETL/DataEndpoint/WebServerLogFileTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?php

namespace UnitTests\ETL\DataEndpoint;

use CCR\Log;
use ETL\DataEndpoint;
use ETL\DataEndpoint\DataEndpointOptions;
use Psr\Log\LoggerInterface;

class WebServerLogFileTest extends \PHPUnit_Framework_TestCase
{
const TEST_ARTIFACT_INPUT_PATH = "./../artifacts/xdmod/etlv2/dataendpoint/input/webserverlogfile";

/**
* @var LoggerInterface
*/
private static $logger = null;

public static function setUpBeforeClass()
{
// Set up a logger so we can get warnings and error messages from the ETL
// infrastructure
$conf = array(
'file' => false,
'db' => false,
'mail' => false,
'consoleLogLevel' => Log::EMERG
);

self::$logger = Log::factory('PHPUnit', $conf);
}

/**
* @dataProvider provideWebServerLogFile
*/
public function testWebServerLogFile($filename, $logFormat, $expected)
{
$config = [
'type' => 'directoryscanner',
'name' => 'Web Server Logs',
'path' => self::TEST_ARTIFACT_INPUT_PATH,
'file_pattern' => "/$filename/",
'handler' => (object)[
'type' => 'webserverlog',
'record_separator' => "\n",
'log_format' => $logFormat
]
];
$options = new DataEndpointOptions($config);
$endpoint = DataEndpoint::factory($options, self::$logger);
$endpoint->verify();
$endpoint->connect();
$numIterations = 0;
foreach ($endpoint as $record) {
$this->assertSame($expected[$numIterations], $record);
$numIterations++;
}
$this->assertSame(
count($expected),
$numIterations,
'Did not parse correct number of records.'
);
}

public function provideWebServerLogFile()
{
$logFormats = [
'%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i"',
'%h %l %u %t "%m %U %H" %>s %b "%{Referer}i" "%{User-Agent}i"'
];
$tests = [];
foreach ($logFormats as $logFormat) {
array_push(
$tests,
[
'test.log',
$logFormat,
[
[
'host' => '127.0.0.0',
'logname' => '-',
'user' => 'testuser1',
'stamp' => 1625127426,
'time' => '01/Jul/2021:03:17:06 -0500',
'requestMethod' => 'GET',
'URL' => '/pun/sys/dashboard/apps/icon/jupyter_quantum_chem/sys/sys?foo=bar',
'requestProtocol' => 'HTTP/1.1',
'status' => '200',
'responseBytes' => '381',
'HeaderReferer' => 'https://ondemand.ccr.buffalo.edu/pun/sys/dashboard/batch_connect/sessions',
'HeaderUserAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'ua_family' => 'Chrome',
'ua_major' => '91',
'ua_minor' => '0',
'ua_patch' => '4472',
'ua_os_family' => 'Windows',
'ua_os_major' => '10',
'ua_os_minor' => null,
'ua_os_patch' => null,
'ua_device_family' => 'Other',
'ua_device_brand' => null,
'ua_device_model' => null,
'geo_city_name' => 'NA',
'geo_subdivision' => 'NA',
'geo_country' => 'NA'
]
]
]
);
}
return $tests;
}
}

0 comments on commit 397d999

Please # to comment.