Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Parse request method, URL, and protocol separately in web server logs. #1824

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ configuration/aggregation_meta/
# configuration/etl/*.json
# configuration/etl/**/*.json

# Include log files that are used for regression testing
!tests/artifacts/xdmod-test-artifacts/xdmod/referencedata/*.log
# Include log files that are used for CI testing
!tests/artifacts/**/*.log

# Include CCR Log Class
!classes/Log
Expand Down
7 changes: 7 additions & 0 deletions classes/ETL/DataEndpoint/WebServerLogFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ public function __construct(DataEndpointOptions $options, LoggerInterface $logge
$this->web_parser->addPattern('%u', '(?P<user>(?:-|[\w\-\.@]+))');

if (isset($options->log_format)) {
// Replace `%r` with `%m %U %H` so the request method, URL, and
// protocol can be parsed separately.
$options->log_format = str_replace(
'%r',
'%m %U %H',
$options->log_format
);
$this->web_parser->setFormat($options->log_format);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
127.0.0.0 - testuser1 [01/Jul/2021:03:17:06 -0500] "GET /pun/sys/dashboard/apps/icon/jupyter_quantum_chem/sys/sys?foo=bar HTTP/1.1" 200 381 "https://ondemand.ccr.buffalo.edu/pun/sys/dashboard/batch_connect/sessions" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
113 changes: 113 additions & 0 deletions tests/unit/lib/ETL/DataEndpoint/WebServerLogFileTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?php

namespace UnitTests\ETL\DataEndpoint;

use CCR\Log;
use ETL\DataEndpoint;
use ETL\DataEndpoint\DataEndpointOptions;
use Psr\Log\LoggerInterface;

class WebServerLogFileTest extends \PHPUnit_Framework_TestCase
{
const TEST_ARTIFACT_INPUT_PATH = "./../artifacts/xdmod/etlv2/dataendpoint/input/webserverlogfile";

/**
* @var LoggerInterface
*/
private static $logger = null;

public static function setUpBeforeClass()
{
// Set up a logger so we can get warnings and error messages from the ETL
// infrastructure
$conf = array(
'file' => false,
'db' => false,
'mail' => false,
'consoleLogLevel' => Log::EMERG
);

self::$logger = Log::factory('PHPUnit', $conf);
}

/**
* @dataProvider provideWebServerLogFile
*/
public function testWebServerLogFile($filename, $logFormat, $expected)
{
$config = [
'type' => 'directoryscanner',
'name' => 'Web Server Logs',
'path' => self::TEST_ARTIFACT_INPUT_PATH,
'file_pattern' => "/$filename/",
'handler' => (object)[
'type' => 'webserverlog',
'record_separator' => "\n",
'log_format' => $logFormat
]
];
$options = new DataEndpointOptions($config);
$endpoint = DataEndpoint::factory($options, self::$logger);
$endpoint->verify();
$endpoint->connect();
$numIterations = 0;
foreach ($endpoint as $record) {
$this->assertSame($expected[$numIterations], $record);
$numIterations++;
}
$this->assertSame(
count($expected),
$numIterations,
'Did not parse correct number of records.'
);
}

public function provideWebServerLogFile()
{
$logFormats = [
'%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i"',
'%h %l %u %t "%m %U %H" %>s %b "%{Referer}i" "%{User-Agent}i"'
];
$tests = [];
foreach ($logFormats as $logFormat) {
array_push(
$tests,
[
'test.log',
$logFormat,
[
[
'host' => '127.0.0.0',
'logname' => '-',
'user' => 'testuser1',
'stamp' => 1625127426,
'time' => '01/Jul/2021:03:17:06 -0500',
'requestMethod' => 'GET',
'URL' => '/pun/sys/dashboard/apps/icon/jupyter_quantum_chem/sys/sys?foo=bar',
'requestProtocol' => 'HTTP/1.1',
'status' => '200',
'responseBytes' => '381',
'HeaderReferer' => 'https://ondemand.ccr.buffalo.edu/pun/sys/dashboard/batch_connect/sessions',
'HeaderUserAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'ua_family' => 'Chrome',
'ua_major' => '91',
'ua_minor' => '0',
'ua_patch' => '4472',
'ua_os_family' => 'Windows',
'ua_os_major' => '10',
'ua_os_minor' => null,
'ua_os_patch' => null,
'ua_device_family' => 'Other',
'ua_device_brand' => null,
'ua_device_model' => null,
'geo_city_name' => 'NA',
'geo_subdivision' => 'NA',
'geo_country' => 'NA'
]
]
]
);
}
return $tests;
}
}