-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclass.access_log_parser.php
164 lines (151 loc) · 5.04 KB
/
class.access_log_parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
<?php
class apache_log_parser
{
/*
This class will read the access.log file generated by Apache
and create an array that can be then placed into a database or array and sorted or searched
*/
var $bad_rows; // Number of bad rows
var $fp; // File pointer
/*
Function to parse the line from access.log.
This is pass 1 of 2 (We will have to do this in steps as there are different types of log entries)
*/
function format_log_line_pass1($line){
/*
Step 1: Get the stuff that is ALWAYS in the log
Stuff we care about:
=====================
$result1[1] = IP address
$result1[2] = Identity
$result1[3] = user
$result1[5] = date
$result1[6] = time
$result1[7] = Server Time Zone
$result1[10] = Request method, This will show either a valid method (HEAD or GET) or "-" depending on which log entry type (as above)
$result1[11] = Remainder of line in log file
(The rest we do not care about, they are "-" or [] characters)
*/
$regex1 = "/(\S+) (\S+) (\S+) (\[)([^:]+):(\d+:\d+:\d+) ([^\]]+)(\]) (\")(\S+) (.*)/";
if(preg_match($regex1,$line,$result1)){
return $result1;
}else{
echo "<br>no match";
}
}
/*
Step 2a: If this step is running it is because the request 'method' WAS NOT = '-"' and will contain a path and protocol
Stuff we care about:
=====================
$result1[1] = path
$result1[2] = protocol
$result1[3] = status
$result1[4] = bytes
$result1[5] = Referer
$result1[6] = agent
*/
function format_log_line_pass2a($stringa){
$regex1 = "/(.*?) (\S+)\" (\S+) (\S+) (\".*?\") (\".*?\")$/";
if(preg_match($regex1,$stringa,$result1)){
return $result1;
}
}
/*
Step 2b: If this step is running it is because the request method WAS = '-"' and so we have no path or method
Stuff we care about:
=====================
$result1[1] = status
$result1[2] = bytes
$result1[3] = referer
$result1[4] = agent
$result1[5] = path
$result1[6] = protocol
*/
function format_log_line_pass2b($stringa){
$regex1 = "/(\S+) (\S+) (\S+) (\S+)$/";
if(preg_match($regex1,$stringa,$result1)){
//insert values for path and protocol
$result1[5] = "N/A";
$result1[6] = "N/A";
return $result1;
}
}
/*
This is the main function that takes the line from the access.log file and parses it into an array.
returns array $matches;
}
*/
function format_line($line){
// We will have to do this in steps as there are different types of log entries
// See notes in function format_log_line_pass1
$logs = $this->format_log_line_pass1($line); // format the line
if (isset($logs[0])){ // check that it formatted OK
$formated_log = array(); // make an array to store the line info in
$formated_log['ip'] = $logs[1];
$formated_log['identity'] = $logs[2];
$formated_log['user'] = $logs[3];
$formated_log['date'] = $logs[5];
$formated_log['time'] = $logs[6];
$formated_log['timezone'] = $logs[7];
$formated_log['method'] = $logs[10];
//now get the rest of the log
$line_remainder = $logs[11];
//now to parse the remainder of the log
$formated_logb = array();
if($logs[10] == '-"'){ // See comments for function 'format_log_line_pass2b()'
//correct invalid method
$formated_log['method'] = "N/A";
$formated_logb = $this->format_log_line_pass2b($logs[11]);
$formated_log['status'] = $formated_logb[1];
$formated_log['bytes'] = $formated_logb[2];
$formated_log['referer'] = $formated_logb[3];
$formated_log['agent'] = $formated_logb[4];
$formated_log['path'] = $formated_logb[5];
$formated_log['protocol'] = $formated_logb[6];
}else{ // See comments for fucntion 'format_log_line_pass2a()'
$formated_logb = $this->format_log_line_pass2a($logs[11]);
$formated_log['status'] = $formated_logb[3];
$formated_log['bytes'] = $formated_logb[4];
$formated_log['referer'] = $formated_logb[5];
$formated_log['agent'] = $formated_logb[6];
$formated_log['path'] = $formated_logb[1];
$formated_log['protocol'] = $formated_logb[2];
}
return $formated_log; // return the array of info
}else{
$this->badRows++; // if the row is not in the right format add it to the bad rows
return false;
}
}
/*
Function to open the file
*/
function open_log_file($file_name){
$this->fp = fopen($file_name, 'r+'); // open the file for reading
if (!$this->fp){
return false; // return false on fail
}
return true; // return true on sucsess
}
/*
Function to clear the log file (erase all data).
I found this useful when I had a CRON job running to execute that saved the data into a database,
clearing the file prevents saving duplicate data.
*/
function clear_log_file($file_name){
$this->fp = fopen($file_name, 'w'); // open the file for writing
fclose($this->fp);
}
/*
Function that closes the file. only used when opening for reading
*/
function close_log_file(){
return fclose($this->fp); // close the file
}
// gets a line from the log file
function get_line(){
$nline = fgets($this->fp);
return $nline;
}
}
?>