forked from muskette/nginx-access-log-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
59 lines (50 loc) · 1.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
__author__ = "Richard O'Dwyer"
__email__ = "richard@richard.do"
__license__ = "None"
import re
from operator import itemgetter
def process_log(log):
requests = get_requests(log)
files = get_files(requests)
totals = file_occur(files)
return totals
def get_requests(f):
log_line = f.read()
pat = (r''
'(\d+.\d+.\d+.\d+)\s-\s-\s' #IP address
'\[(.+)\]\s' #datetime
'"GET\s(.+)\s\w+/.+"\s' #requested file
'(\d+)\s' #status
'(\d+)\s' #bandwidth
'"(.+)"\s' #referrer
'"(.+)"' #user agent
)
requests = find(pat, log_line)
return requests
def find(pat, text):
match = re.findall(pat, text)
if match:
return match
return False
def get_files(requests):
#get requested files with req
requested_files = []
for req in requests:
# req[2] for req file match, change to
# data you want to count totals
requested_files.append(req[2])
return requested_files
def file_occur(files):
# file occurrences in requested files
d = {}
for file in files:
d[file] = d.get(file,0)+1
return d
if __name__ == '__main__':
#nginx access log, standard format
log_file = open('example.log', 'r')
# return dict of files and total requests
urls_with_counts = process_log(log_file)
# sort them by total requests descending
sorted_by_count = sorted(urls_with_counts.items(), key=itemgetter(1), reverse=True)
print(sorted_by_count)