From ada7328deaebcfb3de55a7187390a8e87304b670 Mon Sep 17 00:00:00 2001 From: Colin Newell Date: Thu, 19 Mar 2015 16:35:21 +0000 Subject: [PATCH 1/2] Extract contents of body from request as well as response --- pcap2har/har.py | 13 +++++++++++++ pcap2har/http/request.py | 22 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pcap2har/har.py b/pcap2har/har.py index 70d5df8..3435e94 100644 --- a/pcap2har/har.py +++ b/pcap2har/har.py @@ -34,6 +34,18 @@ def HTTPRequestJsonRepr(self): ''' self = http.Request ''' + content = { + 'size': self.body_length, + 'mimeType': self.mimeType + } + if self.compression_amount is not None: + content['compression'] = self.compression_amount + if self.text: + if self.encoding: + content['text'] = self.text + content['encoding'] = self.encoding + else: + content['text'] = self.text.encode('utf8') # must transcode to utf-8 return { 'method': self.msg.method, 'url': self.url, @@ -43,6 +55,7 @@ def HTTPRequestJsonRepr(self): 'headersSize': -1, 'headers': header_json_repr(self.msg.headers), 'bodySize': len(self.msg.body), + 'content': content, } http.Request.json_repr = HTTPRequestJsonRepr diff --git a/pcap2har/http/request.py b/pcap2har/http/request.py index 8117b79..adbaffe 100644 --- a/pcap2har/http/request.py +++ b/pcap2har/http/request.py @@ -2,10 +2,13 @@ # dpkt.http is buggy, so we use our modified replacement from .. import dpkt_http_replacement as dpkt_http +from .. import settings +from ..mediatype import MediaType import message as http +from response import Response -class Request(http.Message): +class Request(Response): ''' HTTP request. Parses higher-level info out of dpkt.http.Request Members: @@ -24,3 +27,20 @@ def __init__(self, tcpdir, pointer): self.fullurl = fullurl.geturl() self.url, frag = urlparse.urldefrag(self.fullurl) self.query = urlparse.parse_qs(uri.query, keep_blank_values=True) + + if 'content-type' in self.msg.headers: + self.mediaType = MediaType(self.msg.headers['content-type']) + else: + self.mediaType = MediaType('application/x-unknown-content-type') + self.mimeType = self.mediaType.mimeType() + self.body_length = len(self.msg.body) + self.compression_amount = None + self.text = None + # handle body stuff + if settings.drop_bodies: + self.clear_body() + else: + # uncompress body if necessary + self.handle_compression() + # try to get out unicode + self.handle_text() From 09a767888cad9997b44f041f3521b476031ee5bd Mon Sep 17 00:00:00 2001 From: Colin Newell Date: Thu, 19 Mar 2015 19:15:02 +0000 Subject: [PATCH 2/2] Started to make the data compliant with the standard --- pcap2har/har.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcap2har/har.py b/pcap2har/har.py index 3435e94..f0a7239 100644 --- a/pcap2har/har.py +++ b/pcap2har/har.py @@ -46,6 +46,8 @@ def HTTPRequestJsonRepr(self): content['encoding'] = self.encoding else: content['text'] = self.text.encode('utf8') # must transcode to utf-8 + # FIXME: should process postData further and replace text + # with params if URL encoded parameters return { 'method': self.msg.method, 'url': self.url, @@ -55,7 +57,7 @@ def HTTPRequestJsonRepr(self): 'headersSize': -1, 'headers': header_json_repr(self.msg.headers), 'bodySize': len(self.msg.body), - 'content': content, + 'postData': content, } http.Request.json_repr = HTTPRequestJsonRepr