diff --git a/src/outbackcdx/Capture.java b/src/outbackcdx/Capture.java index 7f5d0e6..e5e2b61 100644 --- a/src/outbackcdx/Capture.java +++ b/src/outbackcdx/Capture.java @@ -63,6 +63,9 @@ public class Capture { public long originalCompressedoffset = -1; public String originalFile = "-"; + // pywb adds this to the CDXJ for POST requests + public String method = "GET"; + protected static Pattern URLKEY_POSTDATA_REGEX = Pattern.compile("[?&](__wb_post_data|__warc_post_data)=([^&]+).*$", Pattern.CASE_INSENSITIVE); @@ -544,6 +547,9 @@ private void put(String field, Object value) { case "originalFilename": originalFile = coerceString(value); break; + case "method": + method = coerceString(value); + break; default: throw new IllegalArgumentException("no such capture field: " + field); } @@ -597,6 +603,8 @@ public Object get(String field) { } else { return "bytes=" + compressedoffset + "-" + (compressedoffset + length - 1); } + case "method": + return method; default: throw new IllegalArgumentException("no such capture field: " + field); } diff --git a/test/outbackcdx/CaptureTest.java b/test/outbackcdx/CaptureTest.java index d76241e..717f766 100644 --- a/test/outbackcdx/CaptureTest.java +++ b/test/outbackcdx/CaptureTest.java @@ -46,10 +46,11 @@ public void testCdxDigestScheme() { @Test public void testCdxj() { - Capture src = Capture.fromCdxLine("com,example)/robots.txt 20210203115119 {\"url\": \"https://example.org/robots.txt\", \"mime\": \"unk\", \"status\": \"400\", \"digest\": \"3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ\", \"length\": \"451\", \"offset\": \"90493\", \"filename\": \"example.warc.gz\"}", new UrlCanonicalizer()); + Capture src = Capture.fromCdxLine("com,example)/robots.txt 20210203115119 {\"url\": \"https://example.org/robots.txt\", \"mime\": \"unk\", \"status\": \"400\", \"digest\": \"3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ\", \"length\": \"451\", \"offset\": \"90493\", \"filename\": \"example.warc.gz\", \"method\": \"POST\"}", new UrlCanonicalizer()); Capture dst = new Capture(src.encodeKey(), src.encodeValue()); assertEquals(451, src.length); assertEquals(90493, src.compressedoffset); + assertEquals("POST", src.method); assertFieldsEqual(src, dst); assertEquals("com,example)/robots.txt 20210203115119 https://example.org/robots.txt unk 400 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 451 90493 example.warc.gz - - -", dst.toString()); }