diff --git a/main.go b/main.go index d7da999..393db0b 100644 --- a/main.go +++ b/main.go @@ -189,7 +189,16 @@ func main() { return } - resp, err := json.Marshal(data) + data2, err := reporting.GetReportCounts(ctx, service, *project, *dataset, *rv2Table) + if err != nil { + log.Errorw("error seen during reports get", zap.Error(err), "service", service) + http.Error(w, "processing error", 500) + return + } + + out := append(data, data2...) + + resp, err := json.Marshal(out) if err != nil { log.Errorw("error seen during reports marshal", zap.Error(err), "service", service) http.Error(w, "processing error", 500) @@ -332,7 +341,7 @@ func main() { bodyStr := buf.String() log.Infow("reporting recieved", "content-type", contentType, "service", service, "user-agent", r.UserAgent()) - reports, err := reporting.ParseReport(bodyStr) + reports, err := reporting.ParseReport(bodyStr, service) if err != nil { log.Errorw("error on parsing reporting data", zap.Error(err), "service", service, "content-type", contentType, "body", bodyStr) http.Error(w, "uploading error", 500) diff --git a/pkg/reporting/examples/csp.json b/pkg/reporting/examples/csp.json new file mode 100644 index 0000000..d30e12b --- /dev/null +++ b/pkg/reporting/examples/csp.json @@ -0,0 +1,17 @@ +{ + "type": "csp-violation", + "url": "http://127.0.0.1:9999/", + "body": { + "sourceFile": null, + "lineNumber": null, + "columnNumber": null, + "documentURL": "http://127.0.0.1:9999/", + "referrer": "", + "blockedURL": "https://apis.google.com/js/platform.js", + "effectiveDirective": "script-src-elem", + "originalPolicy": "default-src 'self';", + "sample": "", + "disposition": "enforce", + "statusCode": 200 + } +} \ No newline at end of file diff --git a/pkg/reporting/examples/deprecation.json b/pkg/reporting/examples/deprecation.json new file mode 100644 index 0000000..3e3b3a3 --- /dev/null +++ b/pkg/reporting/examples/deprecation.json @@ -0,0 +1,14 @@ +{ + "type": "deprecation", + "age": 10, + "url": "https://example.com/", + "user_agent": "BarBrowser/98.0 (Mozilla/5.0 compatiblish)", + "body": { + "id": "websql", + "anticipatedRemoval": "1/1/2020", + "message": "WebSQL is deprecated and will be removed in Chrome 97 around January 2020", + "sourceFile": "https://example.com/index.js", + "lineNumber": 1234, + "columnNumber": 42 + } +} diff --git a/pkg/reporting/examples/example.json b/pkg/reporting/examples/example.json deleted file mode 100644 index 25cc7fb..0000000 --- a/pkg/reporting/examples/example.json +++ /dev/null @@ -1,16 +0,0 @@ -[ - { - "type": "deprecation", - "age": 10, - "url": "https://example.com/", - "user_agent": "BarBrowser/98.0 (Mozilla/5.0 compatiblish)", - "body": { - "id": "websql", - "anticipatedRemoval": "1/1/2020", - "message": "WebSQL is deprecated and will be removed in Chrome 97 around January 2020", - "sourceFile": "https://example.com/index.js", - "lineNumber": 1234, - "columnNumber": 42 - } - } -] diff --git a/pkg/reporting/reporting.go b/pkg/reporting/reporting.go index 54dce00..adb1d3e 100644 --- a/pkg/reporting/reporting.go +++ b/pkg/reporting/reporting.go @@ -2,22 +2,23 @@ package reporting import ( "context" - "crypto/sha256" - "encoding/hex" "encoding/json" "fmt" "time" "cloud.google.com/go/bigquery" - "github.com/icco/gutil/logging" + "cloud.google.com/go/civil" + "github.com/icco/reportd/pkg/analytics" + "google.golang.org/api/iterator" ) -var ( - service = "reportd" - log = logging.Must(logging.NewLogger(service)) -) +type CSPReport struct { + Type string `json:"type"` + URL string `json:"url"` + Body CSPReportBody `json:"body"` +} -type CspReport struct { +type CSPReportBody struct { // The URI of the document in which the violation occurred. DocumentUri string `protobuf:"bytes,4,opt,name=document_uri,json=documentUri,proto3" json:"document_uri,omitempty"` // The referrer of the document in which the violation occurred. @@ -46,6 +47,12 @@ type CspReport struct { } type DeprecationReport struct { + Type string `json:"type"` + URL string `json:"url"` + Body CSPReportBody `json:"body"` +} + +type DeprecationReportBody struct { // name of API, e.g. websql Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` // YYYY-MM-DD date format, e.g. "2020-01-01" @@ -60,272 +67,104 @@ type DeprecationReport struct { ColumnNumber int32 `protobuf:"varint,6,opt,name=column_number,json=columnNumber,proto3" json:"column_number,omitempty"` } -// Policy disposition -type SecurityReport_Disposition int32 - -const ( - SecurityReport_DISPOSITION_UNKNOWN SecurityReport_Disposition = 0 - SecurityReport_REPORTING SecurityReport_Disposition = 1 - SecurityReport_ENFORCED SecurityReport_Disposition = 2 -) - -// Enum value maps for SecurityReport_Disposition. -var ( - SecurityReport_Disposition_name = map[int32]string{ - 0: "DISPOSITION_UNKNOWN", - 1: "REPORTING", - 2: "ENFORCED", - } - SecurityReport_Disposition_value = map[string]int32{ - "DISPOSITION_UNKNOWN": 0, - "REPORTING": 1, - "ENFORCED": 2, - } -) - -func (x SecurityReport_Disposition) Enum() *SecurityReport_Disposition { - p := new(SecurityReport_Disposition) - *p = x - return p -} - type SecurityReport struct { + Deprecation *DeprecationReport `bigquery:",nullable"` + CSP *CSPReport `bigquery:",nullable"` - // This report's checksum is computed according to the subtype of the - // report. Used for deduplication. - ReportChecksum string `protobuf:"bytes,1,opt,name=report_checksum,json=reportChecksum,proto3" json:"report_checksum,omitempty"` - // When was this report generated? (milliseconds) - ReportTime int64 `protobuf:"varint,2,opt,name=report_time,json=reportTime,proto3" json:"report_time,omitempty"` - // Number of times we saw this report (always 1 until aggregation happens) - ReportCount int64 `protobuf:"varint,3,opt,name=report_count,json=reportCount,proto3" json:"report_count,omitempty"` - // Unparsed UA + parsed browser name and major version - UserAgent string `protobuf:"bytes,4,opt,name=user_agent,json=userAgent,proto3" json:"user_agent,omitempty"` - BrowserName string `protobuf:"bytes,5,opt,name=browser_name,json=browserName,proto3" json:"browser_name,omitempty"` - BrowserMajorVersion int32 `protobuf:"varint,6,opt,name=browser_major_version,json=browserMajorVersion,proto3" json:"browser_major_version,omitempty"` - Disposition SecurityReport_Disposition `protobuf:"varint,7,opt,name=disposition,proto3,enum=securityreport.SecurityReport_Disposition" json:"disposition,omitempty"` - // this field will hold an extension of the base SecurityReport, - // only one extension can be set for any given request - // - // Types that are assignable to ReportExtension: - // - // *SecurityReport_CspReport - // *SecurityReport_DeprecationReport - ReportExtension isSecurityReport_ReportExtension `protobuf_oneof:"ReportExtension"` -} + // When we recorded this metric. + Time bigquery.NullDateTime -type isSecurityReport_ReportExtension interface { - isSecurityReport_ReportExtension() + // What service this is for. + Service bigquery.NullString } -type SecurityReport_CspReport struct { - CspReport *CspReport `protobuf:"bytes,8,opt,name=csp_report,json=cspReport,proto3,oneof"` -} - -type SecurityReport_DeprecationReport struct { - DeprecationReport *DeprecationReport `protobuf:"bytes,9,opt,name=deprecation_report,json=deprecationReport,proto3,oneof"` -} - -func (*SecurityReport_CspReport) isSecurityReport_ReportExtension() {} +func ParseReport(data, srv string) (*SecurityReport, error) { + sr := &SecurityReport{ + Time: bigquery.NullDateTime{DateTime: civil.DateTimeOf(time.Now()), Valid: true}, + Service: bigquery.NullString{StringVal: srv, Valid: true}, + } -func (*SecurityReport_DeprecationReport) isSecurityReport_ReportExtension() {} + tmp := struct { + Type string `json:"type"` + }{} -func ParseReport(data string) ([]*SecurityReport, error) { - var buf []map[string]interface{} - if err := json.Unmarshal([]byte(data), &buf); err != nil { + if err := json.Unmarshal([]byte(data), &tmp); err != nil { return nil, err } - var reports []*SecurityReport - for _, b := range buf { - r, err := mapToSecurityReport(b) - if err != nil { + switch tmp.Type { + case "csp-violation": + if err := json.Unmarshal([]byte(data), &sr.CSP); err != nil { + return nil, err + } + case "deprecation": + if err := json.Unmarshal([]byte(data), &sr.Deprecation); err != nil { return nil, err } - reports = append(reports, r) + default: + return nil, fmt.Errorf("unknown report type: %s", tmp.Type) } - return reports, nil + return sr, nil } -func (r *SecurityReport) Validate() error { - if r.ReportChecksum == "" { - return fmt.Errorf("report_checksum is required") - } - - if r.ReportTime == 0 { - return fmt.Errorf("report_time is required") - } - - if r.ReportCount == 0 { - return fmt.Errorf("report_count is required") - } - - if r.UserAgent == "" { - return fmt.Errorf("user_agent is required") - } - - if r.BrowserName == "" { - return fmt.Errorf("browser_name is required") - } - - if r.BrowserMajorVersion == 0 { - return fmt.Errorf("browser_major_version is required") +func WriteReportsToBigQuery(ctx context.Context, project, dataset, table string, report *SecurityReport) error { + bq, err := bigquery.NewClient(ctx, project) + if err != nil { + return fmt.Errorf("connecting to bq: %w", err) } - if r.Disposition == 0 { - return fmt.Errorf("disposition is required") + ins := bq.Dataset(dataset).Table(table).Inserter() + if err := ins.Put(ctx, report); err != nil { + return fmt.Errorf("uploading to bq: %w", err) } - return nil } -func mapToSHA256HexString(m map[string]interface{}) (string, error) { - deserialized, err := json.Marshal(m) +func GetReportCounts(ctx context.Context, site, project, dataset, table string) ([]*analytics.WebVitalSummary, error) { + client, err := bigquery.NewClient(ctx, project) if err != nil { - return "", err + return nil, fmt.Errorf("connecting to bq: %w", err) } - checksum := sha256.Sum256(deserialized) - return hex.EncodeToString(checksum[:]), nil -} -func mapToSecurityReport(m map[string]interface{}) (*SecurityReport, error) { - sr := &SecurityReport{} - now := time.Now().UnixMilli() - checksum, err := mapToSHA256HexString(m) + t := client.Dataset(dataset).Table(table) + tableID, err := t.Identifier(bigquery.StandardSQLID) + if err != nil { + return nil, fmt.Errorf("getting table id: %w", err) + } + query := fmt.Sprintf( + "SELECT DATE(Time) AS Day, Service, CAST(COUNT(*) as FLOAT64) AS Value "+ + "FROM `%s` "+ + "WHERE Service = @site AND Time >= DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH) "+ + "GROUP BY 1, 2 "+ + "ORDER BY Day DESC;", + tableID, + ) + + q := client.Query(query) + q.Parameters = []bigquery.QueryParameter{ + {Name: "site", Value: site}, + } + it, err := q.Read(ctx) if err != nil { return nil, err } - sr.ReportChecksum = checksum - sr.ReportCount = int64(1) - sr.Disposition = SecurityReport_DISPOSITION_UNKNOWN - - // the report has "age" field that is the offset from the report's timestamp. - // https://w3c.github.io/reporting/#serialize-reports - // - // NOTE: currently the report doesn't have "timestamp" field, so use server side - // current time. - if age, ok := m["age"].(float64); ok { - sr.ReportTime = now - int64(age) - } - if ua, ok := m["user_agent"].(string); ok { - sr.UserAgent = ua - } - var typ string - var body map[string]interface{} - var ok bool - if typ, ok = m["type"].(string); !ok { - return nil, fmt.Errorf("unexpected report type: %v", m) - } - if body, ok = m["body"].(map[string]interface{}); !ok { - return nil, fmt.Errorf("unexpected report type: %v", m) - } - switch typ { - case "csp-violation": - csp := &CspReport{} - if duri, ok := body["documentURL"].(string); ok { - csp.DocumentUri = duri - } else { - log.Warnf("unexpected documentURL: %#v", body["documentURL"]) - } - if ref, ok := body["referrer"].(string); ok { - csp.Referrer = ref - } else { - log.Warnf("unexpected referrer: %#v", body["referrer"]) - } - if buri, ok := body["blockedURL"].(string); ok { - csp.BlockedUri = buri - } else { - log.Warnf("unexpected blockedURL: %#v", body["blockedURL"]) - } - if vd, ok := body["violatedDirective"].(string); ok { - csp.ViolatedDirective = vd - } else { - log.Warnf("unexpected violatedDirective: %#v", body["violatedDirective"]) - } - if ed, ok := body["effectiveDirective"].(string); ok { - csp.EffectiveDirective = ed - } else { - log.Warnf("unexpected effectiveDirective: %#v", body["effectiveDirective"]) - } - if sf, ok := body["sourceFile"].(string); ok { - csp.SourceFile = sf - } else { - log.Warnf("unexpected sourceFile: %#v", body["sourceFile"]) - } - if ln, ok := body["lineNumber"].(float64); ok { - csp.LineNumber = int32(ln) - } else { - log.Warnf("unexpected lineNumber: %#v", body["lineNumber"]) - } - if cn, ok := body["columnNumber"].(float64); ok { - csp.ColumnNumber = int32(cn) - } else { - log.Warnf("unexpected columnNumber: %#v", body["columnNumber"]) - } - if ss, ok := body["scriptSample"].(string); ok { - csp.ScriptSample = ss - } else { - log.Warnf("unexpected scriptSample: %#v", body["scriptSample"]) - } - sr.ReportExtension = &SecurityReport_CspReport{CspReport: csp} - switch body["disposition"].(string) { - case "enforce": - sr.Disposition = SecurityReport_ENFORCED - case "report": - sr.Disposition = SecurityReport_REPORTING - default: - } - case "deprecation": - dep := &DeprecationReport{} - if id, ok := body["id"].(string); ok { - dep.Id = id - } else { - log.Warnf("unexpected id: %#v", body["id"]) - } - if ar, ok := body["anticipatedRemoval"].(string); ok { - dep.AnticipatedRemoval = ar - } else { - log.Warnf("unexpected anticipatedRemoval: %#v", body["anticipatedRemoval"]) - } - if ln, ok := body["lineNumber"].(float64); ok { - dep.LineNumber = int32(ln) - } else { - log.Warnf("unexpected lineNumber: %#v", body["lineNumber"]) + var ret []*analytics.WebVitalSummary + for { + var r analytics.WebVitalSummary + err := it.Next(&r) + if err == iterator.Done { + break } - if cn, ok := body["columnNumber"].(float64); ok { - dep.ColumnNumber = int32(cn) - } else { - log.Warnf("unexpected columnNumber: %#v", body["columnNumber"]) - } - if m, ok := body["message"].(string); ok { - dep.Message = m - } else { - log.Warnf("unexpected message: %#v", body["message"]) - } - if sf, ok := body["sourceFile"].(string); ok { - dep.SourceFile = sf - } else { - log.Warnf("unexpected sourceFile: %#v", body["sourceFile"]) + if err != nil { + return nil, fmt.Errorf("couldn't get WebVitalSummary: %w", err) } - sr.ReportExtension = &SecurityReport_DeprecationReport{DeprecationReport: dep} - } - return sr, nil -} + r.Name = "Endpoint Security Report Count" -func WriteReportsToBigQuery(ctx context.Context, project, dataset, table string, reports []*SecurityReport) error { - if len(reports) == 0 { - return nil - } - bq, err := bigquery.NewClient(ctx, project) - if err != nil { - return fmt.Errorf("connecting to bq: %w", err) + ret = append(ret, &r) } - ins := bq.Dataset(dataset).Table(table).Inserter() - if err := ins.Put(ctx, reports); err != nil { - return fmt.Errorf("uploading to bq: %w", err) - } - return nil + return ret, nil } diff --git a/pkg/reporting/reporting_test.go b/pkg/reporting/reporting_test.go index 61080a6..1798017 100644 --- a/pkg/reporting/reporting_test.go +++ b/pkg/reporting/reporting_test.go @@ -38,13 +38,19 @@ func TestParseReportParsesReportTo(t *testing.T) { tc := tc t.Run(tc.Name, func(t *testing.T) { t.Parallel() - data, err := ParseReport(tc.JSON) + data, err := ParseReport(tc.JSON, "test") if err != nil { t.Error(err) } if data == nil { t.Error("data should not be nil") + t.FailNow() + } + + if data.Service.String() != "test" { + t.Errorf("expected service to be test, got %s", data.Service.StringVal) + t.FailNow() } }) } diff --git a/pkg/reportto/reports.go b/pkg/reportto/reports.go index bb9bb1e..7543e21 100644 --- a/pkg/reportto/reports.go +++ b/pkg/reportto/reports.go @@ -236,7 +236,7 @@ func GetReportCounts(ctx context.Context, site, project, dataset, table string) return nil, fmt.Errorf("couldn't get WebVitalSummary: %w", err) } - r.Name = "count" + r.Name = "Report-To Report Count" ret = append(ret, &r) }