From 63104d35b5653bc45477170294dd26c6eb72514d Mon Sep 17 00:00:00 2001 From: Adrian Smith Date: Thu, 24 Aug 2023 14:05:34 -0700 Subject: [PATCH] Add alert routing (#149) --- .codeflow.yml => .codeflow.yaml | 0 .gitignore | 3 +- alerts-template.yaml | 27 ++ docs/alert-routing.md | 53 ++++ e2e/alert-routing-cfg.yaml | 30 +++ e2e/e2e.go | 42 ++-- e2e/e2e_test.go | 71 +++++- e2e/test_pagerduty_server.go | 30 ++- e2e/test_slack_server.go | 33 ++- internal/alert/manager.go | 155 +++++------- internal/alert/manager_test.go | 232 ++++++++++++++---- internal/alert/routing_directory.go | 93 +++++++ internal/alert/routing_directory_test.go | 155 ++++++++++++ internal/alert/store_test.go | 1 + .../alert/test_data/alert-routing-test.yaml | 30 +++ internal/app/app_test.go | 6 +- internal/app/init.go | 19 +- internal/client/alert.go | 37 +++ internal/client/alert_test.go | 32 +++ internal/client/pagerduty_client.go | 89 ++++--- internal/client/pagerduty_client_test.go | 29 +++ internal/client/slack_client.go | 54 ++-- internal/client/slack_client_test.go | 30 +++ internal/config/config.go | 45 ++-- internal/core/alert.go | 66 ++++- internal/core/alert_test.go | 31 +++ internal/core/core.go | 28 +++ internal/core/core_test.go | 23 ++ internal/metrics/metrics.go | 28 +-- internal/mocks/alert_client.go | 51 ++++ internal/mocks/client_map.go | 100 ++++++++ internal/mocks/pagerduty_client.go | 18 +- internal/mocks/routing_directory.go | 100 ++++++++ internal/mocks/slack_client.go | 28 ++- mocks/alert_client.go | 65 +++++ 35 files changed, 1555 insertions(+), 279 deletions(-) rename .codeflow.yml => .codeflow.yaml (100%) create mode 100644 alerts-template.yaml create mode 100644 docs/alert-routing.md create mode 100644 e2e/alert-routing-cfg.yaml create mode 100644 internal/alert/routing_directory.go create mode 100644 internal/alert/routing_directory_test.go create mode 100644 internal/alert/test_data/alert-routing-test.yaml create mode 100644 internal/client/alert.go create mode 100644 internal/client/alert_test.go create mode 100644 internal/client/pagerduty_client_test.go create mode 100644 internal/client/slack_client_test.go create mode 100644 internal/core/alert_test.go create mode 100644 internal/mocks/alert_client.go create mode 100644 internal/mocks/client_map.go create mode 100644 internal/mocks/routing_directory.go create mode 100644 mocks/alert_client.go diff --git a/.codeflow.yml b/.codeflow.yaml similarity index 100% rename from .codeflow.yml rename to .codeflow.yaml diff --git a/.gitignore b/.gitignore index fe21d5bc..623cbf35 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ config.env /.vscode/ /.idea -genesis.json \ No newline at end of file +genesis.json +alert-routing.yaml \ No newline at end of file diff --git a/alerts-template.yaml b/alerts-template.yaml new file mode 100644 index 00000000..cc2e4740 --- /dev/null +++ b/alerts-template.yaml @@ -0,0 +1,27 @@ +## This is a template for alert routing configuration. +alertRoutes: + low: + slack: + low_oncall: + url: "" + channel: "" + + medium: + slack: + medium_oncall: + url: "" + channel: "" + medium_oncall: + config: + integration_key: "" + + high: + slack: + high_oncall: + url: "" + channel: "" + pagerduty: + high_oncall: + integration_key: ${MY_INTEGRATION_KEY} + medium_oncall: + integration_key: "" diff --git a/docs/alert-routing.md b/docs/alert-routing.md new file mode 100644 index 00000000..869f6d34 --- /dev/null +++ b/docs/alert-routing.md @@ -0,0 +1,53 @@ +--- +layout: page +title: Alert Routing +permalink: /alert-routing +--- + + +## Overview +The alert routing feature enables users to define a number of alert destinations and then route alerts to those +destinations based on the alert's severity. For example, a user may want to send all alerts to Slack but only send high +severity alerts to PagerDuty. + + +## How it works +Alerts are routed to destinations based on the severity of the given heuristic. +When a heuristic is deployed, the user must specify the severity of the alert that the heuristic will produce. +When the heuristic is run, the alert is routed to the configured destinations based on the severity of the alert. +For example, if a heuristic is configured to produce a high severity alert, the alert will be routed to all configured +destinations that support high severity alerts. + +Each severity level is configured independently for each alert destination. A user can add any number of alert +configurations per severity. + +Located in the root directory you'll find a file named `alerts-template.yaml`. This file contains a template for +configuring alert routing. The template contains a few examples on how you might want to configure your alert routing. + +## Supported Alert Destinations +Pessimism currently supports the following alert destinations: + +| Name | Description | +|-----------|-------------------------------------| +| slack | Sends alerts to a Slack channel | +| pagerduty | Sends alerts to a PagerDuty service | + +## Alert Severity +Pessimism currently defines the following severities for alerts: + +| Severity | Description | +|----------|-----------------------------------------------------------------------------| +| low | Alerts that may not require immediate attention | +| medium | Alerts that could be hazardous, but may not be completely destructive | +| high | Alerts that require immediate attention and could result in a loss of funds | + + +## PagerDuty Severity Mapping +PagerDuty supports the following severities: `critical`, `error`, `warning`, and `info`. +Pessimism maps the Pessimism severities to [PagerDuty severities](https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event) as follows ([ref](../internal/core/alert.go)): + +| Pessimism | PagerDuty | +|-----------|-----------| +| low | warning | +| medium | error | +| high | critical | diff --git a/e2e/alert-routing-cfg.yaml b/e2e/alert-routing-cfg.yaml new file mode 100644 index 00000000..4d958b40 --- /dev/null +++ b/e2e/alert-routing-cfg.yaml @@ -0,0 +1,30 @@ +alertRoutes: + low: + slack: + config: + url: "http://127.0.0.1:7100" + channel: "#test-low" + + medium: + slack: + config: + url: "http://127.0.0.1:7100" + channel: "#test-medium" + pagerduty: + config: + integration_key: "test-medium" + + high: + slack: + config: + url: "http://127.0.0.1:7100" + channel: "#test-high" + config_2: + url: "http://127.0.0.1:7100" + channel: "#test-high-2" + + pagerduty: + config: + integration_key: "test-high-1" + config_2: + integration_key: "test-high-2" diff --git a/e2e/e2e.go b/e2e/e2e.go index 831bf89f..8c7c99b5 100644 --- a/e2e/e2e.go +++ b/e2e/e2e.go @@ -3,6 +3,7 @@ package e2e import ( "context" "errors" + "fmt" "testing" "time" @@ -24,6 +25,11 @@ import ( "github.com/ethereum/go-ethereum/ethclient" ) +const ( + SlackTestServerPort = 7100 + PagerDutyTestPort = 7200 +) + // SysTestSuite ... Stores all the information needed to run an e2e system test type SysTestSuite struct { t *testing.T @@ -69,16 +75,18 @@ func CreateL2TestSuite(t *testing.T) *L2TestSuite { appCfg := DefaultTestConfig() - slackServer := NewTestSlackServer() - appCfg.AlertConfig.SlackConfig.URL = slackServer.Server.URL + slackServer := NewTestSlackServer("127.0.0.1", SlackTestServerPort) + + pagerdutyServer := NewTestPagerDutyServer("127.0.0.1", PagerDutyTestPort) - pagerdutyServer := NewTestPagerDutyServer() - appCfg.AlertConfig.MediumPagerDutyCfg.AlertEventsURL = pagerdutyServer.Server.URL + appCfg.AlertConfig.RoutingCfgPath = "alert-routing-cfg.yaml" + appCfg.AlertConfig.PagerdutyAlertEventsURL = fmt.Sprintf("http://127.0.0.1:%d", PagerDutyTestPort) pess, kill, err := app.NewPessimismApp(ctx, appCfg) if err != nil { t.Fatal(err) } + if err := pess.Start(); err != nil { t.Fatal(err) } @@ -95,6 +103,8 @@ func CreateL2TestSuite(t *testing.T) *L2TestSuite { Close: func() { kill() node.Close() + slackServer.Close() + pagerdutyServer.Close() }, AppCfg: appCfg, TestSlackSvr: slackServer, @@ -126,16 +136,18 @@ func CreateSysTestSuite(t *testing.T) *SysTestSuite { appCfg := DefaultTestConfig() - slackServer := NewTestSlackServer() - appCfg.AlertConfig.SlackConfig.URL = slackServer.Server.URL + slackServer := NewTestSlackServer("127.0.0.1", SlackTestServerPort) + + pagerdutyServer := NewTestPagerDutyServer("127.0.0.1", PagerDutyTestPort) - pagerdutyServer := NewTestPagerDutyServer() - appCfg.AlertConfig.MediumPagerDutyCfg.AlertEventsURL = pagerdutyServer.Server.URL + appCfg.AlertConfig.RoutingCfgPath = "alert-routing-cfg.yaml" + appCfg.AlertConfig.PagerdutyAlertEventsURL = fmt.Sprintf("http://127.0.0.1:%d", PagerDutyTestPort) pess, kill, err := app.NewPessimismApp(ctx, appCfg) if err != nil { t.Fatal(err) } + if err := pess.Start(); err != nil { t.Fatal(err) } @@ -152,6 +164,8 @@ func CreateSysTestSuite(t *testing.T) *SysTestSuite { Close: func() { kill() sys.Close() + slackServer.Close() + pagerdutyServer.Close() }, AppCfg: appCfg, TestSlackSvr: slackServer, @@ -185,16 +199,8 @@ func DefaultTestConfig() *config.Config { Port: port, }, AlertConfig: &alert.Config{ - SlackConfig: &client.SlackConfig{ - URL: "", - Channel: "test", - }, - MediumPagerDutyCfg: &client.PagerDutyConfig{ - AlertEventsURL: "", - }, - HighPagerDutyCfg: &client.PagerDutyConfig{ - AlertEventsURL: "", - }, + PagerdutyAlertEventsURL: "", + RoutingCfgPath: "", }, } } diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 00dc42c4..7169fc35 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -48,7 +48,7 @@ func Test_Balance_Enforcement(t *testing.T) { StartHeight: nil, EndHeight: nil, AlertingParams: &core.AlertPolicy{ - Sev: core.HIGH.String(), + Sev: core.MEDIUM.String(), Msg: alertMsg, }, SessionParams: map[string]interface{}{ @@ -97,8 +97,8 @@ func Test_Balance_Enforcement(t *testing.T) { // Check that the balance enforcement was triggered using the mocked server cache. posts := ts.TestPagerDutyServer.PagerDutyAlerts() slackPosts := ts.TestSlackSvr.SlackAlerts() - assert.Greater(t, len(slackPosts), 0, "No balance enforcement alert was sent") - assert.Greater(t, len(posts), 0, "No balance enforcement alert was sent") + assert.Greater(t, len(slackPosts), 1, "No balance enforcement alert was sent") + assert.Greater(t, len(posts), 1, "No balance enforcement alert was sent") assert.Contains(t, posts[0].Payload.Summary, "balance_enforcement", "Balance enforcement alert was not sent") // Get Bobs's balance. @@ -155,7 +155,7 @@ func Test_Balance_Enforce_With_CoolDown(t *testing.T) { AlertingParams: &core.AlertPolicy{ // Set a cooldown of one minute. CoolDown: 60, - Dest: core.Slack.String(), + Sev: core.LOW.String(), Msg: alertMsg, }, SessionParams: map[string]interface{}{ @@ -202,7 +202,7 @@ func Test_Balance_Enforce_With_CoolDown(t *testing.T) { // Check that the balance enforcement was triggered using the mocked server cache. posts := ts.TestSlackSvr.SlackAlerts() - assert.Equal(t, len(posts), 1, "No balance enforcement alert was sent") + assert.Equal(t, 1, len(posts), "No balance enforcement alert was sent") assert.Contains(t, posts[0].Text, "balance_enforcement", "Balance enforcement alert was not sent") assert.Contains(t, posts[0].Text, alertMsg) @@ -275,6 +275,67 @@ func Test_Contract_Event(t *testing.T) { assert.Contains(t, posts[0].Text, alertMsg, "System contract event message was not propagated") } +// Test_Contract_Event_High_Priority ... Tests the E2E flow of a contract event heuristic with high priority alerts all +// necessary destinations +func Test_Contract_Event_High_Priority(t *testing.T) { + + ts := e2e.CreateSysTestSuite(t) + defer ts.Close() + + l1Client := ts.Sys.Clients["l1"] + + updateSig := "ConfigUpdate(uint256,uint8,bytes)" + alertMsg := "System config gas config updated" + + err := ts.App.BootStrap([]*models.SessionRequestParams{{ + Network: core.Layer1.String(), + PType: core.Live.String(), + HeuristicType: core.ContractEvent.String(), + StartHeight: nil, + EndHeight: nil, + AlertingParams: &core.AlertPolicy{ + Msg: alertMsg, + Sev: core.HIGH.String(), + }, + SessionParams: map[string]interface{}{ + "address": predeploys.DevSystemConfigAddr.String(), + "args": []interface{}{updateSig}, + }, + }}) + + assert.NoError(t, err, "Error bootstrapping heuristic session") + + sysCfg, err := bindings.NewSystemConfig(predeploys.DevSystemConfigAddr, l1Client) + assert.NoError(t, err, "Error getting system config") + + opts, err := bind.NewKeyedTransactorWithChainID(ts.Cfg.Secrets.SysCfgOwner, ts.Cfg.L1ChainIDBig()) + assert.NoError(t, err, "Error getting system config owner pk") + + overhead := big.NewInt(10000) + scalar := big.NewInt(1) + + tx, err := sysCfg.SetGasConfig(opts, overhead, scalar) + assert.NoError(t, err, "Error setting gas config") + + txTimeoutDuration := 10 * time.Duration(ts.Cfg.DeployConfig.L1BlockTime) * time.Second + receipt, err := e2e.WaitForTransaction(tx.Hash(), l1Client, txTimeoutDuration) + + assert.NoError(t, err, "Error waiting for transaction") + assert.Equal(t, receipt.Status, types.ReceiptStatusSuccessful, "transaction failed") + + time.Sleep(3 * time.Second) + slackPosts := ts.TestSlackSvr.SlackAlerts() + pdPosts := ts.TestPagerDutyServer.PagerDutyAlerts() + + // Expect 2 alerts to each destination as alert-routing-cfg.yaml has two slack and two pagerduty destinations + assert.Equal(t, 2, len(slackPosts), "Incorrect Number of slack posts sent") + assert.Equal(t, 2, len(pdPosts), "Incorrect Number of pagerduty posts sent") + assert.Contains(t, slackPosts[0].Text, "contract_event", "System contract event alert was not sent") + assert.Contains(t, slackPosts[1].Text, "contract_event", "System contract event alert was not sent") + assert.Contains(t, pdPosts[0].Payload.Summary, "contract_event", "System contract event alert was not sent") + assert.Contains(t, pdPosts[1].Payload.Summary, "contract_event", "System contract event alert was not sent") +} + // TestAccount defines an account for testing. type TestAccount struct { Key *ecdsa.PrivateKey diff --git a/e2e/test_pagerduty_server.go b/e2e/test_pagerduty_server.go index d15db928..d0855c49 100644 --- a/e2e/test_pagerduty_server.go +++ b/e2e/test_pagerduty_server.go @@ -2,6 +2,8 @@ package e2e import ( "encoding/json" + "fmt" + "net" "net/http" "net/http/httptest" "strings" @@ -19,21 +21,35 @@ type TestPagerDutyServer struct { } // NewTestPagerDutyServer ... Creates a new mock pagerduty server -func NewTestPagerDutyServer() *TestPagerDutyServer { - ts := &TestPagerDutyServer{ +func NewTestPagerDutyServer(url string, port int) *TestPagerDutyServer { //nolint:dupl //This will be addressed + l, err := net.Listen("tcp", fmt.Sprintf("%s:%d", url, port)) + if err != nil { + panic(err) + } + + pds := &TestPagerDutyServer{ Payloads: []*client.PagerDutyRequest{}, } - ts.Server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + pds.Server = httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch strings.TrimSpace(r.URL.Path) { case "/": - ts.mockPagerDutyPost(w, r) + pds.mockPagerDutyPost(w, r) default: http.NotFoundHandler().ServeHTTP(w, r) } })) - return ts + err = pds.Server.Listener.Close() + if err != nil { + panic(err) + } + pds.Server.Listener = l + pds.Server.Start() + + logging.NoContext().Info("Test pagerduty server started", zap.String("url", url), zap.Int("port", port)) + + return pds } // Close ... Closes the server @@ -47,14 +63,14 @@ func (svr *TestPagerDutyServer) mockPagerDutyPost(w http.ResponseWriter, r *http if err := json.NewDecoder(r.Body).Decode(&alert); err != nil { w.WriteHeader(http.StatusBadRequest) - _, _ = w.Write([]byte(`{"status":false, "message":"could not decode pagerduty payload"}`)) + _, _ = w.Write([]byte(`{"status":"failure"", "message":"could not decode pagerduty payload"}`)) return } svr.Payloads = append(svr.Payloads, alert) w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(`{"status":success, "message":""}`)) + _, _ = w.Write([]byte(`{"status":"success", "message":""}`)) } // PagerDutyAlerts ... Returns the pagerduty alerts diff --git a/e2e/test_slack_server.go b/e2e/test_slack_server.go index f1f3f15f..823dff0e 100644 --- a/e2e/test_slack_server.go +++ b/e2e/test_slack_server.go @@ -2,11 +2,16 @@ package e2e import ( "encoding/json" + "fmt" + "net" "net/http" "net/http/httptest" "strings" + "go.uber.org/zap" + "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/logging" ) // TestSlackServer ... Mock server for testing slack alerts @@ -16,21 +21,35 @@ type TestSlackServer struct { } // NewTestSlackServer ... Creates a new mock slack server -func NewTestSlackServer() *TestSlackServer { - ts := &TestSlackServer{ +func NewTestSlackServer(url string, port int) *TestSlackServer { //nolint:dupl //This will be addressed + l, err := net.Listen("tcp", fmt.Sprintf("%s:%d", url, port)) + if err != nil { + panic(err) + } + + ss := &TestSlackServer{ Payloads: []*client.SlackPayload{}, } - ts.Server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ss.Server = httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch strings.TrimSpace(r.URL.Path) { case "/": - ts.mockSlackPost(w, r) + ss.mockSlackPost(w, r) default: http.NotFoundHandler().ServeHTTP(w, r) } })) - return ts + err = ss.Server.Listener.Close() + if err != nil { + panic(err) + } + ss.Server.Listener = l + ss.Server.Start() + + logging.NoContext().Info("Test slack server started", zap.String("url", url), zap.Int("port", port)) + + return ss } // Close ... Closes the server @@ -44,14 +63,14 @@ func (svr *TestSlackServer) mockSlackPost(w http.ResponseWriter, r *http.Request if err := json.NewDecoder(r.Body).Decode(&alert); err != nil { w.WriteHeader(http.StatusBadRequest) - _, _ = w.Write([]byte(`{"ok":false, "error":"could not decode slack payload"}`)) + _, _ = w.Write([]byte(`{"message":"", "error":"could not decode slack payload"}`)) return } svr.Payloads = append(svr.Payloads, alert) w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(`{"ok":true, "error":""}`)) + _, _ = w.Write([]byte(`{"message":"ok", "error":""}`)) } // SlackAlerts ... Returns the slack alerts diff --git a/internal/alert/manager.go b/internal/alert/manager.go index fe0e8235..bc00a04a 100644 --- a/internal/alert/manager.go +++ b/internal/alert/manager.go @@ -14,18 +14,6 @@ import ( "go.uber.org/zap" ) -// NOTE - This should be user defined in the future -// with modularity in mind so that users can define -// their own independent alerting policies -func getSevMap() map[core.Severity][]core.AlertDestination { - return map[core.Severity][]core.AlertDestination{ - core.UNKNOWN: {core.AlertDestination(0)}, - core.LOW: {core.Slack}, - core.MEDIUM: {core.PagerDuty, core.Slack}, - core.HIGH: {core.PagerDuty, core.Slack}, - } -} - // Manager ... Interface for alert manager type Manager interface { AddSession(core.SUUID, *core.AlertPolicy) error @@ -36,30 +24,29 @@ type Manager interface { // Config ... Alert manager configuration type Config struct { - SlackConfig *client.SlackConfig - MediumPagerDutyCfg *client.PagerDutyConfig - HighPagerDutyCfg *client.PagerDutyConfig + RoutingCfgPath string + PagerdutyAlertEventsURL string + RoutingParams *core.AlertRoutingParams } // alertManager ... Alert manager implementation type alertManager struct { ctx context.Context cancel context.CancelFunc + cfg *Config store Store interpolator Interpolator cdHandler CoolDownHandler + cm RoutingDirectory + logger *zap.Logger metrics metrics.Metricer alertTransit chan core.Alert - pdcP0 client.PagerDutyClient - pdcP1 client.PagerDutyClient - sc client.SlackClient } // NewManager ... Instantiates a new alert manager -func NewManager(ctx context.Context, sc client.SlackClient, pdc client.PagerDutyClient, - hpdc client.PagerDutyClient) Manager { +func NewManager(ctx context.Context, cfg *Config, cm RoutingDirectory) Manager { // NOTE - Consider constructing dependencies in higher level // abstraction and passing them in @@ -68,19 +55,15 @@ func NewManager(ctx context.Context, sc client.SlackClient, pdc client.PagerDuty am := &alertManager{ ctx: ctx, cdHandler: NewCoolDownHandler(), - sc: sc, - - // NOTE - This is a major regression and is a quick hack to enable - // multi-service paging in the short-term. Going forward, all alerting - // configurations should be highly configurable and non-opinionated. - pdcP0: hpdc, - pdcP1: pdc, + cfg: cfg, + cm: cm, cancel: cancel, interpolator: NewInterpolator(), store: NewStore(), alertTransit: make(chan core.Alert), metrics: metrics.WithContext(ctx), + logger: logging.WithContext(ctx), } return am @@ -91,23 +74,37 @@ func (am *alertManager) AddSession(sUUID core.SUUID, policy *core.AlertPolicy) e return am.store.AddAlertPolicy(sUUID, policy) } -// TODO - Rename this to ingress() // Transit ... Returns inter-subsystem transit channel for receiving alerts +// TODO - Rename this to ingress() func (am *alertManager) Transit() chan core.Alert { return am.alertTransit } -// handleSlackPost ... Handles posting an alert to slack channel -func (am *alertManager) handleSlackPost(sUUID core.SUUID, content string, msg string) error { - slackMsg := am.interpolator.InterpolateSlackMessage(sUUID, content, msg) +// handleSlackPost ... Handles posting an alert to slack channels +func (am *alertManager) handleSlackPost(alert core.Alert, policy *core.AlertPolicy) error { + slackClients := am.cm.GetSlackClients(alert.Criticality) + if slackClients == nil { + am.logger.Warn("No slack clients defined for criticality", zap.Any("alert", alert)) + return nil + } - resp, err := am.sc.PostData(am.ctx, slackMsg) - if err != nil { - return err + // Create event trigger + event := &client.AlertEventTrigger{ + Message: am.interpolator.InterpolateSlackMessage(alert.SUUID, alert.Content, policy.Msg), + Severity: alert.Criticality, } - if !resp.Ok && resp.Err != "" { - return fmt.Errorf(resp.Err) + for _, sc := range slackClients { + resp, err := sc.PostEvent(am.ctx, event) + if err != nil { + return err + } + + if resp.Status != core.SuccessStatus { + return fmt.Errorf("client %s could not post to slack: %s", sc.GetName(), resp.Message) + } + am.logger.Debug("Successfully posted to Slack", zap.String("resp", resp.Message)) + am.metrics.RecordAlertGenerated(alert, core.Slack, sc.GetName()) } return nil @@ -115,27 +112,31 @@ func (am *alertManager) handleSlackPost(sUUID core.SUUID, content string, msg st // handlePagerDutyPost ... Handles posting an alert to pagerduty func (am *alertManager) handlePagerDutyPost(alert core.Alert) error { - clients := []client.PagerDutyClient{am.pdcP1} - if alert.Criticality == core.HIGH { - clients = append(clients, am.pdcP0) + pdClients := am.cm.GetPagerDutyClients(alert.Criticality) + + if pdClients == nil { + am.logger.Warn("No pagerduty clients defined for criticality", zap.Any("alert", alert)) + return nil } - pdMsg := am.interpolator.InterpolatePagerDutyMessage(alert.SUUID, alert.Content) + event := &client.AlertEventTrigger{ + Message: am.interpolator.InterpolatePagerDutyMessage(alert.SUUID, alert.Content), + DedupKey: alert.PUUID, + Severity: alert.Criticality, + } - for _, pdc := range clients { - resp, err := pdc.PostEvent(am.ctx, &client.PagerDutyEventTrigger{ - Message: pdMsg, - Action: client.Trigger, - Severity: client.Critical, - DedupKey: alert.SUUID.String(), - }) + for _, pdc := range pdClients { + resp, err := pdc.PostEvent(am.ctx, event) if err != nil { return err } - if resp.Status != string(client.SuccessStatus) { - return fmt.Errorf("could not post to pagerduty: %s", resp.Status) + if resp.Status != core.SuccessStatus { + return fmt.Errorf("client %s could not post to pagerduty: %s", pdc.GetName(), resp.Message) } + + am.logger.Debug("Successfully posted to ", zap.Any("resp", resp)) + am.metrics.RecordAlertGenerated(alert, core.PagerDuty, pdc.GetName()) } return nil @@ -143,9 +144,14 @@ func (am *alertManager) handlePagerDutyPost(alert core.Alert) error { // EventLoop ... Event loop for alert manager subsystem func (am *alertManager) EventLoop() error { - logger := logging.WithContext(am.ctx) ticker := time.NewTicker(time.Second * 1) + if am.cfg.RoutingParams == nil { + am.logger.Warn("No alert routing params defined") + } + + am.cm.InitializeRouting(am.cfg.RoutingParams) + for { select { case <-am.ctx.Done(): // Shutdown @@ -160,23 +166,22 @@ func (am *alertManager) EventLoop() error { // 1. Fetch alert policy policy, err := am.store.GetAlertPolicy(alert.SUUID) if err != nil { - logger.Error("Could not determine alerting destination", zap.Error(err)) + am.logger.Error("Could not determine alerting destination", zap.Error(err)) continue } // 2. Check if alert is in cool down if policy.HasCoolDown() && am.cdHandler.IsCoolDown(alert.SUUID) { - logger.Debug("Alert is in cool down", + am.logger.Debug("Alert is in cool down", zap.String(logging.SUUIDKey, alert.SUUID.String())) continue } // 3. Log & propagate alert - logger.Info("received alert", + am.logger.Info("received alert", zap.String(logging.SUUIDKey, alert.SUUID.String())) am.HandleAlert(alert, policy) - am.metrics.RecordAlertGenerated(alert) // 4. Add alert to cool down if applicable if policy.HasCoolDown() { @@ -188,48 +193,14 @@ func (am *alertManager) EventLoop() error { // HandleAlert ... Handles the alert propagation logic func (am *alertManager) HandleAlert(alert core.Alert, policy *core.AlertPolicy) { - locations := []core.AlertDestination{policy.Destination()} - - am.metrics.RecordAlertGenerated(alert) alert.Criticality = policy.Severity() - // Fetch alerting destinations if severity is provided - if policy.Severity() != core.UNKNOWN { - locations = getSevMap()[policy.Severity()] - } - - // Iterate over alerting destinations and propagate alert - for _, dest := range locations { - am.propagate(dest, alert, policy) + if err := am.handleSlackPost(alert, policy); err != nil { + am.logger.Error("could not post to slack", zap.Error(err)) } -} - -// propagate ... Propagates an alert to a destination -func (am *alertManager) propagate(dest core.AlertDestination, alert core.Alert, policy *core.AlertPolicy) { - logger := logging.WithContext(am.ctx) - - switch dest { - case core.Slack: // TODO: add more alert destinations - logger.Debug("Attempting to post alert to slack") - - err := am.handleSlackPost(alert.SUUID, alert.Content, policy.Message()) - if err != nil { - logger.Error("Could not post alert to slack", zap.Error(err)) - } - - case core.PagerDuty: - logger.Debug("Attempting to post alert to pagerduty") - err := am.handlePagerDutyPost(alert) - if err != nil { - logger.Error("Could not post alert to pagerduty", zap.Error(err)) - } - - case core.ThirdParty: - logger.Error("Attempting to post alert to third_party which is not yet supported") - default: - logger.Error("Attempting to post alert to unknown destination", - zap.String("destination", policy.Destination().String())) + if err := am.handlePagerDutyPost(alert); err != nil { + am.logger.Error("could not post to pagerduty", zap.Error(err)) } } diff --git a/internal/alert/manager_test.go b/internal/alert/manager_test.go index 264a53e0..b7373a51 100644 --- a/internal/alert/manager_test.go +++ b/internal/alert/manager_test.go @@ -2,66 +2,214 @@ package alert_test import ( "context" + "fmt" "testing" "time" "github.com/base-org/pessimism/internal/alert" "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/config" "github.com/base-org/pessimism/internal/core" "github.com/base-org/pessimism/internal/mocks" + "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" ) func Test_EventLoop(t *testing.T) { - sc := mocks.NewMockSlackClient(gomock.NewController(t)) - pdc := mocks.NewMockPagerDutyClient(gomock.NewController(t)) - - ctx := context.Background() - - am := alert.NewManager(ctx, sc, pdc, pdc) - go func() { - _ = am.EventLoop() - }() - - defer func() { - _ = am.Shutdown() - }() + cfg := &config.Config{ + AlertConfig: &alert.Config{ + RoutingCfgPath: "test_data/alert-routing-test.yaml", + PagerdutyAlertEventsURL: "test", + }, + } - ingress := am.Transit() + ctx := context.Background() - testAlert := core.Alert{ - Dest: core.Slack, - SUUID: core.NilSUUID(), + c := gomock.NewController(t) + + tests := []struct { + name string + description string + test func(t *testing.T) + }{ + { + name: "Test Low sev", + description: "Test low sev alert sends to slack", + test: func(t *testing.T) { + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + am := alert.NewManager(ctx, cfg.AlertConfig, cm) + + go func() { + _ = am.EventLoop() + }() + + defer func() { + _ = am.Shutdown() + }() + + ingress := am.Transit() + + cm.SetSlackClients([]client.SlackClient{mocks.NewMockSlackClient(c)}, core.LOW) + + alert := core.Alert{ + Criticality: core.LOW, + SUUID: core.NilSUUID(), + } + policy := &core.AlertPolicy{ + Sev: core.LOW.String(), + Msg: "test", + } + + err := am.AddSession(core.NilSUUID(), policy) + assert.Nil(t, err) + + for _, cli := range cm.GetSlackClients(core.LOW) { + sc, ok := cli.(*mocks.MockSlackClient) + assert.True(t, ok) + + sc.EXPECT().PostEvent(gomock.Any(), gomock.Any()).Return( + &client.AlertAPIResponse{ + Message: "test", + Status: core.SuccessStatus, + }, nil).Times(1) + } + + ingress <- alert + time.Sleep(1 * time.Second) + testid := core.MakeSUUID(1, 1, 1) + alert = core.Alert{ + Criticality: core.UNKNOWN, + SUUID: testid, + } + ingress <- alert + time.Sleep(1 * time.Second) + + }, + }, + { + name: "Test Medium sev", + description: "Test medium sev alert sends to just PagerDuty", + test: func(t *testing.T) { + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + am := alert.NewManager(ctx, cfg.AlertConfig, cm) + + go func() { + _ = am.EventLoop() + }() + + defer func() { + _ = am.Shutdown() + }() + + ingress := am.Transit() + + cm.SetPagerDutyClients([]client.PagerDutyClient{mocks.NewMockPagerDutyClient(c)}, core.MEDIUM) + + alert := core.Alert{ + Criticality: core.MEDIUM, + SUUID: core.NilSUUID(), + } + policy := &core.AlertPolicy{ + Sev: core.MEDIUM.String(), + Msg: "test", + } + + err := am.AddSession(core.NilSUUID(), policy) + assert.Nil(t, err) + + for _, cli := range cm.GetPagerDutyClients(core.MEDIUM) { + pdc, ok := cli.(*mocks.MockPagerDutyClient) + assert.True(t, ok) + + pdc.EXPECT().PostEvent(gomock.Any(), gomock.Any()).Return( + &client.AlertAPIResponse{ + Message: "test", + Status: core.SuccessStatus, + }, nil).Times(1) + } + + ingress <- alert + time.Sleep(1 * time.Second) + testid := core.MakeSUUID(1, 1, 1) + alert = core.Alert{ + Criticality: core.UNKNOWN, + SUUID: testid, + } + ingress <- alert + time.Sleep(1 * time.Second) + + }, + }, + { + name: "Test High sev", + description: "Test high sev alert sends to both slack and PagerDuty", + test: func(t *testing.T) { + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + am := alert.NewManager(ctx, cfg.AlertConfig, cm) + + go func() { + _ = am.EventLoop() + }() + + defer func() { + _ = am.Shutdown() + }() + + ingress := am.Transit() + + cm.SetSlackClients([]client.SlackClient{mocks.NewMockSlackClient(c), mocks.NewMockSlackClient(c)}, core.HIGH) + cm.SetPagerDutyClients([]client.PagerDutyClient{mocks.NewMockPagerDutyClient(c), mocks.NewMockPagerDutyClient(c)}, core.HIGH) + + alert := core.Alert{ + Criticality: core.HIGH, + SUUID: core.NilSUUID(), + } + policy := &core.AlertPolicy{ + Sev: core.HIGH.String(), + Msg: "test", + } + err := am.AddSession(core.NilSUUID(), policy) + assert.Nil(t, err) + + for _, cli := range cm.GetPagerDutyClients(core.HIGH) { + pdc, ok := cli.(*mocks.MockPagerDutyClient) + assert.True(t, ok) + + pdc.EXPECT().PostEvent(gomock.Any(), gomock.Any()).Return( + &client.AlertAPIResponse{ + Message: "test", + Status: core.SuccessStatus, + }, nil).Times(1) + } + + for _, cli := range cm.GetSlackClients(core.HIGH) { + sc, ok := cli.(*mocks.MockSlackClient) + assert.True(t, ok) + sc.EXPECT().PostEvent(gomock.Any(), gomock.Any()).Return( + &client.AlertAPIResponse{ + Message: "test", + Status: core.SuccessStatus, + }, nil).Times(1) + } + ingress <- alert + time.Sleep(1 * time.Second) + testid := core.MakeSUUID(1, 1, 1) + alert = core.Alert{ + Criticality: core.UNKNOWN, + SUUID: testid, + } + ingress <- alert + time.Sleep(1 * time.Second) + }, + }, } - err := am.AddSession(core.NilSUUID(), - &core.AlertPolicy{ - Dest: core.Slack.String(), - Msg: "test", + for i, test := range tests { + t.Run(fmt.Sprintf("%s:%d", test.name, i), func(t *testing.T) { + test.test(t) }) - assert.Nil(t, err) - - sc.EXPECT().PostData(gomock.Any(), gomock.Any()). - Return(&client.SlackAPIResponse{ - Ok: true, - Err: "", - }, nil). - Times(1) - - ingress <- testAlert - - time.Sleep(1 * time.Second) - - testID := core.MakeSUUID(1, 1, 1) - - testAlert = core.Alert{ - Dest: core.ThirdParty, - SUUID: testID, } - ingress <- testAlert - - time.Sleep(1 * time.Second) } diff --git a/internal/alert/routing_directory.go b/internal/alert/routing_directory.go new file mode 100644 index 00000000..748361e6 --- /dev/null +++ b/internal/alert/routing_directory.go @@ -0,0 +1,93 @@ +//go:generate mockgen -package mocks --destination ../mocks/routing_directory.go . RoutingDirectory + +package alert + +import ( + "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/core" +) + +// RoutingDirectory ... Interface for routing directory +type RoutingDirectory interface { + GetPagerDutyClients(sev core.Severity) []client.PagerDutyClient + GetSlackClients(sev core.Severity) []client.SlackClient + InitializeRouting(params *core.AlertRoutingParams) + SetPagerDutyClients([]client.PagerDutyClient, core.Severity) + SetSlackClients([]client.SlackClient, core.Severity) +} + +// routingDirectory ... Routing directory implementation +// NOTE: This implementation works for now, but if we add more routing clients in the future, +// we should consider refactoring this to be more generic +type routingDirectory struct { + pagerDutyClients map[core.Severity][]client.PagerDutyClient + slackClients map[core.Severity][]client.SlackClient + cfg *Config +} + +// NewRoutingDirectory ... Instantiates a new routing directory +func NewRoutingDirectory(cfg *Config) RoutingDirectory { + return &routingDirectory{ + cfg: cfg, + pagerDutyClients: make(map[core.Severity][]client.PagerDutyClient), + slackClients: make(map[core.Severity][]client.SlackClient), + } +} + +// GetPagerDutyClients ... Returns the pager duty clients for the given severity level +func (rd *routingDirectory) GetPagerDutyClients(sev core.Severity) []client.PagerDutyClient { + return rd.pagerDutyClients[sev] +} + +// GetSlackClients ... Returns the slack clients for the given severity level +func (rd *routingDirectory) GetSlackClients(sev core.Severity) []client.SlackClient { + return rd.slackClients[sev] +} + +// SetSlackClients ... Sets the slack clients for the given severity level +func (rd *routingDirectory) SetSlackClients(clients []client.SlackClient, sev core.Severity) { + copy(rd.slackClients[sev][0:], clients) +} + +// SetPagerDutyClients ... Sets the pager duty clients for the given severity level +func (rd *routingDirectory) SetPagerDutyClients(clients []client.PagerDutyClient, sev core.Severity) { + copy(rd.pagerDutyClients[sev][0:], clients) +} + +// InitializeRouting ... Parses alert routing parameters for each severity level +func (rd *routingDirectory) InitializeRouting(params *core.AlertRoutingParams) { + if params != nil { + rd.paramsToRouteDirectory(params.AlertRoutes.Low, core.LOW) + rd.paramsToRouteDirectory(params.AlertRoutes.Medium, core.MEDIUM) + rd.paramsToRouteDirectory(params.AlertRoutes.High, core.HIGH) + } +} + +// paramsToRouteDirectory ... Converts alert client config to an alert client map +func (rd *routingDirectory) paramsToRouteDirectory(acc *core.AlertClientCfg, sev core.Severity) { + if acc == nil { + return + } + + if acc.Slack != nil { + for name, cfg := range acc.Slack { + conf := &client.SlackConfig{ + URL: cfg.URL.String(), + Channel: cfg.Channel.String(), + } + client := client.NewSlackClient(conf, name) + rd.slackClients[sev] = append(rd.slackClients[sev], client) + } + } + + if acc.PagerDuty != nil { + for name, cfg := range acc.PagerDuty { + conf := &client.PagerDutyConfig{ + IntegrationKey: cfg.IntegrationKey.String(), + AlertEventsURL: rd.cfg.PagerdutyAlertEventsURL, + } + client := client.NewPagerDutyClient(conf, name) + rd.pagerDutyClients[sev] = append(rd.pagerDutyClients[sev], client) + } + } +} diff --git a/internal/alert/routing_directory_test.go b/internal/alert/routing_directory_test.go new file mode 100644 index 00000000..1df7ae5d --- /dev/null +++ b/internal/alert/routing_directory_test.go @@ -0,0 +1,155 @@ +package alert_test + +import ( + "fmt" + "testing" + + "github.com/base-org/pessimism/internal/alert" + "github.com/base-org/pessimism/internal/config" + "github.com/base-org/pessimism/internal/core" + "github.com/stretchr/testify/assert" +) + +func getCfg() *config.Config { + return &config.Config{ + AlertConfig: &alert.Config{ + RoutingParams: &core.AlertRoutingParams{ + AlertRoutes: &core.SeverityMap{ + Low: &core.AlertClientCfg{ + Slack: map[string]*core.Config{ + "test1": { + Channel: "test1", + URL: "test1", + }, + }, + }, + Medium: &core.AlertClientCfg{ + PagerDuty: map[string]*core.Config{ + "test1": { + IntegrationKey: "test1", + }, + }, + Slack: map[string]*core.Config{ + "test2": { + Channel: "test2", + URL: "test2", + }, + }, + }, + High: &core.AlertClientCfg{ + PagerDuty: map[string]*core.Config{ + "test1": { + IntegrationKey: "test1", + }, + "test2": { + IntegrationKey: "test2", + }, + }, + Slack: map[string]*core.Config{ + "test2": { + Channel: "test2", + URL: "test2", + }, + "test3": { + Channel: "test3", + URL: "test3", + }, + }, + }, + }, + }, + }, + } +} + +func Test_AlertClientCfgToClientMap(t *testing.T) { + tests := []struct { + name string + description string + testLogic func(t *testing.T) + }{ + { + name: "Test AlertClientCfgToClientMap Success", + description: "Test AlertClientCfgToClientMap successfully creates alert clients", + testLogic: func(t *testing.T) { + cfg := getCfg() + + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + + assert.NotNil(t, cm, "client map is nil") + cm.InitializeRouting(cfg.AlertConfig.RoutingParams) + + assert.Len(t, cm.GetSlackClients(core.LOW), 1) + assert.Len(t, cm.GetPagerDutyClients(core.LOW), 0) + assert.Len(t, cm.GetSlackClients(core.MEDIUM), 1) + assert.Len(t, cm.GetPagerDutyClients(core.MEDIUM), 1) + assert.Len(t, cm.GetSlackClients(core.HIGH), 2) + assert.Len(t, cm.GetPagerDutyClients(core.HIGH), 2) + }, + }, + { + name: "Test AlertClientCfgToClientMap Pagerduty Nil", + description: "Test AlertClientCfgToClientMap doesn't fail when pagerduty is nil", + testLogic: func(t *testing.T) { + cfg := getCfg() + cfg.AlertConfig.RoutingParams.AlertRoutes.Medium.PagerDuty = nil + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + assert.NotNil(t, cm, "client map is nil") + + cm.InitializeRouting(cfg.AlertConfig.RoutingParams) + assert.Len(t, cm.GetSlackClients(core.LOW), 1) + assert.Len(t, cm.GetPagerDutyClients(core.LOW), 0) + assert.Len(t, cm.GetSlackClients(core.MEDIUM), 1) + assert.Len(t, cm.GetPagerDutyClients(core.MEDIUM), 0) + assert.Len(t, cm.GetSlackClients(core.HIGH), 2) + assert.Len(t, cm.GetPagerDutyClients(core.HIGH), 2) + }, + }, + { + name: "Test AlertClientCfgToClientMap Nil Slack", + description: "Test AlertClientCfgToClientMap doesn't fail when slack is nil", + testLogic: func(t *testing.T) { + cfg := getCfg() + cfg.AlertConfig.RoutingParams.AlertRoutes.Medium.Slack = nil + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + assert.NotNil(t, cm, "client map is nil") + + cm.InitializeRouting(cfg.AlertConfig.RoutingParams) + assert.Len(t, cm.GetSlackClients(core.LOW), 1) + assert.Len(t, cm.GetPagerDutyClients(core.LOW), 0) + assert.Len(t, cm.GetSlackClients(core.MEDIUM), 0) + assert.Len(t, cm.GetPagerDutyClients(core.MEDIUM), 1) + assert.Len(t, cm.GetSlackClients(core.HIGH), 2) + assert.Len(t, cm.GetPagerDutyClients(core.HIGH), 2) + }, + }, + { + name: "Test AlertClientCfgToClientMap Nil Params", + description: "Test AlertClientCfgToClientMap doesn't fail when params are nil", + testLogic: func(t *testing.T) { + cfg := getCfg() + + cfg.AlertConfig.RoutingParams = nil + + cm := alert.NewRoutingDirectory(cfg.AlertConfig) + assert.NotNil(t, cm, "client map is nil") + + cm.InitializeRouting(cfg.AlertConfig.RoutingParams) + + assert.Len(t, cm.GetSlackClients(core.LOW), 0) + assert.Len(t, cm.GetPagerDutyClients(core.LOW), 0) + assert.Len(t, cm.GetSlackClients(core.MEDIUM), 0) + assert.Len(t, cm.GetPagerDutyClients(core.MEDIUM), 0) + assert.Len(t, cm.GetSlackClients(core.HIGH), 0) + assert.Len(t, cm.GetPagerDutyClients(core.HIGH), 0) + }, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d:%s", i, test.name), func(t *testing.T) { + test.testLogic(t) + }) + } + +} diff --git a/internal/alert/store_test.go b/internal/alert/store_test.go index 01de0549..694f3dcd 100644 --- a/internal/alert/store_test.go +++ b/internal/alert/store_test.go @@ -6,6 +6,7 @@ import ( "github.com/base-org/pessimism/internal/alert" "github.com/base-org/pessimism/internal/core" + "github.com/stretchr/testify/assert" ) diff --git a/internal/alert/test_data/alert-routing-test.yaml b/internal/alert/test_data/alert-routing-test.yaml new file mode 100644 index 00000000..ce4a046d --- /dev/null +++ b/internal/alert/test_data/alert-routing-test.yaml @@ -0,0 +1,30 @@ +alertRoutes: + low: + slack: + config: + url: "test-low" + channel: "#test-low" + + medium: + slack: + config: + url: "test-medium" + channel: "#test-medium" + pagerduty: + config: + integration_key: "test-medium" + + high: + slack: + config: + url: "test-high" + channel: "#test-high" + config_2: + url: "test-high-2" + channel: "#test-high-2" + + pagerduty: + config: + integration_key: "test-high-1" + config_2: + integration_key: "test-high-2" diff --git a/internal/app/app_test.go b/internal/app/app_test.go index 6a6b2029..e80c4898 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -7,7 +7,6 @@ import ( "github.com/base-org/pessimism/internal/alert" "github.com/base-org/pessimism/internal/api/server" "github.com/base-org/pessimism/internal/app" - "github.com/base-org/pessimism/internal/client" "github.com/base-org/pessimism/internal/config" "github.com/base-org/pessimism/internal/metrics" @@ -28,9 +27,8 @@ func Test_AppFlow(t *testing.T) { Enabled: false, }, AlertConfig: &alert.Config{ - SlackConfig: &client.SlackConfig{}, - MediumPagerDutyCfg: &client.PagerDutyConfig{}, - HighPagerDutyCfg: &client.PagerDutyConfig{}, + RoutingCfgPath: "../../e2e/alert-routing-cfg.yaml", + PagerdutyAlertEventsURL: "test", }, } diff --git a/internal/app/init.go b/internal/app/init.go index fb8b6521..a31e4361 100644 --- a/internal/app/init.go +++ b/internal/app/init.go @@ -18,6 +18,7 @@ import ( "github.com/base-org/pessimism/internal/metrics" "github.com/base-org/pessimism/internal/state" "github.com/base-org/pessimism/internal/subsystem" + "go.uber.org/zap" ) @@ -72,12 +73,14 @@ func InitializeServer(ctx context.Context, cfg *config.Config, m subsystem.Manag */ // InitializeAlerting ... Performs dependency injection to build alerting struct -func InitializeAlerting(ctx context.Context, cfg *config.Config) alert.Manager { - sc := client.NewSlackClient(cfg.AlertConfig.SlackConfig) - pdc := client.NewPagerDutyClient(cfg.AlertConfig.HighPagerDutyCfg) - medPdc := client.NewPagerDutyClient(cfg.AlertConfig.MediumPagerDutyCfg) +func InitializeAlerting(ctx context.Context, cfg *config.Config) (alert.Manager, error) { + if err := cfg.ParseAlertConfig(); err != nil { + return nil, err + } - return alert.NewManager(ctx, sc, medPdc, pdc) + clientMap := alert.NewRoutingDirectory(cfg.AlertConfig) + + return alert.NewManager(ctx, cfg.AlertConfig, clientMap), nil } // InitializeETL ... Performs dependency injection to build etl struct @@ -107,7 +110,11 @@ func NewPessimismApp(ctx context.Context, cfg *config.Config) (*Application, fun return nil, nil, err } - alerting := InitializeAlerting(ctx, cfg) + alerting, err := InitializeAlerting(ctx, cfg) + if err != nil { + return nil, nil, err + } + engine := InitializeEngine(ctx, alerting.Transit()) etl := InitializeETL(ctx, engine.Transit()) diff --git a/internal/client/alert.go b/internal/client/alert.go new file mode 100644 index 00000000..5ba32dc7 --- /dev/null +++ b/internal/client/alert.go @@ -0,0 +1,37 @@ +//go:generate mockgen -package mocks --destination ../../mocks/alert_client.go . AlertClient + +package client + +import ( + "context" + + "github.com/base-org/pessimism/internal/core" +) + +// AlertClient ... An interface for alert clients to implement +type AlertClient interface { + PostEvent(ctx context.Context, data *AlertEventTrigger) (*AlertAPIResponse, error) + GetName() string +} + +// AlertEventTrigger ... A standardized event trigger for alert clients +type AlertEventTrigger struct { + Message string + Severity core.Severity + DedupKey core.PUUID +} + +// AlertAPIResponse ... A standardized response for alert clients +type AlertAPIResponse struct { + Status core.AlertStatus + Message string +} + +// ToPagerdutyEvent ... Converts an AlertEventTrigger to a PagerDutyEventTrigger +func (a *AlertEventTrigger) ToPagerdutyEvent() *PagerDutyEventTrigger { + return &PagerDutyEventTrigger{ + DedupKey: a.DedupKey.String(), + Severity: a.Severity.ToPagerDutySev(), + Message: a.Message, + } +} diff --git a/internal/client/alert_test.go b/internal/client/alert_test.go new file mode 100644 index 00000000..611d92de --- /dev/null +++ b/internal/client/alert_test.go @@ -0,0 +1,32 @@ +package client_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/core" +) + +func TestToPagerDutyEvent(t *testing.T) { + alert := &client.AlertEventTrigger{ + Message: "test", + Severity: core.HIGH, + DedupKey: core.NilPUUID(), + } + + sPuuid := alert.DedupKey.String() + res := alert.ToPagerdutyEvent() + assert.Equal(t, core.Critical, res.Severity) + assert.Equal(t, "test", res.Message) + assert.Equal(t, sPuuid, res.DedupKey) + + alert.Severity = core.MEDIUM + res = alert.ToPagerdutyEvent() + assert.Equal(t, core.Error, res.Severity) + + alert.Severity = core.LOW + res = alert.ToPagerdutyEvent() + assert.Equal(t, core.Warning, res.Severity) +} diff --git a/internal/client/pagerduty_client.go b/internal/client/pagerduty_client.go index 4a1947b1..76d7f9fa 100644 --- a/internal/client/pagerduty_client.go +++ b/internal/client/pagerduty_client.go @@ -6,39 +6,29 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "net/http" "time" + "github.com/base-org/pessimism/internal/core" "github.com/base-org/pessimism/internal/logging" "go.uber.org/zap" ) -// PagerDutyAction represents the type of actions that can be triggered by an event +type PagerDutyClient interface { + AlertClient +} + type PagerDutyAction string const ( - Trigger PagerDutyAction = "trigger" - PagerDutyAckAction PagerDutyAction = "acknowledge" - PagerDutyResolveAction PagerDutyAction = "resolve" + Trigger PagerDutyAction = "trigger" ) -// PagerDutySeverity represents the severity of an event -type PagerDutySeverity string - const ( - Critical PagerDutySeverity = "critical" - Error PagerDutySeverity = "error" - Warning PagerDutySeverity = "warning" - Info PagerDutySeverity = "info" -) - -// PagerDutyResponseStatus is the response status of a PagerDuty API call -type PagerDutyResponseStatus string - -const ( - SuccessStatus PagerDutyResponseStatus = "success" + Source = "pessimism" ) // PagerDutyConfig ... Represents the configuration vars for a PagerDuty client @@ -48,13 +38,9 @@ type PagerDutyConfig struct { AlertEventsURL string } -// PagerDutyClient ... Interface for PagerDuty client -type PagerDutyClient interface { - PostEvent(ctx context.Context, event *PagerDutyEventTrigger) (*PagerDutyAPIResponse, error) -} - // pagerdutyClient ... PagerDuty client for making requests type pagerdutyClient struct { + name string integrationKey string changeEventsURL string alertEventsURL string @@ -62,12 +48,13 @@ type pagerdutyClient struct { } // NewPagerDutyClient ... Initializer for PagerDuty client -func NewPagerDutyClient(cfg *PagerDutyConfig) PagerDutyClient { +func NewPagerDutyClient(cfg *PagerDutyConfig, name string) PagerDutyClient { if cfg.IntegrationKey == "" { logging.NoContext().Warn("No PagerDuty integration key provided") } return &pagerdutyClient{ + name: name, integrationKey: cfg.IntegrationKey, changeEventsURL: cfg.ChangeEventsURL, alertEventsURL: cfg.AlertEventsURL, @@ -78,36 +65,35 @@ func NewPagerDutyClient(cfg *PagerDutyConfig) PagerDutyClient { // PagerDutyEventTrigger ... Represents caller specified fields for a PagerDuty event type PagerDutyEventTrigger struct { Message string - Action PagerDutyAction - Severity PagerDutySeverity + Severity core.PagerDutySeverity DedupKey string } // PagerDutyRequest ... Used to construct a PagerDuty api request type PagerDutyRequest struct { RoutingKey string `json:"routing_key"` - EventAction PagerDutyAction `json:"event_action"` DedupKey string `json:"dedup_key"` Payload PagerDutyPayload `json:"payload"` + EventAction PagerDutyAction `json:"event_action"` } // PagerDutyPayload ... Represents the payload of a PagerDuty event type PagerDutyPayload struct { - Summary string `json:"summary"` - Source string `json:"source"` - Severity PagerDutySeverity `json:"severity"` - Timestamp time.Time `json:"timestamp"` + Summary string `json:"summary"` + Source string `json:"source"` + Severity core.PagerDutySeverity `json:"severity"` + Timestamp time.Time `json:"timestamp"` } // newPagerDutyPayload ... Initializes a new PagerDuty payload given the integration key and event func newPagerDutyPayload(integrationKey string, event *PagerDutyEventTrigger) *PagerDutyRequest { return &PagerDutyRequest{ RoutingKey: integrationKey, - EventAction: event.Action, + EventAction: Trigger, DedupKey: event.DedupKey, Payload: PagerDutyPayload{ Summary: event.Message, - Source: "Pessimism", + Source: Source, Severity: event.Severity, Timestamp: time.Now(), }, @@ -126,16 +112,33 @@ func (req *PagerDutyRequest) marshal() ([]byte, error) { // PagerDutyAPIResponse ... Represents the structure of a PagerDuty API response type PagerDutyAPIResponse struct { - Status string `json:"status"` - Message string `json:"message"` - DedupKey string `json:"dedup_key"` + Status core.AlertStatus `json:"status"` + Message string `json:"message"` + DedupKey string `json:"dedup_key"` +} + +// ToAlertResponse ... Converts a PagerDuty API response to an AlertAPIResponse +func (p *PagerDutyAPIResponse) ToAlertResponse() *AlertAPIResponse { + status := core.SuccessStatus + if p.Status != core.SuccessStatus { + status = core.FailureStatus + } + + return &AlertAPIResponse{ + Status: status, + Message: p.Message, + } } // PostEvent ... Posts a new event to PagerDuty -func (pdc pagerdutyClient) PostEvent(ctx context.Context, event *PagerDutyEventTrigger) (*PagerDutyAPIResponse, error) { +func (pdc *pagerdutyClient) PostEvent(ctx context.Context, event *AlertEventTrigger) (*AlertAPIResponse, error) { // 1. Create and marshal payload into request object body - payload, err := newPagerDutyPayload(pdc.integrationKey, event).marshal() + if pdc.integrationKey == "" { + return nil, fmt.Errorf("no Pagerduty integration key provided") + } + + payload, err := newPagerDutyPayload(pdc.integrationKey, event.ToPagerdutyEvent()).marshal() if err != nil { return nil, err } @@ -155,6 +158,9 @@ func (pdc pagerdutyClient) PostEvent(ctx context.Context, event *PagerDutyEventT zap.Error(err)) } }() + if err != nil { + return nil, err + } bytes, err := io.ReadAll(resp.Body) if err != nil { @@ -166,5 +172,10 @@ func (pdc pagerdutyClient) PostEvent(ctx context.Context, event *PagerDutyEventT return nil, err } - return apiResp, nil + return apiResp.ToAlertResponse(), nil +} + +// GetName ... Returns the name of the PagerDuty client +func (pdc *pagerdutyClient) GetName() string { + return pdc.name } diff --git a/internal/client/pagerduty_client_test.go b/internal/client/pagerduty_client_test.go new file mode 100644 index 00000000..a65f8d67 --- /dev/null +++ b/internal/client/pagerduty_client_test.go @@ -0,0 +1,29 @@ +package client_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/core" +) + +func Test_PagerdutyAPIResponse_To_AlertAPIResponse(t *testing.T) { + + testPdSuccess := &client.PagerDutyAPIResponse{ + Status: "success", + Message: "test", + } + + testPdFailure := &client.PagerDutyAPIResponse{ + Status: "failure", + Message: "test", + } + + resSuc := testPdSuccess.ToAlertResponse() + resFail := testPdFailure.ToAlertResponse() + + assert.Equal(t, resSuc.Status, core.SuccessStatus) + assert.Equal(t, resFail.Status, core.FailureStatus) +} diff --git a/internal/client/slack_client.go b/internal/client/slack_client.go index 933cc7ad..65dd8d12 100644 --- a/internal/client/slack_client.go +++ b/internal/client/slack_client.go @@ -12,35 +12,39 @@ import ( "io" "net/http" - "github.com/base-org/pessimism/internal/logging" "go.uber.org/zap" + + "github.com/base-org/pessimism/internal/core" + "github.com/base-org/pessimism/internal/logging" ) -type SlackConfig struct { - Channel string - URL string +type SlackClient interface { + AlertClient } -// SlackClient ... Interface for slack client -type SlackClient interface { - PostData(context.Context, string) (*SlackAPIResponse, error) +type SlackConfig struct { + Channel string + URL string + Priority string } // slackClient ... Slack client type slackClient struct { + name string url string channel string client *http.Client } // NewSlackClient ... Initializer -func NewSlackClient(cfg *SlackConfig) SlackClient { +func NewSlackClient(cfg *SlackConfig, name string) SlackClient { if cfg.URL == "" { logging.NoContext().Warn("No Slack webhook URL not provided") } return &slackClient{ - url: cfg.URL, + url: cfg.URL, + name: name, // NOTE - This is a default client, we can add more configuration to it // when necessary channel: cfg.Channel, @@ -48,7 +52,7 @@ func NewSlackClient(cfg *SlackConfig) SlackClient { } } -// slackPayload represents the structure of a slack alert +// SlackPayload represents the structure of a slack alert type SlackPayload struct { Text interface{} `json:"text"` Channel string `json:"channel"` @@ -71,14 +75,27 @@ func (sp *SlackPayload) marshal() ([]byte, error) { // SlackAPIResponse ... represents the structure of a slack API response type SlackAPIResponse struct { - Ok bool `json:"ok"` - Err string `json:"error"` + Message string `json:"message"` + Err string `json:"error"` } -// PostAlert ... handles posting data to slack -func (sc slackClient) PostData(ctx context.Context, str string) (*SlackAPIResponse, error) { +// ToAlertResponse ... Converts a slack API response to an alert API response +func (a *SlackAPIResponse) ToAlertResponse() *AlertAPIResponse { + status := core.SuccessStatus + if a.Message != "ok" { + status = core.FailureStatus + } + + return &AlertAPIResponse{ + Status: status, + Message: a.Err, + } +} + +// PostEvent ... handles posting an event to slack +func (sc slackClient) PostEvent(ctx context.Context, event *AlertEventTrigger) (*AlertAPIResponse, error) { // 1. make & marshal payload into request object body - payload, err := newSlackPayload(str, sc.channel).marshal() + payload, err := newSlackPayload(event.Message, sc.channel).marshal() if err != nil { return nil, err } @@ -114,5 +131,10 @@ func (sc slackClient) PostData(ctx context.Context, str string) (*SlackAPIRespon return nil, err } - return apiResp, err + return apiResp.ToAlertResponse(), nil +} + +// GetName ... returns the name of the slack client +func (sc slackClient) GetName() string { + return sc.name } diff --git a/internal/client/slack_client_test.go b/internal/client/slack_client_test.go new file mode 100644 index 00000000..598700d6 --- /dev/null +++ b/internal/client/slack_client_test.go @@ -0,0 +1,30 @@ +package client_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/base-org/pessimism/internal/client" + "github.com/base-org/pessimism/internal/core" +) + +func TestSlackResponseToAlertResponse(t *testing.T) { + testSlackSuccess := &client.SlackAPIResponse{ + Message: "ok", + Err: "", + } + + testSlackFailure := &client.SlackAPIResponse{ + Message: "not ok", + Err: "error", + } + + resSuc := testSlackSuccess.ToAlertResponse() + resFail := testSlackFailure.ToAlertResponse() + + assert.Equal(t, core.SuccessStatus, resSuc.Status) + assert.Equal(t, "", resSuc.Message) + assert.Equal(t, core.FailureStatus, resFail.Status) + assert.Equal(t, "error", resFail.Message) +} diff --git a/internal/config/config.go b/internal/config/config.go index 96052497..48a9ebff 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,13 +1,14 @@ package config import ( + "fmt" "log" "os" + "path/filepath" "strconv" "github.com/base-org/pessimism/internal/alert" "github.com/base-org/pessimism/internal/api/server" - "github.com/base-org/pessimism/internal/client" "github.com/base-org/pessimism/internal/core" "github.com/base-org/pessimism/internal/logging" "github.com/base-org/pessimism/internal/metrics" @@ -15,6 +16,7 @@ import ( "github.com/joho/godotenv" "go.uber.org/zap" + "gopkg.in/yaml.v3" ) // TrueEnvVal ... Represents the encoded string value for true (ie. 1) @@ -48,21 +50,8 @@ func NewConfig(fileName core.FilePath) *Config { Environment: core.Env(getEnvStr("ENV")), AlertConfig: &alert.Config{ - SlackConfig: &client.SlackConfig{ - Channel: getEnvStrWithDefault("SLACK_CHANNEL", ""), - URL: getEnvStrWithDefault("SLACK_URL", ""), - }, - - HighPagerDutyCfg: &client.PagerDutyConfig{ - AlertEventsURL: getEnvStrWithDefault("P0_PAGERDUTY_ALERT_EVENTS_URL", ""), - ChangeEventsURL: getEnvStrWithDefault("P0_PAGERDUTY_CHANGE_EVENTS_URL", ""), - IntegrationKey: getEnvStrWithDefault("P0_PAGERDUTY_INTEGRATION_KEY", ""), - }, - MediumPagerDutyCfg: &client.PagerDutyConfig{ - AlertEventsURL: getEnvStrWithDefault("P1_PAGERDUTY_ALERT_EVENTS_URL", ""), - ChangeEventsURL: getEnvStrWithDefault("P1_PAGERDUTY_CHANGE_EVENTS_URL", ""), - IntegrationKey: getEnvStrWithDefault("P1_PAGERDUTY_INTEGRATION_KEY", ""), - }, + RoutingCfgPath: getEnvStrWithDefault("ALERT_ROUTE_CFG_PATH", ""), + PagerdutyAlertEventsURL: getEnvStrWithDefault("PAGERDUTY_ALERT_EVENTS_URL", ""), }, SystemConfig: &subsystem.Config{ @@ -110,6 +99,28 @@ func (cfg *Config) IsBootstrap() bool { return cfg.BootStrapPath != "" } +// ParseAlertConfig ... Parses the alert config +func (cfg *Config) ParseAlertConfig() error { + if cfg.AlertConfig.RoutingCfgPath == "" { + return fmt.Errorf("alert routing config path is empty") + } + + f, err := os.ReadFile(filepath.Clean(cfg.AlertConfig.RoutingCfgPath)) + if err != nil { + return err + } + + params := &core.AlertRoutingParams{} + err = yaml.Unmarshal(f, ¶ms) + + if err != nil { + return err + } + + cfg.AlertConfig.RoutingParams = params + return nil +} + // getEnvStr ... Reads env var from process environment, panics if not found func getEnvStr(key string) string { envVar, ok := os.LookupEnv(key) @@ -123,7 +134,7 @@ func getEnvStr(key string) string { } // getEnvStrWithDefault ... Reads env var from process environment, returns default if not found -func getEnvStrWithDefault(key string, defaultValue string) string { //nolint: unparam // empty str default ok +func getEnvStrWithDefault(key string, defaultValue string) string { envVar, ok := os.LookupEnv(key) // Not found diff --git a/internal/core/alert.go b/internal/core/alert.go index b55653ed..0703f193 100644 --- a/internal/core/alert.go +++ b/internal/core/alert.go @@ -1,6 +1,26 @@ package core -import "time" +import ( + "time" +) + +// PagerDutySeverity represents the severity of an event +type PagerDutySeverity string + +const ( + Critical PagerDutySeverity = "critical" + Error PagerDutySeverity = "error" + Warning PagerDutySeverity = "warning" + Info PagerDutySeverity = "info" +) + +// AlertStatus ... A standardized response status for alert clients +type AlertStatus string + +const ( + SuccessStatus AlertStatus = "success" + FailureStatus AlertStatus = "failure" +) // Severity ... The severity of an alert type Severity uint8 @@ -45,10 +65,27 @@ func (s Severity) String() string { } } +// ToPagerDutySev ... Converts a severity to a pagerduty severity. See docs/alert-routing.md for more on this +func (s Severity) ToPagerDutySev() PagerDutySeverity { + switch s { + case LOW: + return Warning + case MEDIUM: + return Error + case HIGH: + return Critical + + case UNKNOWN: + return Error + + default: + return Error + } +} + // Alert ... An alert type Alert struct { Criticality Severity - Dest AlertDestination PUUID PUUID SUUID SUUID Timestamp time.Time @@ -56,3 +93,28 @@ type Alert struct { Content string } + +// AlertRoutingParams ... The routing parameters for alerts +type AlertRoutingParams struct { + AlertRoutes *SeverityMap `yaml:"alertRoutes"` +} + +// SeverityMap ... A map of severity to alert client config +type SeverityMap struct { + Low *AlertClientCfg `yaml:"low"` + Medium *AlertClientCfg `yaml:"medium"` + High *AlertClientCfg `yaml:"high"` +} + +// AlertClientCfg ... The alert client config +type AlertClientCfg struct { + Slack map[string]*Config `yaml:"slack"` + PagerDuty map[string]*Config `yaml:"pagerduty"` +} + +// Config ... The config for an alert client +type Config struct { + URL StringFromEnv `yaml:"url"` + Channel StringFromEnv `yaml:"channel"` + IntegrationKey StringFromEnv `yaml:"integration_key"` +} diff --git a/internal/core/alert_test.go b/internal/core/alert_test.go new file mode 100644 index 00000000..5d710231 --- /dev/null +++ b/internal/core/alert_test.go @@ -0,0 +1,31 @@ +package core_test + +import ( + "testing" + + "github.com/base-org/pessimism/internal/core" + "github.com/stretchr/testify/assert" +) + +func TestStringToSev(t *testing.T) { + assert.Equal(t, core.StringToSev("low"), core.LOW) + assert.Equal(t, core.StringToSev("medium"), core.MEDIUM) + assert.Equal(t, core.StringToSev("high"), core.HIGH) + assert.Equal(t, core.StringToSev("unknown"), core.UNKNOWN) + assert.Equal(t, core.StringToSev(""), core.UNKNOWN) +} + +func TestSeverity_String(t *testing.T) { + assert.Equal(t, core.LOW.String(), "low") + assert.Equal(t, core.MEDIUM.String(), "medium") + assert.Equal(t, core.HIGH.String(), "high") + assert.Equal(t, core.UNKNOWN.String(), "unknown") +} + +func TestToPagerDutySev(t *testing.T) { + + assert.Equal(t, core.LOW.ToPagerDutySev(), core.PagerDutySeverity("warning")) + assert.Equal(t, core.MEDIUM.ToPagerDutySev(), core.PagerDutySeverity("error")) + assert.Equal(t, core.HIGH.ToPagerDutySev(), core.PagerDutySeverity("critical")) + +} diff --git a/internal/core/core.go b/internal/core/core.go index e0912ee4..8c181b30 100644 --- a/internal/core/core.go +++ b/internal/core/core.go @@ -3,8 +3,12 @@ package core import ( "encoding/json" "fmt" + "os" + "regexp" "time" + "gopkg.in/yaml.v3" + "github.com/ethereum/go-ethereum/common" ) @@ -193,3 +197,27 @@ const ( L2ToL1MessagePasser = "l2_to_l1_address" //#nosec G101: False positive, this isn't a credential L2OutputOracle = "l2_output_address" //#nosec G101: False positive, this isn't a credential ) + +// Regexp for parsing yaml files +var reVar = regexp.MustCompile(`^\${(\w+)}$`) + +type StringFromEnv string + +// UnmarshalYAML implements the yaml.Unmarshaler interface to allow parsing strings from env vars. +func (e *StringFromEnv) UnmarshalYAML(value *yaml.Node) error { + var s string + if err := value.Decode(&s); err != nil { + return err + } + if match := reVar.FindStringSubmatch(s); len(match) > 0 { + *e = StringFromEnv(os.Getenv(match[1])) + } else { + *e = StringFromEnv(s) + } + return nil +} + +// String returns the string value, implementing the flag.Value interface. +func (e *StringFromEnv) String() string { + return string(*e) +} diff --git a/internal/core/core_test.go b/internal/core/core_test.go index b1f54872..f877c0bf 100644 --- a/internal/core/core_test.go +++ b/internal/core/core_test.go @@ -3,6 +3,8 @@ package core_test import ( "testing" + "gopkg.in/yaml.v3" + "github.com/base-org/pessimism/internal/core" "github.com/ethereum/go-ethereum/common" "github.com/stretchr/testify/assert" @@ -60,3 +62,24 @@ func Test_SessionParams(t *testing.T) { assert.Equal(t, nestedArgs, []interface{}{"bland(1,2,3)"}, "NestedArgs should return the correct value") } + +func Test_UnmarshalYaml(t *testing.T) { + + type test struct { + TestKey core.StringFromEnv `yaml:"test_key"` + TestKey2 core.StringFromEnv `yaml:"test_key2"` + } + + t.Setenv("test_key", "test_value") + + yml := []byte(` +test_key: ${test_key} +test_key2: "test_value2" +`) + + t1 := &test{} + err := yaml.Unmarshal(yml, t1) + assert.Nil(t, err, "Unmarshal should not return an error") + assert.Equal(t, "test_value", t1.TestKey.String(), "Unmarshal should return the correct value") + assert.Equal(t, "test_value2", t1.TestKey2.String(), "Unmarshal should return the correct value") +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 53245103..d1b663d0 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -35,7 +35,7 @@ type Metricer interface { DecActivePipelines(pipelineType core.PipelineType, network core.Network) RecordBlockLatency(network core.Network, latency float64) RecordHeuristicRun(heuristic heuristic.Heuristic) - RecordAlertGenerated(alert core.Alert) + RecordAlertGenerated(alert core.Alert, dest core.AlertDestination, clientName string) RecordNodeError(network core.Network) RecordPipelineLatency(pUUID core.PUUID, latency float64) RecordAssessmentError(h heuristic.Heuristic) @@ -117,7 +117,7 @@ func New(ctx context.Context, cfg *Config) (Metricer, func(), error) { Name: "alerts_generated_total", Help: "Number of total alerts generated for a given heuristic", Namespace: metricsNamespace, - }, []string{"network", "heuristic", "pipeline", "destination"}), + }, []string{"network", "heuristic", "pipeline", "severity", "destination", "client_name"}), NodeErrors: factory.NewCounterVec(prometheus.CounterOpts{ Name: "node_errors_total", @@ -206,12 +206,12 @@ func (m *Metrics) RecordHeuristicRun(h heuristic.Heuristic) { } // RecordAlertGenerated ... Records that an alert has been generated for a given heuristic -func (m *Metrics) RecordAlertGenerated(alert core.Alert) { +func (m *Metrics) RecordAlertGenerated(alert core.Alert, dest core.AlertDestination, clientName string) { net := alert.SUUID.PID.Network().String() h := alert.SUUID.PID.HeuristicType().String() pipeline := alert.Ptype.String() - dest := alert.Dest.String() - m.AlertsGenerated.WithLabelValues(net, h, pipeline, dest).Inc() + sev := alert.Criticality.String() + m.AlertsGenerated.WithLabelValues(net, h, pipeline, sev, dest.String(), clientName).Inc() } // RecordNodeError ... Records that an error has been caught for a given node @@ -246,15 +246,15 @@ var NoopMetrics Metricer = new(noopMetricer) func (n *noopMetricer) RecordUp() {} func (n *noopMetricer) IncActiveHeuristics(_ core.HeuristicType, _ core.Network, _ core.PipelineType) { } -func (n *noopMetricer) RecordInvExecutionTime(_ heuristic.Heuristic, _ float64) {} -func (n *noopMetricer) IncActivePipelines(_ core.PipelineType, _ core.Network) {} -func (n *noopMetricer) DecActivePipelines(_ core.PipelineType, _ core.Network) {} -func (n *noopMetricer) RecordHeuristicRun(_ heuristic.Heuristic) {} -func (n *noopMetricer) RecordAlertGenerated(_ core.Alert) {} -func (n *noopMetricer) RecordNodeError(_ core.Network) {} -func (n *noopMetricer) RecordBlockLatency(_ core.Network, _ float64) {} -func (n *noopMetricer) RecordPipelineLatency(_ core.PUUID, _ float64) {} -func (n *noopMetricer) RecordAssessmentError(_ heuristic.Heuristic) {} +func (n *noopMetricer) RecordInvExecutionTime(_ heuristic.Heuristic, _ float64) {} +func (n *noopMetricer) IncActivePipelines(_ core.PipelineType, _ core.Network) {} +func (n *noopMetricer) DecActivePipelines(_ core.PipelineType, _ core.Network) {} +func (n *noopMetricer) RecordHeuristicRun(_ heuristic.Heuristic) {} +func (n *noopMetricer) RecordAlertGenerated(_ core.Alert, _ core.AlertDestination, _ string) {} +func (n *noopMetricer) RecordNodeError(_ core.Network) {} +func (n *noopMetricer) RecordBlockLatency(_ core.Network, _ float64) {} +func (n *noopMetricer) RecordPipelineLatency(_ core.PUUID, _ float64) {} +func (n *noopMetricer) RecordAssessmentError(_ heuristic.Heuristic) {} func (n *noopMetricer) Shutdown(_ context.Context) error { return nil diff --git a/internal/mocks/alert_client.go b/internal/mocks/alert_client.go new file mode 100644 index 00000000..939026de --- /dev/null +++ b/internal/mocks/alert_client.go @@ -0,0 +1,51 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/base-org/pessimism/internal/client/alert_client (interfaces: AlertClient) + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + alert_client "github.com/base-org/pessimism/internal/client" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" +) + +// MockAlertClient is a mock of AlertClient interface. +type MockAlertClient struct { + ctrl *gomock.Controller + recorder *MockAlertClientMockRecorder +} + +// MockAlertClientMockRecorder is the mock recorder for MockAlertClient. +type MockAlertClientMockRecorder struct { + mock *MockAlertClient +} + +// NewMockAlertClient creates a new mock instance. +func NewMockAlertClient(ctrl *gomock.Controller) *MockAlertClient { + mock := &MockAlertClient{ctrl: ctrl} + mock.recorder = &MockAlertClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockAlertClient) EXPECT() *MockAlertClientMockRecorder { + return m.recorder +} + +// PostEvent mocks base method. +func (m *MockAlertClient) PostEvent(arg0 context.Context, arg1 *alert_client.AlertEventTrigger) (*alert_client.AlertAPIResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PostEvent", arg0, arg1) + ret0, _ := ret[0].(*alert_client.AlertAPIResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PostEvent indicates an expected call of PostEvent. +func (mr *MockAlertClientMockRecorder) PostEvent(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PostEvent", reflect.TypeOf((*MockAlertClient)(nil).PostEvent), arg0, arg1) +} diff --git a/internal/mocks/client_map.go b/internal/mocks/client_map.go new file mode 100644 index 00000000..3a880dc3 --- /dev/null +++ b/internal/mocks/client_map.go @@ -0,0 +1,100 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/base-org/pessimism/internal/alert (interfaces: ClientMap) + +// Package mocks is a generated GoMock package. +package mocks + +import ( + reflect "reflect" + + client "github.com/base-org/pessimism/internal/client" + core "github.com/base-org/pessimism/internal/core" + gomock "github.com/golang/mock/gomock" +) + +// MockClientMap is a mock of ClientMap interface. +type MockClientMap struct { + ctrl *gomock.Controller + recorder *MockClientMapMockRecorder +} + +// MockClientMapMockRecorder is the mock recorder for MockClientMap. +type MockClientMapMockRecorder struct { + mock *MockClientMap +} + +// NewMockClientMap creates a new mock instance. +func NewMockClientMap(ctrl *gomock.Controller) *MockClientMap { + mock := &MockClientMap{ctrl: ctrl} + mock.recorder = &MockClientMapMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockClientMap) EXPECT() *MockClientMapMockRecorder { + return m.recorder +} + +// GetPagerDutyClients mocks base method. +func (m *MockClientMap) GetPagerDutyClients(arg0 core.Severity) []client.PagerDutyClient { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPagerDutyClients", arg0) + ret0, _ := ret[0].([]client.PagerDutyClient) + return ret0 +} + +// GetPagerDutyClients indicates an expected call of GetPagerDutyClients. +func (mr *MockClientMapMockRecorder) GetPagerDutyClients(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPagerDutyClients", reflect.TypeOf((*MockClientMap)(nil).GetPagerDutyClients), arg0) +} + +// GetSlackClients mocks base method. +func (m *MockClientMap) GetSlackClients(arg0 core.Severity) []client.SlackClient { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetSlackClients", arg0) + ret0, _ := ret[0].([]client.SlackClient) + return ret0 +} + +// GetSlackClients indicates an expected call of GetSlackClients. +func (mr *MockClientMapMockRecorder) GetSlackClients(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSlackClients", reflect.TypeOf((*MockClientMap)(nil).GetSlackClients), arg0) +} + +// InitAlertClients mocks base method. +func (m *MockClientMap) InitializeRouting(arg0 *core.AlertRoutingParams) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InitializeRouting", arg0) +} + +// InitAlertClients indicates an expected call of InitAlertClients. +func (mr *MockClientMapMockRecorder) InitAlertClients(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InitializeRouting", reflect.TypeOf((*MockClientMap)(nil).InitializeRouting), arg0) +} + +// SetPagerDutyClients mocks base method. +func (m *MockClientMap) SetPagerDutyClients(arg0 []client.PagerDutyClient, arg1 core.Severity) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetPagerDutyClients", arg0, arg1) +} + +// SetPagerDutyClients indicates an expected call of SetPagerDutyClients. +func (mr *MockClientMapMockRecorder) SetPagerDutyClients(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPagerDutyClients", reflect.TypeOf((*MockClientMap)(nil).SetPagerDutyClients), arg0, arg1) +} + +// SetSlackClients mocks base method. +func (m *MockClientMap) SetSlackClients(arg0 []client.SlackClient, arg1 core.Severity) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetSlackClients", arg0, arg1) +} + +// SetSlackClients indicates an expected call of SetSlackClients. +func (mr *MockClientMapMockRecorder) SetSlackClients(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetSlackClients", reflect.TypeOf((*MockClientMap)(nil).SetSlackClients), arg0, arg1) +} diff --git a/internal/mocks/pagerduty_client.go b/internal/mocks/pagerduty_client.go index aa25d948..526a6cf3 100644 --- a/internal/mocks/pagerduty_client.go +++ b/internal/mocks/pagerduty_client.go @@ -35,11 +35,25 @@ func (m *MockPagerDutyClient) EXPECT() *MockPagerDutyClientMockRecorder { return m.recorder } +// GetName mocks base method. +func (m *MockPagerDutyClient) GetName() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetName") + ret0, _ := ret[0].(string) + return ret0 +} + +// GetName indicates an expected call of GetName. +func (mr *MockPagerDutyClientMockRecorder) GetName() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetName", reflect.TypeOf((*MockPagerDutyClient)(nil).GetName)) +} + // PostEvent mocks base method. -func (m *MockPagerDutyClient) PostEvent(arg0 context.Context, arg1 *client.PagerDutyEventTrigger) (*client.PagerDutyAPIResponse, error) { +func (m *MockPagerDutyClient) PostEvent(arg0 context.Context, arg1 *client.AlertEventTrigger) (*client.AlertAPIResponse, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PostEvent", arg0, arg1) - ret0, _ := ret[0].(*client.PagerDutyAPIResponse) + ret0, _ := ret[0].(*client.AlertAPIResponse) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/internal/mocks/routing_directory.go b/internal/mocks/routing_directory.go new file mode 100644 index 00000000..33a32e07 --- /dev/null +++ b/internal/mocks/routing_directory.go @@ -0,0 +1,100 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/base-org/pessimism/internal/alert (interfaces: RoutingDirectory) + +// Package mocks is a generated GoMock package. +package mocks + +import ( + reflect "reflect" + + client "github.com/base-org/pessimism/internal/client" + core "github.com/base-org/pessimism/internal/core" + gomock "github.com/golang/mock/gomock" +) + +// MockRoutingDirectory is a mock of RoutingDirectory interface. +type MockRoutingDirectory struct { + ctrl *gomock.Controller + recorder *MockRoutingDirectoryMockRecorder +} + +// MockRoutingDirectoryMockRecorder is the mock recorder for MockRoutingDirectory. +type MockRoutingDirectoryMockRecorder struct { + mock *MockRoutingDirectory +} + +// NewMockRoutingDirectory creates a new mock instance. +func NewMockRoutingDirectory(ctrl *gomock.Controller) *MockRoutingDirectory { + mock := &MockRoutingDirectory{ctrl: ctrl} + mock.recorder = &MockRoutingDirectoryMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockRoutingDirectory) EXPECT() *MockRoutingDirectoryMockRecorder { + return m.recorder +} + +// GetPagerDutyClients mocks base method. +func (m *MockRoutingDirectory) GetPagerDutyClients(arg0 core.Severity) []client.PagerDutyClient { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPagerDutyClients", arg0) + ret0, _ := ret[0].([]client.PagerDutyClient) + return ret0 +} + +// GetPagerDutyClients indicates an expected call of GetPagerDutyClients. +func (mr *MockRoutingDirectoryMockRecorder) GetPagerDutyClients(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPagerDutyClients", reflect.TypeOf((*MockRoutingDirectory)(nil).GetPagerDutyClients), arg0) +} + +// GetSlackClients mocks base method. +func (m *MockRoutingDirectory) GetSlackClients(arg0 core.Severity) []client.SlackClient { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetSlackClients", arg0) + ret0, _ := ret[0].([]client.SlackClient) + return ret0 +} + +// GetSlackClients indicates an expected call of GetSlackClients. +func (mr *MockRoutingDirectoryMockRecorder) GetSlackClients(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSlackClients", reflect.TypeOf((*MockRoutingDirectory)(nil).GetSlackClients), arg0) +} + +// InitializeRouting mocks base method. +func (m *MockRoutingDirectory) InitializeRouting(arg0 *core.AlertRoutingParams) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InitializeRouting", arg0) +} + +// InitializeRouting indicates an expected call of InitializeRouting. +func (mr *MockRoutingDirectoryMockRecorder) InitializeRouting(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InitializeRouting", reflect.TypeOf((*MockRoutingDirectory)(nil).InitializeRouting), arg0) +} + +// SetPagerDutyClients mocks base method. +func (m *MockRoutingDirectory) SetPagerDutyClients(arg0 []client.PagerDutyClient, arg1 core.Severity) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetPagerDutyClients", arg0, arg1) +} + +// SetPagerDutyClients indicates an expected call of SetPagerDutyClients. +func (mr *MockRoutingDirectoryMockRecorder) SetPagerDutyClients(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPagerDutyClients", reflect.TypeOf((*MockRoutingDirectory)(nil).SetPagerDutyClients), arg0, arg1) +} + +// SetSlackClients mocks base method. +func (m *MockRoutingDirectory) SetSlackClients(arg0 []client.SlackClient, arg1 core.Severity) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetSlackClients", arg0, arg1) +} + +// SetSlackClients indicates an expected call of SetSlackClients. +func (mr *MockRoutingDirectoryMockRecorder) SetSlackClients(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetSlackClients", reflect.TypeOf((*MockRoutingDirectory)(nil).SetSlackClients), arg0, arg1) +} diff --git a/internal/mocks/slack_client.go b/internal/mocks/slack_client.go index b4f57dd0..5f6b702f 100644 --- a/internal/mocks/slack_client.go +++ b/internal/mocks/slack_client.go @@ -35,17 +35,31 @@ func (m *MockSlackClient) EXPECT() *MockSlackClientMockRecorder { return m.recorder } -// PostData mocks base method. -func (m *MockSlackClient) PostData(arg0 context.Context, arg1 string) (*client.SlackAPIResponse, error) { +// GetName mocks base method. +func (m *MockSlackClient) GetName() string { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "PostData", arg0, arg1) - ret0, _ := ret[0].(*client.SlackAPIResponse) + ret := m.ctrl.Call(m, "GetName") + ret0, _ := ret[0].(string) + return ret0 +} + +// GetName indicates an expected call of GetName. +func (mr *MockSlackClientMockRecorder) GetName() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetName", reflect.TypeOf((*MockSlackClient)(nil).GetName)) +} + +// PostEvent mocks base method. +func (m *MockSlackClient) PostEvent(arg0 context.Context, arg1 *client.AlertEventTrigger) (*client.AlertAPIResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PostEvent", arg0, arg1) + ret0, _ := ret[0].(*client.AlertAPIResponse) ret1, _ := ret[1].(error) return ret0, ret1 } -// PostData indicates an expected call of PostData. -func (mr *MockSlackClientMockRecorder) PostData(arg0, arg1 interface{}) *gomock.Call { +// PostEvent indicates an expected call of PostEvent. +func (mr *MockSlackClientMockRecorder) PostEvent(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PostData", reflect.TypeOf((*MockSlackClient)(nil).PostData), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PostEvent", reflect.TypeOf((*MockSlackClient)(nil).PostEvent), arg0, arg1) } diff --git a/mocks/alert_client.go b/mocks/alert_client.go new file mode 100644 index 00000000..1aae3778 --- /dev/null +++ b/mocks/alert_client.go @@ -0,0 +1,65 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/base-org/pessimism/internal/client (interfaces: AlertClient) + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + reflect "reflect" + + client "github.com/base-org/pessimism/internal/client" + gomock "github.com/golang/mock/gomock" +) + +// MockAlertClient is a mock of AlertClient interface. +type MockAlertClient struct { + ctrl *gomock.Controller + recorder *MockAlertClientMockRecorder +} + +// MockAlertClientMockRecorder is the mock recorder for MockAlertClient. +type MockAlertClientMockRecorder struct { + mock *MockAlertClient +} + +// NewMockAlertClient creates a new mock instance. +func NewMockAlertClient(ctrl *gomock.Controller) *MockAlertClient { + mock := &MockAlertClient{ctrl: ctrl} + mock.recorder = &MockAlertClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockAlertClient) EXPECT() *MockAlertClientMockRecorder { + return m.recorder +} + +// GetName mocks base method. +func (m *MockAlertClient) GetName() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetName") + ret0, _ := ret[0].(string) + return ret0 +} + +// GetName indicates an expected call of GetName. +func (mr *MockAlertClientMockRecorder) GetName() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetName", reflect.TypeOf((*MockAlertClient)(nil).GetName)) +} + +// PostEvent mocks base method. +func (m *MockAlertClient) PostEvent(arg0 context.Context, arg1 *client.AlertEventTrigger) (*client.AlertAPIResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PostEvent", arg0, arg1) + ret0, _ := ret[0].(*client.AlertAPIResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PostEvent indicates an expected call of PostEvent. +func (mr *MockAlertClientMockRecorder) PostEvent(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PostEvent", reflect.TypeOf((*MockAlertClient)(nil).PostEvent), arg0, arg1) +}