diff --git a/changelog/fragments/1728652985-Dont-fail-checkin-if-upgrade-action-not-found.yaml b/changelog/fragments/1728652985-Dont-fail-checkin-if-upgrade-action-not-found.yaml new file mode 100644 index 000000000..78ed10f84 --- /dev/null +++ b/changelog/fragments/1728652985-Dont-fail-checkin-if-upgrade-action-not-found.yaml @@ -0,0 +1,35 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Dont fail checkin if upgrade action not found + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +description: + On agent checkin, when an agent has upgrade details, the corresponding agent action is looked up and linked with APM spans. + However, there may be cases where such an action does not exist, e.g. when running `elastic-agent upgrade` for a Fleet-managed agent. + This change ensures that agent checkin does not fail if the agent has upgrade details but no corresponding action. + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/fleet-server/pull/3991 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/internal/pkg/api/handleCheckin.go b/internal/pkg/api/handleCheckin.go index 3e59a041d..9adbc4791 100644 --- a/internal/pkg/api/handleCheckin.go +++ b/internal/pkg/api/handleCheckin.go @@ -415,7 +415,8 @@ func (ct *CheckinT) processUpgradeDetails(ctx context.Context, agent *model.Agen } if len(actions) == 0 { vSpan.End() - return fmt.Errorf("upgrade_details no action for id %q found", details.ActionId) + zerolog.Ctx(vCtx).Warn().Msgf("upgrade_details no action for id %q found (agent id %q)", details.ActionId, agent.Agent.ID) + return nil } action = actions[0] ct.cache.SetAction(action) diff --git a/testing/e2e/api_version/client_api_current.go b/testing/e2e/api_version/client_api_current.go index 3580a4a5f..6009f6c81 100644 --- a/testing/e2e/api_version/client_api_current.go +++ b/testing/e2e/api_version/client_api_current.go @@ -350,6 +350,29 @@ func (tester *ClientAPITester) TestCheckinWithBadRequest() { tester.Require().Equal(http.StatusBadRequest, statusCode, "Expected status code 400 for bad request") } +func (tester *ClientAPITester) TestCheckinWithActionNotFound() { + ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) + defer cancel() + + // enroll agent + tester.T().Log("test enrollment") + agentID, agentKey := tester.Enroll(ctx, tester.enrollmentKey) + tester.VerifyAgentInKibana(ctx, agentID) + + tester.T().Logf("test checkin with no upgrade action: agent %s", agentID) + // checkin request with upgrade details + req := &api.AgentCheckinJSONRequestBody{ + Status: api.CheckinRequestStatusOnline, + Message: "test checkin", + UpgradeDetails: &api.UpgradeDetails{ + ActionId: "test-missing-id", + State: api.UpgradeDetailsStateUPGDOWNLOADING, + }, + } + _, _, statusCode := tester.Checkin(ctx, agentKey, agentID, nil, nil, req) + tester.Require().Equal(http.StatusOK, statusCode, "Expected status code 200 for successful checkin with action not found") +} + func (tester *ClientAPITester) TestFullFileUpload() { ctx, cancel := context.WithCancel(context.Background()) defer cancel()