Skip to content

Commit

Permalink
Feature/4890 detect fail early upgrade (#5864) (#5978)
Browse files Browse the repository at this point in the history
* feature(4890): added shouldUpgrade function in the upgrade cli file

* feature(4890): added shouldUpgrade check into the upgrade command

* feature(4890): ran gofmt

* feature(4890): added a "force" flag, marked it as hidden

* feature(4890): removed dpkg, rpm and container logic

* feature(4890): ran gofmt

* feature(4890): updated the function signature of the upgrade command, updated tests, added new tests

* feature(4890): update comments

* feature(4890): added changelog fragment

* feature(4890): added fatal log in case there is an error while marking force flag as hidden

* feature(4890): added error checks in tests

* feature(4890): updated the summary in the changelog fragment

* feature(4890): removed the shorthand flag for the force flag

* feature(4890): updated synchronization in the tests

* Update internal/pkg/agent/cmd/upgrade_test.go

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>

* feature(4890): using streams err output instead of defaulting to stderr

* feature(4890): use EXPECT instead of On

* feature(4890): moved unconfirmed upgrade error to a package var

* feature(4890): removed confirmation from upgrade check for when force flag is set

* Update internal/pkg/agent/cmd/upgrade.go

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>

* Update internal/pkg/agent/cmd/upgrade.go

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>

* feature(4890): fix errors

* Update internal/pkg/agent/cmd/upgrade.go

Co-authored-by: Craig MacKenzie <craig.mackenzie@elastic.co>

* feature(4890): update test

* fearure(4890): replace ageninfo with state call

- updated agentinfo proto
- updated relevant generated code
- implemented state call in the upgrade cmd

* feature(4890): updated proto, client and server implementation

* feature(4890): fix struct tag

* feature(4890): added skip-verify checks

* feature(4890): ran addLicenseHeaders

* feature(4890): ran mage clean

* feature(4890): fix typo

* feature(4890): added timeout to connection

* feature(4890): changed condition check order

* feature(4890): fix unit tests

* feature(4890): refactored tests, using mock client

* Update internal/pkg/agent/cmd/upgrade.go

Co-authored-by: Andrzej Stencel <andrzej.stencel@elastic.co>

* feature(4890): use lower case "f" in error messages to be more consistent

* feature(4890): remove duplicate line

* feature(4890): ran mage controlProto with correct protoc version

---------

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>
Co-authored-by: Craig MacKenzie <craig.mackenzie@elastic.co>
Co-authored-by: Andrzej Stencel <andrzej.stencel@elastic.co>
(cherry picked from commit 8579474)

Co-authored-by: Kaan Yalti <kaan.yalti@elastic.co>
  • Loading branch information
mergify[bot] and kaanyalti authored Nov 13, 2024
1 parent 22ad8f7 commit 8e61536
Show file tree
Hide file tree
Showing 10 changed files with 556 additions and 216 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Detect and fail-early cli upgrades if agent is fleet-managed

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: This change brings restrictions on the upgrade cli command. If an agent is fleet-managed and is running in unprivileged mode, users won't be able to upgrade the agent using the cli. If an agent is fleet-managed and is running privileged, users will only be able to upgrade the agent using the cli if they provide --force flag.

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: "elastic-agent"

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/5864
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
2 changes: 2 additions & 0 deletions control_v2.proto
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ message StateAgentInfo {
int32 pid = 6;
// True when running as unprivileged.
bool unprivileged = 7;
// True when agent is managed by fleet
bool isManaged = 8;
}

// StateResponse is the current state of Elastic Agent.
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ func (c *Coordinator) generateComponentModel() (err error) {
configInjector = c.monitorMgr.MonitoringConfig
}

var existingCompState = make(map[string]uint64, len(c.state.Components))
existingCompState := make(map[string]uint64, len(c.state.Components))
for _, comp := range c.state.Components {
existingCompState[comp.Component.ID] = comp.State.Pid
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ func (c *Coordinator) refreshState() {
// Coordinator state and sets stateNeedsRefresh.
// Must be called on the main Coordinator goroutine.
func (c *Coordinator) applyComponentState(state runtime.ComponentComponentState) {

// check for any component updates to the known PID, so we can update the component monitoring
found := false
for i, other := range c.state.Components {
Expand Down Expand Up @@ -168,7 +167,6 @@ func (c *Coordinator) applyComponentState(state runtime.ComponentComponentState)
}

c.stateNeedsRefresh = true

}

// generateReportableState aggregates the internal state of the Coordinator
Expand Down
8 changes: 5 additions & 3 deletions internal/pkg/agent/application/info/agent_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (
)

// defaultAgentConfigFile is a name of file used to store agent information
const agentInfoKey = "agent"
const defaultLogLevel = "info"
const maxRetriesloadAgentInfo = 5
const (
agentInfoKey = "agent"
defaultLogLevel = "info"
maxRetriesloadAgentInfo = 5
)

type persistentAgentInfo struct {
ID string `json:"id" yaml:"id" config:"id"`
Expand Down
120 changes: 112 additions & 8 deletions internal/pkg/agent/cmd/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"os"
"strings"
"time"

"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
Expand All @@ -31,6 +32,14 @@ const (
flagPGPBytes = "pgp"
flagPGPBytesPath = "pgp-path"
flagPGPBytesURI = "pgp-uri"
flagForce = "force"
)

var (
unsupportedUpgradeError error = errors.New("this agent is fleet managed and must be upgraded using Fleet")
nonRootExecutionError = errors.New("upgrade command needs to be executed as root for fleet managed agents")
skipVerifyNotAllowedError = errors.New(fmt.Sprintf("\"%s\" flag is not allowed when upgrading a fleet managed agent using the cli", flagSkipVerify))
skipVerifyNotRootError = errors.New(fmt.Sprintf("user needs to be root to use \"%s\" flag when upgrading standalone agents", flagSkipVerify))
)

func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command {
Expand All @@ -40,6 +49,7 @@ func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Comman
Long: "This command upgrades the currently installed Elastic Agent to the specified version.",
Args: cobra.ExactArgs(1),
Run: func(c *cobra.Command, args []string) {
c.SetContext(context.Background())
if err := upgradeCmd(streams, c, args); err != nil {
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
os.Exit(1)
Expand All @@ -53,24 +63,119 @@ func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Comman
cmd.Flags().String(flagPGPBytes, "", "PGP to use for package verification")
cmd.Flags().String(flagPGPBytesURI, "", "Path to a web location containing PGP to use for package verification")
cmd.Flags().String(flagPGPBytesPath, "", "Path to a file containing PGP to use for package verification")
cmd.Flags().BoolP(flagForce, "", false, "Advanced option to force an upgrade on a fleet managed agent")
err := cmd.Flags().MarkHidden(flagForce)
if err != nil {
fmt.Fprintf(streams.Err, "error while setting upgrade force flag attributes: %s", err.Error())
os.Exit(1)
}

return cmd
}

type upgradeInput struct {
streams *cli.IOStreams
cmd *cobra.Command
args []string
c client.Client
agentInfo client.AgentStateInfo
isRoot bool
}

func upgradeCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error {
c := client.New()
return upgradeCmdWithClient(streams, cmd, args, c)
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()

err := c.Connect(ctx)
if err != nil {
return errors.New(err, "failed communicating to running daemon", errors.TypeNetwork, errors.M("socket", control.Address()))
}
defer c.Disconnect()
state, err := c.State(cmd.Context())
if err != nil {
return fmt.Errorf("error while trying to get agent state: %w", err)
}

isRoot, err := utils.HasRoot()
if err != nil {
return fmt.Errorf("error while retrieving user permission: %w", err)
}

input := &upgradeInput{
streams,
cmd,
args,
c,
state.Info,
isRoot,
}
return upgradeCmdWithClient(input)
}

type upgradeCond struct {
isManaged bool
force bool
isRoot bool
skipVerify bool
}

func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []string, c client.Client) error {
version := args[0]
func checkUpgradable(cond upgradeCond) error {
checkManaged := func() error {
if !cond.force {
return unsupportedUpgradeError
}

if cond.skipVerify {
return skipVerifyNotAllowedError
}

if !cond.isRoot {
return nonRootExecutionError
}

return nil
}

checkStandalone := func() error {
if cond.skipVerify && !cond.isRoot {
return skipVerifyNotRootError
}
return nil
}

if cond.isManaged {
return checkManaged()
}

return checkStandalone()
}

func upgradeCmdWithClient(input *upgradeInput) error {
cmd := input.cmd
c := input.c
version := input.args[0]
sourceURI, _ := cmd.Flags().GetString(flagSourceURI)

err := c.Connect(context.Background())
force, err := cmd.Flags().GetBool(flagForce)
if err != nil {
return errors.New(err, "Failed communicating to running daemon", errors.TypeNetwork, errors.M("socket", control.Address()))
return fmt.Errorf("failed to retrieve command flag information while trying to upgrade the agent: %w", err)
}

skipVerification, err := cmd.Flags().GetBool(flagSkipVerify)
if err != nil {
return fmt.Errorf("failed to retrieve %s flag information while upgrading the agent: %w", flagSkipVerify, err)
}

err = checkUpgradable(upgradeCond{
isManaged: input.agentInfo.IsManaged,
force: force,
isRoot: input.isRoot,
skipVerify: skipVerification,
})
if err != nil {
return fmt.Errorf("aborting upgrade: %w", err)
}
defer c.Disconnect()

isBeingUpgraded, err := upgrade.IsInProgress(c, utils.GetWatcherPIDs)
if err != nil {
Expand All @@ -80,7 +185,6 @@ func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []str
return errors.New("an upgrade is already in progress; please try again later.")
}

skipVerification, _ := cmd.Flags().GetBool(flagSkipVerify)
var pgpChecks []string
if !skipVerification {
// get local PGP
Expand Down Expand Up @@ -122,6 +226,6 @@ func upgradeCmdWithClient(streams *cli.IOStreams, cmd *cobra.Command, args []str
return errors.New(err, "Failed trigger upgrade of daemon")
}
}
fmt.Fprintf(streams.Out, "Upgrade triggered to version %s, Elastic Agent is currently restarting\n", version)
fmt.Fprintf(input.streams.Out, "Upgrade triggered to version %s, Elastic Agent is currently restarting\n", version)
return nil
}
Loading

0 comments on commit 8e61536

Please # to comment.