Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Use px/agent_status_diagnostics script within px cli to detect missing kernel headers #2065

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/pixie_cli/pkg/cmd/collect_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/spf13/viper"

"px.dev/pixie/src/pixie_cli/pkg/utils"
"px.dev/pixie/src/utils/shared/k8s"
"px.dev/pixie/src/pixie_cli/pkg/vizier"
)

func init() {
Expand All @@ -42,7 +42,7 @@ var CollectLogsCmd = &cobra.Command{
viper.BindPFlag("namespace", cmd.Flags().Lookup("namespace"))
},
Run: func(cmd *cobra.Command, args []string) {
c := k8s.NewLogCollector()
c := vizier.NewLogCollector(mustCreateBundleReader(), viper.GetString("cloud_addr"))
fName := fmt.Sprintf("pixie_logs_%s.zip", time.Now().Format("20060102150405"))
err := c.CollectPixieLogs(fName)
if err != nil {
Expand Down
83 changes: 19 additions & 64 deletions src/pixie_cli/pkg/cmd/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"context"
"errors"
"fmt"
"io"
"os"
"strings"
"time"
Expand Down Expand Up @@ -72,6 +71,7 @@ var BlockListedLabels = []string{
}

func init() {
DeployCmd.Flags().StringP("bundle", "b", "", "Path/URL to bundle file")
DeployCmd.Flags().StringP("extract_yaml", "e", "", "Directory to extract the Pixie yamls to")
DeployCmd.Flags().StringP("vizier_version", "v", "", "Pixie version to deploy")
DeployCmd.Flags().BoolP("check", "c", true, "Check whether the cluster can run Pixie")
Expand Down Expand Up @@ -106,6 +106,7 @@ var DeployCmd = &cobra.Command{
Use: "deploy",
Short: "Deploys Pixie on the current K8s cluster",
PreRun: func(cmd *cobra.Command, args []string) {
viper.BindPFlag("bundle", cmd.Flags().Lookup("bundle"))
viper.BindPFlag("extract_yaml", cmd.Flags().Lookup("extract_yaml"))
viper.BindPFlag("vizier_version", cmd.Flags().Lookup("vizier_version"))
viper.BindPFlag("check", cmd.Flags().Lookup("check"))
Expand Down Expand Up @@ -604,61 +605,6 @@ func deploy(cloudConn *grpc.ClientConn, clientset *kubernetes.Clientset, vzClien
return clusterID
}

func runSimpleHealthCheckScript(cloudAddr string, clusterID uuid.UUID) error {
v, err := vizier.ConnectionToVizierByID(cloudAddr, clusterID)
br := mustCreateBundleReader()
if err != nil {
return err
}
execScript := br.MustGetScript(script.AgentStatusScript)

ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()

resp, err := v.ExecuteScriptStream(ctx, execScript, nil)
if err != nil {
return err
}

// TODO(zasgar): Make this use the Null output. We can't right now
// because of fatal message on vizier failure.
errCh := make(chan error)
// Eat all responses.
go func() {
for {
select {
case <-ctx.Done():
if ctx.Err() != nil {
errCh <- ctx.Err()
return
}
errCh <- nil
return
case msg := <-resp:
if msg == nil {
errCh <- nil
return
}
if msg.Err != nil {
if msg.Err == io.EOF {
errCh <- nil
return
}
errCh <- msg.Err
return
}
if msg.Resp.Status != nil && msg.Resp.Status.Code != 0 {
errCh <- errors.New(msg.Resp.Status.Message)
}
// Eat messages.
}
}
}()

err = <-errCh
return err
}

func waitForHealthCheckTaskGenerator(cloudAddr string, clusterID uuid.UUID) func() error {
return func() error {
timeout := time.NewTimer(5 * time.Minute)
Expand All @@ -668,10 +614,15 @@ func waitForHealthCheckTaskGenerator(cloudAddr string, clusterID uuid.UUID) func
case <-timeout.C:
return errors.New("timeout waiting for healthcheck (it is possible that Pixie stabilized after the healthcheck timeout. To check if Pixie successfully deployed, run `px debug pods`)")
default:
err := runSimpleHealthCheckScript(cloudAddr, clusterID)
_, err := vizier.RunSimpleHealthCheckScript(mustCreateBundleReader(), cloudAddr, clusterID)
if err == nil {
return nil
}
// The health check warning error indicates the cluster successfully deployed, but there are some warnings.
// Return the error to end the polling and show the warnings.
if _, ok := err.(*vizier.HealthCheckWarning); ok {
return err
}
time.Sleep(5 * time.Second)
}
}
Expand All @@ -691,13 +642,17 @@ func waitForHealthCheck(cloudAddr string, clusterID uuid.UUID, clientset *kubern
hc := utils.NewSerialTaskRunner(healthCheckJobs)
err := hc.RunAndMonitor()
if err != nil {
_ = pxanalytics.Client().Enqueue(&analytics.Track{
UserId: pxconfig.Cfg().UniqueClientID,
Event: "Deploy Healthcheck Failed",
Properties: analytics.NewProperties().
Set("err", err.Error()),
})
utils.WithError(err).Fatal("Failed Pixie healthcheck")
if _, ok := err.(*vizier.HealthCheckWarning); ok {
utils.WithError(err).Error("Pixie healthcheck detected the following warnings:")
} else {
_ = pxanalytics.Client().Enqueue(&analytics.Track{
UserId: pxconfig.Cfg().UniqueClientID,
Event: "Deploy Healthcheck Failed",
Properties: analytics.NewProperties().
Set("err", err.Error()),
})
utils.WithError(err).Fatal("Failed Pixie healthcheck")
}
}
_ = pxanalytics.Client().Enqueue(&analytics.Track{
UserId: pxconfig.Cfg().UniqueClientID,
Expand Down
5 changes: 2 additions & 3 deletions src/pixie_cli/pkg/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ var RootCmd = &cobra.Command{

// Name a variable to store a slice of commands that don't require cloudAddr
var cmdsCloudAddrNotReqd = []*cobra.Command{
CollectLogsCmd,
VersionCmd,
}

Expand Down Expand Up @@ -245,7 +244,7 @@ func checkAuthForCmd(c *cobra.Command) {
os.Exit(1)
}
switch c {
case DeployCmd, UpdateCmd, GetCmd, DeployKeyCmd, APIKeyCmd:
case CollectLogsCmd, DeployCmd, UpdateCmd, GetCmd, DeployKeyCmd, APIKeyCmd:
utils.Errorf("These commands are unsupported in Direct Vizier mode.")
os.Exit(1)
default:
Expand All @@ -254,7 +253,7 @@ func checkAuthForCmd(c *cobra.Command) {
}

switch c {
case DeployCmd, UpdateCmd, RunCmd, LiveCmd, GetCmd, ScriptCmd, DeployKeyCmd, APIKeyCmd:
case CollectLogsCmd, DeployCmd, UpdateCmd, RunCmd, LiveCmd, GetCmd, ScriptCmd, DeployKeyCmd, APIKeyCmd:
authenticated := auth.IsAuthenticated(viper.GetString("cloud_addr"))
if !authenticated {
utils.Errorf("Failed to authenticate. Please retry `px auth login`.")
Expand Down
1 change: 1 addition & 0 deletions src/pixie_cli/pkg/vizier/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ go_library(
"data_formatter.go",
"errors.go",
"lister.go",
"logs.go",
"script.go",
"stream_adapter.go",
"utils.go",
Expand Down
144 changes: 144 additions & 0 deletions src/pixie_cli/pkg/vizier/logs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2018- The Pixie Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

package vizier

import (
"archive/zip"
"context"
"errors"
"os"
"strings"

log "github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"px.dev/pixie/src/utils/script"
"px.dev/pixie/src/utils/shared/k8s"
)

// LogCollector collect logs for Pixie and cluster setup information.
type LogCollector struct {
k8sConfig *rest.Config
k8sClientSet *kubernetes.Clientset
cloudAddr string
br *script.BundleManager
k8s.LogCollector
}

// NewLogCollector creates a new log collector.
func NewLogCollector(br *script.BundleManager, cloudAddr string) *LogCollector {
cfg := k8s.GetConfig()
cs := k8s.GetClientset(cfg)
return &LogCollector{
cfg,
cs,
cloudAddr,
br,
*k8s.NewLogCollector(),
}
}

// CollectPixieLogs collects logs for all Pixie pods and write them to the zip file fName.
func (c *LogCollector) CollectPixieLogs(fName string) error {
if !strings.HasSuffix(fName, ".zip") {
return errors.New("fname must have .zip suffix")
}
f, err := os.Create(fName)
if err != nil {
return err
}
defer f.Close()

zf := zip.NewWriter(f)
defer zf.Close()

vls := k8s.VizierLabelSelector()
vizierLabelSelector := metav1.FormatLabelSelector(&vls)

// We check across all namespaces for the matching pixie pods.
vizierPodList, err := c.k8sClientSet.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{LabelSelector: vizierLabelSelector})
if err != nil {
return err
}

// We also need to get the logs the operator logs.
// As the LabelSelectors are ANDed, we need to make a new query and merge
// the results.
ols := k8s.OperatorLabelSelector()
operatorLabelSelector := metav1.FormatLabelSelector(&ols)

operatorPodList, err := c.k8sClientSet.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{LabelSelector: operatorLabelSelector})
if err != nil {
return err
}

// Merge the two pod lists
pods := append(vizierPodList.Items, operatorPodList.Items...)

for _, pod := range pods {
for _, containerStatus := range pod.Status.ContainerStatuses {
// Ignore prev logs, they might not exist.
_ = c.LogPodInfoToZipFile(zf, pod, containerStatus.Name, true)

err := c.LogPodInfoToZipFile(zf, pod, containerStatus.Name, false)
if err != nil {
log.WithError(err).Warnf("Failed to log pod: %s", pod.Name)
}
}
err = c.WritePodDescription(zf, pod)
if err != nil {
log.WithError(err).Warnf("failed to write pod description")
}
}

err = c.LogKubeCmd(zf, "nodes.log", "describe", "node")
if err != nil {
log.WithError(err).Warn("failed to log node info")
}

err = c.LogKubeCmd(zf, "services.log", "describe", "services", "--all-namespaces", "-l", vizierLabelSelector)
if err != nil {
log.WithError(err).Warnf("failed to log services")
}

// Describe vizier and write it to vizier.log
err = c.LogKubeCmd(zf, "vizier.log", "describe", "vizier", "--all-namespaces")
if err != nil {
log.WithError(err).Warnf("failed to log vizier crd")
}

clusterID, err := GetCurrentVizier(c.cloudAddr)
if err != nil {
log.WithError(err).Warnf("failed to get cluster ID")
}
outputCh, err := RunSimpleHealthCheckScript(c.br, c.cloudAddr, clusterID)

if err != nil {
entry := log.WithError(err)
if _, ok := err.(*HealthCheckWarning); ok {
entry.Warn("healthcheck script detected the following warnings:")
} else {
entry.Warn("failed to run healthcheck script")
}
}

return c.LogOutputToZipFile(zf, "px_agent_diagnostics.txt", <-outputCh)
}
Loading
Loading