@@ -16,10 +16,12 @@ package training
16
16
17
17
import (
18
18
"fmt"
19
+ "strings"
19
20
20
21
"github.com/kubeflow/arena/pkg/apis/types"
21
22
"github.com/kubeflow/arena/pkg/apis/utils"
22
23
podlogs "github.com/kubeflow/arena/pkg/podlogs"
24
+ v1 "k8s.io/api/core/v1"
23
25
)
24
26
25
27
// AcceptJobLog is used for arena-go-sdk
@@ -34,10 +36,13 @@ func AcceptJobLog(jobName string, trainingType types.TrainingJobType, args *type
34
36
if err != nil {
35
37
return err
36
38
}
37
- chiefPod := job .ChiefPod ()
38
39
// 3.if instance name not set,set the chief pod name to instance name
39
- if args .InstanceName == "" && chiefPod != nil {
40
- args .InstanceName = chiefPod .Name
40
+ if args .InstanceName == "" {
41
+ name , err := getInstanceName (job )
42
+ if err != nil {
43
+ return err
44
+ }
45
+ args .InstanceName = name
41
46
}
42
47
podStatuses := map [string ]string {}
43
48
for _ , pod := range job .AllPods () {
@@ -72,3 +77,31 @@ func getTrainingJobTypes() []string {
72
77
}
73
78
return jobTypes
74
79
}
80
+
81
+ func getInstanceName (job TrainingJob ) (string , error ) {
82
+ pods := job .AllPods ()
83
+ // if not found pods,return an error
84
+ if pods == nil || len (pods ) == 0 {
85
+ return "" , fmt .Errorf ("not found instances of the job %v" , job .Name ())
86
+ }
87
+ // if the job has only one pod,return its' name
88
+ if len (pods ) == 1 {
89
+ return pods [0 ].Name , nil
90
+ }
91
+ // if job has many pods and the chief pod name is existed,return it
92
+ if job .ChiefPod () != nil && job .ChiefPod ().Name != "" {
93
+ return job .ChiefPod ().Name , nil
94
+ }
95
+ // return an error
96
+ return "" , fmt .Errorf ("%v" , moreThanOneInstanceHelpInfo (pods ))
97
+ }
98
+
99
+ func moreThanOneInstanceHelpInfo (pods []* v1.Pod ) string {
100
+ header := fmt .Sprintf ("There is %d instances have been found:" , len (pods ))
101
+ lines := []string {}
102
+ footer := fmt .Sprintf ("please use '-i' or '--instance' to filter." )
103
+ for _ , p := range pods {
104
+ lines = append (lines , fmt .Sprintf ("%v" , p .Name ))
105
+ }
106
+ return fmt .Sprintf ("%s\n \n %s\n \n %s\n " , header , strings .Join (lines , "\n " ), footer )
107
+ }
0 commit comments