-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
add e2e cases for preempt #892
Changes from all commits
0d3df83
2440818
79a2973
9daa1e6
246db1e
0c874e0
77e3915
946c5e9
417d358
65bb887
1809b51
4f397e8
08547bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
# this config file contains all config fields with comments | ||
kind: Cluster | ||
apiVersion: kind.x-k8s.io/v1alpha4 | ||
# 1 control plane node and 3 workers | ||
# 1 control plane node and 4 workers | ||
nodes: | ||
# the control plane node config | ||
- role: control-plane | ||
# the three workers | ||
- role: worker | ||
# the four workers | ||
- role: worker | ||
- role: worker | ||
- role: worker | ||
- role: worker |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,97 +192,6 @@ var _ = Describe("Job E2E Test", func() { | |
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
It("Preemption", func() { | ||
ctx := initTestContext(options{ | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
slot := oneCPU | ||
rep := clusterSize(ctx, slot) | ||
|
||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: rep, | ||
}, | ||
}, | ||
} | ||
|
||
job.name = "preemptee-qj" | ||
job.pri = workerPriority | ||
job1 := createJob(ctx, job) | ||
err := waitTasksReady(ctx, job1, int(rep)) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "preemptor-qj" | ||
job.pri = masterPriority | ||
job.min = rep / 2 | ||
job2 := createJob(ctx, job) | ||
err = waitTasksReady(ctx, job1, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, job2, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
It("Multiple Preemption", func() { | ||
ctx := initTestContext(options{ | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
slot := oneCPU | ||
rep := clusterSize(ctx, slot) | ||
|
||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: rep, | ||
}, | ||
}, | ||
} | ||
|
||
job.name = "multipreemptee-qj" | ||
job.pri = workerPriority | ||
job1 := createJob(ctx, job) | ||
|
||
err := waitTasksReady(ctx, job1, int(rep)) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "multipreemptor-qj1" | ||
job.pri = masterPriority | ||
job.min = rep / 3 | ||
job2 := createJob(ctx, job) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "multipreemptor-qj2" | ||
job.pri = masterPriority | ||
job3 := createJob(ctx, job) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, job1, int(rep)/3) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, job2, int(rep)/3) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, job3, int(rep)/3) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
It("Schedule BestEffort Job", func() { | ||
ctx := initTestContext(options{}) | ||
defer cleanupTestContext(ctx) | ||
|
@@ -604,7 +513,7 @@ var _ = Describe("Job E2E Test", func() { | |
if expectPod%1 == 1 { | ||
expectPod-- | ||
} | ||
err = wait.Poll(100*time.Millisecond, oneMinute, func() (bool, error) { | ||
err = wait.Poll(100*time.Millisecond, twoMinute, func() (bool, error) { | ||
fsScheduledPod = 0 | ||
testScheduledPod = 0 | ||
|
||
|
@@ -700,7 +609,7 @@ var _ = Describe("Job E2E Test", func() { | |
if expectPod%1 == 1 { | ||
expectPod-- | ||
} | ||
err = wait.Poll(100*time.Millisecond, oneMinute, func() (bool, error) { | ||
err = wait.Poll(100*time.Millisecond, twoMinute, func() (bool, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
q1ScheduledPod = 0 | ||
q2ScheduledPod = 0 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
package e2e | ||
|
||
import ( | ||
"context" | ||
|
||
. "github.com/onsi/ginkgo" | ||
. "github.com/onsi/gomega" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
schedulingv1beta1 "volcano.sh/volcano/pkg/apis/scheduling/v1beta1" | ||
) | ||
|
||
var _ = Describe("Job E2E Test", func() { | ||
It("schedule high priority job without preemption when resource is enough", func() { | ||
ctx := initTestContext(options{ | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggest fix them in a separate pr, maybe call high/lowPriority There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, i will fix the name in next pr There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it has been fixed at #893 |
||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
slot := oneCPU | ||
|
||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: 1, | ||
}, | ||
}, | ||
} | ||
|
||
job.name = "preemptee" | ||
job.pri = workerPriority | ||
preempteeJob := createJob(ctx, job) | ||
err := waitTasksReady(ctx, preempteeJob, 1) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "preemptor" | ||
job.pri = masterPriority | ||
preemptorJob := createJob(ctx, job) | ||
err = waitTasksReady(ctx, preempteeJob, 1) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, preemptorJob, 1) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
It("schedule high priority job with preemption when resource is NOT enough", func() { | ||
ctx := initTestContext(options{ | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
slot := oneCPU | ||
rep := clusterSize(ctx, slot) | ||
|
||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: rep, | ||
}, | ||
}, | ||
} | ||
|
||
job.name = "preemptee" | ||
job.pri = workerPriority | ||
preempteeJob := createJob(ctx, job) | ||
err := waitTasksReady(ctx, preempteeJob, int(rep)) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "preemptor" | ||
job.pri = masterPriority | ||
job.min = rep / 2 | ||
preemptorJob := createJob(ctx, job) | ||
err = waitTasksReady(ctx, preempteeJob, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
err = waitTasksReady(ctx, preemptorJob, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
It("preemption don't work when podgroup is pending", func() { | ||
ctx := initTestContext(options{ | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
pgName := "pending-pg" | ||
pg := &schedulingv1beta1.PodGroup{ | ||
ObjectMeta: v1.ObjectMeta{ | ||
Namespace: ctx.namespace, | ||
Name: pgName, | ||
}, | ||
Spec: schedulingv1beta1.PodGroupSpec{ | ||
MinMember: 1, | ||
MinResources: &thirtyCPU, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. donot hard coded , should calculate according to the cluster resource There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good advice. There are lots of hard coded in e2e cases and we need a adequate review later |
||
}, | ||
Status: schedulingv1beta1.PodGroupStatus{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove, create will ignore status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it has been fixed at #893 |
||
Phase: schedulingv1beta1.PodGroupPending, | ||
}, | ||
} | ||
_, err := ctx.vcclient.SchedulingV1beta1().PodGroups(ctx.namespace).Create(context.TODO(), pg, v1.CreateOptions{}) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
slot := oneCPU | ||
rep := clusterSize(ctx, slot) | ||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: rep, | ||
}, | ||
}, | ||
} | ||
job.name = "preemptee" | ||
job.pri = workerPriority | ||
preempteeJob := createJob(ctx, job) | ||
err = waitTasksReady(ctx, preempteeJob, int(rep)) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
pod := &corev1.Pod{ | ||
TypeMeta: v1.TypeMeta{ | ||
APIVersion: "v1", | ||
Kind: "Pod", | ||
}, | ||
ObjectMeta: v1.ObjectMeta{ | ||
Namespace: ctx.namespace, | ||
Name: "preemptor-pod", | ||
Annotations: map[string]string{schedulingv1beta1.KubeGroupNameAnnotationKey: pgName}, | ||
}, | ||
Spec: corev1.PodSpec{ | ||
SchedulerName: "volcano", | ||
Containers: createContainers(defaultNginxImage, "", "", oneCPU, oneCPU, 0), | ||
PriorityClassName: masterPriority, | ||
}, | ||
} | ||
_, err = ctx.kubeclient.CoreV1().Pods(ctx.namespace).Create(context.TODO(), pod, v1.CreateOptions{}) | ||
Expect(err).To(HaveOccurred()) | ||
}) | ||
|
||
It("preemption only works in the same queue", func() { | ||
ctx := initTestContext(options{ | ||
queues: []string{"q1-preemption", "q2-reference"}, | ||
priorityClasses: map[string]int32{ | ||
masterPriority: masterPriorityValue, | ||
workerPriority: workerPriorityValue, | ||
}, | ||
}) | ||
defer cleanupTestContext(ctx) | ||
|
||
slot := oneCPU | ||
rep := clusterSize(ctx, slot) | ||
job := &jobSpec{ | ||
tasks: []taskSpec{ | ||
{ | ||
img: defaultNginxImage, | ||
req: slot, | ||
min: 1, | ||
rep: rep / 2, | ||
}, | ||
}, | ||
} | ||
|
||
job.name = "j1-q1" | ||
job.pri = workerPriority | ||
job.queue = "q1-preemption" | ||
queue1Job := createJob(ctx, job) | ||
err := waitTasksReady(ctx, queue1Job, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "j2-q2" | ||
job.pri = workerPriority | ||
job.queue = "q2-reference" | ||
queue2Job := createJob(ctx, job) | ||
err = waitTasksReady(ctx, queue2Job, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
job.name = "j3-q1" | ||
job.pri = masterPriority | ||
job.queue = "q1-preemption" | ||
job.tasks[0].rep = rep | ||
queue1Job3 := createJob(ctx, job) | ||
err = waitTasksReady(ctx, queue1Job3, 1) | ||
Expect(err).NotTo(HaveOccurred()) | ||
err = waitTasksReady(ctx, queue1Job, 0) | ||
Expect(err).NotTo(HaveOccurred()) | ||
err = waitTasksReady(ctx, queue2Job, int(rep)/2) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why change this, how come the scheduling is so slow?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems to be releated the cluster environment at that time.It will be timeout in rare cases. I've tried serval times and it woks well after set the timeout to be 2 minutes.