Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into credential-permissi…
Browse files Browse the repository at this point in the history
…ons-fix
  • Loading branch information
yerzhan7 committed Feb 26, 2025
2 parents 2a18458 + 60aac2a commit 92978f4
Show file tree
Hide file tree
Showing 13 changed files with 241 additions and 6 deletions.
73 changes: 73 additions & 0 deletions .github/workflows/delete-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Manual cluster cleanup

on:
workflow_dispatch:
inputs:
environment:
description: "Github Environment"

concurrency:
group: e2e-cluster-${{ inputs.environment }}

env:
IMAGE_NAME: "s3-csi-driver"
BENCHMARK_ARTIFACTS_FOLDER: ".github/artifacts"
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
KOPS_STATE_FILE: "s3://${{ vars.KOPS_STATE_FILE }}"
BENCHMARK_BUCKET: "s3://${{ vars.BENCHMARK_BUCKET }}"
jobs:
build_matrix:
name: Build Matrix
uses: ./.github/workflows/build_matrix.yaml
delete_cluster:
needs: [ "build_matrix" ]
strategy:
# Failing fast causes some resources created during the test to leak,
# so we disable it to ensure all resources created during test are properly cleaned up.
fail-fast: false
matrix: ${{ fromJson(needs.build_matrix.outputs.matrix) }}
runs-on: ubuntu-latest
environment: ${{ inputs.environment }}
permissions:
id-token: write
contents: read
env:
AWS_REGION: "${{ vars.AWS_REGION }}"
CLUSTER_TYPE: "${{ matrix.cluster-type }}"
ARCH: "${{ matrix.arch }}"
AMI_FAMILY: "${{ matrix.family }}"
# envtest doesn't support all versions, here K8S_VERSION is a full version like 1.28.13,
# and in order to get latest supported version by envtest we convert it to 1.28.
K8S_VERSION: "${{ matrix.kubernetes-version }}"
ENVTEST_K8S_VERSION: "${K8S_VERSION%.*}"
SELINUX_MODE: "${{ matrix.selinux-mode }}"
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.ref }}
persist-credentials: false
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version-file: "go.mod"
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ vars.IAM_ROLE }}
aws-region: ${{ vars.AWS_REGION }}
role-duration-seconds: 7200
- name: Install tools
env:
ACTION: "install_tools"
run: |
tests/e2e-kubernetes/scripts/run.sh
- name: Delete cluster
env:
ACTION: "delete_cluster"
FORCE: "true"
run: |
tests/e2e-kubernetes/scripts/run.sh
2 changes: 2 additions & 0 deletions charts/aws-mountpoint-s3-csi-driver/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,7 @@ spec:
value: {{ .Values.image.pullPolicy }}
- name: MOUNTPOINT_NAMESPACE
value: {{ .Values.mountpointPod.namespace }}
- name: MOUNTPOINT_PRIORITY_CLASS_NAME
value: {{ .Values.mountpointPod.priorityClassName }}

{{- end -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{- if .Values.experimental.podMounter -}}

apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: mount-s3-critical
value: 1000000000
preemptionPolicy: Never
globalDefault: false
description: >-
Default priority class for Mountpoint Pods.
It has the highest possible value for non-builtin PriorityClasses to ensure Mountpoint Pods get scheduled quickly and not evicted first in case of a resource pressure.
It will not cause other Pods to be preempted.
{{- end -}}
1 change: 1 addition & 0 deletions charts/aws-mountpoint-s3-csi-driver/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ controller:

mountpointPod:
namespace: mount-s3
priorityClassName: mount-s3-critical

nameOverride: ""
fullnameOverride: ""
Expand Down
2 changes: 2 additions & 0 deletions cmd/aws-s3-csi-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

var mountpointNamespace = flag.String("mountpoint-namespace", os.Getenv("MOUNTPOINT_NAMESPACE"), "Namespace to spawn Mountpoint Pods in.")
var mountpointVersion = flag.String("mountpoint-version", os.Getenv("MOUNTPOINT_VERSION"), "Version of Mountpoint within the given Mountpoint image.")
var mountpointPriorityClassName = flag.String("mountpoint-priority-class-name", os.Getenv("MOUNTPOINT_PRIORITY_CLASS_NAME"), "Priority class name of the Mountpoint Pods.")
var mountpointImage = flag.String("mountpoint-image", os.Getenv("MOUNTPOINT_IMAGE"), "Image of Mountpoint to use in spawned Mountpoint Pods.")
var mountpointImagePullPolicy = flag.String("mountpoint-image-pull-policy", os.Getenv("MOUNTPOINT_IMAGE_PULL_POLICY"), "Pull policy of Mountpoint images.")
var mountpointContainerCommand = flag.String("mountpoint-container-command", "/bin/aws-s3-csi-mounter", "Entrypoint command of the Mountpoint Pods.")
Expand All @@ -44,6 +45,7 @@ func main() {
err = csicontroller.NewReconciler(mgr.GetClient(), mppod.Config{
Namespace: *mountpointNamespace,
MountpointVersion: *mountpointVersion,
PriorityClassName: *mountpointPriorityClassName,
Container: mppod.ContainerConfig{
Command: *mountpointContainerCommand,
Image: *mountpointImage,
Expand Down
2 changes: 2 additions & 0 deletions pkg/podmounter/mppod/creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type ContainerConfig struct {
type Config struct {
Namespace string
MountpointVersion string
PriorityClassName string
Container ContainerConfig
CSIDriverVersion string
}
Expand Down Expand Up @@ -84,6 +85,7 @@ func (c *Creator) Create(pod *corev1.Pod, pvc *corev1.PersistentVolumeClaim) *co
},
},
}},
PriorityClassName: c.config.PriorityClassName,
Affinity: &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
// This is to making sure Mountpoint Pod gets scheduled into same node as the Workload Pod
Expand Down
3 changes: 3 additions & 0 deletions pkg/podmounter/mppod/creator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func TestCreatingMountpointPods(t *testing.T) {
image := "mp-image:latest"
imagePullPolicy := corev1.PullAlways
command := "/bin/aws-s3-csi-mounter"
priorityClassName := "mount-s3-critical"

// Test Pod values
testNode := "test-node"
Expand All @@ -29,6 +30,7 @@ func TestCreatingMountpointPods(t *testing.T) {
creator := mppod.NewCreator(mppod.Config{
Namespace: namespace,
MountpointVersion: mountpointVersion,
PriorityClassName: priorityClassName,
Container: mppod.ContainerConfig{
Image: image,
ImagePullPolicy: imagePullPolicy,
Expand Down Expand Up @@ -60,6 +62,7 @@ func TestCreatingMountpointPods(t *testing.T) {
mppod.LabelCSIDriverVersion: csiDriverVersion,
}, mpPod.Labels)

assert.Equals(t, priorityClassName, mpPod.Spec.PriorityClassName)
assert.Equals(t, corev1.RestartPolicyOnFailure, mpPod.Spec.RestartPolicy)
assert.Equals(t, []corev1.Volume{
{
Expand Down
1 change: 1 addition & 0 deletions tests/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,7 @@ func verifyMountpointPodFor(pod *testPod, vol *testVolume, mountpointPod *testPo
Operator: corev1.TolerationOpExists,
},
}))
Expect(mountpointPod.Spec.PriorityClassName).To(Equal(mountpointPriorityClassName))

Expect(mountpointPod.Spec.Containers[0].Image).To(Equal(mountpointImage))
Expect(mountpointPod.Spec.Containers[0].ImagePullPolicy).To(Equal(mountpointImagePullPolicy))
Expand Down
15 changes: 15 additions & 0 deletions tests/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/rest"
"k8s.io/kubectl/pkg/scheme"
Expand All @@ -32,6 +33,7 @@ const defaultContainerImage = "public.ecr.aws/docker/library/busybox:stable-musl
// Configuration values passed for `mppod.Config` while creating a controller to use in tests.
const mountpointNamespace = "mount-s3"
const mountpointVersion = "1.10.0"
const mountpointPriorityClassName = "mount-s3-critical"
const mountpointContainerCommand = "/bin/aws-s3-csi-mounter"
const mountpointImage = "mp-image:latest"
const mountpointImagePullPolicy = corev1.PullNever
Expand Down Expand Up @@ -80,6 +82,7 @@ var _ = BeforeSuite(func() {
err = csicontroller.NewReconciler(k8sManager.GetClient(), mppod.Config{
Namespace: mountpointNamespace,
MountpointVersion: mountpointVersion,
PriorityClassName: mountpointPriorityClassName,
Container: mppod.ContainerConfig{
Command: mountpointContainerCommand,
Image: mountpointImage,
Expand All @@ -96,6 +99,7 @@ var _ = BeforeSuite(func() {
}()

createMountpointNamespace()
createMountpointPriorityClass()
})

var _ = AfterSuite(func() {
Expand All @@ -112,3 +116,14 @@ func createMountpointNamespace() {
Expect(k8sClient.Create(ctx, namespace)).To(Succeed())
waitForObject(namespace)
}

// createMountpointPriorityClass creates priority class for Mountpoint Pods.
func createMountpointPriorityClass() {
By(fmt.Sprintf("Creating priority class %q for Mountpoint Pods", mountpointPriorityClassName))
priorityClass := &schedulingv1.PriorityClass{
ObjectMeta: metav1.ObjectMeta{Name: mountpointPriorityClassName},
Value: 1000000,
}
Expect(k8sClient.Create(ctx, priorityClass)).To(Succeed())
waitForObject(priorityClass)
}
63 changes: 60 additions & 3 deletions tests/e2e-kubernetes/scripts/eksctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

set -euox pipefail

# If the cluster is not older than this, it will be re-used.
MAX_CLUSTER_AGE_SECONDS=$((3 * 24 * 60 * 60)) # 3 days

function eksctl_install() {
INSTALL_PATH=${1}
EKSCTL_VERSION=${2}
Expand All @@ -12,6 +15,37 @@ function eksctl_install() {
fi
}

function eksctl_is_cluster_too_old() {
CLUSTER_NAME=${1}
REGION=${2}

CREATED_TIME=$(aws eks describe-cluster --name "${CLUSTER_NAME}" --region "${REGION}" --query 'cluster.createdAt' --output text)
CURRENT_TIME=$(date +%s)
CLUSTER_TIME=$(date -d "${CREATED_TIME}" +%s)

[ $((CURRENT_TIME - CLUSTER_TIME)) -gt ${MAX_CLUSTER_AGE_SECONDS} ]
return $?
}

function eksctl_compute_cluster_spec_hash() {
NODE_TYPE=${1}
ZONES=${2}
EKSCTL_PATCH_SELINUX_ENFORCING_FILE=${3}

echo -n "${NODE_TYPE}-${ZONES}-${EKSCTL_PATCH_SELINUX_ENFORCING_FILE}" | sha256sum | cut -d' ' -f1
}

# Checks whether existing cluster matches with expected specs to decide whether to re-use it.
function eksctl_cluster_matches_specs() {
CLUSTER_NAME=${1}
REGION=${2}
DESIRED_HASH=${3}
CURRENT_HASH=$(aws eks describe-cluster --name "${CLUSTER_NAME}" --region "${REGION}" --query 'cluster.tags.ClusterSpecHash' --output text)

[ "${DESIRED_HASH}" = "${CURRENT_HASH}" ]
return $?
}

function eksctl_create_cluster() {
CLUSTER_NAME=${1}
REGION=${2}
Expand All @@ -27,7 +61,19 @@ function eksctl_create_cluster() {
K8S_VERSION=${12}
EKSCTL_PATCH_SELINUX_ENFORCING_FILE=${13}

eksctl_delete_cluster "$BIN" "$CLUSTER_NAME" "$REGION"
CLUSTER_SPEC_HASH=$(eksctl_compute_cluster_spec_hash "${NODE_TYPE}" "${ZONES}" "${EKSCTL_PATCH_SELINUX_ENFORCING_FILE}")

# Check if cluster exists and matches our specs
if eksctl_cluster_exists "${BIN}" "${CLUSTER_NAME}"; then
if ! eksctl_is_cluster_too_old "${CLUSTER_NAME}" "${REGION}" && \
eksctl_cluster_matches_specs "${CLUSTER_NAME}" "${REGION}" "${CLUSTER_SPEC_HASH}"; then
echo "Reusing existing cluster ${CLUSTER_NAME} as it matches specifications and it is not too old"
return 0
fi

echo "Existing cluster ${CLUSTER_NAME} is either too old or doesn't match specifications. Re-creating..."
eksctl_delete_cluster "$BIN" "$CLUSTER_NAME" "$REGION" "true"
fi

# CAUTION: this may fail with "the targeted availability zone, does not currently have sufficient capacity to support the cluster" error, we may require a fix for that
${BIN} create cluster \
Expand All @@ -38,6 +84,7 @@ function eksctl_create_cluster() {
--with-oidc \
--zones $ZONES \
--version $K8S_VERSION \
--tags ClusterSpecHash=${CLUSTER_SPEC_HASH} \
--dry-run > $CLUSTER_FILE

CLUSTER_FILE_TMP="${CLUSTER_FILE}.tmp"
Expand All @@ -62,9 +109,19 @@ function eksctl_delete_cluster() {
BIN=${1}
CLUSTER_NAME=${2}
REGION=${3}
if eksctl_cluster_exists "${BIN}" "${CLUSTER_NAME}"; then
${BIN} delete cluster "${CLUSTER_NAME}"
FORCE=${4:-false}

if ! eksctl_cluster_exists "${BIN}" "${CLUSTER_NAME}"; then
return 0
fi

# Skip deletion if cluster is not too old and force flag is not set
if [ "${FORCE}" != "true" ] && ! eksctl_is_cluster_too_old "${CLUSTER_NAME}" "${REGION}"; then
echo "Skipping deletion of cluster ${CLUSTER_NAME} to re-use it"
return 0
fi

${BIN} delete cluster "${CLUSTER_NAME}"
STACK_NAME="eksctl-${CLUSTER_NAME}-cluster"
aws cloudformation delete-stack --region ${REGION} --stack-name ${STACK_NAME}

Expand Down
Loading

0 comments on commit 92978f4

Please # to comment.