From 4c30581dc59557d2de17c26bbf595017f2c1d262 Mon Sep 17 00:00:00 2001 From: Pulak Kanti Bhowmick Date: Sat, 14 Sep 2024 16:54:13 +0600 Subject: [PATCH] update aws role management and cleanup role on dataplane delation (#105) * update aws role management and cleanup Signed-off-by: Pulak Kanti Bhowmick * detach policy before deleting role Signed-off-by: Pulak Kanti Bhowmick --------- Signed-off-by: Pulak Kanti Bhowmick --- api/v1/types/awscloudinfra_types.go | 21 +- api/v1/types/zz_generated.deepcopy.go | 7 + chart/baaz/crds/baaz.dev_dataplanes.yaml | 4 + config/crd/bases/baaz.dev_dataplanes.yaml | 4 + internal/dataplane_controller/aws_eks.go | 48 ++++- .../dataplane_controller.go | 7 + internal/tenantinfra_controller/aws_eks.go | 9 +- pkg/aws/eks/interface.go | 4 +- pkg/aws/eks/roles.go | 181 ++++++++++-------- 9 files changed, 190 insertions(+), 95 deletions(-) diff --git a/api/v1/types/awscloudinfra_types.go b/api/v1/types/awscloudinfra_types.go index e699d57..489ccb1 100644 --- a/api/v1/types/awscloudinfra_types.go +++ b/api/v1/types/awscloudinfra_types.go @@ -24,16 +24,17 @@ type EksConfig struct { } type AwsCloudInfraConfigStatus struct { - Vpc string `json:"vpc,omitempty"` - SubnetIds []string `json:"subnetIds,omitempty"` - SecurityGroupIds []string `json:"securityGroupIds,omitempty"` - NATGatewayId string `json:"natGatewayId,omitempty"` - NATAttachedWithRT bool `json:"natAttchedWithRT,omitempty"` - SGInboundRuleAdded bool `json:"sgInboundRuleAdded,omitempty"` - InternetGatewayId string `json:"internetGatewayId,omitempty"` - PublicRTId string `json:"publicRTId,omitempty"` - LBArns []string `json:"lbArns,omitempty"` - EksStatus EksStatus `json:"eksStatus,omitempty"` + Vpc string `json:"vpc,omitempty"` + SubnetIds []string `json:"subnetIds,omitempty"` + SecurityGroupIds []string `json:"securityGroupIds,omitempty"` + NATGatewayId string `json:"natGatewayId,omitempty"` + NATAttachedWithRT bool `json:"natAttchedWithRT,omitempty"` + SGInboundRuleAdded bool `json:"sgInboundRuleAdded,omitempty"` + InternetGatewayId string `json:"internetGatewayId,omitempty"` + PublicRTId string `json:"publicRTId,omitempty"` + LBArns []string `json:"lbArns,omitempty"` + EksStatus EksStatus `json:"eksStatus,omitempty"` + Roles map[string]bool `json:"roles,omitempty"` } type EksStatus struct { diff --git a/api/v1/types/zz_generated.deepcopy.go b/api/v1/types/zz_generated.deepcopy.go index ad8dc9e..b5c148d 100644 --- a/api/v1/types/zz_generated.deepcopy.go +++ b/api/v1/types/zz_generated.deepcopy.go @@ -195,6 +195,13 @@ func (in *AwsCloudInfraConfigStatus) DeepCopyInto(out *AwsCloudInfraConfigStatus copy(*out, *in) } out.EksStatus = in.EksStatus + if in.Roles != nil { + in, out := &in.Roles, &out.Roles + *out = make(map[string]bool, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AwsCloudInfraConfigStatus. diff --git a/chart/baaz/crds/baaz.dev_dataplanes.yaml b/chart/baaz/crds/baaz.dev_dataplanes.yaml index b38cc74..7877d5c 100644 --- a/chart/baaz/crds/baaz.dev_dataplanes.yaml +++ b/chart/baaz/crds/baaz.dev_dataplanes.yaml @@ -164,6 +164,10 @@ spec: type: string publicRTId: type: string + roles: + additionalProperties: + type: boolean + type: object securityGroupIds: items: type: string diff --git a/config/crd/bases/baaz.dev_dataplanes.yaml b/config/crd/bases/baaz.dev_dataplanes.yaml index b38cc74..7877d5c 100644 --- a/config/crd/bases/baaz.dev_dataplanes.yaml +++ b/config/crd/bases/baaz.dev_dataplanes.yaml @@ -164,6 +164,10 @@ spec: type: string publicRTId: type: string + roles: + additionalProperties: + type: boolean + type: object securityGroupIds: items: type: string diff --git a/internal/dataplane_controller/aws_eks.go b/internal/dataplane_controller/aws_eks.go index 35e932f..8014fa6 100644 --- a/internal/dataplane_controller/aws_eks.go +++ b/internal/dataplane_controller/aws_eks.go @@ -255,6 +255,17 @@ func (ae *awsEnv) reconcileAwsEks() error { return fmt.Errorf("failed to create cluster iam role: %s", err.Error()) } + if _, _, err = utils.PatchStatus(ae.ctx, ae.client, ae.dp, func(obj client.Object) client.Object { + ob := obj.(*v1.DataPlanes) + if ob.Status.CloudInfraStatus.Roles == nil { + ob.Status.CloudInfraStatus.Roles = make(map[string]bool) + } + ob.Status.CloudInfraStatus.Roles[*clusterRoleOutput.Role.RoleName] = true + return ob + }); err != nil { + return err + } + klog.Infof("Cluster Role [%s] Created", *clusterRoleOutput.Role.RoleName) createEksResult := ae.eksIC.CreateEks() @@ -1084,6 +1095,17 @@ func (ae *awsEnv) reconcileSystemNodeGroup() error { return errors.New("node role is nil") } + if _, _, err = utils.PatchStatus(ae.ctx, ae.client, ae.dp, func(obj client.Object) client.Object { + ob := obj.(*v1.DataPlanes) + if ob.Status.CloudInfraStatus.Roles == nil { + ob.Status.CloudInfraStatus.Roles = make(map[string]bool) + } + ob.Status.CloudInfraStatus.Roles[*nodeRole.Role.RoleName] = true + return ob + }); err != nil { + return err + } + subnetIds := ae.dp.Spec.CloudInfra.AwsCloudInfraConfig.Eks.SubnetIds if ae.dp.Spec.CloudInfra.ProvisionNetwork { subnetIds = ae.dp.Status.CloudInfraStatus.SubnetIds @@ -1189,6 +1211,17 @@ func (ae *awsEnv) ReconcileDefaultAddons() error { return err } + if _, _, err = utils.PatchStatus(ae.ctx, ae.client, ae.dp, func(obj client.Object) client.Object { + ob := obj.(*v1.DataPlanes) + if ob.Status.CloudInfraStatus.Roles == nil { + ob.Status.CloudInfraStatus.Roles = make(map[string]bool) + } + ob.Status.CloudInfraStatus.Roles[*role.Role.RoleName] = true + return ob + }); err != nil { + return err + } + _, cErr := ae.eksIC.CreateAddon(ae.ctx, &awseks.CreateAddonInput{ AddonName: aws.String(awsEbsCsiDriver), ClusterName: aws.String(clusterName), @@ -1217,11 +1250,22 @@ func (ae *awsEnv) ReconcileDefaultAddons() error { var notFoundErr *types.ResourceNotFoundException if errors.As(err, ¬FoundErr) { klog.Info("Creating vpc cni addon") - _, arn, err := ae.eksIC.CreateVpcCniRole(ae.ctx) + role, arn, err := ae.eksIC.CreateVpcCniRole(ae.ctx) if err != nil { return err } + if _, _, err = utils.PatchStatus(ae.ctx, ae.client, ae.dp, func(obj client.Object) client.Object { + ob := obj.(*v1.DataPlanes) + if ob.Status.CloudInfraStatus.Roles == nil { + ob.Status.CloudInfraStatus.Roles = make(map[string]bool) + } + ob.Status.CloudInfraStatus.Roles[*role.Role.RoleName] = true + return ob + }); err != nil { + return err + } + v := `{"enableNetworkPolicy": "true"}` _, cErr := ae.eksIC.CreateAddon(ae.ctx, &awseks.CreateAddonInput{ @@ -1229,7 +1273,7 @@ func (ae *awsEnv) ReconcileDefaultAddons() error { ClusterName: aws.String(clusterName), ResolveConflicts: types.ResolveConflictsOverwrite, ServiceAccountRoleArn: aws.String(arn), - AddonVersion: aws.String("v1.15.0-eksbuild.2"), + AddonVersion: aws.String("v1.18.3-eksbuild.3"), ConfigurationValues: aws.String(v), }) if cErr != nil { diff --git a/internal/dataplane_controller/dataplane_controller.go b/internal/dataplane_controller/dataplane_controller.go index 90ed372..aeeca8d 100644 --- a/internal/dataplane_controller/dataplane_controller.go +++ b/internal/dataplane_controller/dataplane_controller.go @@ -321,6 +321,13 @@ func (r *DataPlaneReconciler) reconcileDelete(ae *awsEnv) (ctrl.Result, error) { return ctrl.Result{}, retryErr } + // delete created roles + for roleName, _ := range ae.dp.Status.CloudInfraStatus.Roles { + if err := ae.eksIC.DeleteRole(ae.ctx, roleName); err != nil { + return ctrl.Result{}, err + } + } + // update namespace level customerNs := &core.Namespace{} if err := ae.client.Get(ae.ctx, client.ObjectKey{Name: ae.dp.Namespace}, customerNs); err != nil { diff --git a/internal/tenantinfra_controller/aws_eks.go b/internal/tenantinfra_controller/aws_eks.go index fcdffbb..a39269b 100644 --- a/internal/tenantinfra_controller/aws_eks.go +++ b/internal/tenantinfra_controller/aws_eks.go @@ -11,10 +11,6 @@ import ( awseks "github.com/aws/aws-sdk-go-v2/service/eks" "github.com/aws/aws-sdk-go-v2/service/eks/types" "github.com/aws/aws-sdk-go/aws" - v1 "github.com/baazhq/baaz/api/v1/types" - "github.com/baazhq/baaz/pkg/aws/eks" - "github.com/baazhq/baaz/pkg/store" - "github.com/baazhq/baaz/pkg/utils" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -23,6 +19,11 @@ import ( "k8s.io/utils/strings/slices" "sigs.k8s.io/aws-iam-authenticator/pkg/token" "sigs.k8s.io/controller-runtime/pkg/client" + + v1 "github.com/baazhq/baaz/api/v1/types" + "github.com/baazhq/baaz/pkg/aws/eks" + "github.com/baazhq/baaz/pkg/store" + "github.com/baazhq/baaz/pkg/utils" ) type nodeGroupType string diff --git a/pkg/aws/eks/interface.go b/pkg/aws/eks/interface.go index d3faf6e..97516a5 100644 --- a/pkg/aws/eks/interface.go +++ b/pkg/aws/eks/interface.go @@ -7,9 +7,10 @@ import ( awseks "github.com/aws/aws-sdk-go-v2/service/eks" awsiam "github.com/aws/aws-sdk-go-v2/service/iam" awssts "github.com/aws/aws-sdk-go-v2/service/sts" - v1 "github.com/baazhq/baaz/api/v1/types" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + + v1 "github.com/baazhq/baaz/api/v1/types" ) type Eks interface { @@ -44,6 +45,7 @@ type Eks interface { DescribeInstances(ctx context.Context, input *awsec2.DescribeInstancesInput) (*awsec2.DescribeInstancesOutput, error) CreateIAMPolicy(ctx context.Context, input *awsiam.CreatePolicyInput) (*awsiam.CreatePolicyOutput, error) AttachRolePolicy(ctx context.Context, input *awsiam.AttachRolePolicyInput) (*awsiam.AttachRolePolicyOutput, error) + DeleteRole(ctx context.Context, roleName string) error } type eks struct { diff --git a/pkg/aws/eks/roles.go b/pkg/aws/eks/roles.go index 4c73ba3..5d4d644 100644 --- a/pkg/aws/eks/roles.go +++ b/pkg/aws/eks/roles.go @@ -176,57 +176,58 @@ func (ec *eks) CreateEbsCSIRole(ctx context.Context) (*awsiam.CreateRoleOutput, roleName := MakeEBSCSIRoleName(ec.dp.Spec.CloudInfra.Region, ec.dp.Spec.CloudInfra.Eks.Name) - _, err := ec.awsIamClient.GetRole(ec.ctx, &awsiam.GetRoleInput{ + getRoleOutput, err := ec.awsIamClient.GetRole(ec.ctx, &awsiam.GetRoleInput{ RoleName: aws.String(roleName), }) + if err == nil { + return &awsiam.CreateRoleOutput{ + Role: getRoleOutput.Role, + }, nil + } + _, oidcProviderURL, found := strings.Cut(oidcProvider, "oidc-provider/") + if !found { + return nil, errors.New("invalid oidc provider arn") + } + accountID, err := ec.getAccountID() if err != nil { - _, oidcProviderURL, found := strings.Cut(oidcProvider, "oidc-provider/") - if !found { - return nil, errors.New("invalid oidc provider arn") - } - accountID, err := ec.getAccountID() - if err != nil { - return nil, err - } - - tmpl, err := template.New("ebs-template").Parse(ebsCSIRoleTrustJsonTemplate) - if err != nil { - return nil, err - } - var tmplOutput bytes.Buffer + return nil, err + } - if err := tmpl.Execute(&tmplOutput, genericRoleTemplateInput{ - AccountID: accountID, - OIDCProvider: oidcProviderURL, - }); err != nil { - return nil, err - } + tmpl, err := template.New("ebs-template").Parse(ebsCSIRoleTrustJsonTemplate) + if err != nil { + return nil, err + } + var tmplOutput bytes.Buffer - trustPolicy := tmplOutput.String() + if err := tmpl.Execute(&tmplOutput, genericRoleTemplateInput{ + AccountID: accountID, + OIDCProvider: oidcProviderURL, + }); err != nil { + return nil, err + } - roleInput := awsiam.CreateRoleInput{ - AssumeRolePolicyDocument: aws.String(strings.TrimSpace(trustPolicy)), - RoleName: &roleName, - } + trustPolicy := tmplOutput.String() - roleOutput, err := ec.awsIamClient.CreateRole(ctx, &roleInput) - if err != nil { - return nil, err - } + roleInput := awsiam.CreateRoleInput{ + AssumeRolePolicyDocument: aws.String(strings.TrimSpace(trustPolicy)), + RoleName: &roleName, + } - attachPolicyInput := awsiam.AttachRolePolicyInput{ - PolicyArn: &ebsCSIPolicyARN, - RoleName: roleOutput.Role.RoleName, - } + roleOutput, err := ec.awsIamClient.CreateRole(ctx, &roleInput) + if err != nil { + return nil, err + } - if _, err := ec.awsIamClient.AttachRolePolicy(ctx, &attachPolicyInput); err != nil { - return nil, err - } - return roleOutput, nil + attachPolicyInput := awsiam.AttachRolePolicyInput{ + PolicyArn: &ebsCSIPolicyARN, + RoleName: roleOutput.Role.RoleName, } - return &awsiam.CreateRoleOutput{}, nil + if _, err := ec.awsIamClient.AttachRolePolicy(ctx, &attachPolicyInput); err != nil { + return nil, err + } + return roleOutput, nil } func (ec *eks) CreateVpcCniRole(ctx context.Context) (roleOutput *awsiam.CreateRoleOutput, arn string, err error) { @@ -234,58 +235,59 @@ func (ec *eks) CreateVpcCniRole(ctx context.Context) (roleOutput *awsiam.CreateR roleName := MakeVpcCniRoleName(ec.dp.Spec.CloudInfra.Region, ec.dp.Spec.CloudInfra.Eks.Name) - _, err = ec.awsIamClient.GetRole(ec.ctx, &awsiam.GetRoleInput{ + getRoleOutput, err := ec.awsIamClient.GetRole(ec.ctx, &awsiam.GetRoleInput{ RoleName: aws.String(roleName), }) + if err == nil { + return &awsiam.CreateRoleOutput{ + Role: getRoleOutput.Role, + }, *getRoleOutput.Role.Arn, nil + } + _, oidcProviderURL, found := strings.Cut(oidcProvider, "oidc-provider/") + if !found { + return nil, "", errors.New("invalid oidc provider arn") + } + accountID, err := ec.getAccountID() if err != nil { - _, oidcProviderURL, found := strings.Cut(oidcProvider, "oidc-provider/") - if !found { - return nil, "", errors.New("invalid oidc provider arn") - } - accountID, err := ec.getAccountID() - if err != nil { - return nil, "", err - } - - tmpl, err := template.New("vpcni-template").Parse(vpcCniTrustPolicy) - if err != nil { - return nil, "", err - } - var tmplOutput bytes.Buffer + return nil, "", err + } - if err := tmpl.Execute(&tmplOutput, genericRoleTemplateInput{ - AccountID: accountID, - OIDCProvider: oidcProviderURL, - }); err != nil { - return nil, "", err - } + tmpl, err := template.New("vpcni-template").Parse(vpcCniTrustPolicy) + if err != nil { + return nil, "", err + } + var tmplOutput bytes.Buffer - trustPolicy := tmplOutput.String() + if err := tmpl.Execute(&tmplOutput, genericRoleTemplateInput{ + AccountID: accountID, + OIDCProvider: oidcProviderURL, + }); err != nil { + return nil, "", err + } - roleInput := awsiam.CreateRoleInput{ - AssumeRolePolicyDocument: aws.String(strings.TrimSpace(trustPolicy)), - RoleName: &roleName, - } + trustPolicy := tmplOutput.String() - roleOutput, err := ec.awsIamClient.CreateRole(ctx, &roleInput) - if err != nil { - return nil, "", err - } + roleInput := awsiam.CreateRoleInput{ + AssumeRolePolicyDocument: aws.String(strings.TrimSpace(trustPolicy)), + RoleName: &roleName, + } - attachPolicyInput := awsiam.AttachRolePolicyInput{ - PolicyArn: &vpcCNIPolicyARN, - RoleName: roleOutput.Role.RoleName, - } + roleOutput, err = ec.awsIamClient.CreateRole(ctx, &roleInput) + if err != nil { + return nil, "", err + } - if _, err := ec.awsIamClient.AttachRolePolicy(ctx, &attachPolicyInput); err != nil { - return nil, "", err - } + attachPolicyInput := awsiam.AttachRolePolicyInput{ + PolicyArn: &vpcCNIPolicyARN, + RoleName: roleOutput.Role.RoleName, + } - return roleOutput, fmt.Sprintf("arn:aws:iam::%s:role/%s", accountID, roleName), nil + if _, err := ec.awsIamClient.AttachRolePolicy(ctx, &attachPolicyInput); err != nil { + return nil, "", err } - return &awsiam.CreateRoleOutput{}, "", nil + return roleOutput, fmt.Sprintf("arn:aws:iam::%s:role/%s", accountID, roleName), nil } func (ec *eks) CreateIAMPolicy(ctx context.Context, input *iam.CreatePolicyInput) (*iam.CreatePolicyOutput, error) { @@ -295,3 +297,26 @@ func (ec *eks) CreateIAMPolicy(ctx context.Context, input *iam.CreatePolicyInput func (ec *eks) AttachRolePolicy(ctx context.Context, input *iam.AttachRolePolicyInput) (*iam.AttachRolePolicyOutput, error) { return ec.awsIamClient.AttachRolePolicy(ctx, input) } + +func (ec *eks) DeleteRole(ctx context.Context, roleName string) error { + policy, err := ec.awsIamClient.ListAttachedRolePolicies(ctx, &awsiam.ListAttachedRolePoliciesInput{ + RoleName: &roleName, + }) + if err != nil { + return err + } + + for _, p := range policy.AttachedPolicies { + if _, err := ec.awsIamClient.DetachRolePolicy(ctx, &awsiam.DetachRolePolicyInput{ + RoleName: &roleName, + PolicyArn: p.PolicyArn, + }); err != nil { + return err + } + } + + _, err = ec.awsIamClient.DeleteRole(ctx, &awsiam.DeleteRoleInput{ + RoleName: &roleName, + }) + return err +}