diff --git a/tests/assets/karpenter/controller-role-policy-document.json b/tests/assets/karpenter/controller-role-policy-document.json new file mode 100644 index 00000000..18f11f94 --- /dev/null +++ b/tests/assets/karpenter/controller-role-policy-document.json @@ -0,0 +1,112 @@ +{ + "Statement": [ + { + "Action": [ + "ssm:GetParameter", + "ec2:DescribeImages", + "ec2:RunInstances", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInstanceTypeOfferings", + "ec2:DeleteLaunchTemplate", + "ec2:CreateTags", + "ec2:CreateLaunchTemplate", + "ec2:CreateFleet", + "ec2:DescribeSpotPriceHistory", + "pricing:GetProducts" + ], + "Effect": "Allow", + "Resource": "*", + "Sid": "Karpenter" + }, + { + "Action": "ec2:TerminateInstances", + "Condition": { + "StringLike": { + "ec2:ResourceTag/karpenter.sh/nodepool": "*" + } + }, + "Effect": "Allow", + "Resource": "*", + "Sid": "ConditionalEC2Termination" + }, + { + "Effect": "Allow", + "Action": "iam:PassRole", + "Resource": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}", + "Sid": "PassNodeIAMRole" + }, + { + "Effect": "Allow", + "Action": "eks:DescribeCluster", + "Resource": "arn:${AWS_PARTITION}:eks:${AWS_REGION}:${AWS_ACCOUNT_ID}:cluster/${CLUSTER_NAME}", + "Sid": "EKSClusterEndpointLookup" + }, + { + "Sid": "AllowScopedInstanceProfileCreationActions", + "Effect": "Allow", + "Resource": "*", + "Action": [ + "iam:CreateInstanceProfile" + ], + "Condition": { + "StringEquals": { + "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", + "aws:RequestTag/topology.kubernetes.io/region": "${AWS_REGION}" + }, + "StringLike": { + "aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*" + } + } + }, + { + "Sid": "AllowScopedInstanceProfileTagActions", + "Effect": "Allow", + "Resource": "*", + "Action": [ + "iam:TagInstanceProfile" + ], + "Condition": { + "StringEquals": { + "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", + "aws:ResourceTag/topology.kubernetes.io/region": "${AWS_REGION}", + "aws:RequestTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", + "aws:RequestTag/topology.kubernetes.io/region": "${AWS_REGION}" + }, + "StringLike": { + "aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*", + "aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*" + } + } + }, + { + "Sid": "AllowScopedInstanceProfileActions", + "Effect": "Allow", + "Resource": "*", + "Action": [ + "iam:AddRoleToInstanceProfile", + "iam:RemoveRoleFromInstanceProfile", + "iam:DeleteInstanceProfile" + ], + "Condition": { + "StringEquals": { + "aws:ResourceTag/kubernetes.io/cluster/${CLUSTER_NAME}": "owned", + "aws:ResourceTag/topology.kubernetes.io/region": "${AWS_REGION}" + }, + "StringLike": { + "aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*" + } + } + }, + { + "Sid": "AllowInstanceProfileReadActions", + "Effect": "Allow", + "Resource": "*", + "Action": "iam:GetInstanceProfile" + } + ], + "Version": "2012-10-17" +} \ No newline at end of file diff --git a/tests/assets/karpenter/controller-role-trust-policy-document.json b/tests/assets/karpenter/controller-role-trust-policy-document.json new file mode 100644 index 00000000..18b6e23b --- /dev/null +++ b/tests/assets/karpenter/controller-role-trust-policy-document.json @@ -0,0 +1,18 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_ENDPOINT}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDC_ENDPOINT}:aud": "sts.amazonaws.com", + "${OIDC_ENDPOINT}:sub": "system:serviceaccount:karpenter:karpenter" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/assets/karpenter/node-role-policy-document.json b/tests/assets/karpenter/node-role-policy-document.json new file mode 100644 index 00000000..19859682 --- /dev/null +++ b/tests/assets/karpenter/node-role-policy-document.json @@ -0,0 +1,12 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] +} \ No newline at end of file diff --git a/tests/assets/karpenter/nodeclass.yaml b/tests/assets/karpenter/nodeclass.yaml new file mode 100644 index 00000000..d11a1cf9 --- /dev/null +++ b/tests/assets/karpenter/nodeclass.yaml @@ -0,0 +1,60 @@ +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: default +spec: + amiFamily: Custom + instanceProfile: "KarpenterNodeInstanceProfile-${CLUSTER_NAME}" + amiSelectorTerms: + - alias: "al2023@${ALIAS_VERSION}" + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + - tags: + kubernetes.io/cluster/${CLUSTER_NAME}: owned + kubelet: + maxPods: 110 + systemReserved: + cpu: 100m + memory: 100Mi + ephemeral-storage: 1Gi + kubeReserved: + cpu: 100m + memory: 100Mi + ephemeral-storage: 1Gi + evictionHard: + memory.available: 5% + nodefs.available: 10% + nodefs.inodesFree: 10% + userData: | + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="BOUNDARY" + + --BOUNDARY + Content-Type: application/node.eks.aws + + apiVersion: node.eks.aws/v1alpha1 + kind: NodeConfig + spec: + cluster: + name: ${CLUSTER_NAME} + apiServerEndpoint: ${CLUSTER_ENDPOINT} # Using the actual cluster endpoint + certificateAuthority: ${CLUSTER_CA} + cidr: "172.20.0.0/16" + kubelet: + config: + nodeStatusReportFrequency: "60m" + nodeLeaseDurationSeconds: 60 + maxPods: 110 + clusterDNS: ["172.20.0.10"] + flags: + - --node-labels=karpenter.sh/capacity-type=on-demand,karpenter.sh/nodepool=titan-pool + - --register-with-taints=karpenter.sh/unregistered:NoExecute + --BOUNDARY-- \ No newline at end of file diff --git a/tests/assets/karpenter/nodepool.yaml b/tests/assets/karpenter/nodepool.yaml new file mode 100644 index 00000000..0eacf187 --- /dev/null +++ b/tests/assets/karpenter/nodepool.yaml @@ -0,0 +1,47 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: ${CLUSTER_NAME}-${AZ} +spec: + disruption: + budgets: + - nodes: 5% + consolidateAfter: 0s + consolidationPolicy: WhenEmptyOrUnderutilized + replicas: 0 + template: + spec: + expireAfter: 720h + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: default + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: node.kubernetes.io/instance-category + operator: In + values: + - c + - m + - r + - t + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - medium + - large \ No newline at end of file diff --git a/tests/tekton-resources/tasks/generators/karpenter/kubectl-cluster-wait.yaml b/tests/tekton-resources/tasks/generators/karpenter/kubectl-cluster-wait.yaml new file mode 100644 index 00000000..653e3757 --- /dev/null +++ b/tests/tekton-resources/tasks/generators/karpenter/kubectl-cluster-wait.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: node-condition-wait + namespace: scalability +spec: + description: "waits for there to be no nodes with the specific condition" + params: + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: eks endpoint to use + - name: aws-region + - name: initial-delay + default: 30m + - name: condition + description: condition to check + - name: value + description: value of the condition to validate + steps: + - name: drift-nodepool + image: amazon/aws-cli + script: | + sleep $(params.initial-delay) + CHECK_INTERVAL=300 + while true; do + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + echo "$(date): Checking node conditions..." + # Get nodes that still have the unwanted condition + nodes_with_condition=$(kubectl get nodes -o json | jq -r --arg type $(params.condition) --arg status $(params.value) ' + .items[] | + select(.status.conditions[] | select(.type == $type and .status == $status)) | + .metadata.name + ') + if [ -z "$nodes_with_condition" ]; then + echo "$(date): All nodes are clear of condition $(params.condition)=$(params.value)" + echo "Condition check completed successfully!" + exit 0 + else + echo "$(date): The following nodes still have $(params.condition)=$(params.value):" + echo "$nodes_with_condition" + echo "Waiting 5 minutes before next check..." + sleep $CHECK_INTERVAL + fi + done + + exit 1 + diff --git a/tests/tekton-resources/tasks/generators/karpenter/kubectl-drift.yaml b/tests/tekton-resources/tasks/generators/karpenter/kubectl-drift.yaml new file mode 100644 index 00000000..30c70746 --- /dev/null +++ b/tests/tekton-resources/tasks/generators/karpenter/kubectl-drift.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: nodepool-drift + namespace: scalability +spec: + description: "drift a nodepool by adding a new label to the specified nodepool" + params: + - name: nodepool + description: Name of the nodepool to drift + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: eks endpoint to use + - name: aws-region + steps: + - name: drift-nodepool + image: amazon/aws-cli + script: | + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + kubectl patch nodepool ${params.nodepool} --patch '{"spec": {"template": {"metadata": {"labels": {"myLabel": "myValue"}}}}}' \ No newline at end of file diff --git a/tests/tekton-resources/tasks/generators/karpenter/kubectl-scale.yaml b/tests/tekton-resources/tasks/generators/karpenter/kubectl-scale.yaml new file mode 100644 index 00000000..0738c994 --- /dev/null +++ b/tests/tekton-resources/tasks/generators/karpenter/kubectl-scale.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: nodepool-scale + namespace: scalability +spec: + description: "drift a cluster by adding a new label to the specified nodepool" + params: + - name: replicas + description: Number of replicas to scale to + - name: nodepool + description: Name of the nodepool to drift + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: eks endpoint to use + steps: + - name: scale-nodepool + image: amazon/aws-cli + script: | + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + kubectl scale nodepool ${params.nodepool} --replicas $(params.replicas) \ No newline at end of file diff --git a/tests/tekton-resources/tasks/setup/karpenter/awscli-controller-role.yaml b/tests/tekton-resources/tasks/setup/karpenter/awscli-controller-role.yaml new file mode 100644 index 00000000..83c11465 --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/awscli-controller-role.yaml @@ -0,0 +1,70 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-controller-role + namespace: scalability +spec: + description: | + Creates the karpenter Controller Role + results: + - name: node-role-arn + description: Stores the controller role arn created by the task + params: + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: endpoint + - name: aws-region + description: region + default: us-west-2 + - name: aws-account-id + description: account id + - name: aws-partition + description: partition + default: aws + - name: karpenter-controller-role-trust-policy-url + default: https://raw.githubusercontent.com/DerekFrank/kubernetes-iteration-toolkit/refs/heads/main/tests/assets/karpenter/controller-role-trust-policy-document.json + - name: karpenter-controller-role-policy-url + default: https://raw.githubusercontent.com/DerekFrank/kubernetes-iteration-toolkit/refs/heads/main/tests/assets/karpenter/controller-role-policy-document.json + workspaces: + - name: source + mountPath: /src/karpenter/ + steps: + - name: create-role + image: alpine/k8s:1.23.7 + script: | + echo "Starting controller role" + export RAW_OIDC_ENDPOINT="$(aws eks --endpoint $(params.endpoint) describe-cluster --name "$(params.cluster-name)" \ + --query "cluster.identity.oidc.issuer" --output text)" + export OIDC_ID=$(aws eks --endpoint $(params.endpoint) describe-cluster --name $(params.cluster-name) --region $(params.aws-region) --query "cluster.identity.oidc.issuer" --output text | cut -d '/' -f 5) + export AWS_PARTITION=$(params.aws-partition) + export AWS_ACCOUNT_ID=$(params.aws-account-id) + export AWS_REGION=$(params.aws-region) + export CLUSTER_NAME=$(params.cluster-name) + echo $RAW_OIDC_ENDPOINT + echo $OIDC_ID + + export OIDC_ENDPOINT=$(echo ${RAW_OIDC_ENDPOINT#*//}) + + echo $OIDC_ENDPOINT + + curl -fsSL $(params.karpenter-controller-role-trust-policy-url) -o $(workspaces.source.path)karpenter-controller-role-trust-policy-url.json + + envsubst < $(workspaces.source.path)karpenter-controller-role-trust-policy-url.json > $(workspaces.source.path)karpenter-controller-role-trust-policy-url-modified.json + + cat $(workspaces.source.path)karpenter-controller-role-trust-policy-url.json + + cat $(workspaces.source.path)karpenter-controller-role-trust-policy-url-modified.json + + aws iam create-role --role-name "KarpenterControllerRole-$(params.cluster-name)" \ + --assume-role-policy-document file://$(workspaces.source.path)karpenter-controller-role-trust-policy-url-modified.json + + curl -fsSL $(params.karpenter-controller-role-policy-url) -o $(workspaces.source.path)karpenter-controller-role-policy-url.json + + envsubst < $(workspaces.source.path)karpenter-controller-role-policy-url.json > $(workspaces.source.path)karpenter-controller-role-policy-url-modified.json + + cat $(workspaces.source.path)karpenter-controller-role-policy-url-modified.json + + aws iam put-role-policy --role-name "KarpenterControllerRole-$(params.cluster-name)" \ + --policy-name "KarpenterControllerPolicy-$(params.cluster-name)" \ + --policy-document file://$(workspaces.source.path)karpenter-controller-role-policy-url-modified.json \ No newline at end of file diff --git a/tests/tekton-resources/tasks/setup/karpenter/awscli-instanceprofiles.yaml b/tests/tekton-resources/tasks/setup/karpenter/awscli-instanceprofiles.yaml new file mode 100644 index 00000000..613dfb94 --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/awscli-instanceprofiles.yaml @@ -0,0 +1,17 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-instanceprofiles + namespace: scalability +spec: + description: | + Creates the karpenter instance profile + params: + - name: cluster-name + description: The name of the cluster + steps: + - name: create-role + image: alpine/k8s:1.23.7 + script: | + aws iam create-instance-profile --instance-profile-name "KarpenterNodeInstanceProfile-$(params.cluster-name)" + aws iam add-role-to-instance-profile --instance-profile-name "KarpenterNodeInstanceProfile-$(params.cluster-name)" --role-name "KarpenterNodeRole-$(params.cluster-name)" diff --git a/tests/tekton-resources/tasks/setup/karpenter/awscli-karpenter-cfn-stack.yaml b/tests/tekton-resources/tasks/setup/karpenter/awscli-karpenter-cfn-stack.yaml new file mode 100644 index 00000000..6083cd45 --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/awscli-karpenter-cfn-stack.yaml @@ -0,0 +1,65 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-karpenter-cfn-stack + namespace: scalability +spec: + description: | + Creates the karpenter instance roles and sqs interruption queue + params: + - name: cluster-name + description: The name of the cluster + - name: karpenter-version + description: Version of Karpenter to deploy + - name: endpoint + description: Endpoint to use with EKS + - name: region + default: us-west-2 + description: The region where the cluster is in. + - name: account-id + description: The aws account the cluster is running in + workspaces: + - name: source + mountPath: /src/karpenter/ + steps: + - name: create-stack + image: alpine/k8s:1.23.7 + script: | + STACK_NAME=Karpenter-$(params.cluster-name) + STACK_STATUS=$(aws cloudformation describe-stacks --query 'Stacks[?StackName==`'${STACK_NAME}'`].StackStatus' --output text --region $(params.region)) + curl -fsSL https://raw.githubusercontent.com/aws/karpenter-provider-aws/$(params.karpenter-version)/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml -o $(workspaces.source.path)cloudformation.yaml + + cat /src/karpenter/cloudformation.yaml + + aws eks update-cluster-config --name $(params.cluster-name) --access-config authenticationMode=API_AND_CONFIG_MAP --endpoint $(params.endpoint) + + if [[ "$STACK_STATUS" == "" ]]; then + aws cloudformation deploy \ + --stack-name "Karpenter-$(params.cluster-name)" \ + --template-file $(workspaces.source.path)cloudformation.yaml \ + --capabilities CAPABILITY_NAMED_IAM \ + --parameter-overrides "ClusterName=$(params.cluster-name)" + + aws cloudformation wait stack-create-complete --stack-name $STACK_NAME --region $(params.region) + echo "CREATED_CFN_STACK=$STACK_NAME" + else + echo "$STACK_NAME Already exists" + fi + + eksctl create iamserviceaccount \ + --name karpenter \ + --namespace karpenter \ + --cluster "$(params.cluster-name)" \ + --attach-policy-arn "arn:aws:iam::$(params.account-id):role/$(params.cluster-name)-karpenter" \ + --approve \ + --override-existing-serviceaccounts + + export AWS_EKS_ENDPOINT=$(params.endpoint) + eksctl utils associate-iam-oidc-provider --cluster "$(params.cluster-name)" --approve + + aws eks create-access-entry \ + --cluster-name "$(params.cluster-name)" \ + --principal-arn "arn:aws:iam::$(params.account-id):role/KarpenterNodeRole-$(params.cluster-name)" \ + --endpoint $(params.endpoint) \ + --type EC2_LINUX + diff --git a/tests/tekton-resources/tasks/setup/karpenter/awscli-mng.yaml b/tests/tekton-resources/tasks/setup/karpenter/awscli-mng.yaml new file mode 100644 index 00000000..178a595b --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/awscli-mng.yaml @@ -0,0 +1,47 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-mng + namespace: scalability +spec: + description: | + Creates the karpenter MNG + params: + - name: cluster-name + description: The name of the cluster + - name: aws-account-id + description: id of the account + - name: endpoint + description: eks endpoint to use + - name: region + default: "us-west-2" + description: The region where the cluster is in. + steps: + - name: create-mng + image: alpine/k8s:1.23.7 + script: | + SUBNET_IDS=$(aws ec2 describe-subnets \ + --filters "Name=tag:aws:cloudformation:stack-name,Values=$(params.cluster-name)" \ + --query 'Subnets[*].SubnetId' \ + --output text) + + echo ${SUBNET_IDS} + + aws eks create-nodegroup \ + --cluster-name $(params.cluster-name) \ + --nodegroup-name karpenter-system-large \ + --node-role arn:aws:iam::$(params.aws-account-id):role/$(params.cluster-name)-node-role \ + --instance-types r5.24xlarge \ + --scaling-config minSize=2,maxSize=3,desiredSize=2 \ + --subnets ${SUBNET_IDS} \ + --labels dedicated=karpenter \ + --region $(params.region) \ + --endpoint-url $(params.endpoint) \ + --taints key=dedicated,value=karpenter,effect=NoSchedule + + + # quick validation + aws eks list-nodegroups \ + --endpoint $(params.endpoint) \ + --cluster-name $(params.cluster-name) \ + --region $(params.region) \ No newline at end of file diff --git a/tests/tekton-resources/tasks/setup/karpenter/awscli-node-role.yaml b/tests/tekton-resources/tasks/setup/karpenter/awscli-node-role.yaml new file mode 100644 index 00000000..acff12d8 --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/awscli-node-role.yaml @@ -0,0 +1,42 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-node-role + namespace: scalability +spec: + description: | + Creates the karpenter Node Role + results: + - name: node-role-arn + description: Stores the node role arn created by the task + params: + - name: cluster-name + description: The name of the cluster + steps: + - name: create-role + image: alpine/k8s:1.23.7 + script: | + aws iam create-role --role-name "KarpenterNodeRole-$(params.cluster-name)" \ + --assume-role-policy-document '{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + }' + + # Attach required policies + aws iam attach-role-policy --role-name "KarpenterNodeRole-$(params.cluster-name)" \ + --policy-arn arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + aws iam attach-role-policy --role-name "KarpenterNodeRole-$(params.cluster-name)" \ + --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy + aws iam attach-role-policy --role-name "KarpenterNodeRole-$(params.cluster-name)" \ + --policy-arn arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + aws iam attach-role-policy --role-name "KarpenterNodeRole-$(params.cluster-name)" \ + --policy-arn arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + aws iam get-role --role-name KarpenterNodeRole-$(params.cluster-name) --query 'Role.[Arn]' --output text > $(results.node-role-arn) \ No newline at end of file diff --git a/tests/tekton-resources/tasks/setup/karpenter/helm-karpenter-install.yaml b/tests/tekton-resources/tasks/setup/karpenter/helm-karpenter-install.yaml new file mode 100644 index 00000000..7358f87e --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/helm-karpenter-install.yaml @@ -0,0 +1,88 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: helm-karpenter-install + namespace: scalability +spec: + description: | + Install karpenter on the cluster + params: + - name: cluster-name + description: The name of the cluster + - name: aws-account-id + description: aws account id + - name: karpenter-ecr-repo + description: ECR repo to install karpenter + - name: karpenter-version + description: version of karpenter to install + - name: endpoint + description: eks endpoint to use + workspaces: + - name: config + steps: + - name: install-karpenter + image: alpine/k8s:1.23.7 + timeout: 10m + script: | + aws ecr get-login-password --region us-west-2 | helm registry login --username AWS --password-stdin $(params.karpenter-ecr-repo) + + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + + aws eks describe-nodegroup --cluster-name $(params.cluster-name) --endpoint $(params.endpoint) --nodegroup-name karpenter-system-large + + kubectl get nodes -A -o yaml + + kubectl get pods -A -o wide + + kubectl get pods -n karpenter -o yaml + + # kubectl delete nodes -l dedicated=karpenter + + kubectl get deployments -A -o wide + + # helm status karpenter --namespace karpenter + + # kubectl logs karpenter-5df996fbbf-f8ghz -n karpenter -f + + # helm delete -n karpenter karpenter --wait + + # kubectl taint nodes -l dedicated=karpenter dedicated=karpenter:NoSchedule + + helm upgrade --install karpenter oci://$(params.karpenter-ecr-repo)/karpenter/karpenter --version $(params.karpenter-version) \ + --namespace "karpenter" \ + --create-namespace \ + --set "settings.clusterName=$(params.cluster-name)" \ + --set "settings.interruptionQueue=" \ + --set "settings.eksControlPlane=true" \ + --set-string "settings.awsCreateQPS=60" \ + --set "settings.featureGates.disableMetricsControllers=true" \ + --set "settings.featureGates.nodeRepair=true" \ + --set settings.featureGates.reservedCapacity="true" \ + --set settings.featureGates.spotToSpotConsolidation="true" \ + --set settings.featureGates.disableMetricsControllers="true" \ + --set settings.preferencePolicy=Ignore \ + --set "serviceAccount.annotations.eks\.amazonaws\.com/role-arn=arn:aws:iam::$(params.aws-account-id):role/KarpenterControllerRole-$(params.cluster-name)" \ + --set controller.resources.requests.cpu=60 \ + --set controller.resources.requests.memory=200Gi \ + --set controller.resources.limits.cpu=60 \ + --set controller.resources.limits.memory=200Gi \ + --set "controller.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key=dedicated" \ + --set "controller.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator=In" \ + --set "controller.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]=karpenter" \ + --set "tolerations[0].key=dedicated" \ + --set "tolerations[0].value=karpenter" \ + --set "tolerations[0].operator=Equal" \ + --set "dnsPolicy=Default" \ + --set-string "controller.env[0].name=AWS_ENDPOINT_URL_EKS" \ + --set-string "controller.env[0].value=$(params.endpoint)" \ + --set-string "controller.env[1].name=KUBE_CLIENT_QPS" \ + --set-string "controller.env[1].value=50000" \ + --set-string "controller.env[2].name=KUBE_CLIENT_BURST" \ + --set-string "controller.env[2].value=50000" \ + --set-string "controller.env[3].name=ENABLE_PROFILING" \ + --set-string "controller.env[3].value=true" \ + --timeout 100m \ + --debug \ + --wait + + kubectl get pods -n karpenter \ No newline at end of file diff --git a/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodeclass.yaml b/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodeclass.yaml new file mode 100644 index 00000000..cd8b6fb8 --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodeclass.yaml @@ -0,0 +1,56 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: create-ec2nodeclass + namespace: scalability +spec: + description: | + Install karpenter on the cluster + params: + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: eks endpoint to use + - name: karpenter-nodeclass-url + description: url of the nodeclass template to use + workspaces: + - name: source + mountPath: /src/karpenter/ + steps: + - name: create-ec2nodeclass + image: alpine/k8s:1.23.7 + script: | + export CLUSTER_CA=$(aws eks describe-cluster \ + --name $(params.cluster-name) \ + --endpoint-url $(params.endpoint) \ + --query 'cluster.certificateAuthority.data' \ + --output text) + + + export CLUSTER_ENDPOINT=$(aws eks describe-cluster \ + --name $(params.cluster-name) \ + --endpoint-url $(params.endpoint) \ + --query 'cluster.endpoint' \ + --output text) + + export CLUSTER_NAME=$(params.cluster-name) + + export ALIAS_VERSION=latest + + echo "Cluster endpoint: ${CLUSTER_ENDPOINT}" + + curl -fsSL $(params.karpenter-nodeclass-url) -o $(workspaces.source.path)ec2nodeclass.yaml + + cat $(workspaces.source.path)ec2nodeclass.yaml + + envsubst < $(workspaces.source.path)ec2nodeclass.yaml > $(workspaces.source.path)ec2nodeclass-modified.yaml + + ls $(workspaces.source.path) + + cat $(workspaces.source.path)ec2nodeclass-modified.yaml + + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + + kubectl apply -f $(workspaces.source.path)ec2nodeclass-modified.yaml + + kubectl get ec2nodeclass -o yaml diff --git a/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodepools.yaml b/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodepools.yaml new file mode 100644 index 00000000..f438b3ff --- /dev/null +++ b/tests/tekton-resources/tasks/setup/karpenter/kubectl-nodepools.yaml @@ -0,0 +1,43 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: create-nodepool + namespace: scalability +spec: + description: | + Install karpenter on the cluster + params: + - name: cluster-name + description: The name of the cluster + - name: endpoint + description: eks endpoint to use + - name: aws-region + description: aws region to use + default: us-west-2 + - name: karpenter-nodepool-url + description: url of the nodeclass template to use + workspaces: + - name: source + mountPath: /src/karpenter/ + steps: + - name: create-nodepools + image: alpine/k8s:1.23.7 + script: | + aws eks update-kubeconfig --name $(params.cluster-name) --endpoint $(params.endpoint) + + export CLUSTER_NAME=$(params.cluster-name) + curl -fsSL $(params.karpenter-nodepool-url) -o $(workspaces.source.path)nodepool.yaml + + cat $(workspaces.source.path)nodepool.yaml + + + aws ec2 describe-availability-zones --query 'AvailabilityZones[].ZoneName' --output json | jq -r '.[]' | while read -r az; do + export AZ=$az + echo ${AZ} + envsubst < $(workspaces.source.path)nodepool.yaml > $(workspaces.source.path)nodepool-${AZ}.yaml + cat $(workspaces.source.path)nodepool-${AZ}.yaml + kubectl apply -f $(workspaces.source.path)nodepool-${AZ}.yaml + done + + kubectl get nodepool -o yaml + diff --git a/tests/tekton-resources/tasks/teardown/karpenter/awscli-controller-role.yaml b/tests/tekton-resources/tasks/teardown/karpenter/awscli-controller-role.yaml new file mode 100644 index 00000000..c4d03173 --- /dev/null +++ b/tests/tekton-resources/tasks/teardown/karpenter/awscli-controller-role.yaml @@ -0,0 +1,16 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-controller-role-teardown + namespace: scalability +spec: + description: | + Creates the karpenter Controller Role + params: + - name: cluster-name + description: The name of the cluster + steps: + - name: create-role + image: alpine/k8s:1.23.7 + script: | + aws iam delete-role --role-name "KarpenterControllerRole-$(params.cluster-name)" \ No newline at end of file diff --git a/tests/tekton-resources/tasks/teardown/karpenter/awscli-instanceprofiles.yaml b/tests/tekton-resources/tasks/teardown/karpenter/awscli-instanceprofiles.yaml new file mode 100644 index 00000000..fdcb9558 --- /dev/null +++ b/tests/tekton-resources/tasks/teardown/karpenter/awscli-instanceprofiles.yaml @@ -0,0 +1,16 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-instanceprofiles-teardown + namespace: scalability +spec: + description: | + Creates the karpenter instance profile + params: + - name: cluster-name + description: The name of the cluster + steps: + - name: create-role + image: alpine/k8s:1.23.7 + script: | + aws iam delete-instance-profile --instance-profile-name "KarpenterNodeInstanceProfile-$(params.cluster-name)" \ No newline at end of file diff --git a/tests/tekton-resources/tasks/teardown/karpenter/awscli-karpenter-cfn-stack.yaml b/tests/tekton-resources/tasks/teardown/karpenter/awscli-karpenter-cfn-stack.yaml new file mode 100644 index 00000000..9dbbdd45 --- /dev/null +++ b/tests/tekton-resources/tasks/teardown/karpenter/awscli-karpenter-cfn-stack.yaml @@ -0,0 +1,36 @@ +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: awscli-karpenter-cfn-stack-teardown + namespace: scalability +spec: + description: | + Creates the karpenter instance roles and sqs interruption queue + params: + - name: cluster-name + description: The name of the cluster + - name: karpenter-version + description: Version of Karpenter to deploy + - name: endpoint + description: Endpoint to use with EKS + - name: region + default: us-west-2 + description: The region where the cluster is in. + - name: account-id + description: The aws account the cluster is running in + steps: + - name: create-stack + image: alpine/k8s:1.23.7 + script: | + STACK_NAME=Karpenter-$(params.cluster-name) + STACK_STATUS=$(aws cloudformation describe-stacks --query 'Stacks[?StackName==`'${STACK_NAME}'`].StackStatus' --output text --region $(params.region)) + cat ${STACK_STATUS} + + if [[ "$STACK_STATUS" == "ACTIVE" ]]; then + aws cloudformation delete-stack --stack-name ${STACK_NAME} + + aws cloudformation wait stack-delete-complete --stack-name $STACK_NAME --region $(params.region) + else + echo "$STACK_NAME Already exists" + fi +