Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cluster/kube/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ const (
envVarAkashOwner = "AKASH_OWNER"
envVarAkashProvider = "AKASH_PROVIDER"
envVarAkashClusterPublicHostname = "AKASH_CLUSTER_PUBLIC_HOSTNAME"
envVarAkashIngressHostname = "AKASH_INGRESS_HOST"
envVarAkashIngressCustomHostname = "AKASH_INGRESS_CUSTOM_HOST"
)

var (
Expand Down
125 changes: 125 additions & 0 deletions cluster/kube/builder/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package builder

const (
// Config system constants
AkashConfigVolume = "akash-cfg"
AkashConfigMount = "/akash-cfg"
AkashConfigInitName = "akash-init"
AkashConfigEnvFile = "config.env"

// RBAC constants
AkashRoleName = "akash-role"
AkashRoleBinding = "akash-binding"

// Init container script
akashInitScript = `
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds a lot of complexity to the code. I believe you could achieve this using the standard library and the Kubernetes API which would result in a more maintainable PR

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-j-luna

From what I found, the only way to access the data is AFTER the container is created which my 1st attempt meant calling creation twice since the API decides on the nodeports... so you end up needing an INIT script to query that data.

I also looked at the kube meta yaml stuff and theres no way to inject the node port info in...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is accessing the data after the container is created through kube api not an option? I'm not a fan of the idea of injecting an init container into deployments that run an embedded shell script from the provider... worst case scenario would at least mount the script instead.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know the possibility if you can access the meta api from within the main container. My original PR was a lot more buggy and involved doing 2 creates as I stated because you had to get the info from kube since kube decides the node ports.

The big question I guess is when/how can a container get information about the nodeports the kube daemon assigns to it. This can def be tricky since it goes to a service entity and not the container itself, IIRC?

I found the "kube yaml meta" cant do it, the kube api decides it, and you have to add in RBAC to enable api access. I don't know if you can do so in the main container like how EC2 can access AWS metaservice API's.

# Install jq
apk add --no-cache jq curl &>/dev/null

# Define default paths if not set
AKASH_CONFIG_PATH="${AKASH_CONFIG_PATH:-/akash/config}"
AKASH_CONFIG_FILE="${AKASH_CONFIG_FILE:-env.sh}"

# Validate paths
[ "$AKASH_CONFIG_PATH" = "/" ] && AKASH_CONFIG_PATH="/tmp/akash"
AKASH_CONFIG_PATH="${AKASH_CONFIG_PATH%/}"

# Create config directory if it doesn't exist
mkdir -p "${AKASH_CONFIG_PATH}"

if [ "$AKASH_REQUIRES_NODEPORT" != "true" ]; then
touch "${AKASH_CONFIG_PATH}/${AKASH_CONFIG_FILE}"
echo "# No NodePorts required" >> "${AKASH_CONFIG_PATH}/${AKASH_CONFIG_FILE}"
exit 0
fi

# Get service information using the Kubernetes API
NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
CACERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
API_SERVER="https://kubernetes.default.svc:443"
BASE_API_URL="${API_SERVER}/api/v1/namespaces/${NAMESPACE}"

# Function to get first valid service using jq
get_valid_service() {
local service_name="$1"
local services_json

services_json=$(curl -s --cacert "${CACERT}" -H "Authorization: Bearer ${TOKEN}" "${BASE_API_URL}/services/")

echo "$services_json" | jq -r "
(.items[] | select(.metadata.name == \"${service_name}\") | .metadata.name) //
(.items[] | select(.metadata.name == \"${service_name}-np\") | .metadata.name) //
(.items[] | select(.metadata.name | contains(\"${service_name}\")) | .metadata.name) //
empty
" | head -n 1
}

# Get the valid service name
ACTUAL_SERVICE_NAME=$(get_valid_service "${SERVICE_NAME}")

[ -z "$ACTUAL_SERVICE_NAME" ] && ACTUAL_SERVICE_NAME="${SERVICE_NAME}"

API_URL="${BASE_API_URL}/services/${ACTUAL_SERVICE_NAME}"
TEMP_FILE="${AKASH_CONFIG_PATH}/.tmp.${AKASH_CONFIG_FILE}"
CONFIG_FILE="${AKASH_CONFIG_PATH}/${AKASH_CONFIG_FILE}"

# Create initial config file header
echo "# Akash config generated on $(date)" > "$TEMP_FILE"
echo "# Service: ${ACTUAL_SERVICE_NAME}" >> "$TEMP_FILE"

# Add retries with exponential backoff
MAX_ATTEMPTS=30
for i in $(seq 1 $MAX_ATTEMPTS); do
# Query the service to get NodePort mappings
RESPONSE=$(curl -s --max-time 5 --retry 3 --retry-delay 1 --cacert "${CACERT}" \
-H "Authorization: Bearer ${TOKEN}" \
"${API_URL}")

# Check service type first
SERVICE_TYPE=$(echo "$RESPONSE" | jq -r '.spec.type // "unknown"')

if [ "$SERVICE_TYPE" = "NodePort" ]; then
# Service is NodePort, extract nodePort values
NODE_PORTS=$(echo "$RESPONSE" | jq -r '.spec.ports[] | select(.nodePort != null) | "export AKASH_EXTERNAL_PORT_\(.targetPort)+=\(.nodePort)"' 2>/dev/null || echo "")

if [ -n "$NODE_PORTS" ]; then
echo "$NODE_PORTS" >> "$TEMP_FILE"
mv "$TEMP_FILE" "$CONFIG_FILE"
exit 0
fi
elif [ "$SERVICE_TYPE" = "LoadBalancer" ]; then
# Service is LoadBalancer, check for external IPs
EXTERNAL_IPS=$(echo "$RESPONSE" | jq -r '.status.loadBalancer.ingress[]?.ip // empty' 2>/dev/null || echo "")
if [ -n "$EXTERNAL_IPS" ]; then
echo "export AKASH_EXTERNAL_IP+=${EXTERNAL_IPS}" >> "$TEMP_FILE"
mv "$TEMP_FILE" "$CONFIG_FILE"
exit 0
fi
elif [ "$SERVICE_TYPE" = "ClusterIP" ]; then
# Service is ClusterIP with dedicated IP
echo "# Service type is ClusterIP with dedicated IP" >> "$TEMP_FILE"

# Get service ports for reference
PORTS=$(echo "$RESPONSE" | jq -r '.spec.ports[] | "# Port \(.port) -> \(.targetPort)"' 2>/dev/null || echo "# No port mappings found")
echo "$PORTS" >> "$TEMP_FILE"

# Move to final file after waiting for some time, in case it's still being configured
if [ $i -gt 5 ]; then
mv "$TEMP_FILE" "$CONFIG_FILE"
exit 0
fi
fi

# Exponential backoff with max of 10 seconds
SLEEP_TIME=$((2 ** ((i-1) > 3 ? 3 : (i-1))))
sleep $SLEEP_TIME
done

# Create empty config file to prevent container from failing
echo "# Warning: Service configuration timeout after $MAX_ATTEMPTS attempts" >> "$TEMP_FILE"
echo "# Service type: ${SERVICE_TYPE}" >> "$TEMP_FILE"
mv "$TEMP_FILE" "$CONFIG_FILE"
exit 0
`
)
65 changes: 61 additions & 4 deletions cluster/kube/builder/deployment.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package builder

import (
"strconv"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -31,12 +33,66 @@ func NewDeployment(workload *Workload) Deployment {

func (b *deployment) Create() (*appsv1.Deployment, error) { // nolint:golint,unparam
falseValue := false
trueValue := true

revisionHistoryLimit := int32(10)

maxSurge := intstr.FromInt32(0)
maxUnavailable := intstr.FromInt32(1)

// Add config volume
configVolume := corev1.Volume{
Name: AkashConfigVolume,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
}

// Calculate if NodePort is required
requiresNodePort := false
service := &b.deployment.ManifestGroup().Services[b.serviceIdx]
for _, expose := range service.Expose {
if expose.Global {
requiresNodePort = true
break
}
}

// Add init container
initContainer := corev1.Container{
Name: AkashConfigInitName,
Image: "alpine/curl:3.14",
Command: []string{
"/bin/sh",
"-c",
akashInitScript,
},
Env: []corev1.EnvVar{
{
Name: "SERVICE_NAME",
Value: b.Name(),
},
{
Name: "AKASH_CONFIG_PATH",
Value: AkashConfigMount,
},
{
Name: "AKASH_CONFIG_FILE",
Value: AkashConfigEnvFile,
},
{
Name: "AKASH_REQUIRES_NODEPORT",
Value: strconv.FormatBool(requiresNodePort),
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: AkashConfigVolume,
MountPath: AkashConfigMount,
},
},
}

kdeployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: b.Name(),
Expand Down Expand Up @@ -65,10 +121,11 @@ func (b *deployment) Create() (*appsv1.Deployment, error) { // nolint:golint,unp
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &falseValue,
},
AutomountServiceAccountToken: &falseValue,
Containers: []corev1.Container{b.container()},
ImagePullSecrets: b.secretsRefs,
Volumes: b.volumesObjs,
AutomountServiceAccountToken: &trueValue,
InitContainers: []corev1.Container{initContainer},
Containers: []corev1.Container{b.container()},
ImagePullSecrets: b.secretsRefs,
Volumes: append(b.volumesObjs, configVolume),
},
},
},
Expand Down
45 changes: 45 additions & 0 deletions cluster/kube/builder/rbac.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package builder

import (
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// Create role that only grants access to specific service
func CreateRole(namespace string, serviceName string) *rbacv1.Role {
return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: AkashRoleName,
Namespace: namespace,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"services", "services/status"},
Verbs: []string{"get", "list", "watch"},
},
},
}
}

// Create role binding between service account and role
func CreateRoleBinding(namespace string) *rbacv1.RoleBinding {
return &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: AkashRoleBinding,
Namespace: namespace,
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: "default",
Namespace: namespace,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Role",
Name: AkashRoleName,
},
}
}
60 changes: 58 additions & 2 deletions cluster/kube/builder/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"strconv"
)

type StatefulSet interface {
Expand All @@ -31,12 +32,66 @@ func BuildStatefulSet(workload *Workload) StatefulSet {

func (b *statefulSet) Create() (*appsv1.StatefulSet, error) { // nolint:golint,unparam
falseValue := false
trueValue := true

revisionHistoryLimit := int32(1)

partition := int32(0)
maxUnavailable := intstr.FromInt32(1)

// Add config volume
configVolume := corev1.Volume{
Name: AkashConfigVolume,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
}

// Calculate if NodePort is required
requiresNodePort := false
service := &b.deployment.ManifestGroup().Services[b.serviceIdx]
for _, expose := range service.Expose {
if expose.Global {
requiresNodePort = true
break
}
}

// Add init container
initContainer := corev1.Container{
Name: AkashConfigInitName,
Image: "alpine/curl:3.14",
Command: []string{
"/bin/sh",
"-c",
akashInitScript,
},
Env: []corev1.EnvVar{
{
Name: "SERVICE_NAME",
Value: b.Name(),
},
{
Name: "AKASH_CONFIG_PATH",
Value: AkashConfigMount,
},
{
Name: "AKASH_CONFIG_FILE",
Value: AkashConfigEnvFile,
},
{
Name: "AKASH_REQUIRES_NODEPORT",
Value: strconv.FormatBool(requiresNodePort),
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: AkashConfigVolume,
MountPath: AkashConfigMount,
},
},
}

kdeployment := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: b.Name(),
Expand Down Expand Up @@ -65,10 +120,11 @@ func (b *statefulSet) Create() (*appsv1.StatefulSet, error) { // nolint:golint,u
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &falseValue,
},
AutomountServiceAccountToken: &falseValue,
AutomountServiceAccountToken: &trueValue,
InitContainers: []corev1.Container{initContainer},
Containers: []corev1.Container{b.container()},
ImagePullSecrets: b.secretsRefs,
Volumes: b.volumesObjs,
Volumes: append(b.volumesObjs, configVolume),
},
},
VolumeClaimTemplates: b.pvcsObjs,
Expand Down
Loading