diff --git a/docs/dictionary/en-custom.txt b/docs/dictionary/en-custom.txt index c826798514..90e1c0ae9a 100644 --- a/docs/dictionary/en-custom.txt +++ b/docs/dictionary/en-custom.txt @@ -352,6 +352,7 @@ networkconfig networkmanager networktype nfs +nhc nftables nic nigzpbgugpsavdmfyl @@ -507,6 +508,7 @@ sig Sinha sizepercent skbg +snr skiplist specificities spnego diff --git a/roles/cifmw_snr_nhc/README.md b/roles/cifmw_snr_nhc/README.md new file mode 100644 index 0000000000..64c5cd64f4 --- /dev/null +++ b/roles/cifmw_snr_nhc/README.md @@ -0,0 +1,20 @@ +# cifmw_snr_nhc +Apply Self Node Remediation and Node Health Check Custom Resources on OpenShift. + +## Privilege escalation +None - all actions use the provided kubeconfig and require no additional host privileges. + +## Parameters +* `cifmw_snr_nhc_kubeconfig`: (String) Path to the kubeconfig file. +* `cifmw_snr_nhc_kubeadmin_password_file`: (String) Path to the kubeadmin password file. +* `cifmw_snr_nhc_namespace`: (String) Namespace used for SNR and NHC resources. + +## Examples +```yaml +- name: Configure SNR and NHC + hosts: masters + roles: + - role: cifmw_snr_nhc + cifmw_snr_nhc_kubeconfig: "/home/zuul/.kube/config" + cifmw_snr_nhc_kubeadmin_password_file: "/home/zuul/.kube/kubeadmin-password" + cifmw_snr_nhc_namespace: openshift-workload-availability diff --git a/roles/cifmw_snr_nhc/defaults/main.yml b/roles/cifmw_snr_nhc/defaults/main.yml new file mode 100644 index 0000000000..17142d6d22 --- /dev/null +++ b/roles/cifmw_snr_nhc/defaults/main.yml @@ -0,0 +1,4 @@ +--- +cifmw_snr_nhc_kubeconfig: "/home/zuul/.kube/config" +cifmw_snr_nhc_kubeadmin_password_file: "/home/zuul/.kube/kubeadmin-password" +cifmw_snr_nhc_namespace: openshift-workload-availability diff --git a/roles/cifmw_snr_nhc/meta/main.yml b/roles/cifmw_snr_nhc/meta/main.yml new file mode 100644 index 0000000000..517a5c4875 --- /dev/null +++ b/roles/cifmw_snr_nhc/meta/main.yml @@ -0,0 +1,12 @@ +--- +galaxy_info: + author: CI Framework + description: CI Framework Role -- cifmw_snr_nhc + company: Red Hat + license: Apache-2.0 + min_ansible_version: "2.14" + namespace: cifmw + galaxy_tags: + - cifmw + +dependencies: [] diff --git a/roles/cifmw_snr_nhc/tasks/main.yml b/roles/cifmw_snr_nhc/tasks/main.yml new file mode 100644 index 0000000000..f09ac006ca --- /dev/null +++ b/roles/cifmw_snr_nhc/tasks/main.yml @@ -0,0 +1,429 @@ +--- +# Copyright Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: Read kubeadmin password + ansible.builtin.slurp: + src: "{{ cifmw_snr_nhc_kubeadmin_password_file }}" + register: kubeadmin_password + changed_when: false + +- name: Decode kubeadmin password + ansible.builtin.set_fact: + kubeadmin_password_decoded: "{{ kubeadmin_password.content | b64decode | trim }}" + +- name: Check if already logged in + ansible.builtin.command: oc whoami + register: oc_whoami_check + failed_when: false + changed_when: false + +- name: Explicitly login using oc login and update kubeconfig + ansible.builtin.command: > + oc login --username=kubeadmin --password={{ kubeadmin_password_decoded }} + when: "'not_logged_in' in oc_whoami_check.stdout" + register: oc_login_output + changed_when: "'Login successful' in oc_login_output.stdout" + +- name: Debug login output + when: "'not_logged_in' in oc_whoami_check.stdout and ansible_verbosity > 0" + ansible.builtin.debug: + msg: "{{ oc_login_output.stdout }}" + +- name: Create the workload-availability namespace + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + resource_definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ cifmw_snr_nhc_namespace }}" + register: namespace_result + +- name: Display namespace creation result + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ namespace_result }}" + +- name: Check if the namespace exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: v1 + kind: Namespace + name: "{{ cifmw_snr_nhc_namespace }}" + register: current_project + +- name: Display current namespace status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "Namespace exists: {{ current_project.resources | length > 0 }}" + +- name: Switch to namespace {{ cifmw_snr_nhc_namespace }} + when: current_project.resources | length > 0 + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + kind: ConfigMap + namespace: kube-system + resource_definition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: kube-public + data: + namespace: "{{ cifmw_snr_nhc_namespace }}" + +- name: Confirm active namespace + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "Switched to namespace: {{ cifmw_snr_nhc_namespace }}" + +- name: Create the workload-availability-operator-group resource + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + resource_definition: + apiVersion: operators.coreos.com/v1 + kind: OperatorGroup + metadata: + name: workload-availability-operator-group + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: operator_group_result + +- name: Display OperatorGroup creation result + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ operator_group_result }}" + +- name: Check if the OperatorGroup exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1 + kind: OperatorGroup + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: operator_group_check + +- name: Display OperatorGroup information + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ operator_group_check.resources }}" + +- name: Create the self-node-remediation Subscription + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + resource_definition: + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: self-node-remediation-operator + namespace: "{{ cifmw_snr_nhc_namespace }}" + spec: + channel: stable + installPlanApproval: Automatic + name: self-node-remediation + package: self-node-remediation + source: redhat-operators + sourceNamespace: openshift-marketplace + register: subscription_result + +- name: Display Subscription creation result + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ subscription_result }}" + +- name: Check if the Subscription exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: self-node-remediation-operator + register: subscription_check + +- name: Display Subscription information + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ subscription_check.resources }}" + +- name: Check Subscription status + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + name: self-node-remediation-operator + namespace: openshift-operators + register: snr_subscription + +- name: Display Subscription status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_subscription.resources }}" + +- name: Verify SelfNodeRemediationTemplate CR exists + kubernetes.core.k8s_info: + api_version: remediation.medik8s.io/v1alpha1 + kind: SelfNodeRemediationTemplate + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: snr_template + +- name: Display SelfNodeRemediationTemplate status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_template.resources }}" + +- name: Check ClusterServiceVersion (CSV) status for remediation + kubernetes.core.k8s_info: + api_version: operators.coreos.com/v1alpha1 + kind: ClusterServiceVersion + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: csv_status + +- name: Display CSV status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ csv_status.resources }}" + +- name: Verify Self Node Remediation Operator deployment is running + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: self-node-remediation-controller-manager + register: snr_deployment + +- name: Wait for Self Node Remediation deployment to be ready + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: self-node-remediation-controller-manager + register: snr_deployment_check + until: >- + snr_deployment_check.resources[0].status.availableReplicas is defined and + snr_deployment_check.resources[0].status.availableReplicas > 0 + retries: 20 + delay: 15 + +- name: Display Deployment status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_deployment.resources }}" + +- name: Check SelfNodeRemediationConfig CR + kubernetes.core.k8s_info: + api_version: remediation.medik8s.io/v1alpha1 + kind: SelfNodeRemediationConfig + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: snr_config + +- name: Display SelfNodeRemediationConfig status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_config.resources }}" + +- name: Verify Self Node Remediation DaemonSet status + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: DaemonSet + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: self-node-remediation-ds + register: snr_daemonset + +- name: Display DaemonSet status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_daemonset.resources }}" + +- name: Verify SelfNodeRemediationConfig CR exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: self-node-remediation.medik8s.io/v1alpha1 + kind: SelfNodeRemediationConfig + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: self-node-remediation-config + register: snr_config_detail + +- name: Debug SelfNodeRemediationConfig CR details + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_config_detail.resources }}" + +- name: Verify SelfNodeRemediationTemplate exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: self-node-remediation.medik8s.io/v1alpha1 + kind: SelfNodeRemediationTemplate + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: snr_template_detail + +- name: Debug SelfNodeRemediationTemplate details + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ snr_template_detail.resources }}" + +- name: Check remediationStrategy in SelfNodeRemediationTemplate + when: snr_template_detail.resources | length > 0 and ansible_verbosity > 0 + ansible.builtin.debug: + msg: "Remediation strategy: {{ snr_template_detail.resources[0].spec.template.spec.remediationStrategy | default('Not defined') }}" + +- name: Create the Node Health Check Subscription + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + resource_definition: + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: node-health-check-operator + namespace: "{{ cifmw_snr_nhc_namespace }}" + spec: + channel: stable + installPlanApproval: Automatic + name: node-healthcheck-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + package: node-healthcheck-operator + register: nhc_subscription_result + +- name: Display Node Health Check Subscription result + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_subscription_result }}" + +- name: Check if the Node Health Check Subscription exists + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: node-health-check-operator + register: nhc_subscription_check + +- name: Display Node Health Check Subscription information + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_subscription_check.resources }}" + +- name: Verify Node Health Check Subscription + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1alpha1 + kind: Subscription + namespace: "{{ cifmw_snr_nhc_namespace }}" + name: node-health-check-operator + register: nhc_subscription_status + +- name: Display Node Health Check Subscription status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_subscription_status.resources }}" + +- name: Check ClusterServiceVersion (CSV) for Node Health Check Operator + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1alpha1 + kind: ClusterServiceVersion + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: nhc_csv_status + +- name: Wait for CSV to reach Succeeded phase + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: operators.coreos.com/v1alpha1 + kind: ClusterServiceVersion + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: csv_check + until: csv_check.resources | selectattr('status.phase', 'equalto', 'Succeeded') | list | length > 0 + retries: 20 + delay: 15 + +- name: Display Node Health Check CSV status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_csv_status.resources | selectattr('metadata.name', 'search', 'health') }}" + +- name: Verify Node Health Check Operator Deployment + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: apps/v1 + kind: Deployment + namespace: "{{ cifmw_snr_nhc_namespace }}" + register: nhc_deployments + +- name: Display Node Health Check Operator Deployment status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_deployments.resources | selectattr('metadata.name', 'search', 'node-healthcheck') }}" + +- name: Create Node Health Check CR to use SNR + kubernetes.core.k8s: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + state: present + resource_definition: + apiVersion: remediation.medik8s.io/v1alpha1 + kind: NodeHealthCheck + metadata: + name: nodehealthcheck-sample + spec: + minHealthy: 51% + remediationTemplate: + apiVersion: self-node-remediation.medik8s.io/v1alpha1 + name: self-node-remediation-automatic-strategy-template + namespace: "{{ cifmw_snr_nhc_namespace }}" + kind: SelfNodeRemediationTemplate + selector: + matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + unhealthyConditions: + - type: Ready + status: "False" + duration: 30s + - type: Ready + status: Unknown + duration: 30s + register: nhc_cr_creation + +- name: Wait for Node Health Check CR to be created + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: remediation.medik8s.io/v1alpha1 + kind: NodeHealthCheck + name: nodehealthcheck-sample + register: nhc_cr_ready + until: nhc_cr_ready.resources | length > 0 + retries: 10 + delay: 10 + +- name: Display Node Health Check CR creation result + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_cr_creation }}" + +- name: Verify Node Health Check CR existence + kubernetes.core.k8s_info: + kubeconfig: "{{ cifmw_snr_nhc_kubeconfig }}" + api_version: remediation.medik8s.io/v1alpha1 + kind: NodeHealthCheck + name: nodehealthcheck-sample + register: nhc_cr_check + +- name: Display Node Health Check CR status + when: ansible_verbosity > 0 + ansible.builtin.debug: + msg: "{{ nhc_cr_check.resources }}" +