OpenNebula · dandrushko · Jul 11, 2025 · Jul 21, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/inventory/reference/group_vars/all.yml b/inventory/reference/group_vars/all.yml
@@ -4,6 +4,10 @@ ansible_python_interpreter: /usr/bin/python3
 validation:
   # Check if OpenNebula services are running
   core_services: true
+  run_ha_verifications: true
+  one_config_path:
+    - /etc/one
+    - /var/lib/one/remotes/etc
   # List of services to check enabled, stop and restart
   service_list:
     - name: opennebula.service

diff --git a/inventory/reference/local-ha.yml b/inventory/reference/local-ha.yml
@@ -0,0 +1,51 @@
+---
+all:
+  vars:
+    ansible_user: root
+    one_version: '7.0'
+    one_pass: opennebula
+    ee_token: 'ci:Pantufl4.'
+    one_vip: 172.20.0.100
+    one_vip_cidr: 24
+    one_vip_if: eth0
+    ds:
+      mode: ssh
+
+infra:
+  vars:
+    os_image_url: https://d24fmfybwxpuhu.cloudfront.net/ubuntu2404-6.10.0-2-20240710.qcow2
+    os_image_size: 30G
+    vcpu_static: 2
+    memory_MiB: 4096
+    ansible_user: root
+    infra_bridge: brpub
+  hosts:
+    node1:
+      ansible_host: 172.20.0.1
+
+frontend:
+  hosts:
+    fe01: 
+      ansible_host: 172.20.0.10
+      infra_hostname: 'node1'
+    fe02:
+      ansible_host: 172.20.0.11
+      infra_hostname: 'node1'
+    fe03:
+      ansible_host: 172.20.0.12
+      infra_hostname: 'node1'
+  vars:
+    context:
+      ETH0_IP: "{{ ansible_host }}"
+      PASSWORD: 'OpenNebula'
+      ETH0_GATEWAY: 172.20.0.1
+      ETH0_NETWORK: 172.20.0.0
+      ETH0_MASK: 255.255.255.0
+      ETH0_DNS: 172.20.0.1
+      SSH_PUBLIC_KEY: |
+          ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCkKbQKRj3FC++IBl9U1ZuLXaMBPRWr7HDY6kyHyMgQKaRZ0QTFkA9ADMwcrNu4H2xILN626r6qFTrc4zYpti0U/ps7cyodt50kqjFiaueB1gVbpPvP9mUjVk8NNXNmZDwgtYXaQDMCx14JfHn8mWgxKlwBCMlSRfOyJQx5EGpfzX/FoozRYm+mrUPt8LP+QFPVQMJj45q4Jnv2qWMwuJw7ZNjwnkFEaBLtPjpJTbxRVFeiBxVEwWcsqhKyRdvSDcZAMKoVQETKOw9bBY91sdycl+R+OoljQEa0WyBNO4WcDTc7mosohpj6o5mwybyp91PP88ZxJ4LUA1SYCXn3qBa9
+
+
+node:
+  hosts:
+    node1: { ansible_host: 172.20.0.1 }
diff --git a/playbooks/fe-ha.yml b/playbooks/fe-ha.yml
@@ -0,0 +1,7 @@
+---
+
+- hosts: "{{ frontend_group | d('frontend') }}"
+  roles:
+    - role: fe_ha
+      when: validation.run_ha_verifications == true
+
diff --git a/playbooks/validation.yml b/playbooks/validation.yml
@@ -67,6 +67,14 @@
   roles:
     - role: validation
 
+
+# Run FE HA verfification only when variable run_ha_verifications is set to true
+- hosts: "{{ frontend_group | d('frontend') }}"
+  roles:
+    - role: fe_ha
+  when: validation.run_ha_verifications == true
+
+
 # Run network tests on the hypervisor hosts
 - hosts: "{{ node_group | d('node') }}"
   roles:

diff --git a/roles/fe_ha/defaults/main.yml b/roles/fe_ha/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+#################### TEST CONFIGURATION VARIABLES ####################
+# Zone name
+
+one_zone_name: OpenNebula
+
diff --git a/roles/fe_ha/tasks/main.yml b/roles/fe_ha/tasks/main.yml
@@ -0,0 +1,173 @@
+---
+
+- setup:
+    gather_subset:
+    - min
+
+- name: Install required task dependencies
+  ansible.builtin.package:
+    name:
+      - jq
+    state: present
+
+# Check VIP reachability from all the FE nodes
+- name: Check connection to API through a VIP from all FEs
+  ansible.builtin.wait_for:
+    host: "{{ one_vip }}"
+    port: 2633
+  register: vip_reachability
+
+- name: Verify VIP reachability from all FEs
+  debug:
+    msg: "{{ vip_reachability.state }}"
+
+- name: Save VIP reachability from all FEs
+  set_fact:
+    verification_result: "{{ (verification_result | default({})) | combine({'VIP connectivity from all FEs': 'ok'}) }}"
+
+
+# Get zone details from the first FE, Assuming state set to 3 is the leader state
+- name: Get Zone
+  ansible.builtin.shell:
+    cmd: onezone show "{{ one_zone_name }}" -j | jq -r '.ZONE.SERVER_POOL.SERVER | .[] |select(.STATE == "3") | .NAME'
+  register: zone_details 
+  ignore_errors: true
+  when: hostvars[groups[frontend_group | d('frontend')][0]]['ansible_host'] == ansible_host 
+  run_once: true    
+
+# Test to verify that content of /etc/one directory is the same at all FE nodes
+- name: Check content of the config directories
+  find:
+    path: "{{ item }}"
+    file_type: file
+    recurse: true 
+  register: found_files
+  loop: "{{ validation.one_config_path }}"
+
+- name: Set combined list 
+  set_fact:
+    combined_file_list: "{{ combined_file_list | default([]) + item.files }}"
+  loop: "{{ found_files.results }}"
+
+- name: Calculate sha256 sum for each file in the directory
+  stat:
+    path: "{{ item.path }}"
+    checksum_algorithm: sha256
+  loop: "{{ combined_file_list }}"
+  register: file_hashes
+
+- name: Save hashes per host
+  set_fact:
+    file_checksums: "{{ file_checksums | default({}) | combine({ item.item.path: item.stat.checksum}) }}"
+  loop: "{{ file_hashes.results }}"
+  when:
+    - item.stat is defined
+    - item.stat.exists is defined and item.stat.exists
+    - item.stat.isreg is defined and item.stat.isreg
+    - item.stat.checksum is defined
+
+# Compare files using fe1 as a reference
+- name: Set fact for diff files
+  set_fact:
+    diff_files: |
+      {% for fname in hostvars[groups['frontend'][0]]['file_checksums'] %}
+        {% if  hostvars[groups['frontend'][0]]['file_checksums'][fname] != hostvars[item]['file_checksums'][fname] %}
+          {{ (diff_files_dict | default({})) | combine({ item : fname }) }}
+        {% endif %}
+      {% endfor %}
+  loop: "{{ groups.frontend }}"
+  run_once: true
+
+- name: Save /etc/one content checks 
+  set_fact:
+    verification_result: "{{ verification_result | combine({'Check Content /etc/one diretory for file diffs at all FE nodes. Diff files': diff_files }) }}"
+  run_once: true
+
+### Leader failover tests
+#
+- name: Set initial leader node
+  set_fact:
+    initial_leader: "{{ zone_details.stdout }}"
+  run_once: true
+
+- name: Display current leder node
+  debug:
+    msg: "Current leader node is {{ initial_leader }}"
+
+- name: Save Initial Leader node
+  set_fact:
+    verification_result: "{{ verification_result | combine({'Initial FE leader node': initial_leader }) }}"
+
+
+- name: Stop OpenNebula oned service on the current leader to simulate failure
+  systemd:
+    name: opennebula
+    state: stopped
+  delegate_to: "{{ initial_leader }}" # Stop service on the identified leader
+  #when: ansible_host  == initial_leader
+  run_once: true
+
+  # Give OpenNebula's internal HA mechanism time to elect a new leader  
+- name: Wait for leader failover
+  pause:
+    seconds: 20 
+  run_once: true 
+
+- name: Check that one VIP was migrated and reachable from all FE nodes
+  ansible.builtin.wait_for:
+    host: "{{ one_vip }}"
+    port: 2633
+  register: vip_reachability
+
+- name: Save VIP reachability post migration
+  set_fact:
+    verification_result: "{{ verification_result | combine({'VIP reachability after leader migration': 'ok' }) }}" 
+
+- name: Verify VIP reachability from all FEs
+  debug:
+    msg: "{{ vip_reachability.state }}"
+
+
+- name: Get a new leader id
+  ansible.builtin.shell:
+    cmd: onezone show "{{ one_zone_name }}" -j | jq -r '.ZONE.SERVER_POOL.SERVER | .[] |select(.STATE == "3") | .NAME'
+  register: migrated_leader
+  ignore_errors: true
+  when: initial_leader != ansible_host
+  run_once: true
+
+- name: Set a new leader node
+  set_fact:
+    migrated_leader: "{{ migrated_leader.stdout }}"
+  #delegate_to: 
+  #when: hostvars[groups[frontend_group | d('frontend')][1]]['ansible_host'] == ansible_host    
+  run_once: true
+
+- name: Save a new Leader node
+  set_fact:
+    verification_result: "{{ verification_result | combine({'A new FE leader node after the failover': migrated_leader }) }}"
+
+- name: Check that a leader indeed new
+  debug:
+    msg: "Original leader: {{ initial_leader }} a new leader is: {{ migrated_leader }}"
+  when: initial_leader != migrated_leader
+  run_once: true
+
+  # Start OpenNebula service on the original leader
+- name: Recover stopped leader
+  systemd:
+    name: opennebula
+    state: started
+  delegate_to: "{{ initial_leader }}"
+  run_once: true
+
+- name: Render results template
+  delegate_to: localhost
+  become: false
+  vars:
+    date: "{{ '%Y-%m-%d %H:%M:%S' | strftime(ansible_date_time.epoch) }}"
+  template:
+    src: report_template.j2
+    dest: /tmp/fe_ha_report.html
+  ignore_errors: True
+
diff --git a/roles/fe_ha/templates/report_template.j2 b/roles/fe_ha/templates/report_template.j2
@@ -0,0 +1,58 @@
+<html>
+<head>
+
+
+</head>
+<title> Cloud verification report. Executed on {{ date }} </title>
+
+<body>
+<style>
+.table_component {
+    overflow: auto;
+    width: 100%;
+}
+
+.table_component table {
+    border: 1px solid #dededf;
+    width: 100%;
+    table-layout: fixed;
+    border-collapse: collapse;
+    border-spacing: 1px;
+    text-align: left;
+}
+
+.table_component caption {
+    caption-side: top;
+    text-align: left;
+}
+
+.table_component th {
+    border: 1px solid #dededf;
+    background-color: #eceff1;
+    color: #000000;
+	padding: 5px;
+}
+
+.table_component td {
+    border: 1px solid #dededf;
+    background-color: #ffffff;
+    color: #000000;
+	padding: 5px;
+}
+</style>
+
+<div class="table_component" role="region" tabindex="0">
+
+ <!-- Table with the individual tests  -->
+ <table>
+      <tr><th> HA validation scenario name</th><th>Test result status</th></tr>
+      {% for k, v in verification_result.items() %}
+      <tr><td> {{ k }}</td>
+      <td>{{ v }}</td></tr>
+      {% endfor %}
+    </table>
+</div>
+<br><br><br>
+
+</body>
+</html>