diff --git a/galaxy.yml b/galaxy.yml index faabdde..5abff1a 100644 --- a/galaxy.yml +++ b/galaxy.yml @@ -99,6 +99,7 @@ - role: galaxyproject.postgresql_objects become: true become_user: postgres + - role: metacentrum.postgresql_restore - hosts: noletsencrypt become: true diff --git a/galaxy_db_recovery.yaml b/galaxy_db_recovery.yaml new file mode 100644 index 0000000..76ee2fc --- /dev/null +++ b/galaxy_db_recovery.yaml @@ -0,0 +1,202 @@ +--- +- hosts: all + name: apt update, python, pip + become: true + become_user: root + tasks: + - ansible.builtin.apt: + name: + - python3-pip + - python-is-python3 + update_cache: yes + when: ansible_os_family == 'Debian' + +- hosts: dbservers + become: true + become_user: root + vars: + galaxy_db_restore_version: '20250529T010001Z' # Example: '20250529T010001Z'; 'latest' + galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # The point of return must have happened after DB base backup creation (version timestamp). Example: '2025-05-29 02:30:00' + galaxy_db_restore_timeline: 'current' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) + galaxy_db_restore_action: 'promote' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user + pre_tasks: + - name: Install Dependencies + package: + name: ['acl', 'anacron', 'rsync'] + roles: + - galaxyproject.postgresql + - role: galaxyproject.postgresql_objects + become: true + become_user: postgres + post_tasks: +# recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration + ## Part responsible for recovering Galaxy DB from backup if exists (it should be in post_tasks of dbservers but RDB (or NDB) access needs to be set up first) + - name: Register if galaxy exists + ansible.builtin.shell: + cmd: galaxyctl status + register: galaxyctl_status + ignore_errors: true + become: true + + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + +## Unsucessfull attempt to make a prompt with the list of all available backup versions +# - name: Gather Galaxy DB backup versions +# find: +# paths: "{{ postgresql_backup_dir }}" +# file_type: directory +# # You can also use file_type: 'file' for files only, or 'directory' for directories only +# register: db_versions +# become: true +# become_user: postgres +# - name: Set backup versions as a list +# set_fact: +# item_choices: "{{ db_versions.files | map(attribute='path') | list }}" +# - name: Select Galaxy DB backup version from a list +# vars_prompt: +# - name: "selected_version" +# prompt: "Select Galaxy DB version to restore" +# private: no +# choices: "{{ item_choices }}" +# - name: Print selected Galaxy DB backup versions +# debug: +# msg: "You selected {{ selected_version }} to restore" + + - name: Print PostgreSQL data directory path + debug: + msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" + - name: Print Warning + debug: + msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" + when: psql_data_dir.stdout == '' + + - name: Restore Galaxy DB backup - pick proper backup version + ansible.builtin.shell: + cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " + register: galaxy_db_restore_dir + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - print selected version + debug: + msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" + + - name: Restore Galaxy DB backup - stop Galaxy + ansible.builtin.shell: + cmd: galaxyctl stop + become: true + when: galaxyctl_status.rc == 0 + + - name: Restore Galaxy DB backup - stop postgresql + ansible.builtin.systemd: + name: postgresql + state: stopped + become: true + + - name: Restore Galaxy DB backup - backup current postgresql + ansible.builtin.shell: + cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" + ignore_errors: true + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - copy proper backup version + ansible.builtin.shell: + cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " + when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" + state: present + create: true + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - touch the signal file + ansible.builtin.shell: + cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " + when: psql_data_dir.stdout != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - start postgresql + ansible.builtin.systemd: + name: postgresql + state: started + become: true + + - name: Restore Galaxy DB backup - register if postgresql log exists + ansible.builtin.shell: + cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" + register: psql_log + ignore_errors: true + become: true + + - name: Restore Galaxy DB backup - show the PSQL log tail + ansible.builtin.shell: + cmd: "tail -20 {{ psql_log.stdout }}" + when: psql_data_dir.stdout != '' and psql_log.rc == 0 + become: true + + - name: Restore Galaxy DB backup - wait to read the PSQL log + ansible.builtin.pause: + seconds: 10 + when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' + become: true + + - name: Restore Galaxy DB backup - wait for manual check of PSQL state + ansible.builtin.pause: + prompt: Please, manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user on the server. + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' + become: true + + - name: Restore Galaxy DB backup - start Galaxy + ansible.builtin.shell: + cmd: galaxyctl start + become: true + when: galaxyctl_status.rc == 0 diff --git a/roles/metacentrum.postgresql_restore/defaults/main.yml b/roles/metacentrum.postgresql_restore/defaults/main.yml new file mode 100644 index 0000000..0eea75b --- /dev/null +++ b/roles/metacentrum.postgresql_restore/defaults/main.yml @@ -0,0 +1,6 @@ +# If galaxy_db_restore_version will be an empty string, no database restore will take place +galaxy_db_restore_version: '' # Example: '20250529T010001Z'; 'latest' +# The point of return must have happened after DB base backup creation (version timestamp) +galaxy_db_restore_timestamp: '' # Example: '2025-05-29 02:30:00' +galaxy_db_restore_timeline: 'latest' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) +galaxy_db_restore_action: 'pause' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user diff --git a/roles/metacentrum.postgresql_restore/tasks/main.yml b/roles/metacentrum.postgresql_restore/tasks/main.yml new file mode 100644 index 0000000..e4c20d0 --- /dev/null +++ b/roles/metacentrum.postgresql_restore/tasks/main.yml @@ -0,0 +1,185 @@ + - name: Install Dependencies + package: + name: ['rsync'] + become: true + when: galaxy_db_restore_version != '' + +# recover from WAL based on https://training.galaxyproject.org/training-material/topics/admin/tutorials/backup-cleanup/tutorial.html#restoration + - name: Register if galaxy exists + ansible.builtin.shell: + cmd: galaxyctl status + register: galaxyctl_status + ignore_errors: true + become: true + when: galaxy_db_restore_version != '' + + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + when: galaxy_db_restore_version != '' + +## Unsucessfull attempt to make a prompt with the list of all available backup versions +# - name: Gather Galaxy DB backup versions +# find: +# paths: "{{ postgresql_backup_dir }}" +# file_type: directory +# # You can also use file_type: 'file' for files only, or 'directory' for directories only +# register: db_versions +# become: true +# become_user: postgres +# - name: Set backup versions as a list +# set_fact: +# item_choices: "{{ db_versions.files | map(attribute='path') | list }}" +# - name: Select Galaxy DB backup version from a list +# vars_prompt: +# - name: "selected_version" +# prompt: "Select Galaxy DB version to restore" +# private: no +# choices: "{{ item_choices }}" +# - name: Print selected Galaxy DB backup versions +# debug: +# msg: "You selected {{ selected_version }} to restore" + + - name: Print PostgreSQL data directory path + debug: + msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" + when: galaxy_db_restore_version != '' + + - name: Print Warning + debug: + msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" + when: psql_data_dir.stdout == '' and galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - pick proper backup version + ansible.builtin.shell: + cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " + register: galaxy_db_restore_dir + become: true + become_user: postgres + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - print selected version + debug: + msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - stop Galaxy + ansible.builtin.shell: + cmd: galaxyctl stop + become: true + when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - stop postgresql + ansible.builtin.systemd: + name: postgresql + state: stopped + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - backup current postgresql + ansible.builtin.shell: + cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" + ignore_errors: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - copy proper backup version + ansible.builtin.shell: + cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " + when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) + ansible.builtin.lineinfile: + path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" + insertafter: EOF + line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" + state: present + create: true + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - touch the signal file + ansible.builtin.shell: + cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " + when: psql_data_dir.stdout != '' and galaxy_db_restore_version != '' + become: true + become_user: postgres + + - name: Restore Galaxy DB backup - start postgresql + ansible.builtin.systemd: + name: postgresql + state: started + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - register if postgresql log exists + ansible.builtin.shell: + cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" + register: psql_log + ignore_errors: true + become: true + when: galaxy_db_restore_version != '' + + - name: Restore Galaxy DB backup - show the PSQL log tail + ansible.builtin.shell: + cmd: "tail -20 {{ psql_log.stdout }}" + when: psql_data_dir.stdout != '' and psql_log.rc == 0 and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - wait to read the PSQL log + ansible.builtin.pause: + seconds: 10 + when: psql_data_dir.stdout != '' and galaxy_db_restore_action == 'promote' and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - wait for manual check of PSQL state + ansible.builtin.pause: + prompt: Please, manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user on the server. + when: psql_data_dir.stdout != '' and galaxy_db_restore_action != 'promote' and galaxy_db_restore_version != '' + become: true + + - name: Restore Galaxy DB backup - start Galaxy + ansible.builtin.shell: + cmd: galaxyctl start + become: true + when: galaxyctl_status.rc == 0 and galaxy_db_restore_version != '' diff --git a/test_prompt.yaml b/test_prompt.yaml new file mode 100644 index 0000000..116363f --- /dev/null +++ b/test_prompt.yaml @@ -0,0 +1,185 @@ +--- +- hosts: galaxy-qa1.galaxy.cloud.e-infra.cz + become: true + become_user: root + vars: + galaxy_db_restore_version: '20250529T010001Z' # Example: '20250529T010001Z'; 'latest' + galaxy_db_restore_timestamp: '2025-05-29 02:30:00' # Example: '2025-05-29 02:30:00' + galaxy_db_restore_timeline: 'current' # could be 'latest' (default) or 'current', which recovers along the same timeline that was current when the base backup was taken. Or using '0xID' for specific numeric timeline ID (hexadecimal number used in WAL file name) + galaxy_db_restore_action: 'promote' # could be 'promote' (to continue right after recovery) or 'pause' (by default) to manually check the DB state. To unpause PSQL DB use command 'psql -c "select * from pg_wal_replay_resume();"' as postgres user +# pre_tasks: +# - name: Install Dependencies +# package: +# name: ['acl', 'anacron', 'rsync'] +# roles: +# - galaxyproject.postgresql +# - role: galaxyproject.postgresql_objects +# become: true +# become_user: postgres +# post_tasks: +# - name: Register if galaxy exists +# ansible.builtin.shell: +# cmd: galaxyctl status +# register: galaxyctl_status +# ignore_errors: true +# become: true + + tasks: + - name: Register psql data directory + ansible.builtin.shell: + cmd: psql -Atc 'show data_directory;' + register: psql_data_dir + become: true + become_user: postgres + +# Unsucessfull attempt to make a prompt with the list of all available backup versions + - name: Gather Galaxy DB backup versions + find: + paths: "{{ postgresql_backup_dir }}" + file_type: directory + patterns: "*Z" + # You can also use file_type: 'file' for files only, or 'directory' for directories only + register: db_versions + become: true + become_user: postgres + + - name: Set backup versions as a list + set_fact: + version_list: "{{ db_versions.files | map(attribute='path') | list }}" + + - name: Select Galaxy DB backup version from a list + debug: + msg: + - "Select database version to restore:" + - "{{ db_versions.files | map(attribute='path') | map('basename') }}" + + - name: Wait for selection + pause: + prompt: "Select version to restore from the list: {{ db_versions.files | map(attribute='path') | map('basename') }}" + register: selected_version + + - name: Print selected Galaxy DB backup versions + debug: + msg: "You selected {{ selected_version.user_input }} to restore" + +# - name: Print PostgreSQL data directory path +# debug: +# msg: "PostgreSQL data directory path: {{ psql_data_dir.stdout }}" +# - name: Print Warning +# debug: +# msg: "No PostgreSQL data directory path! PSQL DB restore is not possible!" +# when: psql_data_dir.stdout == '' +# +# - name: Restore Galaxy DB backup - pick proper backup version +# ansible.builtin.shell: +# cmd: "if [ '{{ galaxy_db_restore_version }}' = 'latest' ]; then ls -dt {{ postgresql_backup_dir }}/20*Z | head -1; else ls -d {{ postgresql_backup_dir }}/{{ galaxy_db_restore_version }}; fi " +# register: galaxy_db_restore_dir +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - print selected version +# debug: +# msg: "Backup version to be restored: {{ galaxy_db_restore_dir.stdout }}" +# +# - name: Restore Galaxy DB backup - stop Galaxy +# ansible.builtin.shell: +# cmd: galaxyctl stop +# become: true +# when: galaxyctl_status.rc == 0 +# +# - name: Restore Galaxy DB backup - stop postgresql +# ansible.builtin.systemd: +# name: postgresql +# state: stopped +# become: true +# +# - name: Restore Galaxy DB backup - backup current postgresql +# ansible.builtin.shell: +# cmd: "mv {{ psql_data_dir.stdout }} {{ psql_data_dir.stdout+'.backup_'+ansible_date_time.iso8601 }}" +# ignore_errors: true +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - copy proper backup version +# ansible.builtin.shell: +# cmd: "rsync -a {{ galaxy_db_restore_dir.stdout }}/ {{ psql_data_dir.stdout }}/ && chmod -R 0700 {{ psql_data_dir.stdout }} " +# when: galaxy_db_restore_dir.stdout != '' and psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "restore_command = 'cp \"{{ postgresql_backup_dir }}/wal_archive/%f\" \"%p\"'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timestamp of recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_time = '{{ galaxy_db_restore_timestamp }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_timestamp != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific timeline of recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_timeline = '{{ galaxy_db_restore_timeline }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_timeline != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - fill postgresql.auto.conf (using specific action after recovery) +# ansible.builtin.lineinfile: +# path: "{{ psql_data_dir.stdout }}/postgresql.auto.conf" +# insertafter: EOF +# line: "recovery_target_action = '{{ galaxy_db_restore_action }}'" +# state: present +# create: true +# when: psql_data_dir.stdout != '' and galaxy_db_restore_action != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - touch the signal file +# ansible.builtin.shell: +# cmd: "touch {{ psql_data_dir.stdout }}/recovery.signal " +# when: psql_data_dir.stdout != '' +# become: true +# become_user: postgres +# +# - name: Restore Galaxy DB backup - start postgresql +# ansible.builtin.systemd: +# name: postgresql +# state: started +# become: true +# +# - name: Register if postgresql log exists +# ansible.builtin.shell: +# cmd: "ls /var/log/postgresql/postgresql-{{ postgresql_default_version }}-main.log" +# register: psql_log +# ignore_errors: true +# become: true +# +# - name: Restore Galaxy DB backup - touch the signal file +# ansible.builtin.shell: +# cmd: "tail -20 {{ psql_log.stdout }}" +# when: psql_data_dir.stdout != '' and psql_log.rc == 0 +# become: true +# +# - name: Restore Galaxy DB backup - start Galaxy +# ansible.builtin.shell: +# cmd: galaxyctl start +# become: true +# when: galaxyctl_status.rc == 0