Fix issues 3,5,6,7,8,9,11,15,16: security hardening and reliability improvements

- ansible.cfg: enable host_key_checking (closes #1)
- update_upgrade.yml: fix reboot crash on non-Debian hosts, exclude AnsibleHost from targets (closes #2, #7)
- deploy.yml: replace silent ignore_errors with real container health assertion (closes #3)
- redeploy.yml: same assertion fix + restic --overwrite always + RESTIC_RESTORE_PATH variable (closes #3, #4, #5)
- disaster.yml: same fixes as redeploy.yml (closes #3, #4, #5)
- docker_update_containers.yml: create missing playbook (closes #6)
- fresh_install.yml: add safety guard to abort if containers already running (closes #8)
- docker_status.yml: add become: true (closes #9)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
jeet
2026-03-22 14:14:14 -04:00
parent fa67a195ab
commit d2e8cc6e70
8 changed files with 106 additions and 28 deletions
+1 -1
View File
@@ -1,4 +1,4 @@
[defaults]
inventory = ./inventory.yml
host_key_checking = False
host_key_checking = True
#vault_password_file = ./.password_file
+11 -5
View File
@@ -38,15 +38,21 @@
ansible.builtin.command:
cmd: docker compose up -d
chdir: ~/docker
ignore_errors: yes
- name: Pause for 30 seconds to allow containers to stabilize
ansible.builtin.pause:
seconds: 30
- name: Check container status
ansible.builtin.shell: docker compose ps -q | xargs -n1 docker container inspect --format '{{ "{{" }} .State.Running {{ "}}" }}'
- name: Verify all containers are running
ansible.builtin.shell: |
expected=$(docker compose config --services | wc -l | tr -d ' ')
running=$(docker compose ps --status running -q | wc -l | tr -d ' ')
if [ "$expected" != "$running" ]; then
echo "FAIL: $running/$expected containers running"
docker compose ps
exit 1
fi
echo "OK: all $running containers running"
args:
chdir: ~/docker
register: container_status
ignore_errors: yes
changed_when: false
+16 -7
View File
@@ -2,6 +2,7 @@
hosts: DR
tasks:
- include_vars: ./vault/restic/restic.yml
- name: Create folder
ansible.builtin.file:
path: ~/docker
@@ -9,28 +10,36 @@
mode: '0755'
- name: Pull backups
shell: |
ansible.builtin.shell: |
unset HISTFILE
export RESTIC_REPOSITORY={{ RESTIC_REPOSITORY }}
export AWS_ACCESS_KEY_ID={{ AWS_ACCESS_KEY_ID }}
export AWS_SECRET_ACCESS_KEY={{ AWS_SECRET_ACCESS_KEY }}
export RESTIC_PASSWORD={{ RESTIC_PASSWORD }}
cd ~/docker
restic restore latest:/source/gcloud --target ./
# RESTIC_RESTORE_PATH: verify with 'restic snapshots' before running
# Updated from /source/gcloud — set the correct snapshot path in vault/restic/restic.yml
restic restore latest:{{ RESTIC_RESTORE_PATH }} --target ./ --overwrite always
- name: Start container using Docker Compose
ansible.builtin.command:
cmd: docker compose up -d
chdir: ~/docker
ignore_errors: true
- name: Pause for 30 seconds to allow containers to stabilize
ansible.builtin.pause:
seconds: 30
- name: Check container status
ansible.builtin.shell: docker compose ps -q | xargs -n1 docker container inspect --format '{{ "{{" }} .State.Running {{ "}}" }}'
- name: Verify all containers are running
ansible.builtin.shell: |
expected=$(docker compose config --services | wc -l | tr -d ' ')
running=$(docker compose ps --status running -q | wc -l | tr -d ' ')
if [ "$expected" != "$running" ]; then
echo "FAIL: $running/$expected containers running"
docker compose ps
exit 1
fi
echo "OK: all $running containers running"
args:
chdir: ~/docker
register: container_status
ignore_errors: true
changed_when: false
+1
View File
@@ -2,6 +2,7 @@
- name: Check and Report Status of Docker Containers
hosts: Docker
gather_facts: true
become: true
vars:
exited_containers: []
+37
View File
@@ -0,0 +1,37 @@
---
- name: Pull latest images and recreate updated containers
hosts: Docker
tasks:
- name: Pull latest images
ansible.builtin.command:
cmd: docker compose pull
chdir: ~/docker
register: pull_result
changed_when: "'Downloaded newer image' in pull_result.stdout or 'Pulled' in pull_result.stdout"
- name: Recreate containers with updated images
ansible.builtin.command:
cmd: docker compose up -d --remove-orphans
chdir: ~/docker
- name: Pause for 30 seconds to allow containers to stabilize
ansible.builtin.pause:
seconds: 30
- name: Verify all containers are running
ansible.builtin.shell: |
expected=$(docker compose config --services | wc -l | tr -d ' ')
running=$(docker compose ps --status running -q | wc -l | tr -d ' ')
if [ "$expected" != "$running" ]; then
echo "FAIL: $running/$expected containers running"
docker compose ps
exit 1
fi
echo "OK: all $running containers running"
args:
chdir: ~/docker
changed_when: false
- name: Remove dangling images
ansible.builtin.command:
cmd: docker image prune -f
+16
View File
@@ -5,6 +5,22 @@
gather_facts: true
tasks:
- name: Check if Docker containers are already running (safety guard)
ansible.builtin.shell: docker compose ps -q 2>/dev/null || true
args:
chdir: ~/docker
register: running_containers
changed_when: false
failed_when: false
- name: Abort if containers are already running on this host
ansible.builtin.fail:
msg: >
Docker containers are already running on {{ inventory_hostname }}.
Use redeploy.yml to restore from backup or deploy.yml to redeploy config.
Only run fresh_install.yml on hosts with no active containers.
when: running_containers.stdout != ""
- name: Update apt cache (Debian/Ubuntu)
apt:
update_cache: yes
+16 -7
View File
@@ -2,6 +2,7 @@
hosts: Prod
tasks:
- include_vars: ./vault/restic/restic.yml
- name: Create folder
ansible.builtin.file:
path: ~/docker
@@ -9,28 +10,36 @@
mode: '0755'
- name: Pull backups
shell: |
ansible.builtin.shell: |
unset HISTFILE
export RESTIC_REPOSITORY={{ RESTIC_REPOSITORY }}
export AWS_ACCESS_KEY_ID={{ AWS_ACCESS_KEY_ID }}
export AWS_SECRET_ACCESS_KEY={{ AWS_SECRET_ACCESS_KEY }}
export RESTIC_PASSWORD={{ RESTIC_PASSWORD }}
cd ~/docker
restic restore latest:/source/gcloud --target ./
# RESTIC_RESTORE_PATH: verify with 'restic snapshots' before running
# Updated from /source/gcloud — set the correct snapshot path in vault/restic/restic.yml
restic restore latest:{{ RESTIC_RESTORE_PATH }} --target ./ --overwrite always
- name: Start container using Docker Compose
ansible.builtin.command:
cmd: docker compose up -d
chdir: ~/docker
ignore_errors: true
- name: Pause for 30 seconds to allow containers to stabilize
ansible.builtin.pause:
seconds: 30
- name: Check container status
ansible.builtin.shell: docker compose ps -q | xargs -n1 docker container inspect --format '{{ "{{" }} .State.Running {{ "}}" }}'
- name: Verify all containers are running
ansible.builtin.shell: |
expected=$(docker compose config --services | wc -l | tr -d ' ')
running=$(docker compose ps --status running -q | wc -l | tr -d ' ')
if [ "$expected" != "$running" ]; then
echo "FAIL: $running/$expected containers running"
docker compose ps
exit 1
fi
echo "OK: all $running containers running"
args:
chdir: ~/docker
register: container_status
ignore_errors: true
changed_when: false
+2 -2
View File
@@ -1,5 +1,5 @@
---
- hosts: all
- hosts: "all:!AnsibleHost"
gather_facts: true
tasks:
@@ -19,7 +19,7 @@
- name: Reboot the server (if required).
ansible.builtin.reboot:
when: reboot_required_file.stat.exists == true
when: reboot_required_file is defined and reboot_required_file.stat.exists
- name: "Updating and Upgrading Yum Packages"
yum: