From 69e88104da50cd9d5f40cad8cb505f4513c4eadc Mon Sep 17 00:00:00 2001 From: Jens Steidl <Jens.Steidl@slub-dresden.de> Date: Thu, 15 May 2025 18:09:26 +0200 Subject: [PATCH] feat: nfs timeout notification --- tasks/rosetta/configure_nfs_monitoring.yml | 29 +++++++++++++++++++ tasks/rosetta/main_rosetta.yml | 3 ++ .../nfs_timeout_notification.service.j2 | 14 +++++++++ .../nfs_timeout_notification.service.sh.j2 | 16 ++++++++++ 4 files changed, 62 insertions(+) create mode 100644 tasks/rosetta/configure_nfs_monitoring.yml create mode 100644 templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.j2 create mode 100644 templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.sh.j2 diff --git a/tasks/rosetta/configure_nfs_monitoring.yml b/tasks/rosetta/configure_nfs_monitoring.yml new file mode 100644 index 0000000..2a20ef8 --- /dev/null +++ b/tasks/rosetta/configure_nfs_monitoring.yml @@ -0,0 +1,29 @@ +--- +- name: create directory for systemd units + ansible.builtin.file: + path: "/usr/local/lib/systemd/system/" + mode: "0755" + state: directory + owner: "root" + group: "root" + +- name: install NFS monitoring services + ansible.builtin.template: + src: "usr/local/lib/systemd/system/{{ item }}.j2" + dest: "/usr/local/lib/systemd/system/{{ item }}" + mode: "0600" + owner: "root" + group: "root" + loop: + - "nfs_timeout_notification.service" + - "nfs_timeout_notification.service.sh" + notify: daemon-reload + +- name: enable NFS monitoring services + ansible.builtin.systemd_service: + name: "{{ item.name }}" + enabled: "{{ item.enabled | default(true) }}" + state: "{{ item.state | default('started') }}" + daemon_reload: true + loop: + - name: "nfs_timeout_notification.service" diff --git a/tasks/rosetta/main_rosetta.yml b/tasks/rosetta/main_rosetta.yml index 3cca01e..865be2e 100644 --- a/tasks/rosetta/main_rosetta.yml +++ b/tasks/rosetta/main_rosetta.yml @@ -52,3 +52,6 @@ - name: configure shell environment ansible.builtin.import_tasks: "rosetta/configure_shell.yml" tags: [shell, csh, alias, aliases] +- name: configure NFS monitoring + ansible.builtin.import_tasks: "rosetta/configure_nfs_monitoring.yml" + tags: [rosetta, systemd, nfs_monitoring] diff --git a/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.j2 b/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.j2 new file mode 100644 index 0000000..c08722b --- /dev/null +++ b/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.j2 @@ -0,0 +1,14 @@ +[Unit] +Description=monitor journal for NFS timeouts and notify staff +After=remote-fs.target + +[Service] +Type=simple +RemainAfterExit=no +Restart=no +ExecStart=/bin/bash /usr/local/lib/systemd/system/nfs_timeout_notification.service.sh +User={{ vault_nfs_timeout_notification_service.owner }} +Group={{ vault_nfs_timeout_notification_service.group }} + +[Install] +WantedBy=multi-user.target diff --git a/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.sh.j2 b/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.sh.j2 new file mode 100644 index 0000000..e0d3629 --- /dev/null +++ b/templates/usr/local/lib/systemd/system/nfs_timeout_notification.service.sh.j2 @@ -0,0 +1,16 @@ +#!/bin/bash + +WAIT_BETWEEN_MAILS_IN_SECONDS={{ vault_nfs_timeout_notification_service.wait_between_mails_in_seconds }}; +NO_MAIL_UNTIL_EPOCH=0; +journalctl -f | while read -r LINE; do + TIMEOUT=$(echo "${LINE}" | grep "nfs.*timed out"); + if [[ ! -z "${TIMEOUT}" ]]; then + TIME=$(date +%s); + if [[ "${TIME}" -ge "${NO_MAIL_UNTIL}" ]]; then + NO_MAIL_UNTIL_EPOCH=$((TIME + WAIT_BETWEEN_MAILS_IN_SECONDS)); + echo "NFS timeout detected, sending mail to staff"; + echo "${TIMEOUT}" | /usr/bin/mail -s "NFS timeout detected on $(hostname -f)" {{ vault_nfs_timeout_notification_service.staff_mail }}; + fi; + fi; + TIMEOUT=""; +done; \ No newline at end of file -- GitLab