From e27dad0906302a6f42e575d2dae732f70f7aeed5 Mon Sep 17 00:00:00 2001 From: Jordi Sanfeliu Date: Tue, 4 Jun 2013 16:10:35 +0200 Subject: [PATCH] Reimplemented the main loop with the sighandler alarm inside in order to be able to control timeouts in the 'disk' graph. This should avoid a complete freeze if the network goes down when monitoring NFS filesystems. [#10] --- Changes | 3 +++ lib/fs.pm | 26 +++++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Changes b/Changes index 5712d6d..5ce2b1c 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,8 @@ 3.N.N - NN-XXX-2013 ==================== +- Reimplemented the main loop with the sighandler alarm inside in order to be + able to control timeouts in the 'disk' graph. This should avoid a complete + freeze if the network goes down when monitoring NFS filesystems. - Fixed a bug that prevented from seeing stats in the 'nfss' graph. diff --git a/lib/fs.pm b/lib/fs.pm index 1ff368a..a8bcf47 100644 --- a/lib/fs.pm +++ b/lib/fs.pm @@ -129,9 +129,14 @@ sub fs_init { next unless !$d; if($f ne "swap") { + my $pid; eval { - alarm $config->{timeout}; - open(IN, "df -P $f |"); + local $SIG{'ALRM'} = sub { + kill 9, $pid; + logger("$myself: Timeout! Process with PID $pid was hung after $config->{timeout} secs. Killed."); + }; + alarm($config->{timeout}); + $pid = open(IN, "df -P $f |"); while() { if(/ $f$/) { ($d) = split(' ', $_); @@ -139,7 +144,7 @@ sub fs_init { } } close(IN); - alarm 0; + alarm(0); chomp($d); }; } @@ -357,9 +362,15 @@ sub fs_update { # prevents a division by 0 if swap device is not used $use = ($used * 100) / ($used + $free) unless $used + $free == 0; } elsif($f) { + my $pid; eval { - alarm $config->{timeout}; - open(IN, "df -P $f |"); + local $SIG{'ALRM'} = sub { + kill 9, $pid; + logger("$myself: Timeout! Process with PID $pid was hung after $config->{timeout} secs. Killed."); + @tmp = (0, 0, 0, 0); + }; + alarm($config->{timeout}); + $pid = open(IN, "df -P $f |"); while() { if(/ $f$/) { @tmp = split(' ', $_); @@ -367,11 +378,12 @@ sub fs_update { } } close(IN); - alarm 0; + alarm(0); }; (undef, undef, $used, $free) = @tmp; chomp($used, $free); - $use = ($used * 100) / ($used + $free); + # prevents a division by 0 if device is not responding + $use = ($used * 100) / ($used + $free) unless $used + $free == 0; # FS alert if($f eq "/" && lc($fs->{alerts}->{rootfs_enabled}) eq "y") {