Reimplemented the main loop with the sighandler alarm inside in order to be able to control timeouts in the 'disk' graph. This should avoid a complete freeze if the network goes down when monitoring NFS filesystems. [#10]

This commit is contained in:
Jordi Sanfeliu 2013-06-04 16:10:35 +02:00
parent 30cda07b50
commit e27dad0906
2 changed files with 22 additions and 7 deletions

View File

@ -1,5 +1,8 @@
3.N.N - NN-XXX-2013 3.N.N - NN-XXX-2013
==================== ====================
- Reimplemented the main loop with the sighandler alarm inside in order to be
able to control timeouts in the 'disk' graph. This should avoid a complete
freeze if the network goes down when monitoring NFS filesystems.
- Fixed a bug that prevented from seeing stats in the 'nfss' graph. - Fixed a bug that prevented from seeing stats in the 'nfss' graph.

View File

@ -129,9 +129,14 @@ sub fs_init {
next unless !$d; next unless !$d;
if($f ne "swap") { if($f ne "swap") {
my $pid;
eval { eval {
alarm $config->{timeout}; local $SIG{'ALRM'} = sub {
open(IN, "df -P $f |"); kill 9, $pid;
logger("$myself: Timeout! Process with PID $pid was hung after $config->{timeout} secs. Killed.");
};
alarm($config->{timeout});
$pid = open(IN, "df -P $f |");
while(<IN>) { while(<IN>) {
if(/ $f$/) { if(/ $f$/) {
($d) = split(' ', $_); ($d) = split(' ', $_);
@ -139,7 +144,7 @@ sub fs_init {
} }
} }
close(IN); close(IN);
alarm 0; alarm(0);
chomp($d); chomp($d);
}; };
} }
@ -357,9 +362,15 @@ sub fs_update {
# prevents a division by 0 if swap device is not used # prevents a division by 0 if swap device is not used
$use = ($used * 100) / ($used + $free) unless $used + $free == 0; $use = ($used * 100) / ($used + $free) unless $used + $free == 0;
} elsif($f) { } elsif($f) {
my $pid;
eval { eval {
alarm $config->{timeout}; local $SIG{'ALRM'} = sub {
open(IN, "df -P $f |"); kill 9, $pid;
logger("$myself: Timeout! Process with PID $pid was hung after $config->{timeout} secs. Killed.");
@tmp = (0, 0, 0, 0);
};
alarm($config->{timeout});
$pid = open(IN, "df -P $f |");
while(<IN>) { while(<IN>) {
if(/ $f$/) { if(/ $f$/) {
@tmp = split(' ', $_); @tmp = split(' ', $_);
@ -367,11 +378,12 @@ sub fs_update {
} }
} }
close(IN); close(IN);
alarm 0; alarm(0);
}; };
(undef, undef, $used, $free) = @tmp; (undef, undef, $used, $free) = @tmp;
chomp($used, $free); chomp($used, $free);
$use = ($used * 100) / ($used + $free); # prevents a division by 0 if device is not responding
$use = ($used * 100) / ($used + $free) unless $used + $free == 0;
# FS alert # FS alert
if($f eq "/" && lc($fs->{alerts}->{rootfs_enabled}) eq "y") { if($f eq "/" && lc($fs->{alerts}->{rootfs_enabled}) eq "y") {