diff --git a/Makefile b/Makefile index b82ac93..792db1c 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,7 @@ install-bin: $(INSTALL_DATA) lib/HTTPServer.pm "$(DESTDIR)$(LIBDIR)/HTTPServer.pm" $(INSTALL_DATA) lib/icecast.pm "$(DESTDIR)$(LIBDIR)/icecast.pm" $(INSTALL_DATA) lib/int.pm "$(DESTDIR)$(LIBDIR)/int.pm" + $(INSTALL_DATA) lib/intelrapl.pm "$(DESTDIR)$(LIBDIR)/intelrapl.pm" $(INSTALL_DATA) lib/ipmi.pm "$(DESTDIR)$(LIBDIR)/ipmi.pm" $(INSTALL_DATA) lib/kern.pm "$(DESTDIR)$(LIBDIR)/kern.pm" $(INSTALL_DATA) lib/libvirt.pm "$(DESTDIR)$(LIBDIR)/libvirt.pm" diff --git a/lib/intelrapl.pm b/lib/intelrapl.pm new file mode 100644 index 0000000..7755f01 --- /dev/null +++ b/lib/intelrapl.pm @@ -0,0 +1,803 @@ +# +# Monitorix - A lightweight system monitoring tool. +# +# Copyright (C) 2005-2022 by Jordi Sanfeliu +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +package intelrapl; + +use strict; +use warnings; +use Monitorix; +use RRDs; +use Time::HiRes; +use Cwd 'abs_path'; +use File::Basename; +use Exporter 'import'; +our @EXPORT = qw(intelrapl_init intelrapl_update intelrapl_cgi); + +my $epoc_identifier = "last_epoc"; +my $val_identifier = "last_val"; +my $list_delimiter = ","; + +sub get_max_number_of_values_per_group { + + my ($intelrapl) = @_; + my $default_max_number_of_values_per_group = 10; # Can be overwritten via config file but changes will break history. + if(defined($intelrapl->{max_number_of_values_per_group})) { + return $intelrapl->{max_number_of_values_per_group}; + } + return $default_max_number_of_values_per_group; +} + +sub hue_to_rgb { + my ($p, $q, $t) = @_; + if($t < 0) { + $t += 1; + } + if($t > 1) { + $t -= 1; + } + if($t < 1/6) { + return $p + ($q - $p) * 6 * $t; + } + if($t < 1/2) { + return $q; + } + if($t < 2/3) { + return $p + ($q - $p) * (2/3 - $t) * 6; + } + return $p; +} + +sub hsl_to_rgb { + my ($H, $S, $L) = @_; + my $h = $H/360; + my $s = $S/100; + my $l = $L/100; + my ($r, $g, $b); + if($s == 0) { + $r = $g = $b = $l; + } else { + my $q = $l < 0.5 ? $l * (1 + $s) : $l + $s - $l * $s; + my $p = 2 * $l - $q; + $r = hue_to_rgb($p, $q, $h + 1/3); + $g = hue_to_rgb($p, $q, $h); + $b = hue_to_rgb($p, $q, $h - 1/3); + } + return (round($r * 255), round($g * 255), round($b * 255)); +} + +sub line_color { + my ($n) = @_; + my @LC = ( + "#44EEEE", + "#EE44EE", + "#44EE44", + "#4444EE", + "#ff9100", + "#a600ff", + "#EEEE00", + "#448844", + "#EE4444", + "#EE44EE", + ); + if ($n < scalar(@LC)) { + return $LC[$n]; + } + my $h_step = 31; + my $h_min = ($n-1) * $h_step; + my $h_max = $h_min + $h_step; + my ($r,$g,$b) = hsl_to_rgb($n*($h_max-$h_min)+$h_min, 100, 50); + return sprintf("#%02x%02x%02x",$r,$g,$b); +} + +sub measure { + + my ($myself, $config, $intelrapl) = @_; + my $use_nan_for_missing_data = lc($intelrapl->{use_nan_for_missing_data} || "") eq "y" ? 1 : 0; + + my @sensors_all; + my $rrdata = "N"; + + my $max_number_of_values_per_group = get_max_number_of_values_per_group($intelrapl); + + foreach my $k (sort keys %{$intelrapl->{list}}) { + my $package_sensor; + if(defined($intelrapl->{package_sensors}) && defined($intelrapl->{package_sensors}->{$k})) { + $package_sensor = trim($intelrapl->{package_sensors}->{$k}); + } + my $package_index; + my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k}); + my @sensors = ($use_nan_for_missing_data ? (0+"nan") : 0) x $max_number_of_values_per_group; + for(my $n = 0; $n < min(scalar(@sensor_group), $max_number_of_values_per_group); $n++) { + my $str = trim($sensor_group[$n] || ""); + my $sensor_path = trim($intelrapl->{sensors}->{$str} || ""); + chomp($sensor_path); + + my $last_epoc = ($config->{intelrapl_hist}->{$k}->{$n}->{$epoc_identifier} || 0); + my $epoc = Time::HiRes::time(); + $config->{intelrapl_hist}->{$k}->{$n}->{$epoc_identifier} = $epoc; + + if ($sensor_path ne "") { + my $sensor_file = $sensor_path; + if(open(IN, $sensor_file)) { + my $val = ; + close(IN); + $val = trim($val); + chomp($val); + + my $last_sensor_val = ($config->{intelrapl_hist}->{$k}->{$n}->{$val_identifier} || 0); + my $sensor_val = $val; + $config->{intelrapl_hist}->{$k}->{$n}->{$val_identifier} = $sensor_val; + if ($last_epoc ne 0 && $sensor_val >= $last_sensor_val) { + $sensors[$n] = ($sensor_val - $last_sensor_val) / ($epoc - $last_epoc); # Conversion from muJoule to muWatt during the time interval. + if (defined($package_sensor) && $str eq $package_sensor) { + $package_index = $n; + } + } + } else { + logger("$myself: ERROR: unable to open '$sensor_file'."); + } + } + } + push(@sensors_all, @sensors); + # intelrapl alert + if(defined($intelrapl->{alerts}) && lc($intelrapl->{alerts}->{packagepower_enabled}) eq "y") { + my $sensor_index = $package_index; + if (defined($sensor_index)) { + $config->{intelrapl_hist_alert1}->{$k} = 0 if(!$config->{intelrapl_hist_alert1}->{$k}); + if($sensors[$sensor_index] >= $intelrapl->{alerts}->{packagepower_threshold} && $config->{intelrapl_hist_alert1}->{$k} < $sensors[$sensor_index]) { + if(-x $intelrapl->{alerts}->{packagepower_script}) { + logger("$myself: ALERT: executing script '$intelrapl->{alerts}->{packagepower_script}'."); + system($intelrapl->{alerts}->{packagepower_script} . " " .$intelrapl->{alerts}->{packagepower_timeintvl} . " " . $intelrapl->{alerts}->{packagepower_threshold} . " " . $sensors[$sensor_index]); + } else { + logger("$myself: ERROR: script '$intelrapl->{alerts}->{packagepower_script}' doesn't exist or don't has execution permissions."); + } + $config->{intelrapl_hist_alert1}->{$k} = $sensors[$sensor_index]; + } + } else { + logger("$myself: ERROR: could not find $package_sensor in sensors. Alarms will not work!"); + } + } + } + + foreach(@sensors_all) { + $rrdata .= ":$_"; + } + + return $rrdata; +} + +sub intelrapl_init { + my $myself = (caller(0))[3]; + my ($package, $config, $debug) = @_; + my $rrd = $config->{base_lib} . $package . ".rrd"; + my $intelrapl = $config->{intelrapl}; + + my $info; + my @ds; + my @rra; + my @tmp; + my $n; + + my @average; + my @min; + my @max; + my @last; + + my $max_number_of_values_per_group = get_max_number_of_values_per_group($intelrapl); + + foreach my $k (sort keys %{$intelrapl->{list}}) { + my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k}); + for(my $n = 0; $n < min(scalar(@sensor_group), $max_number_of_values_per_group); $n++) { + my $str = trim($sensor_group[$n] || ""); + my $sensor_path = trim($intelrapl->{sensors}->{$str} || ""); + chomp($sensor_path); + if ($sensor_path ne "") { + my $sensor_file = $sensor_path; + unless(-e $sensor_file) { + logger("$myself: ERROR: invalid or inexistent device name '$sensor_file'."); + if(lc($intelrapl->{accept_invalid} || "") ne "y") { + logger("$myself: 'accept_invalid' option is not set."); + logger("$myself: WARNING: initialization aborted."); + return; + } + } + } + } + } + + if(-e $rrd) { + my $rrd_n_groups = 0; + my $rrd_n_groups_times_n_values = 0; + $info = RRDs::info($rrd); + for my $key (keys %$info) { + if(index($key, 'ds[') == 0) { + if(index($key, '.type') != -1) { + push(@ds, substr($key, 3, index($key, ']') - 3)); + } + if(index($key, '_val0].index') != -1) { + $rrd_n_groups += 1; + } + if(index($key, '.index') != -1) { + $rrd_n_groups_times_n_values += 1; + } + } + if(index($key, 'rra[') == 0) { + if(index($key, '.rows') != -1) { + push(@rra, substr($key, 4, index($key, ']') - 4)); + } + } + } + + my $total_number_of_groups = 0; + foreach my $k (sort keys %{$intelrapl->{list}}) { + my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k}); + $total_number_of_groups += scalar(@sensor_group); + } + + if(scalar(@ds) / ($rrd_n_groups_times_n_values / $rrd_n_groups) != keys(%{$intelrapl->{list}})) { + logger("$myself: Detected size mismatch between ... (" . keys(%{$intelrapl->{list}}) . ") and $rrd (" . scalar(@ds) / ($rrd_n_groups_times_n_values / $rrd_n_groups) . "). Resizing it accordingly. All historical data will be lost. Backup file created."); + rename($rrd, "$rrd.bak"); + } + if($rrd_n_groups_times_n_values / $rrd_n_groups < $max_number_of_values_per_group) { + logger("$myself: Detected size mismatch between max_number_of_values_per_group (" . $max_number_of_values_per_group . ") and $rrd (" . ($rrd_n_groups_times_n_values / $rrd_n_groups) . "). Resizing it accordingly. All historical data will be lost. Backup file created."); + rename($rrd, "$rrd.bak"); + } + if(scalar(@rra) < 12 + (4 * $config->{max_historic_years})) { + logger("$myself: Detected size mismatch between 'max_historic_years' (" . $config->{max_historic_years} . ") and $rrd (" . ((scalar(@rra) -12) / 4) . "). Resizing it accordingly. All historical data will be lost. Backup file created."); + rename($rrd, "$rrd.bak"); + } + } + + if(!(-e $rrd)) { + logger("Creating '$rrd' file."); + for($n = 1; $n <= $config->{max_historic_years}; $n++) { + push(@average, "RRA:AVERAGE:0.5:1440:" . (365 * $n)); + push(@min, "RRA:MIN:0.5:1440:" . (365 * $n)); + push(@max, "RRA:MAX:0.5:1440:" . (365 * $n)); + push(@last, "RRA:LAST:0.5:1440:" . (365 * $n)); + } + for(my $k = 0; $k < keys(%{$intelrapl->{list}}); $k++) { + for($n = 0; $n < $max_number_of_values_per_group; $n++) { + push(@tmp, "DS:rapl" . $k . "_val" . $n . ":GAUGE:120:0:U"); + } + } + eval { + RRDs::create($rrd, + "--step=60", + @tmp, + "RRA:AVERAGE:0.5:1:1440", + "RRA:AVERAGE:0.5:30:336", + "RRA:AVERAGE:0.5:60:744", + @average, + "RRA:MIN:0.5:1:1440", + "RRA:MIN:0.5:30:336", + "RRA:MIN:0.5:60:744", + @min, + "RRA:MAX:0.5:1:1440", + "RRA:MAX:0.5:30:336", + "RRA:MAX:0.5:60:744", + @max, + "RRA:LAST:0.5:1:1440", + "RRA:LAST:0.5:30:336", + "RRA:LAST:0.5:60:744", + @last, + ); + }; + my $err = RRDs::error; + if($@ || $err) { + logger("$@") unless !$@; + if($err) { + logger("ERROR: while creating $rrd: $err"); + if($err eq "RRDs::error") { + logger("... is the RRDtool Perl package installed?"); + } + } + return; + } + } + + # check dependencies + if(defined($intelrapl->{alerts}) && lc($intelrapl->{alerts}->{packagepower_enabled} || "") eq "y") { + if(! -x $intelrapl->{alerts}->{packagepower_script}) { + logger("$myself: ERROR: script '$intelrapl->{alerts}->{packagepower_script}' doesn't exist or don't has execution permissions."); + } + } + + $config->{intelrapl_hist_alert1} = (); + $config->{intelrapl_hist} = (); + push(@{$config->{func_update}}, $package); + + measure($myself, $config, $intelrapl); + + logger("$myself: Ok") if $debug; +} + +sub intelrapl_update { + my $myself = (caller(0))[3]; + my ($package, $config, $debug) = @_; + my $rrd = $config->{base_lib} . $package . ".rrd"; + my $intelrapl = $config->{intelrapl}; + + my $ rrdata = measure($myself, $config, $intelrapl); + + RRDs::update($rrd, $rrdata); + logger("$myself: $rrdata") if $debug; + my $err = RRDs::error; + logger("ERROR: while updating $rrd: $err") if $err; +} + +sub round { + my ($float) = @_; + return int($float + $float/abs($float*2 || 1)); +} + +sub intelrapl_cgi { + + my ($package, $config, $cgi) = @_; + my @output; + + my $intelrapl = $config->{intelrapl}; + my @rigid = split(',', ($intelrapl->{rigid} || "")); + my @limit = split(',', ($intelrapl->{limit} || "")); + my $tf = $cgi->{tf}; + my $colors = $cgi->{colors}; + my $graph = $cgi->{graph}; + my $silent = $cgi->{silent}; + my $zoom = "--zoom=" . $config->{global_zoom}; + my %rrd = ( + 'new' => \&RRDs::graphv, + 'old' => \&RRDs::graph, + ); + my $version = "new"; + my @full_size_mode; + my $pic; + my $picz; + my $picz_width; + my $picz_height; + + my $u = ""; + my $width; + my $height; + my @extra; + my @riglim; + my @IMG; + my @IMGz; + my @tmp; + my @tmpz; + my @CDEF; + my $n; + my $n2; + my $e; + my $e2; + my $str; + my $err; + + $version = "old" if $RRDs::VERSION < 1.3; + push(@full_size_mode, "--full-size-mode") if $RRDs::VERSION > 1.3; + my $rrd = $config->{base_lib} . $package . ".rrd"; + my $title = $config->{graph_title}->{$package}; + my $IMG_DIR = $config->{base_dir} . "/" . $config->{imgs_dir}; + my $imgfmt_uc = uc($config->{image_format}); + my $imgfmt_lc = lc($config->{image_format}); + foreach my $i (split(',', $config->{rrdtool_extra_options} || "")) { + push(@extra, trim($i)) if trim($i); + } + + $title = !$silent ? $title : ""; + my $gap_on_all_nan = lc($intelrapl->{gap_on_all_nan} || "") eq "y" ? 1 : 0; + + my $max_number_of_values_per_group = get_max_number_of_values_per_group($intelrapl); + + # text mode + # + if(lc($config->{iface_mode}) eq "text") { + if($title) { + push(@output, main::graph_header($title, 2)); + push(@output, " \n"); + push(@output, " \n"); + } + my (undef, undef, undef, $data) = RRDs::fetch("$rrd", + "--resolution=$tf->{res}", + "--start=-$tf->{nwhen}$tf->{twhen}", + "AVERAGE"); + $err = RRDs::error; + push(@output, "ERROR: while fetching $rrd: $err\n") if $err; + my $line1; + my $line2; + my $line3; + push(@output, "
\n");
+		foreach my $k (sort keys %{$intelrapl->{list}}) {
+			my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k});
+			for($n = 0; $n < min(scalar(@sensor_group), $max_number_of_values_per_group); $n++) {
+				$str = sprintf(" RAPL power %d               ", $n + 1);
+				$line1 .= $str;
+				$str = sprintf(" Sensor values ");
+				$line2 .= $str;
+				$line3 .=      "----------------------";
+			}
+		}
+		push(@output, "     $line1\n");
+		push(@output, "Time $line2\n");
+		push(@output, "-----$line3\n");
+		my $line;
+		my @row;
+		my $time;
+		my $from;
+		my $to;
+		for($n = 0, $time = $tf->{tb}; $n < ($tf->{tb} * $tf->{ts}); $n++) {
+			$line = @$data[$n];
+			$time = $time - (1 / $tf->{ts});
+			push(@output, sprintf(" %2d$tf->{tc} ", $time));
+			$e = 0;
+			foreach my $k (sort keys %{$intelrapl->{list}}) {
+			  my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k});
+
+				for($n2 = 0; $n2 < min(scalar(@sensor_group), $max_number_of_values_per_group); $n2++) {
+					my $str = trim($sensor_group[$n] || "");
+					$from = ($e * scalar(@sensor_group) + $n2);
+					$to = $from + 3;
+					my @sensor_values = @$line[$from..$to];
+					@row = (celsius_to($config, $sensor_values[0]), @sensor_values[1, -1]);
+					my $format_string = "%7.0f" x scalar(@row);
+					push(@output, sprintf(" " . $format_string. " ", @row));
+				}
+				$e++;
+			}
+			push(@output, "\n");
+		}
+		push(@output, "    
\n"); + if($title) { + push(@output, " \n"); + push(@output, " \n"); + push(@output, main::graph_footer()); + } + push(@output, "
\n"); + return @output; + } + + + # graph mode + # + if($silent eq "yes" || $silent eq "imagetag") { + $colors->{fg_color} = "#000000"; # visible color for text mode + $u = "_"; + } + if($silent eq "imagetagbig") { + $colors->{fg_color} = "#000000"; # visible color for text mode + $u = ""; + } + + my $plots_per_list_item = 1; + for($n = 0; $n < keys(%{$intelrapl->{list}}); $n++) { + my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$n}); + for($n2 = 0; $n2 < $plots_per_list_item; $n2++) { + $str = $u . $package . $n . $n2 . "." . $tf->{when} . ".$imgfmt_lc"; + push(@IMG, $str); + unlink("$IMG_DIR" . $str); + if(lc($config->{enable_zoom}) eq "y") { + $str = $u . $package . $n . $n2 . "z." . $tf->{when} . ".$imgfmt_lc"; + push(@IMGz, $str); + unlink("$IMG_DIR" . $str); + } + } + } + + + $e = 0; + foreach my $k (sort keys %{$intelrapl->{list}}) { + my @sensor_group = split($list_delimiter, $intelrapl->{list}->{$k}); + if($title && $e == 0) { + push(@output, main::graph_header($title, 1)); + push(@output, " \n"); + push(@output, " \n"); + } + + my $n_plot = $k; + @riglim = @{setup_riglim($rigid[$n_plot], $limit[$n_plot])}; + undef(@tmp); + undef(@tmpz); + undef(@CDEF); + + my $dstr = $k; + if(defined($intelrapl->{list_item_names}->{$k})) { + $dstr = $intelrapl->{list_item_names}->{$k}; + } + my $core_string = $dstr; + $str = $dstr; + + my $legend_label_format = "%7.2lf"; + my $value_transformation = ",1e-6,*"; # muWatt to Watts + + my $legend_size = 16; + my $cpu_label_size = 50; + my $cpu_label = sprintf("%-" . $cpu_label_size . "s", $str); + my $cpu_label_empty = sprintf("%-" . $cpu_label_size . "s", ""); + + my $sum_name = "sum"; + my $cdef_sum = "CDEF:" . $sum_name . "="; + + my @sum_group = (); + if(defined($intelrapl->{sum}) && defined($intelrapl->{sum}->{$k})) { + @sum_group = split($list_delimiter, $intelrapl->{sum}->{$k}); + } + + my $package_sensor = "package"; + my $package_index; + if(defined($intelrapl->{package_sensors}) && defined($intelrapl->{package_sensors}->{$k})) { + $package_sensor = trim($intelrapl->{package_sensors}->{$k}); + } + my $noncore_name = "noncore"; + my $cdef_noncore = "CDEF:" . $noncore_name . "="; + my @package_content_group = (); + if(defined($intelrapl->{package_content}) && defined($intelrapl->{package_content}->{$k})) { + @package_content_group = split($list_delimiter, $intelrapl->{package_content}->{$k}); + } + + my $sum_counter = 0; + my $package_content_counter = 0; + for($n = 0; $n < min(scalar(@sensor_group), $max_number_of_values_per_group); $n += 1) { + my $value_name = "val" . $n; + my $sensor_name = trim($sensor_group[$n]); + my $value_label = $sensor_name; + if(defined($intelrapl->{sensor_names}->{$value_label})) { + $value_label = $intelrapl->{sensor_names}->{$value_label}; + } + $value_label = sprintf("%-".$legend_size."s", $value_label); + + push(@CDEF, "CDEF:trans_" . $value_name . "=" . $value_name . $value_transformation); + + if(scalar(@sum_group) != 0) { + if(grep {$_ eq $sensor_name} @sum_group) { + if ($sum_counter != 0) { + $cdef_sum .= ","; + } + $cdef_sum .= $value_name; + $sum_counter += 1; + } + } + + if(scalar(@package_content_group) != 0) { + if(grep {$_ eq $sensor_name} @package_content_group) { + if ($package_content_counter != 0) { + $cdef_noncore .= ","; + } + $cdef_noncore .= $value_name; + $package_content_counter += 1; + } + } + + if ($package_sensor eq $sensor_name) { + $package_index = $n; + } + + my $legend_label = $value_label; + + if (defined($package_index) && scalar(@package_content_group) != 0 && $sensor_name eq $package_sensor) { + my $noncore_label_full = "Non-Core"; + if(defined($intelrapl->{noncore_names}) && defined($intelrapl->{noncore_names}->{$k})) { + $noncore_label_full = trim($intelrapl->{noncore_names}->{$k}); + } + + my $noncore_label = sprintf("%-".$legend_size."s", $noncore_label_full); + my $noncore_color = line_color(scalar(@sensor_group)); + + my $package_sensor_name = $package_sensor; + if(defined($intelrapl->{sensor_names}->{$package_sensor})) { + $package_sensor_name = $intelrapl->{sensor_names}->{$package_sensor}; + } + + my $package_info = "(".$noncore_label_full . " = " . $package_sensor_name; + for(my $i = 0; $i < scalar(@package_content_group); $i += 1) { + my $package_item = trim($package_content_group[$i]); + my $package_item_name = $package_item; + if(defined($intelrapl->{sensor_names}->{$package_item})) { + $package_item_name = $intelrapl->{sensor_names}->{$package_item}; + } + $package_info .= " - "; + $package_info .= $package_item_name; + } + $package_info .= ")"; + $package_info = sprintf("%-" . ($cpu_label_size) . "s" , substr($package_info, 0, ($cpu_label_size))); + if(defined($intelrapl->{show_noncore_info}) && lc(trim($intelrapl->{show_noncore_info})) eq "y") { + push(@tmp, "COMMENT:" . $package_info); + } else { + push(@tmp, "COMMENT:" . $cpu_label_empty); + } + + + push(@tmp, "LINE1:trans_" . $noncore_name . $noncore_color . ":" . $noncore_label); + push(@tmpz, "LINE1:trans_" . $noncore_name . $noncore_color . ":" . $noncore_label); + + push(@tmp, "GPRINT:trans_" . $noncore_name . ":LAST:Current\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $noncore_name . ":AVERAGE:Average\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $noncore_name . ":MIN:Min\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $noncore_name . ":MAX:Max\\:" . $legend_label_format . "\\n"); + } + + if ($n == 0) { + push(@tmp, "COMMENT:" . $cpu_label); + } else { + push(@tmp, "COMMENT:" . $cpu_label_empty); + } + + my $hex_color_n = line_color($n); + if ($sensor_name eq "core") { + my $hex_transparency = "E6"; + push(@tmp, "AREA:trans_" . $value_name . $hex_color_n .$hex_transparency. ":" . $legend_label); + push(@tmpz, "AREA:trans_" . $value_name . $hex_color_n .$hex_transparency. ":" . $legend_label); + } else { + push(@tmp, "LINE1:trans_" . $value_name . $hex_color_n . ":" . $legend_label); + push(@tmpz, "LINE1:trans_" . $value_name . $hex_color_n . ":" . $legend_label); + } + + push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:Current\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $value_name . ":AVERAGE:Average\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $value_name . ":MIN:Min\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $value_name . ":MAX:Max\\:" . $legend_label_format . "\\n"); + } + + if ($sum_counter != 0) { + $cdef_sum .= ",+" x ($sum_counter-1); + push(@CDEF, $cdef_sum); + push(@CDEF, "CDEF:trans_" . $sum_name . "=" . $sum_name . $value_transformation); + my $sum_label = "Sum"; + if(defined($intelrapl->{sum_names}->{$k})) { + $sum_label = $intelrapl->{sum_names}->{$k}; + } + + $sum_label = sprintf("%-".$legend_size."s", $sum_label); + my $socket_color = line_color(scalar(@sensor_group)+1); + push(@tmp, "COMMENT:". $cpu_label_empty); + + push(@tmp, "LINE1:trans_" . $sum_name . $socket_color . ":" . $sum_label); + push(@tmpz, "LINE1:trans_" . $sum_name . $socket_color . ":" . $sum_label); + + push(@tmp, "GPRINT:trans_" . $sum_name . ":LAST:Current\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $sum_name . ":AVERAGE:Average\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $sum_name . ":MIN:Min\\:" . $legend_label_format); + push(@tmp, "GPRINT:trans_" . $sum_name . ":MAX:Max\\:" . $legend_label_format . "\\n"); + } + + if (defined($package_index) && $package_content_counter != 0) { + $cdef_noncore .= ",+" x ($package_content_counter-1); + $cdef_noncore .= ",val" . $package_index . ",-,-1,*"; + push(@CDEF, $cdef_noncore); + push(@CDEF, "CDEF:trans_" . $noncore_name . "=" . $noncore_name . $value_transformation); + } + + if(lc($config->{show_gaps}) eq "y") { + push(@tmp, "AREA:wrongdata#$colors->{gap}:"); + push(@tmpz, "AREA:wrongdata#$colors->{gap}:"); + push(@CDEF, "CDEF:wrongdata=allvalues,UN,INF,UNKN,IF"); + } + ($width, $height) = split('x', $config->{graph_size}->{'main'}); + if($silent =~ /imagetag/) { + ($width, $height) = split('x', $config->{graph_size}->{remote}) if $silent eq "imagetag"; + ($width, $height) = split('x', $config->{graph_size}->{main}) if $silent eq "imagetagbig"; + @tmp = @tmpz; + push(@tmp, "COMMENT: \\n"); + push(@tmp, "COMMENT: \\n"); + push(@tmp, "COMMENT: \\n"); + } + + $height *= 1.2; + $width += 300; + + my @def_sensor_average; + my $cdef_sensor_allvalues = "CDEF:allvalues="; + my $sum_of_cores = 0; + for(my $n_group = 0; $n_group < min(scalar(@sensor_group), $max_number_of_values_per_group); $n_group++) { + my $dstr = trim($sensor_group[$n_group]); + $sum_of_cores += 1; + my $value_name = "val" . $n_group; + push(@def_sensor_average, "DEF:" . $value_name . "=$rrd:rapl" . $e . "_" . $value_name . ":AVERAGE"); + if($n_group != 0) { + $cdef_sensor_allvalues .= ","; + } + if ($gap_on_all_nan) { + $cdef_sensor_allvalues .= $value_name . ",UN,0,1,IF"; + } else { + $cdef_sensor_allvalues .= $value_name; + } + } + $cdef_sensor_allvalues .= ",+" x ($sum_of_cores-1); + if ($gap_on_all_nan) { + $cdef_sensor_allvalues .= ",0,GT,1,UNKN,IF"; + } + my $y_axis_title = "Watt"; + my $large_plot = 1; + my $plot_title = $config->{graphs}->{'_intelrapl1'}; + if(defined($intelrapl->{desc}) && defined($intelrapl->{desc}->{$k})) { + $plot_title = $intelrapl->{desc}->{$k}; + } + + $pic = $rrd{$version}->("$IMG_DIR" . $IMG[$e * $plots_per_list_item + $n_plot], + "--title=$plot_title ($tf->{nwhen}$tf->{twhen})", + "--start=-$tf->{nwhen}$tf->{twhen}", + "--imgformat=$imgfmt_uc", + "--vertical-label=" . $y_axis_title, + "--width=$width", + "--height=$height", + @extra, + @riglim, + $zoom, + @{$cgi->{version12}}, + $large_plot ? () : @{$cgi->{version12_small}}, + @{$colors->{graph_colors}}, + @def_sensor_average, + $cdef_sensor_allvalues, + @CDEF, + @tmp); + $err = RRDs::error; + push(@output, "ERROR: while graphing $IMG_DIR" . $IMG[$e * $plots_per_list_item + $n_plot] . ": $err\n") if $err; + if(lc($config->{enable_zoom}) eq "y") { + ($width, $height) = split('x', $config->{graph_size}->{zoom}); + $picz = $rrd{$version}->("$IMG_DIR" . $IMGz[$e * $plots_per_list_item + $n_plot], + "--title=$plot_title ($tf->{nwhen}$tf->{twhen})", + "--start=-$tf->{nwhen}$tf->{twhen}", + "--imgformat=$imgfmt_uc", + "--vertical-label=" . $y_axis_title, + "--width=$width", + "--height=$height", + @full_size_mode, + @extra, + @riglim, + $zoom, + @{$cgi->{version12}}, + $large_plot ? () : @{$cgi->{version12_small}}, + @{$colors->{graph_colors}}, + @def_sensor_average, + $cdef_sensor_allvalues, + @CDEF, + @tmpz); + $err = RRDs::error; + push(@output, "ERROR: while graphing $IMG_DIR" . $IMGz[$e * $plots_per_list_item + $n_plot] . ": $err\n") if $err; + } + $e2 = $e + $n_plot + 1; + if($title || ($silent =~ /imagetag/ && $graph =~ /intelrapl$e2/)) { + if(lc($config->{enable_zoom}) eq "y") { + if(lc($config->{disable_javascript_void}) eq "y") { + push(@output, " {url} . "/" . $config->{imgs_dir} . $IMGz[$e * $plots_per_list_item + $n_plot] . "\">\n"); + } else { + if($version eq "new") { + $picz_width = $picz->{image_width} * $config->{global_zoom}; + $picz_height = $picz->{image_height} * $config->{global_zoom}; + } else { + $picz_width = $width + 115; + $picz_height = $height + 100; + } + push(@output, " " . picz_js_a_element(width => $picz_width, height => $picz_height, config => $config, IMGz => $IMGz[$e * $plots_per_list_item + $n_plot], IMG => $IMG[$e * $plots_per_list_item + $n_plot]) . "\n"); + } + } else { + push(@output, " " . img_element(config => $config, IMG => $IMG[$e * $plots_per_list_item + $n_plot]) . "\n"); + } + } + $e++; + } + if($title) { + push(@output, " \n"); + push(@output, " \n"); + push(@output, main::graph_footer()); + } + push(@output, "
\n"); + return @output; +} + +1; diff --git a/man/man5/monitorix.conf.5 b/man/man5/monitorix.conf.5 index d828ec7..07a7b45 100644 --- a/man/man5/monitorix.conf.5 +++ b/man/man5/monitorix.conf.5 @@ -666,6 +666,236 @@ This option will completely enable or disable the legend in the processor graphs .P Default value: \fIy\fP .RE +.SS Intel RAPL power consumption (intelrapl.pm) +This graph is able to monitor the power draw of an unlimited number of Intel RAPL supporting devices like Intel CPU packages. For example it can (if the device RAPL data is available) show power draw of the cores, integrated GPU, package, DRAM and the non-core part of the power draw. The non-core power draw composition can be specified by setting which parts will be subtracted from the package power draw. +.P +.BI list +.RS +This is a list of groups of CPU sensors you want to monitor with an arbitrary name. Each group will become a plot and there may be an unlimited number of groups. You can define sensor names like \fcore\fP and \fpackage\fP. +.P +WARNING: Every time the number of groups or the size of the groups in this option changes, Monitorix will resize the \fIintelrapl.rrd\fP file accordingly, removing all historical data. +.P +To collect the CPU power usage the energy counters from the \fIintel-rapl\fP framework are used and a power consumption in the monitored time interval calculated. +.P +It is recommended that you first check if the \fIintel-rapl\fP class tree and collect your desired energy counters from the devices that you plan to monitor. For example check \fItree /sys/class/powercap/intel-rapl\fP for available \fIenergy_uj\fP energy counters. The corresponding \fIname\fP files helps identifying the sensors. + +You can add it to the group 0 like this: +.P +.RS + +.br + 0 = core, uncore, package, dram +.br + 1 = dram +.br + +.RE +.P +.RE +.P +.BI list_item_names +.RS +This list complements the \fBlist\fP option. It allows you to change the group name that will appear in the graph, hiding the sensor name. If no association is defined, then Monitorix will display the name of the device as it is specified in the config file. +.P +.RS + +.br + 0 = CPU 1 +.br + 1 = DRAM modules +.br + +.RE +.RE +.P +.BI sensors +.RS +This list sets the energy counter files for the devices specified in the \fBlist\fP option. +.P +.RS + +.br + core = /sys/class/powercap/intel-rapl:0:0/energy_uj +.br + uncore = /sys/class/powercap/intel-rapl:0:1/energy_uj +.br + package = /sys/class/powercap/intel-rapl:0/energy_uj +.br + dram = /sys/class/powercap/intel-rapl:0:2/energy_uj +.br + +.RE +.RE +.P +.BI sensor_names +.RS +This list complements the \fBsensors\fP option. It allows you to change the device name that will appear in the graph, hiding the device name. If no association is defined, then Monitorix will display the name of the device as it is specified in the config file. +.P +.RS + +.br + package = Package +.br + core = Cores +.br + uncore = IGP +.br + dram = DRAM +.br + +.RE +.RE +.P +.BI package_sensors +.RS +This list allows you to set a device name for the package device. This specified device is used as total power draw to calculate the non-core power draw by subtracting the power draw of the devices specified for the same group in \fBackage_sensors\fP. Non-core power draw will not be shown if no package sensor is specified for a certain group. +.P +.RS + +.br + 0 = package +.br + +.RE +.RE +.P +.BI package_content +.RS +This list complements the \fBpackage_sensors\fP option. It allows specify the devices that are contained in the total package power draw. This information is needed if you want to visualize non-core power draw. Non-core power draw will be calculated by subtracting the sum power draw of a package content group from the package power draw. +.P +.RS + +.br + 0 = core, uncore +.br + +.RE +.RE +.P +.BI noncore_names +.RS +This list allows you to change the non-core name for a certain group that will appear in the graph legend. +.P +.RS + +.br + 0 = Non-Core +.br + +.RE +.RE +.P +.BI desc +.RS +This list complements the \fBlist\fP option. It allows you to include a title for every group of devicess. The title will appear in the title above the plot. +.P +.RS + +.br + 0 = CPU power distribution +.br + 1 = Memory power distribution +.br + +.RE +.RE +.P +.BI sum +.RS +This list allows you to add a specific sum of devices to the plot of a certain group. No sum will be shown if not specified for the group. +.P +.RS + +.br + 0 = package, dram +.br + +.RE +.RE +.P +.BI sum_names +.RS +This list complements the \fBseum\fP option. It allows specify the name of the sum if desired. +.P +.RS + +.br + 0 = Package + DRAM +.br + +.RE +.RE +.P +.BI packagepower_enabled +.RS +This section enables or disables the alert capabilities for this graph; the alert for the package power draw. It works as follows: +.P +If the package power draw specified by \fBpackage_sensors\fP of any of the specified device reaches or succeeds the \fBpackagepower_threshold\fP (the interval of time is not used here), Monitorix will execute the external alert script defined in \fBpackagepower_script\fP. +.P +The default Monitorix installation includes an example of a shell-script alert called \fBmonitorix-alert.sh\fP which you can use as a base for your own script. +.P +Default value: \fIn\fP +.RE +.P +.BI packagepower_timeintvl +.RS +Not used in this alert. +.P +Default value: \fINone\fP +.RE +.P +.BI packagepower_threshold +.RS +This is the value that needs to be reached or succeeded to trigger the mechanism for a particular action, which in this case is the execution of an external alert script. +.P +Default value: \fINone\fP +.RE +.P +.BI packagepower_script +.RS +This is the full path name of the script that will be executed by this alert. +.P +It will receive the following three parameters: +.P +1st - the value currently defined in \fBpackagepower_timeintvl\fP. +.br +2nd - the value currently defined in \fBpackagepower_threshold\fP. +.br +3rd - the current package power draw. +.P +Default value: \fI/path/to/script.sh\fP +.RE +.P +.BI use_nan_for_missing_data +.RS +This option, when enabled via \fIy\fP, shows \fnan\fP values for missing data instead of \f0\fP. This is useful when \f0\fP could be mistaken for valid data. +.P +Default value: \fIn\fP +.RE +.P +.BI gap_on_all_nan +.RS +This option, when enabled via \fIy\fP, combined with the \fIshow_gaps\fP option shows gaps only if all data points are \fInan\fP instead of requiring only one to be \fInan\fP for a gap. This can be useful if not all sensor data are required for normal operation. +.P +Default value: \fIn\fP +.RE +.P +.BI show_noncore_info +.RS +This option, when enabled via \fIy\fP, enable showing description string in the legend how the non-core part is calculated. +.P +Default value: \fIn\fP +.RE +.P +.BI max_number_of_values_per_group +.RS +This option sets the maximum number of devices per group if more values are required. +.P +WARNING: Every time the \fImax_number_of_values_per_group\fP value changes, Monitorix will resize the \fIintelrapl.rrd\fP file accordingly, removing all historical data. +.P +Default value: \fI10\fP +.RE + .SS HP ProLiant System Health (hptemp.pm) .BI list .RS diff --git a/monitorix.conf b/monitorix.conf index 1fc9fa7..4e5e950 100644 --- a/monitorix.conf +++ b/monitorix.conf @@ -79,6 +79,7 @@ secure_log_date_format = %b %e system = y kern = y proc = y + intelrapl = n hptemp = n lmsens = n gensens = n @@ -178,6 +179,55 @@ secure_log_date_format = %b %e +# intelrapl graph +# ----------------------------------------------------------------------------- + + + 0 = core, uncore, package, dram + 1 = dram + + + 0 = CPU 1 + 1 = DRAM modules + + + core = /sys/class/powercap/intel-rapl:0:0/energy_uj + uncore = /sys/class/powercap/intel-rapl:0:1/energy_uj + package = /sys/class/powercap/intel-rapl:0/energy_uj + dram = /sys/class/powercap/intel-rapl:0:2/energy_uj + + + package = Package + core = Cores + uncore = IGP + dram = DRAM + + + 0 = package + + + 0 = core, uncore + + + 0 = Non-Core + + + 0 = CPU power distribution + 1 = Memory power distribution + + + + + + rigid = 0 + limit = 100 + use_nan_for_missing_data = n + gap_on_all_nan = n + show_noncore_info = n + max_number_of_values_per_group = 10 + + + # HPTEMP graph # ----------------------------------------------------------------------------- @@ -1073,12 +1123,13 @@ logo_bottom = logo_bot.png remote = 300x100 -graph_name = system, kern, proc, hptemp, lmsens, gensens, ipmi, ambsens, amdgpu, nvidiagpu, nvidia, disk, nvme, fs, zfs, du, net, netstat, tinyproxy, tc, libvirt, process, serv, mail, port, user, ftp, apache, nginx, lighttpd, mysql, pgsql, mongodb, varnish, pagespeed, squid, nfss, nfsc, bind, unbound, ntp, chrony, fail2ban, icecast, raspberrypi, phpapc, memcached, redis, phpfpm, apcupsd, nut, wowza, int, verlihub +graph_name = system, kern, proc, intelrapl, hptemp, lmsens, gensens, ipmi, ambsens, amdgpu, nvidiagpu, nvidia, disk, nvme, fs, zfs, du, net, netstat, tinyproxy, tc, libvirt, process, serv, mail, port, user, ftp, apache, nginx, lighttpd, mysql, pgsql, mongodb, varnish, pagespeed, squid, nfss, nfsc, bind, unbound, ntp, chrony, fail2ban, icecast, raspberrypi, phpapc, memcached, redis, phpfpm, apcupsd, nut, wowza, int, verlihub system = System load average and usage kern = Global kernel usage proc = Kernel usage per processor + intelrapl = RAPL power usage hptemp = HP ProLiant System Health lmsens = LM-Sensors and GPU temperatures gensens = Generic sensor statistics @@ -1142,6 +1193,7 @@ graph_name = system, kern, proc, hptemp, lmsens, gensens, ipmi, ambsens, amdgpu, _kern2 = Context switches and forks _kern3 = VFS usage _proc = Processor + _intelrapl1 = Power distribution _hptemp1 = Temperatures 1 _hptemp2 = Temperatures 2 _hptemp3 = Temperatures 3