diff --git a/lib/amdgpu.pm b/lib/amdgpu.pm index c8b14ff..995737c 100644 --- a/lib/amdgpu.pm +++ b/lib/amdgpu.pm @@ -58,12 +58,25 @@ sub amdgpu_init { $d =~ s/^\"//; $d =~ s/\"$//; $d =~ s/^(.+?) .*$/$1/; - next if -e $d; - logger("$myself: ERROR: invalid or inexistent device name '$d'."); - if(lc($amdgpu->{accept_invalid_amdgpu} || "") ne "y") { - logger("$myself: 'accept_invalid_amdgpu' option is not set."); - logger("$myself: WARNING: initialization aborted."); - return; + my $str = trim($gpu_group[$n] || ""); + my @sensor_names = split(',', $amdgpu->{sensors}->{$str}); + for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) { + if ($i_sensor < scalar(@sensor_names)) { + my $sensor_name = $sensor_names[$i_sensor]; + chomp($sensor_name); + $sensor_name = trim($sensor_name); + if ($sensor_name ne "") { + my $sensor_file = $sensor_name; + unless(-e $sensor_file) { + logger("$myself: ERROR: invalid or inexistent device name '$sensor_file'."); + if(lc($amdgpu->{accept_invalid_amdgpu} || "") ne "y") { + logger("$myself: 'accept_invalid_amdgpu' option is not set."); + logger("$myself: WARNING: initialization aborted."); + return; + } + } + } + } } } } @@ -201,13 +214,15 @@ sub amdgpu_update { $d =~ s/\"$//; my $str = trim($gpu_group[$n] || ""); - my @sensor_names = split(', ', $amdgpu->{sensors}->{$str}); + my @sensor_names = split(',', $amdgpu->{sensors}->{$str}); for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) { if ($i_sensor < scalar(@sensor_names)) { - my $sensor_name = $sensor_names[$i_sensor]; - if ($sensor_name ne "N/A") { - my $sensor_file = $d . "/" . $sensor_name; + my $sensor_name = $sensor_names[$i_sensor]; + chomp($sensor_name); + $sensor_name = trim($sensor_name); + if ($sensor_name ne "") { + my $sensor_file = $sensor_name; if(open(IN, $sensor_file)) { my $val = ; close(IN); @@ -515,10 +530,10 @@ sub amdgpu_cgi { my $n_sensor; my $n_sensor2; if (ref($graphs_per_plot[$n_graph]) eq 'ARRAY') { - $n_sensor = $graphs_per_plot[$n_plot]->[0]; + $n_sensor = $graphs_per_plot[$n_plot]->[0]; $n_sensor2 = $graphs_per_plot[$n_plot]->[1]; $n_graph += 1 - } else { + } else { $n_sensor = $graphs_per_plot[$n_plot]; } @@ -569,11 +584,16 @@ sub amdgpu_cgi { push(@tmpz, "LINE2:trans_" . $value_name2 . $LC[$n] . "BB" . ":dashes=1,3:"); } + my @gpu_group = split(', ', $amdgpu->{list}->{$k}); + my $device_str = trim($gpu_group[$n] || ""); + my @sensor_names = split(',', $amdgpu->{sensors}->{$device_str}); + if($n_sensor >= scalar(@sensor_names)) { + $sensor_names[$n_sensor] = ""; + } + chomp($sensor_names[$n_sensor]); + $sensor_names[$n_sensor] = trim($sensor_names[$n_sensor]); if($n_plot < $main_sensor_plots) { - my @gpu_group = split(', ', $amdgpu->{list}->{$k}); - my $device_str = trim($gpu_group[$n] || ""); - my @sensor_names = split(', ', $amdgpu->{sensors}->{$device_str}); - if($sensor_names[$n_sensor] eq "N/A") { + if($sensor_names[$n_sensor] eq "") { push(@tmp, "COMMENT: N/A\\n"); } else { if($main_plots_with_average[$n_plot]) { @@ -587,11 +607,15 @@ sub amdgpu_cgi { } } else { if($show_current_values) { - if($n_sensor2 && $value_name2) { - push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . "\\g"); - push(@tmp, "GPRINT:trans_" . $value_name2 . ":LAST: /" . $legend_labels_per_sensor[$n_sensor2] . " (actual/limit)\\n"); + if($sensor_names[$n_sensor] eq "") { + push(@tmp, "COMMENT:N/A\\n"); } else { - push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . (($n%2 || !$d[$n+1]) ? "\\n" : "")); + if($n_sensor2 && $value_name2) { + push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . "\\g"); + push(@tmp, "GPRINT:trans_" . $value_name2 . ":LAST: /" . $legend_labels_per_sensor[$n_sensor2] . " (actual/limit)\\n"); + } else { + push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . (($n%2 || !$d[$n+1]) ? "\\n" : "")); + } } } } diff --git a/man/man5/monitorix.conf.5 b/man/man5/monitorix.conf.5 index ae475d5..9d9e3c9 100644 --- a/man/man5/monitorix.conf.5 +++ b/man/man5/monitorix.conf.5 @@ -1362,7 +1362,7 @@ This graph is able to monitor an unlimited number of AMD GPUs as long as the dri .P .BI list .RS -This is a list of groups of GPUs that you want to monitor. Each group will become a graph and there may be an unlimited number of groups. You can define device names or paths to devices like \fI/dev/amd-w6800\fP. +This is a list of groups of GPUs that you want to monitor. Each group will become a graph and there may be an unlimited number of groups. You can define device names like \fIamd-w6800\fP. .P WARNING: Every time the number of groups in this option changes, Monitorix will resize the \fIamdgpu.rrd\fP file accordingly, removing all historical data. .P @@ -1375,9 +1375,9 @@ You can add it to the group 0 like this: .RS .br - 0 = /dev/amd-w6800, /dev/amd-rx6900 + 0 = amd-w6800, amd-rx6900 .br - 1 = /dev/amd-wx5100 + 1 = amd-wx5100 .br .RE @@ -1387,14 +1387,14 @@ The maximum number of GPUs allowed per group is 8. .P .BI sensors .RS -This list specifies the sensor locations. If a certain card does not have this specific sensor you can disable it by writing /fiN/A/fP instead. The order has to be: GPU busy percent, memory busy percent, GPU clock, memory clock, memory used, power consumption, power limit, fan pwm value, GPU temperature, junction temperature, memory temperature. Power consumption and limit will be shown in one plot. +This list specifies the sensor locations in absolute paths. If a certain card does not have a specific sensor you leave the entry empty instead (\fI/path/sensor_file1, , /path/sensor_file2\fP). The order has to be: GPU busy percent, memory busy percent, GPU clock, memory clock, memory used, power consumption, power limit, fan pwm value, GPU temperature, junction temperature, memory temperature. Power consumption and limit will be shown in one plot. .P .RS .br - /dev/amd-rx6900 = device/gpu_busy_percent, device/mem_busy_percent, freq1_input, freq2_input, device/mem_info_vram_used, power1_average, power1_cap, pwm1, temp1_input, temp2_input, temp3_input + amd-rx6900 = /dev/amd-rx6900/device/gpu_busy_percent, /dev/amd-rx6900/device/mem_busy_percent, /dev/amd-rx6900/freq1_input, /dev/amd-rx6900/freq2_input, /dev/amd-rx6900/device/mem_info_vram_used, /dev/amd-rx6900/power1_average, /dev/amd-rx6900/power1_cap, /dev/amd-rx6900/pwm1, /dev/amd-rx6900/temp1_input, /dev/amd-rx6900/temp2_input, /dev/amd-rx6900/temp3_input .br - /dev/amd-wx5100 = device/gpu_busy_percent, device/mem_busy_percent, freq1_input, freq2_input, device/mem_info_vram_used, power1_average, power1_cap, pwm1, temp1_input, N/A, N/A + amd-wx5100 = /dev/amd-wx5100/device/gpu_busy_percent, /dev/amd-wx5100/device/mem_busy_percent, /dev/amd-wx5100/freq1_input, /dev/amd-wx5100/freq2_input, /dev/amd-wx5100/device/mem_info_vram_used, /dev/amd-wx5100/power1_average, /dev/amd-wx5100/power1_cap, /dev/amd-wx5100/pwm1, /dev/amd-wx5100/temp1_input, , .br .RE @@ -1406,9 +1406,9 @@ This list complements the \fBlist\fP option. It basically allows you to change t .RS .br - /dev/amd-w6800 = W 6800 + amd-w6800 = W 6800 .br - /dev/amd-rx6900 = RX 6900 + amd-rx6900 = RX 6900 .br .RE