Change amdgpu config logic.

- Devices are no independent identigiers in a list.
- Sensors specify the absolute path to the file containing the information.
- Unavailable sensors have to be left blank.
- Some indentations fixes.
This commit is contained in:
Andreas Bachlechner 2021-12-09 17:58:38 +01:00
parent 7eeaf34089
commit d4adf2d43f
2 changed files with 52 additions and 28 deletions

View File

@ -58,8 +58,17 @@ sub amdgpu_init {
$d =~ s/^\"//; $d =~ s/^\"//;
$d =~ s/\"$//; $d =~ s/\"$//;
$d =~ s/^(.+?) .*$/$1/; $d =~ s/^(.+?) .*$/$1/;
next if -e $d; my $str = trim($gpu_group[$n] || "");
logger("$myself: ERROR: invalid or inexistent device name '$d'."); my @sensor_names = split(',', $amdgpu->{sensors}->{$str});
for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) {
if ($i_sensor < scalar(@sensor_names)) {
my $sensor_name = $sensor_names[$i_sensor];
chomp($sensor_name);
$sensor_name = trim($sensor_name);
if ($sensor_name ne "") {
my $sensor_file = $sensor_name;
unless(-e $sensor_file) {
logger("$myself: ERROR: invalid or inexistent device name '$sensor_file'.");
if(lc($amdgpu->{accept_invalid_amdgpu} || "") ne "y") { if(lc($amdgpu->{accept_invalid_amdgpu} || "") ne "y") {
logger("$myself: 'accept_invalid_amdgpu' option is not set."); logger("$myself: 'accept_invalid_amdgpu' option is not set.");
logger("$myself: WARNING: initialization aborted."); logger("$myself: WARNING: initialization aborted.");
@ -68,6 +77,10 @@ sub amdgpu_init {
} }
} }
} }
}
}
}
}
if(-e $rrd) { if(-e $rrd) {
my $rrd_n_gpu = 0; my $rrd_n_gpu = 0;
@ -201,13 +214,15 @@ sub amdgpu_update {
$d =~ s/\"$//; $d =~ s/\"$//;
my $str = trim($gpu_group[$n] || ""); my $str = trim($gpu_group[$n] || "");
my @sensor_names = split(', ', $amdgpu->{sensors}->{$str}); my @sensor_names = split(',', $amdgpu->{sensors}->{$str});
for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) { for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) {
if ($i_sensor < scalar(@sensor_names)) { if ($i_sensor < scalar(@sensor_names)) {
my $sensor_name = $sensor_names[$i_sensor]; my $sensor_name = $sensor_names[$i_sensor];
if ($sensor_name ne "N/A") { chomp($sensor_name);
my $sensor_file = $d . "/" . $sensor_name; $sensor_name = trim($sensor_name);
if ($sensor_name ne "") {
my $sensor_file = $sensor_name;
if(open(IN, $sensor_file)) { if(open(IN, $sensor_file)) {
my $val = <IN>; my $val = <IN>;
close(IN); close(IN);
@ -569,11 +584,16 @@ sub amdgpu_cgi {
push(@tmpz, "LINE2:trans_" . $value_name2 . $LC[$n] . "BB" . ":dashes=1,3:"); push(@tmpz, "LINE2:trans_" . $value_name2 . $LC[$n] . "BB" . ":dashes=1,3:");
} }
if($n_plot < $main_sensor_plots) {
my @gpu_group = split(', ', $amdgpu->{list}->{$k}); my @gpu_group = split(', ', $amdgpu->{list}->{$k});
my $device_str = trim($gpu_group[$n] || ""); my $device_str = trim($gpu_group[$n] || "");
my @sensor_names = split(', ', $amdgpu->{sensors}->{$device_str}); my @sensor_names = split(',', $amdgpu->{sensors}->{$device_str});
if($sensor_names[$n_sensor] eq "N/A") { if($n_sensor >= scalar(@sensor_names)) {
$sensor_names[$n_sensor] = "";
}
chomp($sensor_names[$n_sensor]);
$sensor_names[$n_sensor] = trim($sensor_names[$n_sensor]);
if($n_plot < $main_sensor_plots) {
if($sensor_names[$n_sensor] eq "") {
push(@tmp, "COMMENT: N/A\\n"); push(@tmp, "COMMENT: N/A\\n");
} else { } else {
if($main_plots_with_average[$n_plot]) { if($main_plots_with_average[$n_plot]) {
@ -587,6 +607,9 @@ sub amdgpu_cgi {
} }
} else { } else {
if($show_current_values) { if($show_current_values) {
if($sensor_names[$n_sensor] eq "") {
push(@tmp, "COMMENT:N/A\\n");
} else {
if($n_sensor2 && $value_name2) { if($n_sensor2 && $value_name2) {
push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . "\\g"); push(@tmp, "GPRINT:trans_" . $value_name . ":LAST:" . $legend_labels_per_sensor[$n_sensor] . "\\g");
push(@tmp, "GPRINT:trans_" . $value_name2 . ":LAST: /" . $legend_labels_per_sensor[$n_sensor2] . " (actual/limit)\\n"); push(@tmp, "GPRINT:trans_" . $value_name2 . ":LAST: /" . $legend_labels_per_sensor[$n_sensor2] . " (actual/limit)\\n");
@ -597,6 +620,7 @@ sub amdgpu_cgi {
} }
} }
} }
}
if($n_plot < $main_sensor_plots) { if($n_plot < $main_sensor_plots) {
push(@tmp, "COMMENT: \\n"); push(@tmp, "COMMENT: \\n");

View File

@ -1362,7 +1362,7 @@ This graph is able to monitor an unlimited number of AMD GPUs as long as the dri
.P .P
.BI list .BI list
.RS .RS
This is a list of groups of GPUs that you want to monitor. Each group will become a graph and there may be an unlimited number of groups. You can define device names or paths to devices like \fI/dev/amd-w6800\fP. This is a list of groups of GPUs that you want to monitor. Each group will become a graph and there may be an unlimited number of groups. You can define device names like \fIamd-w6800\fP.
.P .P
WARNING: Every time the number of groups in this option changes, Monitorix will resize the \fIamdgpu.rrd\fP file accordingly, removing all historical data. WARNING: Every time the number of groups in this option changes, Monitorix will resize the \fIamdgpu.rrd\fP file accordingly, removing all historical data.
.P .P
@ -1375,9 +1375,9 @@ You can add it to the group 0 like this:
.RS .RS
<list> <list>
.br .br
0 = /dev/amd-w6800, /dev/amd-rx6900 0 = amd-w6800, amd-rx6900
.br .br
1 = /dev/amd-wx5100 1 = amd-wx5100
.br .br
</list> </list>
.RE .RE
@ -1387,14 +1387,14 @@ The maximum number of GPUs allowed per group is 8.
.P .P
.BI sensors .BI sensors
.RS .RS
This list specifies the sensor locations. If a certain card does not have this specific sensor you can disable it by writing /fiN/A/fP instead. The order has to be: GPU busy percent, memory busy percent, GPU clock, memory clock, memory used, power consumption, power limit, fan pwm value, GPU temperature, junction temperature, memory temperature. Power consumption and limit will be shown in one plot. This list specifies the sensor locations in absolute paths. If a certain card does not have a specific sensor you leave the entry empty instead (\fI/path/sensor_file1, , /path/sensor_file2\fP). The order has to be: GPU busy percent, memory busy percent, GPU clock, memory clock, memory used, power consumption, power limit, fan pwm value, GPU temperature, junction temperature, memory temperature. Power consumption and limit will be shown in one plot.
.P .P
.RS .RS
<sensors> <sensors>
.br .br
/dev/amd-rx6900 = device/gpu_busy_percent, device/mem_busy_percent, freq1_input, freq2_input, device/mem_info_vram_used, power1_average, power1_cap, pwm1, temp1_input, temp2_input, temp3_input amd-rx6900 = /dev/amd-rx6900/device/gpu_busy_percent, /dev/amd-rx6900/device/mem_busy_percent, /dev/amd-rx6900/freq1_input, /dev/amd-rx6900/freq2_input, /dev/amd-rx6900/device/mem_info_vram_used, /dev/amd-rx6900/power1_average, /dev/amd-rx6900/power1_cap, /dev/amd-rx6900/pwm1, /dev/amd-rx6900/temp1_input, /dev/amd-rx6900/temp2_input, /dev/amd-rx6900/temp3_input
.br .br
/dev/amd-wx5100 = device/gpu_busy_percent, device/mem_busy_percent, freq1_input, freq2_input, device/mem_info_vram_used, power1_average, power1_cap, pwm1, temp1_input, N/A, N/A amd-wx5100 = /dev/amd-wx5100/device/gpu_busy_percent, /dev/amd-wx5100/device/mem_busy_percent, /dev/amd-wx5100/freq1_input, /dev/amd-wx5100/freq2_input, /dev/amd-wx5100/device/mem_info_vram_used, /dev/amd-wx5100/power1_average, /dev/amd-wx5100/power1_cap, /dev/amd-wx5100/pwm1, /dev/amd-wx5100/temp1_input, ,
.br .br
</sensors> </sensors>
.RE .RE
@ -1406,9 +1406,9 @@ This list complements the \fBlist\fP option. It basically allows you to change t
.RS .RS
<map> <map>
.br .br
/dev/amd-w6800 = W 6800 amd-w6800 = W 6800
.br .br
/dev/amd-rx6900 = RX 6900 amd-rx6900 = RX 6900
.br .br
</map> </map>
.RE .RE