Add respect_power_state option to amdgpu.pm to avoid waking up GPUs in D3 power state.

This commit is contained in:
Andreas Bachlechner 2022-08-05 09:15:10 +02:00
parent 6adf462e8e
commit 6fb4995979
3 changed files with 66 additions and 14 deletions

View File

@ -196,6 +196,7 @@ sub amdgpu_update {
my $rrd = $config->{base_lib} . $package . ".rrd";
my $amdgpu = $config->{amdgpu};
my $use_nan_for_missing_data = lc($amdgpu->{use_nan_for_missing_data} || "") eq "y" ? 1 : 0;
my $respect_power_state = lc($amdgpu->{respect_power_state} || "") eq "y" ? 1 : 0;
my @sensors;
@ -216,21 +217,44 @@ sub amdgpu_update {
my $str = trim($gpu_group[$n] || "");
my @sensor_names = split(',', $amdgpu->{sensors}->{$str});
for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) {
if ($i_sensor < scalar(@sensor_names)) {
my $sensor_name = $sensor_names[$i_sensor];
chomp($sensor_name);
$sensor_name = trim($sensor_name);
if ($sensor_name ne "") {
my $sensor_file = $sensor_name;
if(open(IN, $sensor_file)) {
my $gpu_in_d3_state = 0;
if ($respect_power_state) {
my $power_state_sensor_name = $amdgpu->{power_states}->{$str};
if (defined($power_state_sensor_name)) {
chomp($power_state_sensor_name);
$power_state_sensor_name = trim($power_state_sensor_name);
if ($power_state_sensor_name ne "") {
my $power_state_sensor_file = $power_state_sensor_name;
if(open(IN, $power_state_sensor_file)) {
my $val = <IN>;
close(IN);
$val = trim($val);
chomp($val);
$sensors[$i_sensor] = $val;
if (index(lc($val), lc("D3")) != -1) {
$gpu_in_d3_state = 1;
}
} else {
logger("$myself: ERROR: unable to open '$sensor_file'.");
logger("$myself: ERROR: unable to open power state sensor file '$power_state_sensor_file'.");
}
}
}
}
if (!$gpu_in_d3_state) {
for(my $i_sensor = 0; $i_sensor < $number_of_values_per_gpu_in_rrd; $i_sensor++) {
if ($i_sensor < scalar(@sensor_names)) {
my $sensor_name = $sensor_names[$i_sensor];
chomp($sensor_name);
$sensor_name = trim($sensor_name);
if ($sensor_name ne "") {
my $sensor_file = $sensor_name;
if(open(IN, $sensor_file)) {
my $val = <IN>;
close(IN);
$val = trim($val);
chomp($val);
$sensors[$i_sensor] = $val;
} else {
logger("$myself: ERROR: unable to open '$sensor_file'.");
}
}
}
}

View File

@ -2012,6 +2012,28 @@ This option, when enabled via \fIy\fP, combined with the \fIshow_gaps\fP option
.P
Default value: \fIn\fP
.RE
.P
.BI respect_power_state
.RS
This option, when enabled via \fIy\fP, will respect the AMD GPU D3 power state. Monitorix won't wake up a GPU in D3 power state to check the sensors values but skip it. The power state sensor has to be specified via the \fpower_states\fP option for each GPU that should be respected.
.P
Default value: \fIn\fP
.RE
.P
.BI power_states
.RS
This list complements the \fBrespect_power_state\fP option. You can specify the power_state sensor for each GPU that should not be woken up if in D3.
.P
.RS
<power_states>
.br
amd-w6800 = /dev/hwmon-w6800/device/power_state
.br
amd-rx6900 = /dev/hwmon-rx6900/device/power_state
.br
</power_states>
.RE
.RE
.SS NVIDIA GPU temperatures and usage (nvidiagpu.pm)
This graph is able to monitor an unlimited number of Nvidia GPUs via \fInvidia-smi\fP.
.P

View File

@ -389,10 +389,16 @@ secure_log_date_format = %b %e
amd-wx5100 = WX 5100
</map>
<sensors>
amd-w6800 = /dev/device/gpu_busy_percent, /dev/device/mem_busy_percent, /dev/freq1_input, /dev/freq2_input, /dev/device/mem_info_vram_used, /dev/power1_average, /dev/power1_cap, pwm1, /dev/temp1_input, /dev/temp2_input, /dev/temp3_input
amd-wx5100 = /dev/device/gpu_busy_percent, /dev/device/mem_busy_percent, /dev/freq1_input, /dev/freq2_input, /dev/device/mem_info_vram_used, power1_average, /dev/power1_cap, /dev/pwm1, /dev/temp1_input, ,
amd-w6800 = /dev/device1/gpu_busy_percent, /dev/device1/mem_busy_percent, /dev/device1/freq1_input, /dev/device1/freq2_input, /dev/device1/mem_info_vram_used, /dev/device1/power1_average, /dev/device1/power1_cap, /dev/device1/pwm1, /dev/device1/temp1_input, /dev/device1/temp2_input, /dev/device1/temp3_input
amd-wx5100 = /dev/device2/gpu_busy_percent, /dev/device2/mem_busy_percent, /dev/device2/freq1_input, /dev/device2/freq2_input, /dev/device2/mem_info_vram_used, /dev/device2/power1_average, /dev/device2/power1_cap, /dev/device2/pwm1, /dev/device2/temp1_input, ,
</sensors>
respect_power_state = n
<power_states>
amd-w6800 = /dev/device1/power_state
amd-wx5100 = /dev/device2/power_state
</power_states>
<alerts>
coretemp_enabled = n
coretemp_timeintvl = 0