thermal: core: Synchronize suspend-prepare and post-suspend actions

BugLink: https://bugs.launchpad.net/bugs/2101915

After commit 5a5efdaffd ("thermal: core: Resume thermal zones
asynchronously") it is theoretically possible that, if a system suspend
starts immediately after a system resume, thermal_zone_device_resume()
spawned by the thermal PM notifier for one of the thermal zones at the
end of the system resume will run after the PM thermal notifier for the
suspend-prepare action.  If that happens, tz->suspended set by the latter
will be reset by the former which may lead to unexpected consequences.

To avoid that race, synchronize thermal_zone_device_resume() with the
suspend-prepare thermal PM notifier with the help of additional bool
field and completion in struct thermal_zone_device.

Note that this also ensures running __thermal_zone_device_update() at
least once for each thermal zone between system resume and the following
system suspend in case it is needed to start thermal mitigation.

Fixes: 5a5efdaffd ("thermal: core: Resume thermal zones asynchronously")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit d2278f3533a8c4933c52f85784ffa73e8250c524)
[koichiroden: Pulled from v6.10 as a dependency for the subsequent
commit, applied changes for drivers/thermal/thermal_core.h to
include/linux/thermal.h due to the missing backport of:
b1ae92dcfa8e ("thermal: core: Make struct thermal_zone_device definition internal")
with adjustments due to the absent commit:
9b0a62758665 ("thermal: core: Store zone trips table in struct thermal_zone_device").
N.B. this also fixes a K6.8+ issue]
Signed-off-by: Koichiro Den <koichiro.den@canonical.com>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
This commit is contained in:
Rafael J. Wysocki
2025-03-11 08:51:19 +09:00
committed by Stefan Bader
parent 675d45b371
commit 4ca9db04e0
2 changed files with 25 additions and 0 deletions
+21
View File
@@ -1332,6 +1332,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
ida_init(&tz->ida);
mutex_init(&tz->lock);
init_completion(&tz->removal);
init_completion(&tz->resume);
id = ida_alloc(&thermal_tz_ida, GFP_KERNEL);
if (id < 0) {
result = id;
@@ -1577,6 +1578,9 @@ static void thermal_zone_device_resume(struct work_struct *work)
thermal_zone_device_init(tz);
__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
complete(&tz->resume);
tz->resuming = false;
mutex_unlock(&tz->lock);
}
@@ -1594,6 +1598,20 @@ static int thermal_pm_notify(struct notifier_block *nb,
list_for_each_entry(tz, &thermal_tz_list, node) {
mutex_lock(&tz->lock);
if (tz->resuming) {
/*
* thermal_zone_device_resume() queued up for
* this zone has not acquired the lock yet, so
* release it to let the function run and wait
* util it has done the work.
*/
mutex_unlock(&tz->lock);
wait_for_completion(&tz->resume);
mutex_lock(&tz->lock);
}
tz->suspended = true;
mutex_unlock(&tz->lock);
@@ -1611,6 +1629,9 @@ static int thermal_pm_notify(struct notifier_block *nb,
cancel_delayed_work(&tz->poll_queue);
reinit_completion(&tz->resume);
tz->resuming = true;
/*
* Replace the work function with the resume one, which
* will restore the original work function and schedule
+4
View File
@@ -125,6 +125,7 @@ struct thermal_cooling_device {
* @type: the thermal zone device type
* @device: &struct device for this thermal zone
* @removal: removal completion
* @resume: resume completion
* @trip_temp_attrs: attributes for trip points for sysfs: trip temperature
* @trip_type_attrs: attributes for trip points for sysfs: trip type
* @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis
@@ -160,12 +161,14 @@ struct thermal_cooling_device {
* @poll_queue: delayed work for polling
* @notify_event: Last notification event
* @suspended: thermal zone suspend indicator
* @resuming: indicates whether or not thermal zone resume is in progress
*/
struct thermal_zone_device {
int id;
char type[THERMAL_NAME_LENGTH];
struct device device;
struct completion removal;
struct completion resume;
struct attribute_group trips_attribute_group;
struct thermal_attr *trip_temp_attrs;
struct thermal_attr *trip_type_attrs;
@@ -197,6 +200,7 @@ struct thermal_zone_device {
struct thermal_debugfs *debugfs;
#endif
bool suspended;
bool resuming;
};
/**