Merge 0507d2526f ("Merge tag 'erofs-for-6.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs") into android-mainline

Steps on the way to 6.8-rc1

Change-Id: Ic5d03b4d7e56e77f9210b7ff3483a1743a9562b9
Signed-off-by: Lee Jones <joneslee@google.com>
This commit is contained in:
Lee Jones
2024-03-14 16:37:23 +00:00
1319 changed files with 56130 additions and 28402 deletions
+4
View File
@@ -478,6 +478,8 @@ Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.vnet.ibm.com>
Paul E. McKenney <paulmck@kernel.org> <paulmck@us.ibm.com>
Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org>
Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com>
Paul Moore <paul@paul-moore.com> <paul.moore@hp.com>
Paul Moore <paul@paul-moore.com> <pmoore@redhat.com>
Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
Peter A Jonsson <pj@ludd.ltu.se>
Peter Oruba <peter.oruba@amd.com>
@@ -542,6 +544,8 @@ Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
Sebastian Reichel <sre@kernel.org> <sre@debian.org>
Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com>
Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com>
Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
+11 -4
View File
@@ -9,10 +9,6 @@
Linus
----------
N: Matt Mackal
E: mpm@selenic.com
D: SLOB slab allocator
N: Matti Aarnio
E: mea@nic.funet.fi
D: Alpha systems hacking, IPv6 and other network related stuff
@@ -323,6 +319,9 @@ N: Ohad Ben Cohen
E: ohad@wizery.com
D: Remote Processor (remoteproc) subsystem
D: Remote Processor Messaging (rpmsg) subsystem
D: Hardware spinlock (hwspinlock) subsystem
D: OMAP hwspinlock driver
D: OMAP remoteproc driver
N: Krzysztof Benedyczak
E: golbi@mat.uni.torun.pl
@@ -1572,6 +1571,10 @@ S: Ampferstr. 50 / 4
S: 6020 Innsbruck
S: Austria
N: Mark Hemment
E: markhe@nextd.demon.co.uk
D: SLAB allocator implementation
N: Richard Henderson
E: rth@twiddle.net
E: rth@cygnus.com
@@ -2445,6 +2448,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic
N: Olivia Mackall
E: olivia@selenic.com
D: SLOB slab allocator
N: Paul Mackerras
E: paulus@samba.org
D: PPP driver
@@ -16,3 +16,9 @@ Description:
Example output in powerpc:
grep . /sys/bus/event_source/devices/cpu/caps/*
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
The "branch_counter_nr" in the supported platform exposes the
maximum number of counters which can be shown in the u64 counters
of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
exposes the width of each counter. Both of them can be used by
the perf tool to parse the logged counters in each branch.
@@ -25,6 +25,9 @@ KernelVersion: 5.14
Contact: linux-mtd@lists.infradead.org
Description: (RO) Part name of the SPI NOR flash.
The attribute is optional. User space should not rely on
it to be present or even correct. Instead, user space
should read the jedec_id attribute.
What: /sys/bus/spi/devices/.../spi-nor/sfdp
Date: April 2021
@@ -52,6 +52,9 @@ Description:
echo 0 > /sys/class/devfreq/.../trans_stat
If the transition table is bigger than PAGE_SIZE, reading
this will return an -EFBIG error.
What: /sys/class/devfreq/.../available_frequencies
Date: October 2012
Contact: Nishanth Menon <nm@ti.com>
@@ -25,12 +25,14 @@ Description: Writing 'on' or 'off' to this file makes the kdamond starts or
stops, respectively. Reading the file returns the keywords
based on the current status. Writing 'commit' to this file
makes the kdamond reads the user inputs in the sysfs files
except 'state' again. Writing 'update_schemes_stats' to the
file updates contents of schemes stats files of the kdamond.
Writing 'update_schemes_tried_regions' to the file updates
contents of 'tried_regions' directory of every scheme directory
of this kdamond. Writing 'update_schemes_tried_bytes' to the
file updates only '.../tried_regions/total_bytes' files of this
except 'state' again. Writing 'commit_schemes_quota_goals' to
this file makes the kdamond reads the quota goal files again.
Writing 'update_schemes_stats' to the file updates contents of
schemes stats files of the kdamond. Writing
'update_schemes_tried_regions' to the file updates contents of
'tried_regions' directory of every scheme directory of this
kdamond. Writing 'update_schemes_tried_bytes' to the file
updates only '.../tried_regions/total_bytes' files of this
kdamond. Writing 'clear_schemes_tried_regions' to the file
removes contents of the 'tried_regions' directory.
@@ -212,6 +214,25 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the quotas
charge reset interval of the scheme in milliseconds.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/nr_goals
Date: Nov 2023
Contact: SeongJae Park <sj@kernel.org>
Description: Writing a number 'N' to this file creates the number of
directories for setting automatic tuning of the scheme's
aggressiveness named '0' to 'N-1' under the goals/ directory.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_value
Date: Nov 2023
Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the target
value of the goal metric.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/current_value
Date: Nov 2023
Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the current
value of the goal metric.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -0,0 +1,29 @@
What: /sys/devices/platform/silicom-platform/uc_version
Date: November 2023
KernelVersion: 6.7
Contact: Henry Shi <henrys@silicom-usa.com>
Description:
This file allows to read microcontroller firmware
version of current platform.
What: /sys/devices/platform/silicom-platform/power_cycle
Date: November 2023
KernelVersion: 6.7
Contact: Henry Shi <henrys@silicom-usa.com>
This file allow user to power cycle the platform.
Default value is 0; when set to 1, it powers down
the platform, waits 5 seconds, then powers on the
device. It returns to default value after power cycle.
0 - default value.
What: /sys/devices/platform/silicom-platform/efuse_status
Date: November 2023
KernelVersion: 6.7
Contact: Henry Shi <henrys@silicom-usa.com>
Description:
This file is read only. It returns the current
OTP status:
0 - not programmed.
1 - programmed.
+1 -1
View File
@@ -328,7 +328,7 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled pages can be
marked as idle based on how long (in seconds) it's been since they were
last accessed::
+42 -6
View File
@@ -1093,7 +1093,11 @@ All time durations are in microseconds.
A read-write single value file which exists on non-root
cgroups. The default is "100".
The weight in the range [1, 10000].
For non idle groups (cpu.idle = 0), the weight is in the
range [1, 10000].
If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
then the weight will show as a 0.
cpu.weight.nice
A read-write single value file which exists on non-root
@@ -1157,6 +1161,16 @@ All time durations are in microseconds.
values similar to the sched_setattr(2). This maximum utilization
value is used to clamp the task specific maximum utilization clamp.
cpu.idle
A read-write single value file which exists on non-root cgroups.
The default is 0.
This is the cgroup analog of the per-task SCHED_IDLE sched policy.
Setting this value to a 1 will make the scheduling policy of the
cgroup SCHED_IDLE. The threads inside the cgroup will retain their
own relative priorities, but the cgroup itself will be treated as
very low priority relative to its peers.
Memory
@@ -1679,6 +1693,21 @@ PAGE_SIZE multiple when read back.
limit, it will refuse to take any more stores before existing
entries fault back in or are written out to disk.
memory.zswap.writeback
A read-write single value file. The default value is "1". The
initial value of the root cgroup is 1, and when a new cgroup is
created, it inherits the current value of its parent.
When this is set to 0, all swapping attempts to swapping devices
are disabled. This included both zswap writebacks, and swapping due
to zswap store failures. If the zswap store failures are recurring
(for e.g if the pages are incompressible), users can observe
reclaim inefficiency after disabling writeback (because the same
pages might be rejected again and again).
Note that this is subtly different from setting memory.swap.max to
0, as it still allows for pages to be written to the zswap pool.
memory.pressure
A read-only nested-keyed file.
@@ -2316,6 +2345,13 @@ Cpuset Interface Files
treated to have an implicit value of "cpuset.cpus" in the
formation of local partition.
cpuset.cpus.isolated
A read-only and root cgroup only multiple values file.
This file shows the set of all isolated CPUs used in existing
isolated partitions. It will be empty if no isolated partition
is created.
cpuset.cpus.partition
A read-write single value file which exists on non-root
cpuset-enabled cgroups. This flag is owned by the parent cgroup
@@ -2358,11 +2394,11 @@ Cpuset Interface Files
partition or scheduling domain. The set of exclusive CPUs is
determined by the value of its "cpuset.cpus.exclusive.effective".
When set to "isolated", the CPUs in that partition will
be in an isolated state without any load balancing from the
scheduler. Tasks placed in such a partition with multiple
CPUs should be carefully distributed and bound to each of the
individual CPUs for optimal performance.
When set to "isolated", the CPUs in that partition will be in
an isolated state without any load balancing from the scheduler
and excluded from the unbound workqueues. Tasks placed in such
a partition with multiple CPUs should be carefully distributed
and bound to each of the individual CPUs for optimal performance.
A partition root ("root" or "isolated") can be in one of the
two possible states - valid or invalid. An invalid partition
+1
View File
@@ -119,6 +119,7 @@ configure specific aspects of kernel behavior to your liking.
parport
perf-security
pm/index
pmf
pnp
rapidio
ras
@@ -172,7 +172,7 @@ variables.
Offset of the free_list's member. This value is used to compute the number
of free pages.
Each zone has a free_area structure array called free_area[MAX_ORDER + 1].
Each zone has a free_area structure array called free_area[NR_PAGE_ORDERS].
The free_list represents a linked list of free page blocks.
(list_head, next|prev)
@@ -189,11 +189,11 @@ Offsets of the vmap_area's members. They carry vmalloc-specific
information. Makedumpfile gets the start address of the vmalloc region
from this.
(zone.free_area, MAX_ORDER + 1)
-------------------------------
(zone.free_area, NR_PAGE_ORDERS)
--------------------------------
Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator.
free_area ranges. NR_PAGE_ORDERS is used by the zone buddy allocator.
prb
---
+19 -12
View File
@@ -970,17 +970,17 @@
buddy allocator. Bigger value increase the probability
of catching random memory corruption, but reduce the
amount of memory for normal system use. The maximum
possible value is MAX_ORDER/2. Setting this parameter
to 1 or 2 should be enough to identify most random
memory corruption problems caused by bugs in kernel or
driver code when a CPU writes to (or reads from) a
random memory location. Note that there exists a class
of memory corruptions problems caused by buggy H/W or
F/W or by drivers badly programming DMA (basically when
memory is written at bus level and the CPU MMU is
bypassed) which are not detectable by
CONFIG_DEBUG_PAGEALLOC, hence this option will not help
tracking down these problems.
possible value is MAX_PAGE_ORDER/2. Setting this
parameter to 1 or 2 should be enough to identify most
random memory corruption problems caused by bugs in
kernel or driver code when a CPU writes to (or reads
from) a random memory location. Note that there exists
a class of memory corruptions problems caused by buggy
H/W or F/W or by drivers badly programming DMA
(basically when memory is written at bus level and the
CPU MMU is bypassed) which are not detectable by
CONFIG_DEBUG_PAGEALLOC, hence this option will not
help tracking down these problems.
debug_pagealloc=
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
@@ -4140,7 +4140,7 @@
[KNL] Minimal page reporting order
Format: <integer>
Adjust the minimal page reporting order. The page
reporting is disabled when it exceeds MAX_ORDER.
reporting is disabled when it exceeds MAX_PAGE_ORDER.
panic= [KNL] Kernel behaviour on panic: delay <timeout>
timeout > 0: seconds before rebooting
@@ -5548,6 +5548,13 @@
print every Nth verbose statement, where N is the value
specified.
regulator_ignore_unused
[REGULATOR]
Prevents regulator framework from disabling regulators
that are unused, due no driver claiming them. This may
be useful for debug and development, but should not be
needed on a platform with proper driver support.
relax_domain_level=
[KNL, SMP] Set scheduler's default relax_domain_level.
See Documentation/admin-guide/cgroup-v1/cpusets.rst.
+102 -41
View File
@@ -59,41 +59,47 @@ Files Hierarchy
The files hierarchy of DAMON sysfs interface is shown below. In the below
figure, parents-children relations are represented with indentations, each
directory is having ``/`` suffix, and files in each directory are separated by
comma (","). ::
comma (",").
/sys/kernel/mm/damon/admin
│ kdamonds/nr_kdamonds
│ │ 0/state,pid
│ │ contexts/nr_contexts
│ │ │ │ 0/avail_operations,operations
│ │ │ │ │ monitoring_attrs/
.. parsed-literal::
:ref:`/sys/kernel/mm/damon <sysfs_root>`/admin
:ref:`kdamonds <sysfs_kdamonds>`/nr_kdamonds
│ │ :ref:`0 <sysfs_kdamond>`/state,pid
│ │ │ :ref:`contexts <sysfs_contexts>`/nr_contexts
│ │ │ │ :ref:`0 <sysfs_context>`/avail_operations,operations
│ │ │ │ │ :ref:`monitoring_attrs <sysfs_monitoring_attrs>`/
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
│ │ │ │ │ │ nr_regions/min,max
│ │ │ │ │ targets/nr_targets
│ │ │ │ │ │ 0/pid_target
│ │ │ │ │ │ │ regions/nr_regions
│ │ │ │ │ │ │ │ 0/start,end
│ │ │ │ │ :ref:`targets <sysfs_targets>`/nr_targets
│ │ │ │ │ │ :ref:`0 <sysfs_target>`/pid_target
│ │ │ │ │ │ │ :ref:`regions <sysfs_regions>`/nr_regions
│ │ │ │ │ │ │ │ :ref:`0 <sysfs_region>`/start,end
│ │ │ │ │ │ │ │ ...
│ │ │ │ │ │ ...
│ │ │ │ │ schemes/nr_schemes
│ │ │ │ │ │ 0/action,apply_interval_us
│ │ │ │ │ │ │ access_pattern/
│ │ │ │ │ :ref:`schemes <sysfs_schemes>`/nr_schemes
│ │ │ │ │ │ :ref:`0 <sysfs_scheme>`/action,apply_interval_us
│ │ │ │ │ │ │ :ref:`access_pattern <sysfs_access_pattern>`/
│ │ │ │ │ │ │ │ sz/min,max
│ │ │ │ │ │ │ │ nr_accesses/min,max
│ │ │ │ │ │ │ │ age/min,max
│ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
│ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
│ │ │ │ │ │ │ filters/nr_filters
│ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
│ │ │ │ │ │ │ │ │ 0/target_value,current_value
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
│ │ │ │ │ │ │ :ref:`filters <sysfs_filters>`/nr_filters
│ │ │ │ │ │ │ │ 0/type,matching,memcg_id
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
│ │ │ │ │ │ │ tried_regions/total_bytes
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
│ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
│ │ │ │ │ │ │ │ ...
│ │ │ │ │ │ ...
│ │ │ │ ...
│ │ ...
.. _sysfs_root:
Root
----
@@ -102,6 +108,8 @@ has one directory named ``admin``. The directory contains the files for
privileged user space programs' control of DAMON. User space tools or daemons
having the root permission could use this directory.
.. _sysfs_kdamonds:
kdamonds/
---------
@@ -113,6 +121,8 @@ details) exists. In the beginning, this directory has only one file,
child directories named ``0`` to ``N-1``. Each directory represents each
kdamond.
.. _sysfs_kdamond:
kdamonds/<N>/
-------------
@@ -120,29 +130,37 @@ In each kdamond directory, two files (``state`` and ``pid``) and one directory
(``contexts``) exist.
Reading ``state`` returns ``on`` if the kdamond is currently running, or
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
in the state. Writing ``commit`` to the ``state`` file makes kdamond reads the
user inputs in the sysfs files except ``state`` file again. Writing
``update_schemes_stats`` to ``state`` file updates the contents of stats files
for each DAMON-based operation scheme of the kdamond. For details of the
stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.
``off`` if it is not running.
Writing ``update_schemes_tried_regions`` to ``state`` file updates the
DAMON-based operation scheme action tried regions directory for each
DAMON-based operation scheme of the kdamond. Writing
``update_schemes_tried_bytes`` to ``state`` file updates only
``.../tried_regions/total_bytes`` files. Writing
``clear_schemes_tried_regions`` to ``state`` file clears the DAMON-based
operating scheme action tried regions directory for each DAMON-based operation
scheme of the kdamond. For details of the DAMON-based operation scheme action
tried regions directory, please refer to :ref:`tried_regions section
<sysfs_schemes_tried_regions>`.
Users can write below commands for the kdamond to the ``state`` file.
- ``on``: Start running.
- ``off``: Stop running.
- ``commit``: Read the user inputs in the sysfs files except ``state`` file
again.
- ``commit_schemes_quota_goals``: Read the DAMON-based operation schemes'
:ref:`quota goals <sysfs_schemes_quota_goals>`.
- ``update_schemes_stats``: Update the contents of stats files for each
DAMON-based operation scheme of the kdamond. For details of the stats,
please refer to :ref:`stats section <sysfs_schemes_stats>`.
- ``update_schemes_tried_regions``: Update the DAMON-based operation scheme
action tried regions directory for each DAMON-based operation scheme of the
kdamond. For details of the DAMON-based operation scheme action tried
regions directory, please refer to
:ref:`tried_regions section <sysfs_schemes_tried_regions>`.
- ``update_schemes_tried_bytes``: Update only ``.../tried_regions/total_bytes``
files.
- ``clear_schemes_tried_regions``: Clear the DAMON-based operating scheme
action tried regions directory for each DAMON-based operation scheme of the
kdamond.
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
``contexts`` directory contains files for controlling the monitoring contexts
that this kdamond will execute.
.. _sysfs_contexts:
kdamonds/<N>/contexts/
----------------------
@@ -153,7 +171,7 @@ number (``N``) to the file creates the number of child directories named as
details). At the moment, only one context per kdamond is supported, so only
``0`` or ``1`` can be written to the file.
.. _sysfs_contexts:
.. _sysfs_context:
contexts/<N>/
-------------
@@ -203,6 +221,8 @@ writing to and rading from the files.
For more details about the intervals and monitoring regions range, please refer
to the Design document (:doc:`/mm/damon/design`).
.. _sysfs_targets:
contexts/<N>/targets/
---------------------
@@ -210,6 +230,8 @@ In the beginning, this directory has only one file, ``nr_targets``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each monitoring target.
.. _sysfs_target:
targets/<N>/
------------
@@ -244,6 +266,8 @@ In the beginning, this directory has only one file, ``nr_regions``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each initial monitoring target region.
.. _sysfs_region:
regions/<N>/
------------
@@ -254,6 +278,8 @@ region by writing to and reading from the files, respectively.
Each region should not overlap with others. ``end`` of directory ``N`` should
be equal or smaller than ``start`` of directory ``N+1``.
.. _sysfs_schemes:
contexts/<N>/schemes/
---------------------
@@ -265,6 +291,8 @@ In the beginning, this directory has only one file, ``nr_schemes``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each DAMON-based operation scheme.
.. _sysfs_scheme:
schemes/<N>/
------------
@@ -277,7 +305,7 @@ The ``action`` file is for setting and getting the scheme's :ref:`action
from the file and their meaning are as below.
Note that support of each action depends on the running DAMON operations set
:ref:`implementation <sysfs_contexts>`.
:ref:`implementation <sysfs_context>`.
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
Supported by ``vaddr`` and ``fvaddr`` operations set.
@@ -299,6 +327,8 @@ Note that support of each action depends on the running DAMON operations set
The ``apply_interval_us`` file is for setting and getting the scheme's
:ref:`apply_interval <damon_design_damos>` in microseconds.
.. _sysfs_access_pattern:
schemes/<N>/access_pattern/
---------------------------
@@ -312,6 +342,8 @@ to and reading from the ``min`` and ``max`` files under ``sz``,
``nr_accesses``, and ``age`` directories, respectively. Note that the ``min``
and the ``max`` form a closed interval.
.. _sysfs_quotas:
schemes/<N>/quotas/
-------------------
@@ -319,8 +351,7 @@ The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
DAMON-based operation scheme.
Under ``quotas`` directory, three files (``ms``, ``bytes``,
``reset_interval_ms``) and one directory (``weights``) having three files
(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
``reset_interval_ms``) and two directores (``weights`` and ``goals``) exist.
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
``reset interval`` in milliseconds by writing the values to the three files,
@@ -330,11 +361,37 @@ apply the action to only up to ``bytes`` bytes of memory regions within the
``reset_interval_ms``. Setting both ``ms`` and ``bytes`` zero disables the
quota limits.
You can also set the :ref:`prioritization weights
Under ``weights`` directory, three files (``sz_permil``,
``nr_accesses_permil``, and ``age_permil``) exist.
You can set the :ref:`prioritization weights
<damon_design_damos_quotas_prioritization>` for size, access frequency, and age
in per-thousand unit by writing the values to the three files under the
``weights`` directory.
.. _sysfs_schemes_quota_goals:
schemes/<N>/quotas/goals/
-------------------------
The directory for the :ref:`automatic quota tuning goals
<damon_design_damos_quotas_auto_tuning>` of the given DAMON-based operation
scheme.
In the beginning, this directory has only one file, ``nr_goals``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each goal and current achievement.
Among the multiple feedback, the best one is used.
Each goal directory contains two files, namely ``target_value`` and
``current_value``. Users can set and get any number to those files to set the
feedback. User space main workload's latency or throughput, system metrics
like free memory ratio or memory pressure stall time (PSI) could be example
metrics for the values. Note that users should write
``commit_schemes_quota_goals`` to the ``state`` file of the :ref:`kdamond
directory <sysfs_kdamond>` to pass the feedback to DAMON.
.. _sysfs_watermarks:
schemes/<N>/watermarks/
-----------------------
@@ -354,6 +411,8 @@ as below.
The ``interval`` should written in microseconds unit.
.. _sysfs_filters:
schemes/<N>/filters/
--------------------
@@ -394,7 +453,7 @@ pages of all memory cgroups except ``/having_care_already``.::
echo N > 1/matching
Note that ``anon`` and ``memcg`` filters are currently supported only when
``paddr`` :ref:`implementation <sysfs_contexts>` is being used.
``paddr`` :ref:`implementation <sysfs_context>` is being used.
Also, memory regions that are filtered out by ``addr`` or ``target`` filters
are not counted as the scheme has tried to those, while regions that filtered
@@ -449,6 +508,8 @@ and query-like efficient data access monitoring results retrievals. For the
latter use case, in particular, users can set the ``action`` as ``stat`` and
set the ``access pattern`` as their interested pattern that they want to query.
.. _sysfs_schemes_tried_region:
tried_regions/<N>/
------------------
+55
View File
@@ -80,6 +80,9 @@ pages_to_scan
how many pages to scan before ksmd goes to sleep
e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``.
The pages_to_scan value cannot be changed if ``advisor_mode`` has
been set to scan-time.
Default: 100 (chosen for demonstration purposes)
sleep_millisecs
@@ -164,6 +167,29 @@ smart_scan
optimization is enabled. The ``pages_skipped`` metric shows how
effective the setting is.
advisor_mode
The ``advisor_mode`` selects the current advisor. Two modes are
supported: none and scan-time. The default is none. By setting
``advisor_mode`` to scan-time, the scan time advisor is enabled.
The section about ``advisor`` explains in detail how the scan time
advisor works.
adivsor_max_cpu
specifies the upper limit of the cpu percent usage of the ksmd
background thread. The default is 70.
advisor_target_scan_time
specifies the target scan time in seconds to scan all the candidate
pages. The default value is 200 seconds.
advisor_min_pages_to_scan
specifies the lower limit of the ``pages_to_scan`` parameter of the
scan time advisor. The default is 500.
adivsor_max_pages_to_scan
specifies the upper limit of the ``pages_to_scan`` parameter of the
scan time advisor. The default is 30000.
The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
general_profit
@@ -263,6 +289,35 @@ ksm_swpin_copy
note that KSM page might be copied when swapping in because do_swap_page()
cannot do all the locking needed to reconstitute a cross-anon_vma KSM page.
Advisor
=======
The number of candidate pages for KSM is dynamic. It can be often observed
that during the startup of an application more candidate pages need to be
processed. Without an advisor the ``pages_to_scan`` parameter needs to be
sized for the maximum number of candidate pages. The scan time advisor can
changes the ``pages_to_scan`` parameter based on demand.
The advisor can be enabled, so KSM can automatically adapt to changes in the
number of candidate pages to scan. Two advisors are implemented: none and
scan-time. With none, no advisor is enabled. The default is none.
The scan time advisor changes the ``pages_to_scan`` parameter based on the
observed scan times. The possible values for the ``pages_to_scan`` parameter is
limited by the ``advisor_max_cpu`` parameter. In addition there is also the
``advisor_target_scan_time`` parameter. This parameter sets the target time to
scan all the KSM candidate pages. The parameter ``advisor_target_scan_time``
decides how aggressive the scan time advisor scans candidate pages. Lower
values make the scan time advisor to scan more aggresively. This is the most
important parameter for the configuration of the scan time advisor.
The initial value and the maximum value can be changed with
``advisor_min_pages_to_scan`` and ``advisor_max_pages_to_scan``. The default
values are sufficient for most workloads and use cases.
The ``pages_to_scan`` parameter is re-calculated after a scan has been completed.
--
Izik Eidus,
Hugh Dickins, 17 Nov 2009
+1
View File
@@ -253,6 +253,7 @@ Following flags about pages are currently supported:
- ``PAGE_IS_SWAPPED`` - Page is in swapped
- ``PAGE_IS_PFNZERO`` - Page has zero PFN
- ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
+78 -19
View File
@@ -45,10 +45,25 @@ components:
the two is using hugepages just because of the fact the TLB miss is
going to run faster.
Modern kernels support "multi-size THP" (mTHP), which introduces the
ability to allocate memory in blocks that are bigger than a base page
but smaller than traditional PMD-size (as described above), in
increments of a power-of-2 number of pages. mTHP can back anonymous
memory (for example 16K, 32K, 64K, etc). These THPs continue to be
PTE-mapped, but in many cases can still provide similar benefits to
those outlined above: Page faults are significantly reduced (by a
factor of e.g. 4, 8, 16, etc), but latency spikes are much less
prominent because the size of each page isn't as huge as the PMD-sized
variant and there is less memory to clear in each page fault. Some
architectures also employ TLB compression mechanisms to squeeze more
entries in when a set of PTEs are virtually and physically contiguous
and approporiately aligned. In this case, TLB misses will occur less
often.
THP can be enabled system wide or restricted to certain tasks or even
memory ranges inside task's address space. Unless THP is completely
disabled, there is ``khugepaged`` daemon that scans memory and
collapses sequences of basic pages into huge pages.
collapses sequences of basic pages into PMD-sized huge pages.
The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
interface and using madvise(2) and prctl(2) system calls.
@@ -95,12 +110,40 @@ Global THP controls
Transparent Hugepage Support for anonymous memory can be entirely disabled
(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
regions (to avoid the risk of consuming more memory resources) or enabled
system wide. This can be achieved with one of::
system wide. This can be achieved per-supported-THP-size with one of::
echo always >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
echo madvise >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
echo never >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
where <size> is the hugepage size being addressed, the available sizes
for which vary by system.
For example::
echo always >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
Alternatively it is possible to specify that a given hugepage size
will inherit the top-level "enabled" value::
echo inherit >/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/enabled
For example::
echo inherit >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
The top-level setting (for use with "inherit") can be set by issuing
one of the following commands::
echo always >/sys/kernel/mm/transparent_hugepage/enabled
echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
echo never >/sys/kernel/mm/transparent_hugepage/enabled
By default, PMD-sized hugepages have enabled="inherit" and all other
hugepage sizes have enabled="never". If enabling multiple hugepage
sizes, the kernel will select the most appropriate enabled size for a
given allocation.
It's also possible to limit defrag efforts in the VM to generate
anonymous hugepages in case they're not immediately free to madvise
regions or to never try to defrag memory and simply fallback to regular
@@ -146,25 +189,34 @@ madvise
never
should be self-explanatory.
By default kernel tries to use huge zero page on read page fault to
anonymous mapping. It's possible to disable huge zero page by writing 0
or enable it back by writing 1::
By default kernel tries to use huge, PMD-mappable zero page on read
page fault to anonymous mapping. It's possible to disable huge zero
page by writing 0 or enable it back by writing 1::
echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
Some userspace (such as a test program, or an optimized memory allocation
library) may want to know the size (in bytes) of a transparent hugepage::
Some userspace (such as a test program, or an optimized memory
allocation library) may want to know the size (in bytes) of a
PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
khugepaged will be automatically started when
transparent_hugepage/enabled is set to "always" or "madvise, and it'll
be automatically shutdown if it's set to "never".
khugepaged will be automatically started when one or more hugepage
sizes are enabled (either by directly setting "always" or "madvise",
or by setting "inherit" while the top-level enabled is set to "always"
or "madvise"), and it'll be automatically shutdown when the last
hugepage size is disabled (either by directly setting "never", or by
setting "inherit" while the top-level enabled is set to "never").
Khugepaged controls
-------------------
.. note::
khugepaged currently only searches for opportunities to collapse to
PMD-sized THP and no attempt is made to collapse to other THP
sizes.
khugepaged runs usually at low frequency so while one may not want to
invoke defrag algorithms synchronously during the page faults, it
should be worth invoking defrag at least in khugepaged. However it's
@@ -282,19 +334,26 @@ force
Need of application restart
===========================
The transparent_hugepage/enabled values and tmpfs mount option only affect
future behavior. So to make them effective you need to restart any
application that could have been using hugepages. This also applies to the
regions registered in khugepaged.
The transparent_hugepage/enabled and
transparent_hugepage/hugepages-<size>kB/enabled values and tmpfs mount
option only affect future behavior. So to make them effective you need
to restart any application that could have been using hugepages. This
also applies to the regions registered in khugepaged.
Monitoring usage
================
The number of anonymous transparent huge pages currently used by the
.. note::
Currently the below counters only record events relating to
PMD-sized THP. Events relating to other THP sizes are not included.
The number of PMD-sized anonymous transparent huge pages currently used by the
system is available by reading the AnonHugePages field in ``/proc/meminfo``.
To identify what applications are using anonymous transparent huge pages,
it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages fields
for each mapping.
To identify what applications are using PMD-sized anonymous transparent huge
pages, it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages
fields for each mapping. (Note that AnonHugePages only applies to traditional
PMD-sized THP for historical reasons and should have been called
AnonHugePmdMapped).
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
@@ -413,7 +472,7 @@ for huge pages.
Optimizing the applications
===========================
To be guaranteed that the kernel will map a 2M page immediately in any
To be guaranteed that the kernel will map a THP immediately in any
memory region, the mmap region has to be hugepage naturally
aligned. posix_memalign() can provide that guarantee.
@@ -113,6 +113,9 @@ events, except page fault notifications, may be generated:
areas. ``UFFD_FEATURE_MINOR_SHMEM`` is the analogous feature indicating
support for shmem virtual memory areas.
- ``UFFD_FEATURE_MOVE`` indicates that the kernel supports moving an
existing page contents from userspace.
The userland application should set the feature flags it intends to use
when invoking the ``UFFDIO_API`` ioctl, to request that those features be
enabled if supported.
+20
View File
@@ -153,6 +153,26 @@ attribute, e. g.::
Setting this parameter to 100 will disable the hysteresis.
Some users cannot tolerate the swapping that comes with zswap store failures
and zswap writebacks. Swapping can be disabled entirely (without disabling
zswap itself) on a cgroup-basis as follows:
echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
Note that if the store failures are recurring (for e.g if the pages are
incompressible), users can observe reclaim inefficiency after disabling
writeback (because the same pages might be rejected again and again).
When there is a sizable amount of cold memory residing in the zswap pool, it
can be advantageous to proactively write these cold pages to swap and reclaim
the memory for other use cases. By default, the zswap shrinker is disabled.
User can enable it as follows:
echo Y > /sys/module/zswap/parameters/shrinker_enabled
This can be enabled at the boot time if ``CONFIG_ZSWAP_SHRINKER_DEFAULT_ON`` is
selected.
A debugfs interface is provided for various statistic about pool size, number
of pages stored, same-value filled pages and various counters for the reasons
pages are rejected.
+24
View File
@@ -0,0 +1,24 @@
.. SPDX-License-Identifier: GPL-2.0
Set udev rules for PMF Smart PC Builder
---------------------------------------
AMD PMF(Platform Management Framework) Smart PC Solution builder has to set the system states
like S0i3, Screen lock, hibernate etc, based on the output actions provided by the PMF
TA (Trusted Application).
In order for this to work the PMF driver generates a uevent for userspace to react to. Below are
sample udev rules that can facilitate this experience when a machine has PMF Smart PC solution builder
enabled.
Please add the following line(s) to
``/etc/udev/rules.d/99-local.rules``::
DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="0", RUN+="/usr/bin/systemctl suspend"
DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="1", RUN+="/usr/bin/systemctl hibernate"
DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="2", RUN+="/bin/loginctl lock-sessions"
EVENT_ID values:
0= Put the system to S0i3/S2Idle
1= Put the system to hibernate
2= Lock the screen
+4
View File
@@ -81,6 +81,9 @@ section.
Sometimes it is necessary to ensure the next call to store to a maple tree does
not allocate memory, please see :ref:`maple-tree-advanced-api` for this use case.
You can use mtree_dup() to duplicate an entire maple tree. It is a more
efficient way than inserting all elements one by one into a new tree.
Finally, you can remove all entries from a maple tree by calling
mtree_destroy(). If the maple tree entries are pointers, you may wish to free
the entries first.
@@ -112,6 +115,7 @@ Takes ma_lock internally:
* mtree_insert()
* mtree_insert_range()
* mtree_erase()
* mtree_dup()
* mtree_destroy()
* mt_set_in_rcu()
* mt_clear_in_rcu()
+1 -1
View File
@@ -37,7 +37,7 @@ The Slab Cache
.. kernel-doc:: include/linux/slab.h
:internal:
.. kernel-doc:: mm/slab.c
.. kernel-doc:: mm/slub.c
:export:
.. kernel-doc:: mm/slab_common.c
@@ -11,3 +11,12 @@ state on a per-test basis, register custom cleanup actions, and more.
.. kernel-doc:: include/kunit/resource.h
:internal:
Managed Devices
---------------
Functions for using KUnit-managed struct device and struct device_driver.
Include ``kunit/device.h`` to use these.
.. kernel-doc:: include/kunit/device.h
:internal:
+47 -4
View File
@@ -49,9 +49,52 @@ loaded.
The results will appear in TAP format in ``dmesg``.
debugfs
=======
KUnit can be accessed from userspace via the debugfs filesystem (See more
information about debugfs at Documentation/filesystems/debugfs.rst).
If ``CONFIG_KUNIT_DEBUGFS`` is enabled, the KUnit debugfs filesystem is
mounted at /sys/kernel/debug/kunit. You can use this filesystem to perform
the following actions.
Retrieve Test Results
=====================
You can use debugfs to retrieve KUnit test results. The test results are
accessible from the debugfs filesystem in the following read-only file:
.. code-block :: bash
/sys/kernel/debug/kunit/<test_suite>/results
The test results are printed in a KTAP document. Note this document is separate
to the kernel log and thus, may have different test suite numbering.
Run Tests After Kernel Has Booted
=================================
You can use the debugfs filesystem to trigger built-in tests to run after
boot. To run the test suite, you can use the following command to write to
the ``/sys/kernel/debug/kunit/<test_suite>/run`` file:
.. code-block :: bash
echo "any string" > /sys/kernel/debugfs/kunit/<test_suite>/run
As a result, the test suite runs and the results are printed to the kernel
log.
However, this feature is not available with KUnit suites that use init data,
because init data may have been discarded after the kernel boots. KUnit
suites that use init data should be defined using the
kunit_test_init_section_suites() macro.
Also, you cannot use this feature to run tests concurrently. Instead a test
will wait to run until other tests have completed or failed.
.. note ::
If ``CONFIG_KUNIT_DEBUGFS`` is enabled, KUnit test results will
be accessible from the ``debugfs`` filesystem (if mounted).
They will be in ``/sys/kernel/debug/kunit/<test_suite>/results``, in
TAP format.
For test authors, to use this feature, tests will need to correctly initialise
and/or clean up any data, so the test runs correctly a second time.
@@ -428,3 +428,10 @@ This attribute indicates the name of the module associated with the test.
This attribute is automatically saved as a string and is printed for each suite.
Tests can also be filtered using this attribute.
``is_init``
This attribute indicates whether the test uses init data or functions.
This attribute is automatically saved as a boolean and tests can also be
filtered using this attribute.
+57 -3
View File
@@ -651,12 +651,16 @@ For example:
}
Note that, for functions like device_unregister which only accept a single
pointer-sized argument, it's possible to directly cast that function to
a ``kunit_action_t`` rather than writing a wrapper function, for example:
pointer-sized argument, it's possible to automatically generate a wrapper
with the ``KUNIT_DEFINE_ACTION_WRAPPER()`` macro, for example:
.. code-block:: C
kunit_add_action(test, (kunit_action_t *)&device_unregister, &dev);
KUNIT_DEFINE_ACTION_WRAPPER(device_unregister, device_unregister_wrapper, struct device *);
kunit_add_action(test, &device_unregister_wrapper, &dev);
You should do this in preference to manually casting to the ``kunit_action_t`` type,
as casting function pointers will break Control Flow Integrity (CFI).
``kunit_add_action`` can fail if, for example, the system is out of memory.
You can use ``kunit_add_action_or_reset`` instead which runs the action
@@ -793,3 +797,53 @@ structures as shown below:
KUnit is not enabled, or if no test is running in the current task, it will do
nothing. This compiles down to either a no-op or a static key check, so will
have a negligible performance impact when no test is running.
Managing Fake Devices and Drivers
---------------------------------
When testing drivers or code which interacts with drivers, many functions will
require a ``struct device`` or ``struct device_driver``. In many cases, setting
up a real device is not required to test any given function, so a fake device
can be used instead.
KUnit provides helper functions to create and manage these fake devices, which
are internally of type ``struct kunit_device``, and are attached to a special
``kunit_bus``. These devices support managed device resources (devres), as
described in Documentation/driver-api/driver-model/devres.rst
To create a KUnit-managed ``struct device_driver``, use ``kunit_driver_create()``,
which will create a driver with the given name, on the ``kunit_bus``. This driver
will automatically be destroyed when the corresponding test finishes, but can also
be manually destroyed with ``driver_unregister()``.
To create a fake device, use the ``kunit_device_register()``, which will create
and register a device, using a new KUnit-managed driver created with ``kunit_driver_create()``.
To provide a specific, non-KUnit-managed driver, use ``kunit_device_register_with_driver()``
instead. Like with managed drivers, KUnit-managed fake devices are automatically
cleaned up when the test finishes, but can be manually cleaned up early with
``kunit_device_unregister()``.
The KUnit devices should be used in preference to ``root_device_register()``, and
instead of ``platform_device_register()`` in cases where the device is not otherwise
a platform device.
For example:
.. code-block:: c
#include <kunit/device.h>
static void test_my_device(struct kunit *test)
{
struct device *fake_device;
const char *dev_managed_string;
// Create a fake device.
fake_device = kunit_device_register(test, "my_device");
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, fake_device)
// Pass it to functions which need a device.
dev_managed_string = devm_kstrdup(fake_device, "Hello, World!");
// Everything is cleaned up automatically when the test ends.
}
@@ -29,6 +29,12 @@ properties:
maxItems: 1
description:
Specifies the base address and size of vMPM registers in RPM MSG RAM.
deprecated: true
qcom,rpm-msg-ram:
$ref: /schemas/types.yaml#/definitions/phandle
description:
Phandle to the APSS MPM slice of the RPM Message RAM
interrupts:
maxItems: 1
@@ -67,34 +73,46 @@ properties:
required:
- compatible
- reg
- interrupts
- mboxes
- interrupt-controller
- '#interrupt-cells'
- qcom,mpm-pin-count
- qcom,mpm-pin-map
- qcom,rpm-msg-ram
additionalProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
mpm: interrupt-controller@45f01b8 {
compatible = "qcom,mpm";
interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
reg = <0x45f01b8 0x1000>;
mboxes = <&apcs_glb 1>;
interrupt-controller;
#interrupt-cells = <2>;
interrupt-parent = <&intc>;
qcom,mpm-pin-count = <96>;
qcom,mpm-pin-map = <2 275>,
<5 296>,
<12 422>,
<24 79>,
<86 183>,
<90 260>,
<91 260>;
#power-domain-cells = <0>;
remoteproc-rpm {
compatible = "qcom,msm8998-rpm-proc", "qcom,rpm-proc";
glink-edge {
compatible = "qcom,glink-rpm";
interrupts = <GIC_SPI 168 IRQ_TYPE_EDGE_RISING>;
qcom,rpm-msg-ram = <&rpm_msg_ram>;
mboxes = <&apcs_glb 0>;
};
mpm: interrupt-controller {
compatible = "qcom,mpm";
qcom,rpm-msg-ram = <&apss_mpm>;
interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
mboxes = <&apcs_glb 1>;
interrupt-controller;
#interrupt-cells = <2>;
interrupt-parent = <&intc>;
qcom,mpm-pin-count = <96>;
qcom,mpm-pin-map = <2 275>,
<5 296>,
<12 422>,
<24 79>,
<86 183>,
<91 260>;
#power-domain-cells = <0>;
};
};
@@ -26,6 +26,7 @@ properties:
- renesas,r9a07g043u-irqc # RZ/G2UL
- renesas,r9a07g044-irqc # RZ/G2{L,LC}
- renesas,r9a07g054-irqc # RZ/V2L
- renesas,r9a08g045-irqc # RZ/G3S
- const: renesas,rzg2l-irqc
'#interrupt-cells':
@@ -167,7 +168,9 @@ allOf:
properties:
compatible:
contains:
const: renesas,r9a07g043u-irqc
enum:
- renesas,r9a07g043u-irqc
- renesas,r9a08g045-irqc
then:
properties:
interrupts:
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: U-Boot bootloader partition
description: |
U-Boot is a bootlodaer commonly used in embedded devices. It's almost always
U-Boot is a bootloader commonly used in embedded devices. It's almost always
located on some kind of flash device.
Device configuration is stored as a set of environment variables that are
@@ -105,6 +105,8 @@ properties:
description:
Interrupt signaling a critical under-voltage event.
system-critical-regulator: true
required:
- compatible
- regulator-name
@@ -42,6 +42,7 @@ description: |
For PM7325, smps1 - smps8, ldo1 - ldo19
For PM8005, smps1 - smps4
For PM8009, smps1 - smps2, ldo1 - ldo7
For PM8010, ldo1 - ldo7
For PM8150, smps1 - smps10, ldo1 - ldo18
For PM8150L, smps1 - smps8, ldo1 - ldo11, bob, flash, rgb
For PM8350, smps1 - smps12, ldo1 - ldo10
@@ -68,6 +69,7 @@ properties:
- qcom,pm8005-rpmh-regulators
- qcom,pm8009-rpmh-regulators
- qcom,pm8009-1-rpmh-regulators
- qcom,pm8010-rpmh-regulators
- qcom,pm8150-rpmh-regulators
- qcom,pm8150l-rpmh-regulators
- qcom,pm8350-rpmh-regulators
@@ -238,6 +240,18 @@ allOf:
"^vdd-l[1-47]-supply$": true
"^vdd-s[1-2]-supply$": true
- if:
properties:
compatible:
enum:
- qcom,pm8010-rpmh-regulators
then:
properties:
vdd-l1-l2-supply: true
vdd-l3-l4-supply: true
patternProperties:
"^vdd-l[5-7]-supply$": true
- if:
properties:
compatible:
@@ -47,6 +47,9 @@ description:
For pm8916, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11,
l12, l13, l14, l15, l16, l17, l18
For pm8937, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,
l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23
For pm8941, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11,
l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2,
lvs3, 5vs1, 5vs2
@@ -92,6 +95,7 @@ properties:
- qcom,rpm-pm8841-regulators
- qcom,rpm-pm8909-regulators
- qcom,rpm-pm8916-regulators
- qcom,rpm-pm8937-regulators
- qcom,rpm-pm8941-regulators
- qcom,rpm-pm8950-regulators
- qcom,rpm-pm8953-regulators
@@ -22,6 +22,7 @@ properties:
- qcom,pm8841-regulators
- qcom,pm8909-regulators
- qcom,pm8916-regulators
- qcom,pm8937-regulators
- qcom,pm8941-regulators
- qcom,pm8950-regulators
- qcom,pm8994-regulators
@@ -291,6 +292,24 @@ allOf:
patternProperties:
"^vdd_s[1-3]-supply$": true
- if:
properties:
compatible:
contains:
enum:
- qcom,pm8937-regulators
then:
properties:
vdd_l1_l19-supply: true
vdd_l20_l21-supply: true
vdd_l2_l23-supply: true
vdd_l3-supply: true
vdd_l4_l5_l6_l7_l16-supply: true
vdd_l8_l11_l12_l17_l22-supply: true
vdd_l9_l10_l13_l14_l15_l18-supply: true
patternProperties:
"^vdd_s[1-6]-supply$": true
- if:
properties:
compatible:
@@ -36,10 +36,11 @@ unevaluatedProperties: false
examples:
- |
pm8150b {
pmic {
#address-cells = <1>;
#size-cells = <0>;
pm8150b_vbus: usb-vbus-regulator@1100 {
usb-vbus-regulator@1100 {
compatible = "qcom,pm8150b-vbus-reg";
reg = <0x1100>;
regulator-min-microamp = <500000>;
@@ -114,6 +114,11 @@ properties:
description: Enable pull down resistor when the regulator is disabled.
type: boolean
system-critical-regulator:
description: Set if the regulator is critical to system stability or
functionality.
type: boolean
regulator-over-current-protection:
description: Enable over current protection.
type: boolean
@@ -181,6 +186,14 @@ properties:
be enabled but limit setting can be omitted. Limit is given as microvolt
offset from voltage set to regulator.
regulator-uv-less-critical-window-ms:
description: Specifies the time window (in milliseconds) following a
critical under-voltage event during which the system can continue to
operate safely while performing less critical operations. This property
provides a defined duration before a more severe reaction to the
under-voltage event is needed, allowing for certain non-urgent actions to
be carried out in preparation for potential power loss.
regulator-temp-protection-kelvin:
description: Set over temperature protection limit. This is a limit where
hardware performs emergency shutdown. Zero can be passed to disable
@@ -1,31 +0,0 @@
Analog Devices AXI SPI Engine controller Device Tree Bindings
Required properties:
- compatible : Must be "adi,axi-spi-engine-1.00.a""
- reg : Physical base address and size of the register map.
- interrupts : Property with a value describing the interrupt
number.
- clock-names : List of input clock names - "s_axi_aclk", "spi_clk"
- clocks : Clock phandles and specifiers (See clock bindings for
details on clock-names and clocks).
- #address-cells : Must be <1>
- #size-cells : Must be <0>
Optional subnodes:
Subnodes are use to represent the SPI slave devices connected to the SPI
master. They follow the generic SPI bindings as outlined in spi-bus.txt.
Example:
spi@@44a00000 {
compatible = "adi,axi-spi-engine-1.00.a";
reg = <0x44a00000 0x1000>;
interrupts = <0 56 4>;
clocks = <&clkc 15 &clkc 15>;
clock-names = "s_axi_aclk", "spi_clk";
#address-cells = <1>;
#size-cells = <0>;
/* SPI devices */
};
@@ -0,0 +1,66 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/spi/adi,axi-spi-engine.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Analog Devices AXI SPI Engine Controller
description: |
The AXI SPI Engine controller is part of the SPI Engine framework[1] and
allows memory mapped access to the SPI Engine control bus. This allows it
to be used as a general purpose software driven SPI controller as well as
some optional advanced acceleration and offloading capabilities.
[1] https://wiki.analog.com/resources/fpga/peripherals/spi_engine
maintainers:
- Michael Hennerich <Michael.Hennerich@analog.com>
- Nuno Sá <nuno.sa@analog.com>
allOf:
- $ref: /schemas/spi/spi-controller.yaml#
properties:
compatible:
const: adi,axi-spi-engine-1.00.a
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
items:
- description: The AXI interconnect clock.
- description: The SPI controller clock.
clock-names:
items:
- const: s_axi_aclk
- const: spi_clk
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
unevaluatedProperties: false
examples:
- |
spi@44a00000 {
compatible = "adi,axi-spi-engine-1.00.a";
reg = <0x44a00000 0x1000>;
interrupts = <0 56 4>;
clocks = <&clkc 15>, <&clkc 15>;
clock-names = "s_axi_aclk", "spi_clk";
#address-cells = <1>;
#size-cells = <0>;
/* SPI devices */
};
@@ -21,7 +21,7 @@ properties:
- enum:
- renesas,rspi-r7s72100 # RZ/A1H
- renesas,rspi-r7s9210 # RZ/A2
- renesas,r9a07g043-rspi # RZ/G2UL
- renesas,r9a07g043-rspi # RZ/G2UL and RZ/Five
- renesas,r9a07g044-rspi # RZ/G2{L,LC}
- renesas,r9a07g054-rspi # RZ/V2L
- const: renesas,rspi-rz
@@ -72,8 +72,6 @@ properties:
- const: snps,dw-apb-ssi
- description: Intel Keem Bay SPI Controller
const: intel,keembay-ssi
- description: Intel Thunder Bay SPI Controller
const: intel,thunderbay-ssi
- description: Intel Mount Evans Integrated Management Complex SPI Controller
const: intel,mountevans-imc-ssi
- description: AMD Pensando Elba SoC SPI Controller
@@ -23,7 +23,9 @@ properties:
compatible:
enum:
- st,stm32f4-spi
- st,stm32f7-spi
- st,stm32h7-spi
- st,stm32mp25-spi
reg:
maxItems: 1
@@ -16,6 +16,7 @@ properties:
- allwinner,sun8i-a83t-ths
- allwinner,sun8i-h3-ths
- allwinner,sun8i-r40-ths
- allwinner,sun20i-d1-ths
- allwinner,sun50i-a64-ths
- allwinner,sun50i-a100-ths
- allwinner,sun50i-h5-ths
@@ -61,6 +62,7 @@ allOf:
compatible:
contains:
enum:
- allwinner,sun20i-d1-ths
- allwinner,sun50i-a100-ths
- allwinner,sun50i-h6-ths
@@ -84,7 +86,9 @@ allOf:
properties:
compatible:
contains:
const: allwinner,sun8i-h3-ths
enum:
- allwinner,sun8i-h3-ths
- allwinner,sun20i-d1-ths
then:
properties:
@@ -103,6 +107,7 @@ allOf:
enum:
- allwinner,sun8i-h3-ths
- allwinner,sun8i-r40-ths
- allwinner,sun20i-d1-ths
- allwinner,sun50i-a64-ths
- allwinner,sun50i-a100-ths
- allwinner,sun50i-h5-ths
@@ -10,6 +10,9 @@ maintainers:
- zhanghongchen <zhanghongchen@loongson.cn>
- Yinbo Zhu <zhuyinbo@loongson.cn>
allOf:
- $ref: /schemas/thermal/thermal-sensor.yaml#
properties:
compatible:
oneOf:
@@ -26,12 +29,16 @@ properties:
interrupts:
maxItems: 1
'#thermal-sensor-cells':
const: 1
required:
- compatible
- reg
- interrupts
- '#thermal-sensor-cells'
additionalProperties: false
unevaluatedProperties: false
examples:
- |
@@ -41,4 +48,5 @@ examples:
reg = <0x1fe01500 0x30>;
interrupt-parent = <&liointc0>;
interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
#thermal-sensor-cells = <1>;
};
@@ -0,0 +1,99 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Mediatek thermal controller for on-SoC temperatures
maintainers:
- Sascha Hauer <s.hauer@pengutronix.de>
description:
This device does not have its own ADC, instead it directly controls the AUXADC
via AHB bus accesses. For this reason it needs phandles to the AUXADC. Also it
controls a mux in the apmixedsys register space via AHB bus accesses, so a
phandle to the APMIXEDSYS is also needed.
allOf:
- $ref: thermal-sensor.yaml#
properties:
compatible:
enum:
- mediatek,mt2701-thermal
- mediatek,mt2712-thermal
- mediatek,mt7622-thermal
- mediatek,mt7981-thermal
- mediatek,mt7986-thermal
- mediatek,mt8173-thermal
- mediatek,mt8183-thermal
- mediatek,mt8365-thermal
- mediatek,mt8516-thermal
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
items:
- description: Main clock needed for register access
- description: The AUXADC clock
clock-names:
items:
- const: therm
- const: auxadc
mediatek,auxadc:
$ref: /schemas/types.yaml#/definitions/phandle
description: A phandle to the AUXADC which the thermal controller uses
mediatek,apmixedsys:
$ref: /schemas/types.yaml#/definitions/phandle
description: A phandle to the APMIXEDSYS controller
resets:
description: Reset controller controlling the thermal controller
nvmem-cells:
items:
- description:
NVMEM cell with EEPROMA phandle to the calibration data provided by an
NVMEM device. If unspecified default values shall be used.
nvmem-cell-names:
items:
- const: calibration-data
required:
- reg
- interrupts
- clocks
- clock-names
- mediatek,auxadc
- mediatek,apmixedsys
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/clock/mt8173-clk.h>
#include <dt-bindings/reset/mt8173-resets.h>
thermal@1100b000 {
compatible = "mediatek,mt8173-thermal";
reg = <0x1100b000 0x1000>;
interrupts = <0 70 IRQ_TYPE_LEVEL_LOW>;
clocks = <&pericfg CLK_PERI_THERM>, <&pericfg CLK_PERI_AUXADC>;
clock-names = "therm", "auxadc";
resets = <&pericfg MT8173_PERI_THERM_SW_RST>;
mediatek,auxadc = <&auxadc>;
mediatek,apmixedsys = <&apmixedsys>;
nvmem-cells = <&thermal_calibration_data>;
nvmem-cell-names = "calibration-data";
#thermal-sensor-cells = <1>;
};
@@ -1,52 +0,0 @@
* Mediatek Thermal
This describes the device tree binding for the Mediatek thermal controller
which measures the on-SoC temperatures. This device does not have its own ADC,
instead it directly controls the AUXADC via AHB bus accesses. For this reason
this device needs phandles to the AUXADC. Also it controls a mux in the
apmixedsys register space via AHB bus accesses, so a phandle to the APMIXEDSYS
is also needed.
Required properties:
- compatible:
- "mediatek,mt8173-thermal" : For MT8173 family of SoCs
- "mediatek,mt2701-thermal" : For MT2701 family of SoCs
- "mediatek,mt2712-thermal" : For MT2712 family of SoCs
- "mediatek,mt7622-thermal" : For MT7622 SoC
- "mediatek,mt7981-thermal", "mediatek,mt7986-thermal" : For MT7981 SoC
- "mediatek,mt7986-thermal" : For MT7986 SoC
- "mediatek,mt8183-thermal" : For MT8183 family of SoCs
- "mediatek,mt8365-thermal" : For MT8365 family of SoCs
- "mediatek,mt8516-thermal", "mediatek,mt2701-thermal : For MT8516 family of SoCs
- reg: Address range of the thermal controller
- interrupts: IRQ for the thermal controller
- clocks, clock-names: Clocks needed for the thermal controller. required
clocks are:
"therm": Main clock needed for register access
"auxadc": The AUXADC clock
- mediatek,auxadc: A phandle to the AUXADC which the thermal controller uses
- mediatek,apmixedsys: A phandle to the APMIXEDSYS controller.
- #thermal-sensor-cells : Should be 0. See Documentation/devicetree/bindings/thermal/thermal-sensor.yaml for a description.
Optional properties:
- resets: Reference to the reset controller controlling the thermal controller.
- nvmem-cells: A phandle to the calibration data provided by a nvmem device. If
unspecified default values shall be used.
- nvmem-cell-names: Should be "calibration-data"
Example:
thermal: thermal@1100b000 {
#thermal-sensor-cells = <1>;
compatible = "mediatek,mt8173-thermal";
reg = <0 0x1100b000 0 0x1000>;
interrupts = <0 70 IRQ_TYPE_LEVEL_LOW>;
clocks = <&pericfg CLK_PERI_THERM>, <&pericfg CLK_PERI_AUXADC>;
clock-names = "therm", "auxadc";
resets = <&pericfg MT8173_PERI_THERM_SW_RST>;
reset-names = "therm";
mediatek,auxadc = <&auxadc>;
mediatek,apmixedsys = <&apmixedsys>;
nvmem-cells = <&thermal_calibration_data>;
nvmem-cell-names = "calibration-data";
};
@@ -114,12 +114,14 @@ examples:
- |
#include <dt-bindings/iio/qcom,spmi-vadc.h>
#include <dt-bindings/interrupt-controller/irq.h>
spmi_bus {
pmic {
#address-cells = <1>;
#size-cells = <0>;
pm8998_adc: adc@3100 {
reg = <0x3100>;
compatible = "qcom,spmi-adc-rev2";
reg = <0x3100>;
#address-cells = <1>;
#size-cells = <0>;
#io-channel-cells = <1>;
@@ -130,7 +132,7 @@ examples:
};
};
pm8998_adc_tm: adc-tm@3400 {
adc-tm@3400 {
compatible = "qcom,spmi-adc-tm-hc";
reg = <0x3400>;
interrupts = <0x2 0x34 0x0 IRQ_TYPE_EDGE_RISING>;
@@ -167,12 +167,14 @@ examples:
- |
#include <dt-bindings/iio/qcom,spmi-vadc.h>
#include <dt-bindings/interrupt-controller/irq.h>
spmi_bus {
pmic {
#address-cells = <1>;
#size-cells = <0>;
pm8150b_adc: adc@3100 {
reg = <0x3100>;
compatible = "qcom,spmi-adc5";
reg = <0x3100>;
#address-cells = <1>;
#size-cells = <0>;
#io-channel-cells = <1>;
@@ -186,7 +188,7 @@ examples:
};
};
pm8150b_adc_tm: adc-tm@3500 {
adc-tm@3500 {
compatible = "qcom,spmi-adc-tm5";
reg = <0x3500>;
interrupts = <0x2 0x35 0x0 IRQ_TYPE_EDGE_RISING>;
@@ -207,12 +209,14 @@ examples:
#include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
#include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
#include <dt-bindings/interrupt-controller/irq.h>
spmi_bus {
pmic {
#address-cells = <1>;
#size-cells = <0>;
pmk8350_vadc: adc@3100 {
reg = <0x3100>;
compatible = "qcom,spmi-adc7";
reg = <0x3100>;
#address-cells = <1>;
#size-cells = <0>;
#io-channel-cells = <1>;
@@ -233,7 +237,7 @@ examples:
};
};
pmk8350_adc_tm: adc-tm@3400 {
adc-tm@3400 {
compatible = "qcom,spmi-adc-tm5-gen2";
reg = <0x3400>;
interrupts = <0x0 0x34 0x0 IRQ_TYPE_EDGE_RISING>;
@@ -66,6 +66,7 @@ properties:
- qcom,sm8350-tsens
- qcom,sm8450-tsens
- qcom,sm8550-tsens
- qcom,sm8650-tsens
- const: qcom,tsens-v2
- description: v2 of TSENS with combined interrupt
@@ -75,6 +75,22 @@ patternProperties:
framework and assumes that the thermal sensors in this zone
support interrupts.
critical-action:
$ref: /schemas/types.yaml#/definitions/string
description: |
The action the OS should perform after the critical temperature is reached.
By default the system will shutdown as a safe action to prevent damage
to the hardware, if the property is not set.
The shutdown action should be always the default and preferred one.
Choose 'reboot' with care, as the hardware may be in thermal stress,
thus leading to infinite reboots that may cause damage to the hardware.
Make sure the firmware/bootloader will act as the last resort and take
over the thermal control.
enum:
- shutdown
- reboot
thermal-sensors:
$ref: /schemas/types.yaml#/definitions/phandle-array
maxItems: 1
+1
View File
@@ -115,6 +115,7 @@ available subsections can be seen below.
hte/index
wmi
dpll
wbrf
.. only:: subproject and html
+189 -49
View File
@@ -2,64 +2,204 @@
SPI NOR framework
=================
Part I - Why do we need this framework?
---------------------------------------
How to propose a new flash addition
-----------------------------------
SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
controller operates agnostic of the specific device attached. However, some
controllers (such as Freescale's QuadSPI controller) cannot easily handle
arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
Most SPI NOR flashes comply with the JEDEC JESD216
Serial Flash Discoverable Parameter (SFDP) standard. SFDP describes
the functional and feature capabilities of serial flash devices in a
standard set of internal read-only parameter tables.
In particular, Freescale's QuadSPI controller must know the NOR commands to
find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
opcodes, addresses, or data payloads; a SPI controller simply knows to send or
receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
which the controller driver is aware of the opcodes, addressing, and other
details of the SPI NOR protocol.
The SPI NOR driver queries the SFDP tables in order to determine the
flash's parameters and settings. If the flash defines the SFDP tables
it's likely that you won't need a flash entry at all, and instead
rely on the generic flash driver which probes the flash solely based
on its SFDP data. All one has to do is to specify the "jedec,spi-nor"
compatible in the device tree.
Part II - How does the framework work?
--------------------------------------
There are cases however where you need to define an explicit flash
entry. This typically happens when the flash has settings or support
that is not covered by the SFDP tables (e.g. Block Protection), or
when the flash contains mangled SFDP data. If the later, one needs
to implement the ``spi_nor_fixups`` hooks in order to amend the SFDP
parameters with the correct values.
This framework just adds a new layer between the MTD and the SPI bus driver.
With this new layer, the SPI NOR controller driver does not depend on the
m25p80 code anymore.
Minimum testing requirements
-----------------------------
Before this framework, the layer is like::
Do all the tests from below and paste them in the commit's comments
section, after the ``---`` marker.
MTD
------------------------
m25p80
------------------------
SPI bus driver
------------------------
SPI NOR chip
1) Specify the controller that you used to test the flash and specify
the frequency at which the flash was operated, e.g.::
After this framework, the layer is like::
This flash is populated on the X board and was tested at Y
frequency using the Z (put compatible) SPI controller.
MTD
------------------------
SPI NOR framework
------------------------
m25p80
------------------------
SPI bus driver
------------------------
SPI NOR chip
2) Dump the sysfs entries and print the md5/sha1/sha256 SFDP checksum::
With the SPI NOR controller driver (Freescale QuadSPI), it looks like::
root@1:~# cat /sys/bus/spi/devices/spi0.0/spi-nor/partname
sst26vf064b
root@1:~# cat /sys/bus/spi/devices/spi0.0/spi-nor/jedec_id
bf2643
root@1:~# cat /sys/bus/spi/devices/spi0.0/spi-nor/manufacturer
sst
root@1:~# xxd -p /sys/bus/spi/devices/spi0.0/spi-nor/sfdp
53464450060102ff00060110300000ff81000106000100ffbf0001180002
0001fffffffffffffffffffffffffffffffffd20f1ffffffff0344eb086b
083b80bbfeffffffffff00ffffff440b0c200dd80fd810d820914824806f
1d81ed0f773830b030b0f7ffffff29c25cfff030c080ffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffff0004fff37f0000f57f0000f9ff
7d00f57f0000f37f0000ffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffbf2643ffb95ffdff30f260f332ff0a122346ff0f19320f1919ffffff
ffffffff00669938ff05013506040232b03072428de89888a585c09faf5a
ffff06ec060c0003080bffffffffff07ffff0202ff060300fdfd040700fc
0300fefe0202070e
root@1:~# sha256sum /sys/bus/spi/devices/spi0.0/spi-nor/sfdp
428f34d0461876f189ac97f93e68a05fa6428c6650b3b7baf736a921e5898ed1 /sys/bus/spi/devices/spi0.0/spi-nor/sfdp
MTD
------------------------
SPI NOR framework
------------------------
fsl-quadSPI
------------------------
SPI NOR chip
Please dump the SFDP tables using ``xxd -p``. It enables us to do
the reverse operation and convert the hexdump to binary with
``xxd -rp``. Dumping the SFDP data with ``hexdump -Cv`` is accepted,
but less desirable.
Part III - How can drivers use the framework?
---------------------------------------------
3) Dump debugfs data::
The main API is spi_nor_scan(). Before you call the hook, a driver should
initialize the necessary fields for spi_nor{}. Please see
drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to spi-fsl-qspi.c
when you want to write a new driver for a SPI NOR controller.
root@1:~# cat /sys/kernel/debug/spi-nor/spi0.0/capabilities
Supported read modes by the flash
1S-1S-1S
opcode 0x03
mode cycles 0
dummy cycles 0
1S-1S-1S (fast read)
opcode 0x0b
mode cycles 0
dummy cycles 8
1S-1S-2S
opcode 0x3b
mode cycles 0
dummy cycles 8
1S-2S-2S
opcode 0xbb
mode cycles 4
dummy cycles 0
1S-1S-4S
opcode 0x6b
mode cycles 0
dummy cycles 8
1S-4S-4S
opcode 0xeb
mode cycles 2
dummy cycles 4
4S-4S-4S
opcode 0x0b
mode cycles 2
dummy cycles 4
Supported page program modes by the flash
1S-1S-1S
opcode 0x02
root@1:~# cat /sys/kernel/debug/spi-nor/spi0.0/params
name sst26vf064b
id bf 26 43 bf 26 43
size 8.00 MiB
write size 1
page size 256
address nbytes 3
flags HAS_LOCK | HAS_16BIT_SR | SOFT_RESET | SWP_IS_VOLATILE
opcodes
read 0xeb
dummy cycles 6
erase 0x20
program 0x02
8D extension none
protocols
read 1S-4S-4S
write 1S-1S-1S
register 1S-1S-1S
erase commands
20 (4.00 KiB) [0]
d8 (8.00 KiB) [1]
d8 (32.0 KiB) [2]
d8 (64.0 KiB) [3]
c7 (8.00 MiB)
sector map
region (in hex) | erase mask | flags
------------------+------------+----------
00000000-00007fff | [01 ] |
00008000-0000ffff | [0 2 ] |
00010000-007effff | [0 3] |
007f0000-007f7fff | [0 2 ] |
007f8000-007fffff | [01 ] |
4) Use `mtd-utils <https://git.infradead.org/mtd-utils.git>`__
and verify that erase, read and page program operations work fine::
root@1:~# dd if=/dev/urandom of=./spi_test bs=1M count=2
2+0 records in
2+0 records out
2097152 bytes (2.1 MB, 2.0 MiB) copied, 0.848566 s, 2.5 MB/s
root@1:~# mtd_debug erase /dev/mtd0 0 2097152
Erased 2097152 bytes from address 0x00000000 in flash
root@1:~# mtd_debug read /dev/mtd0 0 2097152 spi_read
Copied 2097152 bytes from address 0x00000000 in flash to spi_read
root@1:~# hexdump spi_read
0000000 ffff ffff ffff ffff ffff ffff ffff ffff
*
0200000
root@1:~# sha256sum spi_read
4bda3a28f4ffe603c0ec1258c0034d65a1a0d35ab7bd523a834608adabf03cc5 spi_read
root@1:~# mtd_debug write /dev/mtd0 0 2097152 spi_test
Copied 2097152 bytes from spi_test to address 0x00000000 in flash
root@1:~# mtd_debug read /dev/mtd0 0 2097152 spi_read
Copied 2097152 bytes from address 0x00000000 in flash to spi_read
root@1:~# sha256sum spi*
c444216a6ba2a4a66cccd60a0dd062bce4b865dd52b200ef5e21838c4b899ac8 spi_read
c444216a6ba2a4a66cccd60a0dd062bce4b865dd52b200ef5e21838c4b899ac8 spi_test
If the flash comes erased by default and the previous erase was ignored,
we won't catch it, thus test the erase again::
root@1:~# mtd_debug erase /dev/mtd0 0 2097152
Erased 2097152 bytes from address 0x00000000 in flash
root@1:~# mtd_debug read /dev/mtd0 0 2097152 spi_read
Copied 2097152 bytes from address 0x00000000 in flash to spi_read
root@1:~# sha256sum spi*
4bda3a28f4ffe603c0ec1258c0034d65a1a0d35ab7bd523a834608adabf03cc5 spi_read
c444216a6ba2a4a66cccd60a0dd062bce4b865dd52b200ef5e21838c4b899ac8 spi_test
Dump some other relevant data::
root@1:~# mtd_debug info /dev/mtd0
mtd.type = MTD_NORFLASH
mtd.flags = MTD_CAP_NORFLASH
mtd.size = 8388608 (8M)
mtd.erasesize = 4096 (4K)
mtd.writesize = 1
mtd.oobsize = 0
regions = 0
@@ -39,7 +39,7 @@ Note that the standard disclaimer for this subsystem also applies to this
document: All of this has been reverse-engineered and may thus be erroneous
and/or incomplete.
All CRCs used in the following are two-byte ``crc_ccitt_false(0xffff, ...)``.
All CRCs used in the following are two-byte ``crc_itu_t(0xffff, ...)``.
All multi-byte values are little-endian, there is no implicit padding between
values.
+78
View File
@@ -0,0 +1,78 @@
.. SPDX-License-Identifier: GPL-2.0-or-later
=================================
WBRF - Wifi Band RFI Mitigations
=================================
Due to electrical and mechanical constraints in certain platform designs
there may be likely interference of relatively high-powered harmonics of
the GPU memory clocks with local radio module frequency bands used by
certain Wifi bands.
To mitigate possible RFI interference producers can advertise the
frequencies in use and consumers can use this information to avoid using
these frequencies for sensitive features.
When a platform is known to have this issue with any contained devices,
the platform designer will advertise the availability of this feature via
ACPI devices with a device specific method (_DSM).
* Producers with this _DSM will be able to advertise the frequencies in use.
* Consumers with this _DSM will be able to register for notifications of
frequencies in use.
Some general terms
==================
Producer: such component who can produce high-powered radio frequency
Consumer: such component who can adjust its in-use frequency in
response to the radio frequencies of other components to mitigate the
possible RFI.
To make the mechanism function, those producers should notify active use
of their particular frequencies so that other consumers can make relative
internal adjustments as necessary to avoid this resonance.
ACPI interface
==============
Although initially used by for wifi + dGPU use cases, the ACPI interface
can be scaled to any type of device that a platform designer discovers
can cause interference.
The GUID used for the _DSM is 7B7656CF-DC3D-4C1C-83E9-66E721DE3070.
3 functions are available in this _DSM:
* 0: discover # of functions available
* 1: record RF bands in use
* 2: retrieve RF bands in use
Driver programming interface
============================
.. kernel-doc:: drivers/platform/x86/amd/wbrf.c
Sample Usage
=============
The expected flow for the producers:
1. During probe, call `acpi_amd_wbrf_supported_producer` to check if WBRF
can be enabled for the device.
2. On using some frequency band, call `acpi_amd_wbrf_add_remove` with 'add'
param to get other consumers properly notified.
3. Or on stopping using some frequency band, call
`acpi_amd_wbrf_add_remove` with 'remove' param to get other consumers notified.
The expected flow for the consumers:
1. During probe, call `acpi_amd_wbrf_supported_consumer` to check if WBRF
can be enabled for the device.
2. Call `amd_wbrf_register_notifier` to register for notification
of frequency band change(add or remove) from other producers.
3. Call the `amd_wbrf_retrieve_freq_band` initally to retrieve
current active frequency bands considering some producers may broadcast
such information before the consumer is up.
4. On receiving a notification for frequency band change, run
`amd_wbrf_retrieve_freq_band` again to retrieve the latest
active frequency bands.
5. During driver cleanup, call `amd_wbrf_unregister_notifier` to
unregister the notifier.
+11 -10
View File
@@ -31,15 +31,15 @@ However, except for filenames, fscrypt does not encrypt filesystem
metadata.
Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated
directly into supported filesystems --- currently ext4, F2FS, and
UBIFS. This allows encrypted files to be read and written without
caching both the decrypted and encrypted pages in the pagecache,
thereby nearly halving the memory used and bringing it in line with
unencrypted files. Similarly, half as many dentries and inodes are
needed. eCryptfs also limits encrypted filenames to 143 bytes,
causing application compatibility issues; fscrypt allows the full 255
bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be
used by unprivileged users, with no need to mount anything.
directly into supported filesystems --- currently ext4, F2FS, UBIFS,
and CephFS. This allows encrypted files to be read and written
without caching both the decrypted and encrypted pages in the
pagecache, thereby nearly halving the memory used and bringing it in
line with unencrypted files. Similarly, half as many dentries and
inodes are needed. eCryptfs also limits encrypted filenames to 143
bytes, causing application compatibility issues; fscrypt allows the
full 255 bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API
can be used by unprivileged users, with no need to mount anything.
fscrypt does not support encrypting files in-place. Instead, it
supports marking an empty directory as encrypted. Then, after
@@ -1382,7 +1382,8 @@ directory.) These structs are defined as follows::
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 __reserved[4];
u8 log2_data_unit_size;
u8 __reserved[3];
u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
u8 nonce[FSCRYPT_FILE_NONCE_SIZE];
};
+1 -4
View File
@@ -121,8 +121,5 @@ Documentation for filesystem implementations.
udf
virtiofs
vfat
xfs-delayed-logging-design
xfs-maintainer-entry-profile
xfs-self-describing-metadata
xfs-online-fsck-design
xfs/index
zonefs
+2 -2
View File
@@ -261,7 +261,7 @@ prototypes::
struct folio *src, enum migrate_mode);
int (*launder_folio)(struct folio *);
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
int (*error_remove_page)(struct address_space *, struct page *);
int (*error_remove_folio)(struct address_space *, struct folio *);
int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
int (*swap_deactivate)(struct file *);
int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
@@ -287,7 +287,7 @@ direct_IO:
migrate_folio: yes (both)
launder_folio: yes
is_partially_uptodate: yes
error_remove_page: yes
error_remove_folio: yes
swap_activate: no
swap_deactivate: no
swap_rw: yes, unlocks
+3 -3
View File
@@ -528,9 +528,9 @@ replaced by copy-on-write) part of the underlying shmem object out on swap.
does not take into account swapped out page of underlying shmem objects.
"Locked" indicates whether the mapping is locked in memory or not.
"THPeligible" indicates whether the mapping is eligible for allocating THP
pages as well as the THP is PMD mappable or not - 1 if true, 0 otherwise.
It just shows the current status.
"THPeligible" indicates whether the mapping is eligible for allocating
naturally aligned THP pages of any currently enabled size. 1 if true, 0
otherwise.
"VmFlags" field deserves a separate description. This member represents the
kernel flags associated with the particular virtual memory area in two letter
+60
View File
@@ -64,6 +64,66 @@ obtained from this site also.
The squashfs-tools development tree is now located on kernel.org
git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git
2.1 Mount options
-----------------
=================== =========================================================
errors=%s Specify whether squashfs errors trigger a kernel panic
or not
========== =============================================
continue errors don't trigger a panic (default)
panic trigger a panic when errors are encountered,
similar to several other filesystems (e.g.
btrfs, ext4, f2fs, GFS2, jfs, ntfs, ubifs)
This allows a kernel dump to be saved,
useful for analyzing and debugging the
corruption.
========== =============================================
threads=%s Select the decompression mode or the number of threads
If SQUASHFS_CHOICE_DECOMP_BY_MOUNT is set:
========== =============================================
single use single-threaded decompression (default)
Only one block (data or metadata) can be
decompressed at any one time. This limits
CPU and memory usage to a minimum, but it
also gives poor performance on parallel I/O
workloads when using multiple CPU machines
due to waiting on decompressor availability.
multi use up to two parallel decompressors per core
If you have a parallel I/O workload and your
system has enough memory, using this option
may improve overall I/O performance. It
dynamically allocates decompressors on a
demand basis.
percpu use a maximum of one decompressor per core
It uses percpu variables to ensure
decompression is load-balanced across the
cores.
1|2|3|... configure the number of threads used for
decompression
The upper limit is num_online_cpus() * 2.
========== =============================================
If SQUASHFS_CHOICE_DECOMP_BY_MOUNT is **not** set and
SQUASHFS_DECOMP_MULTI, SQUASHFS_MOUNT_DECOMP_THREADS are
both set:
========== =============================================
2|3|... configure the number of threads used for
decompression
The upper limit is num_online_cpus() * 2.
========== =============================================
=================== =========================================================
3. Squashfs Filesystem Design
-----------------------------
+3 -3
View File
@@ -823,7 +823,7 @@ cache in your filesystem. The following members are defined:
bool (*is_partially_uptodate) (struct folio *, size_t from,
size_t count);
void (*is_dirty_writeback)(struct folio *, bool *, bool *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
int (*error_remove_folio)(struct mapping *mapping, struct folio *);
int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
int (*swap_deactivate)(struct file *);
int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
@@ -1034,8 +1034,8 @@ cache in your filesystem. The following members are defined:
VM if a folio should be treated as dirty or writeback for the
purposes of stalling.
``error_remove_page``
normally set to generic_error_remove_page if truncation is ok
``error_remove_folio``
normally set to generic_error_remove_folio if truncation is ok
for this address space. Used for memory failure handling.
Setting this implies you deal with pages going away under you,
unless you have them locked or reference counts increased.
+14
View File
@@ -0,0 +1,14 @@
.. SPDX-License-Identifier: GPL-2.0
============================
XFS Filesystem Documentation
============================
.. toctree::
:maxdepth: 2
:numbered:
xfs-delayed-logging-design
xfs-maintainer-entry-profile
xfs-self-describing-metadata
xfs-online-fsck-design
@@ -962,7 +962,7 @@ disk, but these buffer verifiers cannot provide any consistency checking
between metadata structures.
For more information, please see the documentation for
Documentation/filesystems/xfs-self-describing-metadata.rst
Documentation/filesystems/xfs/xfs-self-describing-metadata.rst
Reverse Mapping
---------------
@@ -105,4 +105,4 @@ to do something different in the near future.
../driver-api/media/maintainer-entry-profile
../driver-api/vfio-pci-device-specific-driver-acceptance
../nvme/feature-and-quirk-policy
../filesystems/xfs-maintainer-entry-profile
../filesystems/xfs/xfs-maintainer-entry-profile
@@ -18,8 +18,6 @@ PTE Page Table Helpers
+---------------------------+--------------------------------------------------+
| pte_same | Tests whether both PTE entries are the same |
+---------------------------+--------------------------------------------------+
| pte_bad | Tests a non-table mapped PTE |
+---------------------------+--------------------------------------------------+
| pte_present | Tests a valid mapped PTE |
+---------------------------+--------------------------------------------------+
| pte_young | Tests a young PTE |
+25 -12
View File
@@ -5,6 +5,18 @@ Design
======
.. _damon_design_execution_model_and_data_structures:
Execution Model and Data Structures
===================================
The monitoring-related information including the monitoring request
specification and DAMON-based operation schemes are stored in a data structure
called DAMON ``context``. DAMON executes each context with a kernel thread
called ``kdamond``. Multiple kdamonds could run in parallel, for different
types of monitoring.
Overall Architecture
====================
@@ -346,6 +358,19 @@ the weight will be respected are up to the underlying prioritization mechanism
implementation.
.. _damon_design_damos_quotas_auto_tuning:
Aim-oriented Feedback-driven Auto-tuning
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Automatic feedback-driven quota tuning. Instead of setting the absolute quota
value, users can repeatedly provide numbers representing how much of their goal
for the scheme is achieved as feedback. DAMOS then automatically tunes the
aggressiveness (the quota) of the corresponding scheme. For example, if DAMOS
is under achieving the goal, DAMOS automatically increases the quota. If DAMOS
is over achieving the goal, it decreases the quota.
.. _damon_design_damos_watermarks:
Watermarks
@@ -477,15 +502,3 @@ modules for proactive reclamation and LRU lists manipulation are provided. For
more detail, please read the usage documents for those
(:doc:`/admin-guide/mm/damon/reclaim` and
:doc:`/admin-guide/mm/damon/lru_sort`).
.. _damon_design_execution_model_and_data_structures:
Execution Model and Data Structures
===================================
The monitoring-related information including the monitoring request
specification and DAMON-based operation schemes are stored in a data structure
called DAMON ``context``. DAMON executes each context with a kernel thread
called ``kdamond``. Multiple kdamonds could run in parallel, for different
types of monitoring.
+2 -2
View File
@@ -117,7 +117,7 @@ pages:
- map/unmap of a PMD entry for the whole THP increment/decrement
folio->_entire_mapcount and also increment/decrement
folio->_nr_pages_mapped by COMPOUND_MAPPED when _entire_mapcount
folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount
goes from -1 to 0 or 0 to -1.
- map/unmap of individual pages with PTE entry increment/decrement
@@ -156,7 +156,7 @@ Partial unmap and deferred_split_folio()
Unmapping part of THP (with munmap() or other way) is not going to free
memory immediately. Instead, we detect that a subpage of THP is not in use
in page_remove_rmap() and queue the THP for splitting if memory pressure
in folio_remove_rmap_*() and queue the THP for splitting if memory pressure
comes. Splitting will free up unused subpages.
Splitting the page right away is not an option due to locking context in
+2 -2
View File
@@ -486,7 +486,7 @@ munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
Before the unevictable/mlock changes, mlocking did not mark the pages in any
way, so unmapping them required no processing.
For each PTE (or PMD) being unmapped from a VMA, page_remove_rmap() calls
For each PTE (or PMD) being unmapped from a VMA, folio_remove_rmap_*() calls
munlock_vma_folio(), which calls munlock_folio() when the VMA is VM_LOCKED
(unless it was a PTE mapping of a part of a transparent huge page).
@@ -511,7 +511,7 @@ userspace; truncation even unmaps and deletes any private anonymous pages
which had been Copied-On-Write from the file pages now being truncated.
Mlocked pages can be munlocked and deleted in this way: like with munmap(),
for each PTE (or PMD) being unmapped from a VMA, page_remove_rmap() calls
for each PTE (or PMD) being unmapped from a VMA, folio_remove_rmap_*() calls
munlock_vma_folio(), which calls munlock_folio() when the VMA is VM_LOCKED
(unless it was a PTE mapping of a part of a transparent huge page).
+7 -7
View File
@@ -263,20 +263,20 @@ the name indicates, this function allocates pages of memory, and the second
argument is "order" or a power of two number of pages, that is
(for PAGE_SIZE == 4096) order=0 ==> 4096 bytes, order=1 ==> 8192 bytes,
order=2 ==> 16384 bytes, etc. The maximum size of a
region allocated by __get_free_pages is determined by the MAX_ORDER macro. More
precisely the limit can be calculated as::
region allocated by __get_free_pages is determined by the MAX_PAGE_ORDER macro.
More precisely the limit can be calculated as::
PAGE_SIZE << MAX_ORDER
PAGE_SIZE << MAX_PAGE_ORDER
In a i386 architecture PAGE_SIZE is 4096 bytes
In a 2.4/i386 kernel MAX_ORDER is 10
In a 2.6/i386 kernel MAX_ORDER is 11
In a 2.4/i386 kernel MAX_PAGE_ORDER is 10
In a 2.6/i386 kernel MAX_PAGE_ORDER is 11
So get_free_pages can allocate as much as 4MB or 8MB in a 2.4/2.6 kernel
respectively, with an i386 architecture.
User space programs can include /usr/include/sys/user.h and
/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_ORDER declarations.
/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_PAGE_ORDER declarations.
The pagesize can also be determined dynamically with the getpagesize (2)
system call.
@@ -324,7 +324,7 @@ Definitions:
(see /proc/slabinfo)
<pointer size> depends on the architecture -- ``sizeof(void *)``
<page size> depends on the architecture -- PAGE_SIZE or getpagesize (2)
<max-order> is the value defined with MAX_ORDER
<max-order> is the value defined with MAX_PAGE_ORDER
<frame size> it's an upper bound of frame's capture size (more on this later)
============== ================================================================
+46 -35
View File
@@ -14,27 +14,28 @@ architectures).
II. How does it work?
=====================
There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN
and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have
PF_NOFREEZE unset (all user space processes and some kernel threads) are
regarded as 'freezable' and treated in a special way before the system enters a
suspend state as well as before a hibernation image is created (in what follows
we only consider hibernation, but the description also applies to suspend).
There is one per-task flag (PF_NOFREEZE) and three per-task states
(TASK_FROZEN, TASK_FREEZABLE and __TASK_FREEZABLE_UNSAFE) used for that.
The tasks that have PF_NOFREEZE unset (all user space tasks and some kernel
threads) are regarded as 'freezable' and treated in a special way before the
system enters a sleep state as well as before a hibernation image is created
(hibernation is directly covered by what follows, but the description applies
to system-wide suspend too).
Namely, as the first step of the hibernation procedure the function
freeze_processes() (defined in kernel/power/process.c) is called. A system-wide
variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate
whether the system is to undergo a freezing operation. And freeze_processes()
sets this variable. After this, it executes try_to_freeze_tasks() that sends a
fake signal to all user space processes, and wakes up all the kernel threads.
All freezable tasks must react to that by calling try_to_freeze(), which
results in a call to __refrigerator() (defined in kernel/freezer.c), which sets
the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes
it loop until PF_FROZEN is cleared for it. Then, we say that the task is
'frozen' and therefore the set of functions handling this mechanism is referred
to as 'the freezer' (these functions are defined in kernel/power/process.c,
kernel/freezer.c & include/linux/freezer.h). User space processes are generally
frozen before kernel threads.
static key freezer_active (as opposed to a per-task flag or state) is used to
indicate whether the system is to undergo a freezing operation. And
freeze_processes() sets this static key. After this, it executes
try_to_freeze_tasks() that sends a fake signal to all user space processes, and
wakes up all the kernel threads. All freezable tasks must react to that by
calling try_to_freeze(), which results in a call to __refrigerator() (defined
in kernel/freezer.c), which changes the task's state to TASK_FROZEN, and makes
it loop until it is woken by an explicit TASK_FROZEN wakeup. Then, that task
is regarded as 'frozen' and so the set of functions handling this mechanism is
referred to as 'the freezer' (these functions are defined in
kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). User space
tasks are generally frozen before kernel threads.
__refrigerator() must not be called directly. Instead, use the
try_to_freeze() function (defined in include/linux/freezer.h), that checks
@@ -43,31 +44,40 @@ if the task is to be frozen and makes the task enter __refrigerator().
For user space processes try_to_freeze() is called automatically from the
signal-handling code, but the freezable kernel threads need to call it
explicitly in suitable places or use the wait_event_freezable() or
wait_event_freezable_timeout() macros (defined in include/linux/freezer.h)
that combine interruptible sleep with checking if the task is to be frozen and
calling try_to_freeze(). The main loop of a freezable kernel thread may look
wait_event_freezable_timeout() macros (defined in include/linux/wait.h)
that put the task to sleep (TASK_INTERRUPTIBLE) or freeze it (TASK_FROZEN) if
freezer_active is set. The main loop of a freezable kernel thread may look
like the following one::
set_freezable();
do {
hub_events();
wait_event_freezable(khubd_wait,
!list_empty(&hub_event_list) ||
kthread_should_stop());
} while (!kthread_should_stop() || !list_empty(&hub_event_list));
(from drivers/usb/core/hub.c::hub_thread()).
while (true) {
struct task_struct *tsk = NULL;
If a freezable kernel thread fails to call try_to_freeze() after the freezer has
initiated a freezing operation, the freezing of tasks will fail and the entire
hibernation operation will be cancelled. For this reason, freezable kernel
threads must call try_to_freeze() somewhere or use one of the
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
spin_lock_irq(&oom_reaper_lock);
if (oom_reaper_list != NULL) {
tsk = oom_reaper_list;
oom_reaper_list = tsk->oom_reaper_list;
}
spin_unlock_irq(&oom_reaper_lock);
if (tsk)
oom_reap_task(tsk);
}
(from mm/oom_kill.c::oom_reaper()).
If a freezable kernel thread is not put to the frozen state after the freezer
has initiated a freezing operation, the freezing of tasks will fail and the
entire system-wide transition will be cancelled. For this reason, freezable
kernel threads must call try_to_freeze() somewhere or use one of the
wait_event_freezable() and wait_event_freezable_timeout() macros.
After the system memory state has been restored from a hibernation image and
devices have been reinitialized, the function thaw_processes() is called in
order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that
have been frozen leave __refrigerator() and continue running.
order to wake up each frozen task. Then, the tasks that have been frozen leave
__refrigerator() and continue running.
Rationale behind the functions dealing with freezing and thawing of tasks
@@ -96,7 +106,8 @@ III. Which kernel threads are freezable?
Kernel threads are not freezable by default. However, a kernel thread may clear
PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
directly is not allowed). From this point it is regarded as freezable
and must call try_to_freeze() in a suitable place.
and must call try_to_freeze() or variants of wait_event_freezable() in a
suitable place.
IV. Why do we do that?
======================
+4 -4
View File
@@ -180,7 +180,7 @@ This is the (partial) list of the hooks:
compat_yield sysctl is turned on; in that case, it places the scheduling
entity at the right-most end of the red-black tree.
- check_preempt_curr(...)
- wakeup_preempt(...)
This function checks if a task that entered the runnable state should
preempt the currently running task.
@@ -189,10 +189,10 @@ This is the (partial) list of the hooks:
This function chooses the most appropriate task eligible to run next.
- set_curr_task(...)
- set_next_task(...)
This function is called when a task changes its scheduling class or changes
its task group.
This function is called when a task changes its scheduling class, changes
its task group or is scheduled.
- task_tick(...)
+3 -4
View File
@@ -90,8 +90,8 @@ For more detail see:
- Documentation/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
UTIL_EST / UTIL_EST_FASTUP
==========================
UTIL_EST
========
Because periodic tasks have their averages decayed while they sleep, even
though when running their expected utilization will be the same, they suffer a
@@ -99,8 +99,7 @@ though when running their expected utilization will be the same, they suffer a
To alleviate this (a default enabled option) UTIL_EST drives an Infinite
Impulse Response (IIR) EWMA with the 'running' value on dequeue -- when it is
highest. A further default enabled option UTIL_EST_FASTUP modifies the IIR
filter to instantly increase and only decay on decrease.
highest. UTIL_EST filters to instantly increase and only decay on decrease.
A further runqueue wide sum (of runnable tasks) is maintained of:
+28 -31
View File
@@ -3,13 +3,13 @@ PXA2xx SPI on SSP driver HOWTO
==============================
This a mini HOWTO on the pxa2xx_spi driver. The driver turns a PXA2xx
synchronous serial port into an SPI master controller
synchronous serial port into an SPI host controller
(see Documentation/spi/spi-summary.rst). The driver has the following features
- Support for any PXA2xx and compatible SSP.
- SSP PIO and SSP DMA data transfers.
- External and Internal (SSPFRM) chip selects.
- Per slave device (chip) configuration.
- Per peripheral device (chip) configuration.
- Full suspend, freeze, resume support.
The driver is built around a &struct spi_message FIFO serviced by kernel
@@ -17,10 +17,10 @@ thread. The kernel thread, spi_pump_messages(), drives message FIFO and
is responsible for queuing SPI transactions and setting up and launching
the DMA or interrupt driven transfers.
Declaring PXA2xx Master Controllers
-----------------------------------
Typically, for a legacy platform, an SPI master is defined in the
arch/.../mach-*/board-*.c as a "platform device". The master configuration
Declaring PXA2xx host controllers
---------------------------------
Typically, for a legacy platform, an SPI host controller is defined in the
arch/.../mach-*/board-*.c as a "platform device". The host controller configuration
is passed to the driver via a table found in include/linux/spi/pxa2xx_spi.h::
struct pxa2xx_spi_controller {
@@ -30,7 +30,7 @@ is passed to the driver via a table found in include/linux/spi/pxa2xx_spi.h::
};
The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of
slave device (chips) attached to this SPI master.
peripheral devices (chips) attached to this SPI host controller.
The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should
be used. This caused the driver to acquire two DMA channels: Rx channel and
@@ -40,8 +40,8 @@ See the "PXA2xx Developer Manual" section "DMA Controller".
For the new platforms the description of the controller and peripheral devices
comes from Device Tree or ACPI.
NSSP MASTER SAMPLE
------------------
NSSP HOST SAMPLE
----------------
Below is a sample configuration using the PXA255 NSSP for a legacy platform::
static struct resource pxa_spi_nssp_resources[] = {
@@ -57,7 +57,7 @@ Below is a sample configuration using the PXA255 NSSP for a legacy platform::
},
};
static struct pxa2xx_spi_controller pxa_nssp_master_info = {
static struct pxa2xx_spi_controller pxa_nssp_controller_info = {
.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
.enable_dma = 1, /* Enables NSSP DMA */
};
@@ -68,7 +68,7 @@ Below is a sample configuration using the PXA255 NSSP for a legacy platform::
.resource = pxa_spi_nssp_resources,
.num_resources = ARRAY_SIZE(pxa_spi_nssp_resources),
.dev = {
.platform_data = &pxa_nssp_master_info, /* Passed to driver */
.platform_data = &pxa_nssp_controller_info, /* Passed to driver */
},
};
@@ -81,17 +81,17 @@ Below is a sample configuration using the PXA255 NSSP for a legacy platform::
(void)platform_add_device(devices, ARRAY_SIZE(devices));
}
Declaring Slave Devices
-----------------------
Typically, for a legacy platform, each SPI slave (chip) is defined in the
Declaring peripheral devices
----------------------------
Typically, for a legacy platform, each SPI peripheral device (chip) is defined in the
arch/.../mach-*/board-*.c using the "spi_board_info" structure found in
"linux/spi/spi.h". See "Documentation/spi/spi-summary.rst" for additional
information.
Each slave device attached to the PXA must provide slave specific configuration
Each peripheral device (chip) attached to the PXA2xx must provide specific chip configuration
information via the structure "pxa2xx_spi_chip" found in
"include/linux/spi/pxa2xx_spi.h". The pxa2xx_spi master controller driver
will uses the configuration whenever the driver communicates with the slave
"include/linux/spi/pxa2xx_spi.h". The PXA2xx host controller driver will use
the configuration whenever the driver communicates with the peripheral
device. All fields are optional.
::
@@ -123,7 +123,7 @@ dma_burst_size == 0.
The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
trailing bytes in the SSP receiver FIFO. The correct value for this field is
dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
slave device. Please note that the PXA2xx SSP 1 does not support trailing byte
peripheral device. Please note that the PXA2xx SSP 1 does not support trailing byte
timeouts and must busy-wait any trailing bytes.
NOTE: the SPI driver cannot control the chip select if SSPFRM is used, so the
@@ -132,8 +132,8 @@ asserted around the complete message. Use SSPFRM as a GPIO (through a descriptor
to accommodate these chips.
NSSP SLAVE SAMPLE
-----------------
NSSP PERIPHERAL SAMPLE
----------------------
For a legacy platform or in some other cases, the pxa2xx_spi_chip structure
is passed to the pxa2xx_spi driver in the "spi_board_info.controller_data"
field. Below is a sample configuration using the PXA255 NSSP.
@@ -161,16 +161,16 @@ field. Below is a sample configuration using the PXA255 NSSP.
.bus_num = 2, /* Framework bus number */
.chip_select = 0, /* Framework chip select */
.platform_data = NULL; /* No spi_driver specific config */
.controller_data = &cs8415a_chip_info, /* Master chip config */
.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
.controller_data = &cs8415a_chip_info, /* Host controller config */
.irq = STREETRACER_APCI_IRQ, /* Peripheral device interrupt */
},
{
.modalias = "cs8405a", /* Name of spi_driver for this device */
.max_speed_hz = 3686400, /* Run SSP as fast a possible */
.bus_num = 2, /* Framework bus number */
.chip_select = 1, /* Framework chip select */
.controller_data = &cs8405a_chip_info, /* Master chip config */
.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
.controller_data = &cs8405a_chip_info, /* Host controller config */
.irq = STREETRACER_APCI_IRQ, /* Peripheral device interrupt */
},
};
@@ -193,17 +193,14 @@ mode supports both coherent and stream based DMA mappings.
The following logic is used to determine the type of I/O to be used on
a per "spi_transfer" basis::
if !enable_dma then
always use PIO transfers
if spi_message.len > 65536 then
if spi_message.is_dma_mapped or rx_dma_buf != 0 or tx_dma_buf != 0 then
reject premapped transfers
if spi_message.len > 8191 then
print "rate limited" warning
use PIO transfers
if spi_message.is_dma_mapped and rx_dma_buf != 0 and tx_dma_buf != 0 then
use coherent DMA mode
if rx_buf and tx_buf are aligned on 8 byte boundary then
if enable_dma and the size is in the range [DMA burst size..65536] then
use streaming DMA mode
otherwise
@@ -56,8 +56,8 @@ Linux カーネルパッチ投稿者向けチェックリスト
9: sparseを利用してちゃんとしたコードチェックをしてください。
10: 'make checkstack' と 'make namespacecheck' を利用し、問題が発見されたら
修正してください。'make checkstack' は明示的に問題を示しませんが、どれか
10: 'make checkstack' を利用し、問題が発見されたら修正してください。
'make checkstack' は明示的に問題を示しませんが、どれか
1つの関数が512バイトより大きいスタックを使っていれば、修正すべき候補と
なります。
@@ -53,8 +53,7 @@ Linux内核补丁提交检查单
9) 通过 sparse 清查。
(参见 Documentation/translations/zh_CN/dev-tools/sparse.rst
10) 使用 ``make checkstack````make namespacecheck`` 并修复他们发现的任何
问题。
10) 使用 ``make checkstack`` 并修复他们发现的任何问题。
.. note::
@@ -80,7 +80,7 @@ p->se.vruntime。一旦p->se.vruntime变得足够大,其它的任务将成为
CFS使用纳秒粒度的计时,不依赖于任何jiffies或HZ的细节。因此CFS并不像之前的调度器那样
有“时间片”的概念,也没有任何启发式的设计。唯一可调的参数(你需要打开CONFIG_SCHED_DEBUG)是:
/sys/kernel/debug/sched/min_granularity_ns
/sys/kernel/debug/sched/base_slice_ns
它可以用来将调度器从“桌面”模式(也就是低时延)调节为“服务器”(也就是高批处理)模式。
它的默认设置是适合桌面的工作负载。SCHED_BATCH也被CFS调度器模块处理。
@@ -147,7 +147,7 @@ array)。
这个函数的行为基本上是出队,紧接着入队,除非compat_yield sysctl被开启。在那种情况下,
它将调度实体放在红黑树的最右端。
- check_preempt_curr(...)
- wakeup_preempt(...)
这个函数检查进入可运行状态的任务能否抢占当前正在运行的任务。
@@ -155,9 +155,9 @@ array)。
这个函数选择接下来最适合运行的任务。
- set_curr_task(...)
- set_next_task(...)
这个函数在任务改变调度类改变任务组时被调用。
这个函数在任务改变调度类改变任务组时,或者任务被调度时被调用。
- task_tick(...)
@@ -89,16 +89,15 @@ r_cpu被定义为当前CPU的最高性能水平与系统中任何其它CPU的最
- Documentation/translations/zh_CN/scheduler/sched-capacity.rst:"1. CPU Capacity + 2. Task utilization"
UTIL_EST / UTIL_EST_FASTUP
==========================
UTIL_EST
========
由于周期性任务的平均数在睡眠时会衰减,而在运行时其预期利用率会和睡眠前相同,
因此它们在再次运行后会面临(DVFS)的上涨。
为了缓解这个问题,(一个默认使能的编译选项)UTIL_EST驱动一个无限脉冲响应
Infinite Impulse ResponseIIR)的EWMA,“运行”值在出队时是最高的。
另一个默认使能的编译选项UTIL_EST_FASTUP修改了IIR滤波器,使其允许立即增加,
仅在利用率下降时衰减。
UTIL_EST滤波使其在遇到更高值时立刻增加,而遇到低值时会缓慢衰减。
进一步,运行队列的(可运行任务的)利用率之和由下式计算:
@@ -56,8 +56,7 @@ Linux內核補丁提交檢查單
9) 通過 sparse 清查。
(參見 Documentation/translations/zh_CN/dev-tools/sparse.rst
10) 使用 ``make checkstack````make namespacecheck`` 並修復他們發現的任何
問題。
10) 使用 ``make checkstack`` 並修復他們發現的任何問題。
.. note::
+1
View File
@@ -33,6 +33,7 @@ place where this information is gathered.
sysfs-platform_profile
vduse
futex2
lsm
.. only:: subproject and html
+73
View File
@@ -0,0 +1,73 @@
.. SPDX-License-Identifier: GPL-2.0
.. Copyright (C) 2022 Casey Schaufler <casey@schaufler-ca.com>
.. Copyright (C) 2022 Intel Corporation
=====================================
Linux Security Modules
=====================================
:Author: Casey Schaufler
:Date: July 2023
Linux security modules (LSM) provide a mechanism to implement
additional access controls to the Linux security policies.
The various security modules may support any of these attributes:
``LSM_ATTR_CURRENT`` is the current, active security context of the
process.
The proc filesystem provides this value in ``/proc/self/attr/current``.
This is supported by the SELinux, Smack and AppArmor security modules.
Smack also provides this value in ``/proc/self/attr/smack/current``.
AppArmor also provides this value in ``/proc/self/attr/apparmor/current``.
``LSM_ATTR_EXEC`` is the security context of the process at the time the
current image was executed.
The proc filesystem provides this value in ``/proc/self/attr/exec``.
This is supported by the SELinux and AppArmor security modules.
AppArmor also provides this value in ``/proc/self/attr/apparmor/exec``.
``LSM_ATTR_FSCREATE`` is the security context of the process used when
creating file system objects.
The proc filesystem provides this value in ``/proc/self/attr/fscreate``.
This is supported by the SELinux security module.
``LSM_ATTR_KEYCREATE`` is the security context of the process used when
creating key objects.
The proc filesystem provides this value in ``/proc/self/attr/keycreate``.
This is supported by the SELinux security module.
``LSM_ATTR_PREV`` is the security context of the process at the time the
current security context was set.
The proc filesystem provides this value in ``/proc/self/attr/prev``.
This is supported by the SELinux and AppArmor security modules.
AppArmor also provides this value in ``/proc/self/attr/apparmor/prev``.
``LSM_ATTR_SOCKCREATE`` is the security context of the process used when
creating socket objects.
The proc filesystem provides this value in ``/proc/self/attr/sockcreate``.
This is supported by the SELinux security module.
Kernel interface
================
Set a security attribute of the current process
-----------------------------------------------
.. kernel-doc:: security/lsm_syscalls.c
:identifiers: sys_lsm_set_self_attr
Get the specified security attributes of the current process
------------------------------------------------------------
.. kernel-doc:: security/lsm_syscalls.c
:identifiers: sys_lsm_get_self_attr
.. kernel-doc:: security/lsm_syscalls.c
:identifiers: sys_lsm_list_modules
Additional documentation
========================
* Documentation/security/lsm.rst
* Documentation/security/lsm-development.rst
+45 -25
View File
@@ -3339,13 +3339,17 @@ M: Eric Paris <eparis@redhat.com>
L: audit@vger.kernel.org
S: Supported
W: https://github.com/linux-audit
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
Q: https://patchwork.kernel.org/project/audit/list
B: mailto:audit@vger.kernel.org
P: https://github.com/linux-audit/audit-kernel/blob/main/README.md
T: git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
F: include/asm-generic/audit_*.h
F: include/linux/audit.h
F: include/linux/audit_arch.h
F: include/uapi/linux/audit.h
F: kernel/audit*
F: lib/*audit.c
K: \baudit_[a-z_0-9]\+\b
AUXILIARY BUS DRIVER
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -3404,6 +3408,16 @@ W: https://ez.analog.com/linux-software-drivers
F: Documentation/devicetree/bindings/hwmon/adi,axi-fan-control.yaml
F: drivers/hwmon/axi-fan-control.c
AXI SPI ENGINE
M: Michael Hennerich <michael.hennerich@analog.com>
M: Nuno Sá <nuno.sa@analog.com>
R: David Lechner <dlechner@baylibre.com>
L: linux-spi@vger.kernel.org
S: Supported
W: https://ez.analog.com/linux-software-drivers
F: Documentation/devicetree/bindings/spi/adi,axi-spi-engine.yaml
F: drivers/spi/spi-axi-spi-engine.c
AXXIA I2C CONTROLLER
M: Krzysztof Adamski <krzysztof.adamski@nokia.com>
L: linux-i2c@vger.kernel.org
@@ -5339,6 +5353,7 @@ L: linux-mm@kvack.org
S: Maintained
F: mm/memcontrol.c
F: mm/swap_cgroup.c
F: samples/cgroup/*
F: tools/testing/selftests/cgroup/memcg_protection.m
F: tools/testing/selftests/cgroup/test_hugetlb_memcg.c
F: tools/testing/selftests/cgroup/test_kmem.c
@@ -7925,6 +7940,7 @@ F: include/uapi/linux/ext4.h
Extended Verification Module (EVM)
M: Mimi Zohar <zohar@linux.ibm.com>
M: Roberto Sassu <roberto.sassu@huawei.com>
L: linux-integrity@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
@@ -9010,7 +9026,7 @@ F: drivers/gpio/gpio-mockup.c
F: tools/testing/selftests/gpio/
GPIO REGMAP
M: Michael Walle <michael@walle.cc>
M: Michael Walle <mwalle@kernel.org>
S: Maintained
F: drivers/gpio/gpio-regmap.c
F: include/linux/gpio/regmap.h
@@ -9282,7 +9298,6 @@ F: drivers/char/hw_random/
F: include/linux/hw_random.h
HARDWARE SPINLOCK CORE
M: Ohad Ben-Cohen <ohad@wizery.com>
M: Bjorn Andersson <andersson@kernel.org>
R: Baolin Wang <baolin.wang7@gmail.com>
L: linux-remoteproc@vger.kernel.org
@@ -10521,7 +10536,9 @@ F: drivers/crypto/inside-secure/
INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
M: Mimi Zohar <zohar@linux.ibm.com>
M: Roberto Sassu <roberto.sassu@huawei.com>
M: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
R: Eric Snowberg <eric.snowberg@oracle.com>
L: linux-integrity@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
@@ -12420,6 +12437,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
F: Documentation/admin-guide/LSM/LoadPin.rst
F: security/loadpin/
LOCKDOWN SECURITY MODULE
L: linux-security-module@vger.kernel.org
S: Odd Fixes
T: git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git
F: security/lockdown/
LOCKING PRIMITIVES
M: Peter Zijlstra <peterz@infradead.org>
M: Ingo Molnar <mingo@redhat.com>
@@ -15721,9 +15744,8 @@ F: Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml
F: drivers/gpio/gpio-omap.c
OMAP HARDWARE SPINLOCK SUPPORT
M: Ohad Ben-Cohen <ohad@wizery.com>
L: linux-omap@vger.kernel.org
S: Maintained
S: Orphan
F: drivers/hwspinlock/omap_hwspinlock.c
OMAP HS MMC SUPPORT
@@ -19459,22 +19481,29 @@ SECURITY SUBSYSTEM
M: Paul Moore <paul@paul-moore.com>
M: James Morris <jmorris@namei.org>
M: "Serge E. Hallyn" <serge@hallyn.com>
L: linux-security-module@vger.kernel.org (suggested Cc:)
L: linux-security-module@vger.kernel.org
S: Supported
W: http://kernsec.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git
Q: https://patchwork.kernel.org/project/linux-security-module/list
B: mailto:linux-security-module@vger.kernel.org
P: https://github.com/LinuxSecurityModule/kernel/blob/main/README.md
T: git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git
F: include/uapi/linux/lsm.h
F: security/
F: tools/testing/selftests/lsm/
X: security/selinux/
K: \bsecurity_[a-z_0-9]\+\b
SELINUX SECURITY MODULE
M: Paul Moore <paul@paul-moore.com>
M: Stephen Smalley <stephen.smalley.work@gmail.com>
M: Eric Paris <eparis@parisplace.org>
R: Ondrej Mosnacek <omosnace@redhat.com>
L: selinux@vger.kernel.org
S: Supported
W: https://selinuxproject.org
W: https://github.com/SELinuxProject
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
Q: https://patchwork.kernel.org/project/selinux/list
B: mailto:selinux@vger.kernel.org
P: https://github.com/SELinuxProject/selinux-kernel/blob/main/README.md
T: git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
F: Documentation/ABI/removed/sysfs-selinux-checkreqprot
F: Documentation/ABI/removed/sysfs-selinux-disable
F: Documentation/admin-guide/LSM/SELinux.rst
@@ -19846,7 +19875,7 @@ W: http://www.winischhofer.at/linuxsisusbvga.shtml
F: drivers/usb/misc/sisusbvga/
SL28 CPLD MFD DRIVER
M: Michael Walle <michael@walle.cc>
M: Michael Walle <mwalle@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/gpio/kontron,sl28cpld-gpio.yaml
F: Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml
@@ -19861,7 +19890,7 @@ F: drivers/pwm/pwm-sl28cpld.c
F: drivers/watchdog/sl28cpld_wdt.c
SL28 VPD NVMEM LAYOUT DRIVER
M: Michael Walle <michael@walle.cc>
M: Michael Walle <mwalle@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
F: drivers/nvmem/layouts/sl28vpd.c
@@ -20371,7 +20400,7 @@ F: drivers/pinctrl/spear/
SPI NOR SUBSYSTEM
M: Tudor Ambarus <tudor.ambarus@linaro.org>
M: Pratyush Yadav <pratyush@kernel.org>
R: Michael Walle <michael@walle.cc>
M: Michael Walle <mwalle@kernel.org>
L: linux-mtd@lists.infradead.org
S: Maintained
W: http://www.linux-mtd.infradead.org/
@@ -23613,15 +23642,6 @@ F: drivers/platform/olpc/
F: drivers/platform/x86/
F: include/linux/platform_data/x86/
X86 PLATFORM DRIVERS - ARCH
R: Darren Hart <dvhart@infradead.org>
R: Andy Shevchenko <andy@infradead.org>
L: platform-driver-x86@vger.kernel.org
L: x86@kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
F: arch/x86/platform
X86 PLATFORM UV HPE SUPERDOME FLEX
M: Steve Wahl <steve.wahl@hpe.com>
R: Justin Ernst <justin.ernst@hpe.com>
@@ -23833,10 +23853,10 @@ S: Supported
W: http://xfs.org/
C: irc://irc.oftc.net/xfs
T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
P: Documentation/filesystems/xfs/xfs-maintainer-entry-profile.rst
F: Documentation/ABI/testing/sysfs-fs-xfs
F: Documentation/admin-guide/xfs.rst
F: Documentation/filesystems/xfs-*
F: Documentation/filesystems/xfs/*
F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h
+4 -2
View File
@@ -1615,7 +1615,8 @@ help:
echo ' (default: $(INSTALL_HDR_PATH))'; \
echo ''
@echo 'Static analysers:'
@echo ' checkstack - Generate a list of stack hogs'
@echo ' checkstack - Generate a list of stack hogs and consider all functions'
@echo ' with a stack size larger than MINSTACKSIZE (default: 100)'
@echo ' versioncheck - Sanity check on version.h usage'
@echo ' includecheck - Check for duplicate included header files'
@echo ' export_report - List the usages of all exported symbols'
@@ -2059,9 +2060,10 @@ CHECKSTACK_ARCH := $(SUBARCH)
else
CHECKSTACK_ARCH := $(ARCH)
endif
MINSTACKSIZE ?= 100
checkstack:
$(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
$(PERL) $(srctree)/scripts/checkstack.pl $(CHECKSTACK_ARCH)
$(PERL) $(srctree)/scripts/checkstack.pl $(CHECKSTACK_ARCH) $(MINSTACKSIZE)
kernelrelease:
@$(filechk_kernel.release)
+8 -13
View File
@@ -301,17 +301,8 @@ config ARCH_HAS_DMA_CLEAR_UNCACHED
config ARCH_HAS_CPU_FINALIZE_INIT
bool
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
# Select if arch has its private alloc_task_struct() function
config ARCH_TASK_STRUCT_ALLOCATOR
bool
config HAVE_ARCH_THREAD_STRUCT_WHITELIST
bool
depends on !ARCH_TASK_STRUCT_ALLOCATOR
help
An architecture should select this to provide hardened usercopy
knowledge about what region of the thread_struct should be
@@ -320,10 +311,6 @@ config HAVE_ARCH_THREAD_STRUCT_WHITELIST
should be implemented. Without this, the entire thread_struct
field in task_struct will be left whitelisted.
# Select if arch has its private alloc_thread_stack() function
config ARCH_THREAD_STACK_ALLOCATOR
bool
# Select if arch wants to size task_struct dynamically via arch_task_struct_size:
config ARCH_WANTS_DYNAMIC_TASK_STRUCT
bool
@@ -1470,6 +1457,14 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
bool
config ARCH_HAS_HW_PTE_YOUNG
bool
help
Architectures that select this option are capable of setting the
accessed bit in PTE entries when using them as part of linear address
translations. Architectures that require runtime check should select
this option and override arch_has_hw_pte_young().
config ARCH_HAS_NONLEAF_PMD_YOUNG
bool
help
+3
View File
@@ -498,3 +498,6 @@
566 common futex_requeue sys_futex_requeue
567 common statmount sys_statmount
568 common listmount sys_listmount
569 common lsm_get_self_attr sys_lsm_get_self_attr
570 common lsm_set_self_attr sys_lsm_set_self_attr
571 common lsm_list_modules sys_lsm_list_modules
-1
View File
@@ -4,7 +4,6 @@
#
asflags-y := $(KBUILD_CFLAGS)
ccflags-y := -Werror
# Many of these routines have implementations tuned for ev6.
# Choose them iff we're targeting ev6 specifically.
-2
View File
@@ -3,6 +3,4 @@
# Makefile for the linux alpha-specific parts of the memory manager.
#
ccflags-y := -Werror
obj-y := init.o fault.o
+1 -1
View File
@@ -1362,7 +1362,7 @@ config ARCH_FORCE_MAX_ORDER
default "10"
help
The kernel page allocator limits the size of maximal physically
contiguous allocations. The limit is called MAX_ORDER and it
contiguous allocations. The limit is called MAX_PAGE_ORDER and it
defines the maximal power of two of number of pages that can be
allocated as a single contiguous block. This option allows
overriding the default setting when ability to allocate very
+1
View File
@@ -13,6 +13,7 @@
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
#endif
/* Replace task scheduler's default cpu-invariant accounting */
+3
View File
@@ -472,3 +472,6 @@
456 common futex_requeue sys_futex_requeue
457 common statmount sys_statmount
458 common listmount sys_listmount
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+12 -11
View File
@@ -36,6 +36,7 @@ config ARM64
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_HW_PTE_YOUNG
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
@@ -154,7 +155,7 @@ config ARM64
select HAVE_MOVE_PUD
select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI)
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_COMPILER_H
@@ -1519,15 +1520,15 @@ config XEN
# include/linux/mmzone.h requires the following to be true:
#
# MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
# MAX_PAGE_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
#
# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT:
# so the maximum value of MAX_PAGE_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT:
#
# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER |
# ----+-------------------+--------------+-----------------+--------------------+
# 4K | 27 | 12 | 15 | 10 |
# 16K | 27 | 14 | 13 | 11 |
# 64K | 29 | 16 | 13 | 13 |
# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_PAGE_ORDER | default MAX_PAGE_ORDER |
# ----+-------------------+--------------+----------------------+-------------------------+
# 4K | 27 | 12 | 15 | 10 |
# 16K | 27 | 14 | 13 | 11 |
# 64K | 29 | 16 | 13 | 13 |
config ARCH_FORCE_MAX_ORDER
int
default "13" if ARM64_64K_PAGES
@@ -1535,16 +1536,16 @@ config ARCH_FORCE_MAX_ORDER
default "10"
help
The kernel page allocator limits the size of maximal physically
contiguous allocations. The limit is called MAX_ORDER and it
contiguous allocations. The limit is called MAX_PAGE_ORDER and it
defines the maximal power of two of number of pages that can be
allocated as a single contiguous block. This option allows
overriding the default setting when ability to allocate very
large blocks of physically contiguous memory is required.
The maximal size of allocation cannot exceed the size of the
section, so the value of MAX_ORDER should satisfy
section, so the value of MAX_PAGE_ORDER should satisfy
MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
MAX_PAGE_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS
Don't change if unsure.
+2 -22
View File
@@ -15,29 +15,9 @@
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
void kasan_init(void);
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
* where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
*
* KASAN_SHADOW_OFFSET:
* This value is used to map an address to the corresponding shadow
* address by the following formula:
* shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
*
* (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
* [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
* addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
* KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
* (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
*/
#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
void kasan_copy_shadow(pgd_t *pgdir);
asmlinkage void kasan_early_init(void);
void kasan_init(void);
void kasan_copy_shadow(pgd_t *pgdir);
#else
static inline void kasan_init(void) { }
+32 -6
View File
@@ -65,15 +65,41 @@
#define KERNEL_END _end
/*
* Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
* address space for the shadow region respectively. They can bloat the stack
* significantly, so double the (minimum) stack size when they are in use.
* Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the
* kernel virtual address space for storing the shadow memory respectively.
*
* The mapping between a virtual memory address and its corresponding shadow
* memory address is defined based on the formula:
*
* shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
*
* where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map
* to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets
* the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of
* the shadow memory region.
*
* Based on this mapping, we define two constants:
*
* KASAN_SHADOW_START: the start of the shadow memory region;
* KASAN_SHADOW_END: the end of the shadow memory region.
*
* KASAN_SHADOW_END is defined first as the shadow address that corresponds to
* the upper bound of possible virtual kernel memory addresses UL(1) << 64
* according to the mapping formula.
*
* KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow
* memory start must map to the lowest possible kernel virtual memory address
* and thus it depends on the actual bitness of the address space.
*
* As KASAN inserts redzones between stack variables, this increases the stack
* memory usage significantly. Thus, we double the (minimum) stack size.
*/
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
+ KASAN_SHADOW_OFFSET)
#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET)
#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
#define PAGE_END KASAN_SHADOW_START
#define KASAN_THREAD_SHIFT 1
#else
#define KASAN_THREAD_SHIFT 0
+1 -1
View File
@@ -10,7 +10,7 @@
/*
* Section size must be at least 512MB for 64K base
* page size config. Otherwise it will be less than
* MAX_ORDER and the build process will fail.
* MAX_PAGE_ORDER and the build process will fail.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SECTION_SIZE_BITS 29
+1
View File
@@ -23,6 +23,7 @@ void update_freq_counters_refs(void);
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
#ifdef CONFIG_ACPI_CPPC_LIB
#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
+1 -1
View File
@@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 459
#define __NR_compat_syscalls 462
#endif
#define __ARCH_WANT_SYS_CLONE
+6
View File
@@ -923,6 +923,12 @@ __SYSCALL(__NR_futex_requeue, sys_futex_requeue)
__SYSCALL(__NR_statmount, sys_statmount)
#define __NR_listmount 458
__SYSCALL(__NR_listmount, sys_listmount)
#define __NR_lsm_get_self_attr 459
__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
#define __NR_lsm_set_self_attr 460
__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
#define __NR_lsm_list_modules 461
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
/*
* Please add new compat syscalls above this comment and update
+3 -3
View File
@@ -122,9 +122,9 @@ static void *image_load(struct kimage *image,
kernel_segment->memsz -= text_offset;
image->start = kernel_segment->mem;
pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
kernel_segment->mem, kbuf.bufsz,
kernel_segment->memsz);
kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
kernel_segment->mem, kbuf.bufsz,
kernel_segment->memsz);
return NULL;
}
+6 -20
View File
@@ -32,26 +32,12 @@
static void _kexec_image_info(const char *func, int line,
const struct kimage *kimage)
{
unsigned long i;
pr_debug("%s:%d:\n", func, line);
pr_debug(" kexec kimage info:\n");
pr_debug(" type: %d\n", kimage->type);
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
i,
kimage->segment[i].mem,
kimage->segment[i].mem + kimage->segment[i].memsz,
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
}
kexec_dprintk("%s:%d:\n", func, line);
kexec_dprintk(" kexec kimage info:\n");
kexec_dprintk(" type: %d\n", kimage->type);
kexec_dprintk(" head: %lx\n", kimage->head);
kexec_dprintk(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
kexec_dprintk(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
}
void machine_kexec_cleanup(struct kimage *kimage)
+6 -6
View File
@@ -127,8 +127,8 @@ int load_other_segments(struct kimage *image,
image->elf_load_addr = kbuf.mem;
image->elf_headers_sz = headers_sz;
pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
}
/* load initrd */
@@ -148,8 +148,8 @@ int load_other_segments(struct kimage *image,
goto out_err;
initrd_load_addr = kbuf.mem;
pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
initrd_load_addr, kbuf.bufsz, kbuf.memsz);
kexec_dprintk("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
initrd_load_addr, kbuf.bufsz, kbuf.memsz);
}
/* load dtb */
@@ -179,8 +179,8 @@ int load_other_segments(struct kimage *image,
image->arch.dtb = dtb;
image->arch.dtb_mem = kbuf.mem;
pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
kbuf.mem, kbuf.bufsz, kbuf.memsz);
kexec_dprintk("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
kbuf.mem, kbuf.bufsz, kbuf.memsz);
return 0;
+13 -13
View File
@@ -82,7 +82,12 @@ int __init parse_acpi_topology(void)
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
/*
* Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
* the CPU capacity and its associated frequency have been correctly
* initialized.
*/
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
@@ -112,14 +117,14 @@ static inline bool freq_counters_valid(int cpu)
return true;
}
static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
void freq_inv_set_max_ratio(int cpu, u64 max_rate)
{
u64 ratio;
u64 ratio, ref_rate = arch_timer_get_rate();
if (unlikely(!max_rate || !ref_rate)) {
pr_debug("CPU%d: invalid maximum or reference frequency.\n",
WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n",
cpu);
return -EINVAL;
return;
}
/*
@@ -139,12 +144,10 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
ratio = div64_u64(ratio, max_rate);
if (!ratio) {
WARN_ONCE(1, "Reference frequency too low.\n");
return -EINVAL;
return;
}
per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
return 0;
WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio);
}
static void amu_scale_freq_tick(void)
@@ -195,10 +198,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
return;
for_each_cpu(cpu, cpus) {
if (!freq_counters_valid(cpu) ||
freq_inv_set_max_ratio(cpu,
cpufreq_get_hw_max_freq(cpu) * 1000ULL,
arch_timer_get_rate()))
if (!freq_counters_valid(cpu))
return;
}
+1 -1
View File
@@ -16,7 +16,7 @@ struct hyp_pool {
* API at EL2.
*/
hyp_spinlock_t lock;
struct list_head free_area[MAX_ORDER + 1];
struct list_head free_area[NR_PAGE_ORDERS];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;

Some files were not shown because too many files have changed in this diff Show More