diff options
821 files changed, 26387 insertions, 8681 deletions
diff --git a/.gitignore b/.gitignore index 265959544978..5da004814678 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,8 @@ *.o *.o.* *.patch +*.rmeta +*.rsi *.s *.so *.so.dbg @@ -97,6 +99,7 @@ modules.order !.gitattributes !.gitignore !.mailmap +!.rustfmt.toml # # Generated include files @@ -162,3 +165,6 @@ x509.genkey # Documentation toolchain sphinx_*/ + +# Rust analyzer configuration +/rust-project.json diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 000000000000..3de5cc497465 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,12 @@ +edition = "2021" +newline_style = "Unix" + +# Unstable options that help catching some mistakes in formatting and that we may want to enable +# when they become stable. +# +# They are kept here since they are useful to run from time to time. +#format_code_in_doc_comments = true +#reorder_impl_items = true +#comment_width = 100 +#wrap_comments = true +#normalize_comments = true diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma index 721b4aea3020..e93d3ddca844 100644 --- a/Documentation/ABI/testing/sysfs-bus-bcma +++ b/Documentation/ABI/testing/sysfs-bus-bcma @@ -3,7 +3,7 @@ Date: May 2011 KernelVersion: 3.0 Contact: RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com> Description: - Each BCMA core has it's manufacturer id. See + Each BCMA core has its manufacturer id. See include/linux/bcma/bcma.h for possible values. What: /sys/bus/bcma/devices/.../id diff --git a/Documentation/ABI/testing/sysfs-bus-fcoe b/Documentation/ABI/testing/sysfs-bus-fcoe index 8fe787cc4ab7..5a4f2091ac37 100644 --- a/Documentation/ABI/testing/sysfs-bus-fcoe +++ b/Documentation/ABI/testing/sysfs-bus-fcoe @@ -31,7 +31,7 @@ Description: 'FCoE Controller' instances on the fcoe bus. 1) Write interface name to ctlr_create 2) Configure the FCoE Controller (ctlr_X) 3) Enable the FCoE Controller to begin discovery and login. The FCoE Controller is destroyed by - writing it's name, i.e. ctlr_X to the ctlr_delete file. + writing its name, i.e. ctlr_X to the ctlr_delete file. Attributes: diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity b/Documentation/ABI/testing/sysfs-bus-iio-proximity index 3aac6dab8775..9b9d1cc9b703 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-proximity +++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity @@ -18,7 +18,7 @@ Description: on the signal from which time of flight measurements are taken. The appropriate values to take is dependent on both the - sensor and it's operating environment: + sensor and its operating environment: * as3935 (0-31 range) 18 = indoors (default) 14 = outdoors diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 760c889b6cd1..f54867cadb0f 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -296,7 +296,7 @@ Description: Processor frequency boosting control This switch controls the boost setting for the whole system. Boosting allows the CPU and the firmware to run at a frequency - beyond it's nominal limit. + beyond its nominal limit. More details can be found in Documentation/admin-guide/pm/cpufreq.rst diff --git a/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 index b0f4684a83fe..b9f7d924f28a 100644 --- a/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 +++ b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 @@ -2,8 +2,8 @@ What: /sys/bus/platform/devices/ci_hdrc.0/role Date: Mar 2017 Contact: Peter Chen <peter.chen@nxp.com> Description: - It returns string "gadget" or "host" when read it, it indicates - current controller role. + When read, it returns string "gadget" or "host", indicating + the current controller role. - It will do role switch when write "gadget" or "host" to it. + It will do role switch when "gadget" or "host" is written to it. Only controller at dual-role configuration supports writing. diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 90ec4987074b..f99d433ff311 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -152,7 +152,7 @@ Description: case further investigation is required to determine which device is causing the problem. Note that genuine RTC clock values (such as when pm_trace has not been used), can still - match a device and output it's name here. + match a device and output its name here. What: /sys/power/pm_async Date: January 2009 diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index 42cc5d891bd2..048c5bc1f813 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -66,8 +66,13 @@ over a rather long period of time, but improvements are always welcome! As a rough rule of thumb, any dereference of an RCU-protected pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(), rcu_read_lock_sched(), or by the appropriate update-side lock. - Disabling of preemption can serve as rcu_read_lock_sched(), but - is less readable and prevents lockdep from detecting locking issues. + Explicit disabling of preemption (preempt_disable(), for example) + can serve as rcu_read_lock_sched(), but is less readable and + prevents lockdep from detecting locking issues. + + Please not that you *cannot* rely on code known to be built + only in non-preemptible kernels. Such code can and will break, + especially in kernels built with CONFIG_PREEMPT_COUNT=y. Letting RCU-protected pointers "leak" out of an RCU read-side critical section is every bit as bad as letting them leak out @@ -185,6 +190,9 @@ over a rather long period of time, but improvements are always welcome! 5. If call_rcu() or call_srcu() is used, the callback function will be called from softirq context. In particular, it cannot block. + If you need the callback to block, run that code in a workqueue + handler scheduled from the callback. The queue_rcu_work() + function does this for you in the case of call_rcu(). 6. Since synchronize_rcu() can block, it cannot be called from any sort of irq context. The same rule applies @@ -297,7 +305,8 @@ over a rather long period of time, but improvements are always welcome! the machine. d. Periodically invoke synchronize_rcu(), permitting a limited - number of updates per grace period. + number of updates per grace period. Better yet, periodically + invoke rcu_barrier() to wait for all outstanding callbacks. The same cautions apply to call_srcu() and kfree_rcu(). @@ -477,6 +486,6 @@ over a rather long period of time, but improvements are always welcome! So if you need to wait for both an RCU grace period and for all pre-existing call_rcu() callbacks, you will need to execute both rcu_barrier() and synchronize_rcu(), if necessary, using - something like workqueues to to execute them concurrently. + something like workqueues to execute them concurrently. See rcubarrier.rst for more information. diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst index cc860a0c296b..a94f55991a71 100644 --- a/Documentation/RCU/lockdep.rst +++ b/Documentation/RCU/lockdep.rst @@ -61,7 +61,7 @@ checking of rcu_dereference() primitives: rcu_access_pointer(p): Return the value of the pointer and omit all barriers, but retain the compiler constraints that prevent duplicating - or coalescsing. This is useful when when testing the + or coalescsing. This is useful when testing the value of the pointer itself, for example, against NULL. The rcu_dereference_check() check expression can be any boolean diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst index 0b418a5b243c..81e828c8313b 100644 --- a/Documentation/RCU/rcu_dereference.rst +++ b/Documentation/RCU/rcu_dereference.rst @@ -128,10 +128,16 @@ Follow these rules to keep your RCU code working properly: This sort of comparison occurs frequently when scanning RCU-protected circular linked lists. - Note that if checks for being within an RCU read-side - critical section are not required and the pointer is never - dereferenced, rcu_access_pointer() should be used in place - of rcu_dereference(). + Note that if the pointer comparison is done outside + of an RCU read-side critical section, and the pointer + is never dereferenced, rcu_access_pointer() should be + used in place of rcu_dereference(). In most cases, + it is best to avoid accidental dereferences by testing + the rcu_access_pointer() return value directly, without + assigning it to a variable. + + Within an RCU read-side critical section, there is little + reason to use rcu_access_pointer(). - The comparison is against a pointer that references memory that was initialized "a long time ago." The reason diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 77ea260efd12..1c747ac3f2c8 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -6,13 +6,15 @@ What is RCU? -- "Read, Copy, Update" Please note that the "What is RCU?" LWN series is an excellent place to start learning about RCU: -| 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ -| 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ -| 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ -| 4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/ -| 2010 Big API Table http://lwn.net/Articles/419086/ -| 5. The RCU API, 2014 Edition http://lwn.net/Articles/609904/ -| 2014 Big API Table http://lwn.net/Articles/609973/ +| 1. What is RCU, Fundamentally? https://lwn.net/Articles/262464/ +| 2. What is RCU? Part 2: Usage https://lwn.net/Articles/263130/ +| 3. RCU part 3: the RCU API https://lwn.net/Articles/264090/ +| 4. The RCU API, 2010 Edition https://lwn.net/Articles/418853/ +| 2010 Big API Table https://lwn.net/Articles/419086/ +| 5. The RCU API, 2014 Edition https://lwn.net/Articles/609904/ +| 2014 Big API Table https://lwn.net/Articles/609973/ +| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/ +| 2019 Big API Table https://lwn.net/Articles/777165/ What is RCU? @@ -915,13 +917,18 @@ which an RCU reference is held include: The understanding that RCU provides a reference that only prevents a change of type is particularly visible with objects allocated from a slab cache marked ``SLAB_TYPESAFE_BY_RCU``. RCU operations may yield a -reference to an object from such a cache that has been concurrently -freed and the memory reallocated to a completely different object, -though of the same type. In this case RCU doesn't even protect the -identity of the object from changing, only its type. So the object -found may not be the one expected, but it will be one where it is safe -to take a reference or spinlock and then confirm that the identity -matches the expectations. +reference to an object from such a cache that has been concurrently freed +and the memory reallocated to a completely different object, though of +the same type. In this case RCU doesn't even protect the identity of the +object from changing, only its type. So the object found may not be the +one expected, but it will be one where it is safe to take a reference +(and then potentially acquiring a spinlock), allowing subsequent code +to check whether the identity matches expectations. It is tempting +to simply acquire the spinlock without first taking the reference, but +unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be +initialized after each and every call to kmem_cache_alloc(), which renders +reference-free spinlock acquisition completely unsafe. Therefore, when +using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter. With traditional reference counting -- such as that implemented by the kref library in Linux -- there is typically code that runs when the last @@ -1057,14 +1064,20 @@ SRCU: Initialization/cleanup:: init_srcu_struct cleanup_srcu_struct -All: lockdep-checked RCU-protected pointer access:: +All: lockdep-checked RCU utility APIs:: - rcu_access_pointer - rcu_dereference_raw RCU_LOCKDEP_WARN rcu_sleep_check RCU_NONIDLE +All: Unchecked RCU-protected pointer access:: + + rcu_dereference_raw + +All: Unchecked RCU-protected pointer access with dereferencing prohibited:: + + rcu_access_pointer + See the comment headers in the source code (or the docbook generated from them) for more information. diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 9eb6b9042f75..9a969c0157f1 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -262,8 +262,6 @@ Compiling the kernel - Make sure you have at least gcc 5.1 available. For more information, refer to :ref:`Documentation/process/changes.rst <changes>`. - Please note that you can still run a.out user programs with this kernel. - - Do a ``make`` to create a compressed kernel image. It is also possible to do ``make install`` if you have lilo installed to suit the kernel makefiles, but you may want to check your particular lilo setup first. @@ -332,85 +330,10 @@ Compiling the kernel If something goes wrong ----------------------- - - If you have problems that seem to be due to kernel bugs, please check - the file MAINTAINERS to see if there is a particular person associated - with the part of the kernel that you are having trouble with. If there - isn't anyone listed there, then the second best thing is to mail - them to me (torvalds@linux-foundation.org), and possibly to any other - relevant mailing-list or to the newsgroup. - - - In all bug-reports, *please* tell what kernel you are talking about, - how to duplicate the problem, and what your setup is (use your common - sense). If the problem is new, tell me so, and if the problem is - old, please try to tell me when you first noticed it. - - - If the bug results in a message like:: - - unable to handle kernel paging request at address C0000010 - Oops: 0002 - EIP: 0010:XXXXXXXX - eax: xxxxxxxx ebx: xxxxxxxx ecx: xxxxxxxx edx: xxxxxxxx - esi: xxxxxxxx edi: xxxxxxxx ebp: xxxxxxxx - ds: xxxx es: xxxx fs: xxxx gs: xxxx - Pid: xx, process nr: xx - xx xx xx xx xx xx xx xx xx xx - - or similar kernel debugging information on your screen or in your - system log, please duplicate it *exactly*. The dump may look - incomprehensible to you, but it does contain information that may - help debugging the problem. The text above the dump is also - important: it tells something about why the kernel dumped code (in - the above example, it's due to a bad kernel pointer). More information - on making sense of the dump is in Documentation/admin-guide/bug-hunting.rst - - - If you compiled the kernel with CONFIG_KALLSYMS you can send the dump - as is, otherwise you will have to use the ``ksymoops`` program to make - sense of the dump (but compiling with CONFIG_KALLSYMS is usually preferred). - This utility can be downloaded from - https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ . - Alternatively, you can do the dump lookup by hand: - - - In debugging dumps like the above, it helps enormously if you can - look up what the EIP value means. The hex value as such doesn't help - me or anybody else very much: it will depend on your particular - kernel setup. What you should do is take the hex value from the EIP - line (ignore the ``0010:``), and look it up in the kernel namelist to - see which kernel function contains the offending address. - - To find out the kernel function name, you'll need to find the system - binary associated with the kernel that exhibited the symptom. This is - the file 'linux/vmlinux'. To extract the namelist and match it against - the EIP from the kernel crash, do:: - - nm vmlinux | sort | less - - This will give you a list of kernel addresses sorted in ascending - order, from which it is simple to find the function that contains the - offending address. Note that the address given by the kernel - debugging messages will not necessarily match exactly with the - function addresses (in fact, that is very unlikely), so you can't - just 'grep' the list: the list will, however, give you the starting - point of each kernel function, so by looking for the function that - has a starting address lower than the one you are searching for but - is followed by a function with a higher address you will find the one - you want. In fact, it may be a good idea to include a bit of - "context" in your problem report, giving a few lines around the - interesting one. - - If you for some reason cannot do the above (you have a pre-compiled - kernel image or similar), telling me as much about your setup as - possible will help. Please read - 'Documentation/admin-guide/reporting-issues.rst' for details. - - - Alternatively, you can use gdb on a running kernel. (read-only; i.e. you - cannot change values or set break points.) To do this, first compile the - kernel with -g; edit arch/x86/Makefile appropriately, then do a ``make - clean``. You'll also need to enable CONFIG_PROC_FS (via ``make config``). - - After you've rebooted with the new kernel, do ``gdb vmlinux /proc/kcore``. - You can now use all the usual gdb commands. The command to look up the - point where your system crashed is ``l *0xXXXXXXXX``. (Replace the XXXes - with the EIP value.) - - gdb'ing a non-running kernel currently fails because ``gdb`` (wrongly) - disregards the starting offset for which the kernel is compiled. +If you have problems that seem to be due to kernel bugs, please follow the +instructions at 'Documentation/admin-guide/reporting-issues.rst'. + +Hints on understanding kernel bug reports are in +'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel +with gdb is in 'Documentation/dev-tools/gdb-kernel-debugging.rst' and +'Documentation/dev-tools/kgdb.rst'. diff --git a/Documentation/admin-guide/acpi/dsdt-override.rst b/Documentation/admin-guide/acpi/dsdt-override.rst deleted file mode 100644 index 50bd7f194bf4..000000000000 --- a/Documentation/admin-guide/acpi/dsdt-override.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=============== -Overriding DSDT -=============== - -Linux supports a method of overriding the BIOS DSDT: - -CONFIG_ACPI_CUSTOM_DSDT - builds the image into the kernel. - -When to use this method is described in detail on the -Linux/ACPI home page: -https://01.org/linux-acpi/documentation/overriding-dsdt diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 2ce2a38cdd55..c4dcdb3d0d45 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -613,6 +613,7 @@ kernel command line. eibrs enhanced IBRS eibrs,retpoline enhanced IBRS + Retpolines eibrs,lfence enhanced IBRS + LFENCE + ibrs use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 8419019b6a88..6726f439958c 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -200,7 +200,7 @@ prb A pointer to the printk ringbuffer (struct printk_ringbuffer). This may be pointing to the static boot ringbuffer or the dynamically -allocated ringbuffer, depending on when the the core dump occurred. +allocated ringbuffer, depending on when the core dump occurred. Used by user-space tools to read the active kernel log buffer. printk_rb_static diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a6d3c7beaa4a..e92d63d3e878 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3801,6 +3801,10 @@ nox2apic [X86-64,APIC] Do not enable x2APIC mode. + NOTE: this parameter will be ignored on systems with the + LEGACY_XAPIC_DISABLED bit set in the + IA32_XAPIC_DISABLE_STATUS MSR. + nps_mtm_hs_ctr= [KNL,ARC] This parameter sets the maximum duration, in cycles, each HW thread of the CTOP can run diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index 8e2727dc18d4..19f27c0d92e0 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -65,7 +65,7 @@ HugePages_Surp may be temporarily larger than the maximum number of surplus huge pages when the system is under memory pressure. Hugepagesize - is the default hugepage size (in Kb). + is the default hugepage size (in kB). Hugetlb is the total amount of memory (in kB), consumed by huge pages of all sizes. diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst index 7d80e8c307d1..92a8a07f5c43 100644 --- a/Documentation/admin-guide/tainted-kernels.rst +++ b/Documentation/admin-guide/tainted-kernels.rst @@ -134,6 +134,12 @@ More detailed explanation for tainting scsi/snic on something else than x86_64, scsi/ips on non x86/x86_64/itanium, have broken firmware settings for the irqchip/irq-gic on arm64 ...). + - x86/x86_64: Microcode late loading is dangerous and will result in + tainting the kernel. It requires that all CPUs rendezvous to make sure + the update happens when the system is as quiescent as possible. However, + a higher priority MCE/SMI/NMI can move control flow away from that + rendezvous and interrupt the update, which can be detrimental to the + machine. 3) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all modules were unloaded normally. diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 4997d2088fef..5d798437dad4 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -138,7 +138,7 @@ The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. -The 1-bit source operand field in the opcode is used to to select what byte +The 1-bit source operand field in the opcode is used to select what byte order the operation convert from or to: ========= ===== ================================================= diff --git a/Documentation/bpf/map_cgroup_storage.rst b/Documentation/bpf/map_cgroup_storage.rst index cab9543017bf..8e5fe532c07e 100644 --- a/Documentation/bpf/map_cgroup_storage.rst +++ b/Documentation/bpf/map_cgroup_storage.rst @@ -31,7 +31,7 @@ The map uses key of type of either ``__u64 cgroup_inode_id`` or }; ``cgroup_inode_id`` is the inode id of the cgroup directory. -``attach_type`` is the the program's attach type. +``attach_type`` is the program's attach type. Linux 5.9 added support for type ``__u64 cgroup_inode_id`` as the key type. When this key type is used, then all attach types of the particular cgroup and @@ -155,7 +155,7 @@ However, the BPF program can still only associate with one map of each type ``BPF_MAP_TYPE_CGROUP_STORAGE`` or more than one ``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE``. -In all versions, userspace may use the the attach parameters of cgroup and +In all versions, userspace may use the attach parameters of cgroup and attach type pair in ``struct bpf_cgroup_storage_key`` as the key to the BPF map APIs to read or update the storage for a given attachment. For Linux 5.9 attach type shared storages, only the first value in the struct, cgroup inode diff --git a/Documentation/conf.py b/Documentation/conf.py index 255384d094bf..b50c85083149 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -15,6 +15,18 @@ import sys import os import sphinx +import shutil + +# helper +# ------ + +def have_command(cmd): + """Search ``cmd`` in the ``PATH`` environment. + + If found, return True. + If not found, return False. + """ + return shutil.which(cmd) is not None # Get Sphinx version major, minor, patch = sphinx.version_info[:3] @@ -107,7 +119,32 @@ else: autosectionlabel_prefix_document = True autosectionlabel_maxdepth = 2 -extensions.append("sphinx.ext.imgmath") +# Load math renderer: +# For html builder, load imgmath only when its dependencies are met. +# mathjax is the default math renderer since Sphinx 1.8. +have_latex = have_command('latex') +have_dvipng = have_command('dvipng') +load_imgmath = have_latex and have_dvipng + +# Respect SPHINX_IMGMATH (for html docs only) +if 'SPHINX_IMGMATH' in os.environ: + env_sphinx_imgmath = os.environ['SPHINX_IMGMATH'] + if 'yes' in env_sphinx_imgmath: + load_imgmath = True + elif 'no' in env_sphinx_imgmath: + load_imgmath = False + else: + sys.stderr.write("Unknown env SPHINX_IMGMATH=%s ignored.\n" % env_sphinx_imgmath) + +# Always load imgmath for Sphinx <1.8 or for epub docs +load_imgmath = (load_imgmath or (major == 1 and minor < 8) + or 'epub' in sys.argv) + +if load_imgmath: + extensions.append("sphinx.ext.imgmath") + math_renderer = 'imgmath' +else: + math_renderer = 'mathjax' # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -333,7 +370,8 @@ html_static_path = ['sphinx-static'] html_use_smartypants = False # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# Note that the RTD theme ignores this. +html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']} # Additional templates that should be rendered to pages, maps page names to # template names. diff --git a/Documentation/asm-annotations.rst b/Documentation/core-api/asm-annotations.rst index a64f2ca469d4..bc514ed59887 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/core-api/asm-annotations.rst @@ -43,10 +43,11 @@ annotated objects like this, tools can be run on them to generate more useful information. In particular, on properly annotated objects, ``objtool`` can be run to check and fix the object if needed. Currently, ``objtool`` can report missing frame pointer setup/destruction in functions. It can also -automatically generate annotations for :doc:`ORC unwinder <x86/orc-unwinder>` +automatically generate annotations for the ORC unwinder +(Documentation/x86/orc-unwinder.rst) for most code. Both of these are especially important to support reliable -stack traces which are in turn necessary for :doc:`Kernel live patching -<livepatch/livepatch>`. +stack traces which are in turn necessary for kernel live patching +(Documentation/livepatch/livepatch.rst). Caveat and Discussion --------------------- diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index c6f4ba2fb32d..f75778d37488 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -560,7 +560,7 @@ available: * cpuhp_state_remove_instance(state, node) * cpuhp_state_remove_instance_nocalls(state, node) -The arguments are the same as for the the cpuhp_state_add_instance*() +The arguments are the same as for the cpuhp_state_add_instance*() variants above. The functions differ in the way how the installed callbacks are treated: diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index dc95df462eea..b0e7b4771fff 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -23,6 +23,7 @@ it. printk-formats printk-index symbol-namespaces + asm-annotations Data structures and low-level utilities ======================================= @@ -44,6 +45,8 @@ Library functionality that is used throughout the kernel. this_cpu_ops timekeeping errseq + wrappers/atomic_t + wrappers/atomic_bitops Low level entry and exit ======================== @@ -67,6 +70,7 @@ Documentation/locking/index.rst for more related documentation. local_ops padata ../RCU/index + wrappers/memory-barriers.rst Low-level hardware management ============================= diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst index d30b4d0a9769..f88a6ee67a35 100644 --- a/Documentation/core-api/irq/irq-domain.rst +++ b/Documentation/core-api/irq/irq-domain.rst @@ -71,7 +71,7 @@ variety of methods: Note that irq domain lookups must happen in contexts that are compatible with a RCU read-side critical section. -The irq_create_mapping() function must be called *atleast once* +The irq_create_mapping() function must be called *at least once* before any call to irq_find_mapping(), lest the descriptor will not be allocated. diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 5e89497ba314..dbe1aacc79d0 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -625,6 +625,16 @@ Examples:: %p4cc Y10 little-endian (0x20303159) %p4cc NV12 big-endian (0xb231564e) +Rust +---- + +:: + + %pA + +Only intended to be used from Rust code to format ``core::fmt::Arguments``. +Do *not* use it from C. + Thanks ====== diff --git a/Documentation/core-api/wrappers/atomic_bitops.rst b/Documentation/core-api/wrappers/atomic_bitops.rst new file mode 100644 index 000000000000..bf24e4081a8f --- /dev/null +++ b/Documentation/core-api/wrappers/atomic_bitops.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + This is a simple wrapper to bring atomic_bitops.txt into the RST world + until such a time as that file can be converted directly. + +============= +Atomic bitops +============= + +.. raw:: latex + + \footnotesize + +.. include:: ../../atomic_bitops.txt + :literal: + +.. raw:: latex + + \normalsize diff --git a/Documentation/core-api/wrappers/atomic_t.rst b/Documentation/core-api/wrappers/atomic_t.rst new file mode 100644 index 000000000000..ed109a964c77 --- /dev/null +++ b/Documentation/core-api/wrappers/atomic_t.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + This is a simple wrapper to bring atomic_t.txt into the RST world + until such a time as that file can be converted directly. + +============ +Atomic types +============ + +.. raw:: latex + + \footnotesize + +.. include:: ../../atomic_t.txt + :literal: + +.. raw:: latex + + \normalsize + diff --git a/Documentation/core-api/wrappers/memory-barriers.rst b/Documentation/core-api/wrappers/memory-barriers.rst new file mode 100644 index 000000000000..532460b5e3eb --- /dev/null +++ b/Documentation/core-api/wrappers/memory-barriers.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + This is a simple wrapper to bring memory-barriers.txt into the RST world + until such a time as that file can be converted directly. + +============================ +Linux kernel memory barriers +============================ + +.. raw:: latex + + \footnotesize + +.. include:: ../../memory-barriers.txt + :literal: + +.. raw:: latex + + \normalsize diff --git a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml index a41588763786..bf396e9466aa 100644 --- a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml @@ -57,6 +57,11 @@ properties: - description: interrupt ID for I2C event - description: interrupt ID for I2C error + interrupt-names: + items: + - const: event + - const: error + resets: maxItems: 1 @@ -92,6 +97,8 @@ properties: - description: register offset within syscfg - description: register bitmask for FMP bit + wakeup-source: true + required: - compatible - reg diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst index 9c779bd7a751..1dcbd7332476 100644 --- a/Documentation/doc-guide/kernel-doc.rst +++ b/Documentation/doc-guide/kernel-doc.rst @@ -14,6 +14,9 @@ when it is embedded in source files. reasons. The kernel source contains tens of thousands of kernel-doc comments. Please stick to the style described here. +.. note:: kernel-doc does not cover Rust code: please see + Documentation/rust/general-information.rst instead. + The kernel-doc structure is extracted from the comments, and proper `Sphinx C Domain`_ function and type descriptions with anchors are generated from them. The descriptions are filtered for special kernel-doc diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 1228b85f6f77..c708cec889af 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -48,10 +48,6 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. on the Sphinx version, it should be installed separately, with ``pip install sphinx_rtd_theme``. - #) Some ReST pages contain math expressions. Due to the way Sphinx works, - those expressions are written using LaTeX notation. It needs texlive - installed with amsfonts and amsmath in order to evaluate them. - In summary, if you want to install Sphinx version 2.4.4, you should do:: $ virtualenv sphinx_2.4.4 @@ -86,6 +82,27 @@ Depending on the distribution, you may also need to install a series of ``texlive`` packages that provide the minimal set of functionalities required for ``XeLaTeX`` to work. +Math Expressions in HTML +------------------------ + +Some ReST pages contain math expressions. Due to the way Sphinx works, +those expressions are written using LaTeX notation. +There are two options for Sphinx to render math expressions in html output. +One is an extension called `imgmath`_ which converts math expressions into +images and embeds them in html pages. +The other is an extension called `mathjax`_ which delegates math rendering +to JavaScript capable web browsers. +The former was the only option for pre-6.1 kernel documentation and it +requires quite a few texlive packages including amsfonts and amsmath among +others. + +Since kernel release 6.1, html pages with math expressions can be built +without installing any texlive packages. See `Choice of Math Renderer`_ for +further info. + +.. _imgmath: https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.imgmath +.. _mathjax: https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax + .. _sphinx-pre-install: Checking for Sphinx dependencies @@ -164,6 +181,38 @@ To remove the generated documentation, run ``make cleandocs``. as well would improve the quality of images embedded in PDF documents, especially for kernel releases 5.18 and later. +Choice of Math Renderer +----------------------- + +Since kernel release 6.1, mathjax works as a fallback math renderer for +html output.\ [#sph1_8]_ + +Math renderer is chosen depending on available commands as shown below: + +.. table:: Math Renderer Choices for HTML + + ============= ================= ============ + Math renderer Required commands Image format + ============= ================= ============ + imgmath latex, dvipng PNG (raster) + mathjax + ============= ================= ============ + +The choice can be overridden by setting an environment variable +``SPHINX_IMGMATH`` as shown below: + +.. table:: Effect of Setting ``SPHINX_IMGMATH`` + + ====================== ======== + Setting Renderer + ====================== ======== + ``SPHINX_IMGMATH=yes`` imgmath + ``SPHINX_IMGMATH=no`` mathjax + ====================== ======== + +.. [#sph1_8] Fallback of math renderer requires Sphinx >=1.8. + + Writing Documentation ===================== diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 55272942e721..dc1b2353cea3 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -301,6 +301,7 @@ IO region devm_release_region() devm_release_resource() devm_request_mem_region() + devm_request_free_mem_region() devm_request_region() devm_request_resource() @@ -334,7 +335,7 @@ IRQ devm_irq_alloc_descs_from() devm_irq_alloc_generic_chip() devm_irq_setup_generic_chip() - devm_irq_sim_init() + devm_irq_domain_create_sim() LED devm_led_classdev_register() @@ -392,7 +393,9 @@ PHY PINCTRL devm_pinctrl_get() devm_pinctrl_put() + devm_pinctrl_get_select() devm_pinctrl_register() + devm_pinctrl_register_and_init() devm_pinctrl_unregister() POWER @@ -427,6 +430,8 @@ SLAVE DMA ENGINE devm_acpi_dma_controller_register() SPI + devm_spi_alloc_master() + devm_spi_alloc_slave() devm_spi_register_master() WATCHDOG diff --git a/Documentation/driver-api/isa.rst b/Documentation/driver-api/isa.rst index def4a7b690b5..3df1b1696524 100644 --- a/Documentation/driver-api/isa.rst +++ b/Documentation/driver-api/isa.rst @@ -100,7 +100,7 @@ I believe platform_data is available for this, but if rather not, moving the isa_driver pointer to the private struct isa_dev is ofcourse fine as well. -Then, if the the driver did not provide a .match, it matches. If it did, +Then, if the driver did not provide a .match, it matches. If it did, the driver match() method is called to determine a match. If it did **not** match, dev->platform_data is reset to indicate this to diff --git a/Documentation/fb/udlfb.rst b/Documentation/fb/udlfb.rst index 732b37db3504..99cfbb7a1922 100644 --- a/Documentation/fb/udlfb.rst +++ b/Documentation/fb/udlfb.rst @@ -86,17 +86,24 @@ Module Options Special configuration for udlfb is usually unnecessary. There are a few options, however. -From the command line, pass options to modprobe -modprobe udlfb fb_defio=0 console=1 shadow=1 +From the command line, pass options to modprobe:: -Or modify options on the fly at /sys/module/udlfb/parameters directory via -sudo nano fb_defio -change the parameter in place, and save the file. + modprobe udlfb fb_defio=0 console=1 shadow=1 -Unplug/replug USB device to apply with new settings +Or change options on the fly by editing +/sys/module/udlfb/parameters/PARAMETER_NAME :: -Or for permanent option, create file like /etc/modprobe.d/udlfb.conf with text -options udlfb fb_defio=0 console=1 shadow=1 + cd /sys/module/udlfb/parameters + ls # to see a list of parameter names + sudo nano PARAMETER_NAME + # change the parameter in place, and save the file. + +Unplug/replug USB device to apply with new settings. + +Or to apply options permanently, create a modprobe configuration file +like /etc/modprobe.d/udlfb.conf with text:: + + options udlfb fb_defio=0 console=1 shadow=1 Accepted boolean options: diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index d7507becf674..3a199fc50828 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -122,7 +122,7 @@ volumes, calling:: to tell fscache that a volume has been withdrawn. This waits for all outstanding accesses on the volume to complete before returning. -When the the cache is completely withdrawn, fscache should be notified by +When the cache is completely withdrawn, fscache should be notified by calling:: void fscache_relinquish_cache(struct fscache_cache *cache); diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index 268888522e35..0152888cac29 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -456,15 +456,15 @@ The ext4 superblock is laid out as follows in * - 0x277 - __u8 - s_lastcheck_hi - - Upper 8 bits of the s_lastcheck_hi field. + - Upper 8 bits of the s_lastcheck field. * - 0x278 - __u8 - s_first_error_time_hi - - Upper 8 bits of the s_first_error_time_hi field. + - Upper 8 bits of the s_first_error_time field. * - 0x279 - __u8 - s_last_error_time_hi - - Upper 8 bits of the s_last_error_time_hi field. + - Upper 8 bits of the s_last_error_time field. * - 0x27A - __u8 - s_pad[2] diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index d0c09663dae8..17df9a02ccff 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -286,9 +286,8 @@ compress_algorithm=%s:%d Control compress algorithm and its compress level, now, algorithm level range lz4 3 - 16 zstd 1 - 22 -compress_log_size=%u Support configuring compress cluster size, the size will - be 4KB * (1 << %u), 16KB is minimum size, also it's - default size. +compress_log_size=%u Support configuring compress cluster size. The size will + be 4KB * (1 << %u). The default and minimum sizes are 16KB. compress_extension=%s Support adding specified extension, so that f2fs can enable compression on those corresponding files, e.g. if all files with '.ext' has high compression rate, we can set the '.ext' diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst index c1db8748389c..b9b31066aef2 100644 --- a/Documentation/filesystems/idmappings.rst +++ b/Documentation/filesystems/idmappings.rst @@ -661,7 +661,7 @@ idmappings:: mount idmapping: u0:k10000:r10000 Assume a file owned by ``u1000`` is read from disk. The filesystem maps this id -to ``k21000`` according to it's idmapping. This is what is stored in the +to ``k21000`` according to its idmapping. This is what is stored in the inode's ``i_uid`` and ``i_gid`` fields. When the caller queries the ownership of this file via ``stat()`` the kernel diff --git a/Documentation/filesystems/qnx6.rst b/Documentation/filesystems/qnx6.rst index fd13433d362c..523b798f04e7 100644 --- a/Documentation/filesystems/qnx6.rst +++ b/Documentation/filesystems/qnx6.rst @@ -176,7 +176,7 @@ Then userspace. The requirement for a static, fixed preallocated system area comes from how qnx6fs deals with writes. -Each superblock got it's own half of the system area. So superblock #1 +Each superblock got its own half of the system area. So superblock #1 always uses blocks from the lower half while superblock #2 just writes to blocks represented by the upper half bitmap system area bits. diff --git a/Documentation/filesystems/spufs/spufs.rst b/Documentation/filesystems/spufs/spufs.rst index 8a42859bb100..ca0441cbe37e 100644 --- a/Documentation/filesystems/spufs/spufs.rst +++ b/Documentation/filesystems/spufs/spufs.rst @@ -227,7 +227,7 @@ Files from the data buffer, updating the value of the specified signal notification register. The signal notification register will either be replaced with the input data or will be updated to the - bitwise OR or the old value and the input data, depending on the + bitwise OR of the old value and the input data, depending on the contents of the signal1_type, or signal2_type respectively, file. diff --git a/Documentation/filesystems/xfs-delayed-logging-design.rst b/Documentation/filesystems/xfs-delayed-logging-design.rst index 4ef419f54663..6402ab8e370c 100644 --- a/Documentation/filesystems/xfs-delayed-logging-design.rst +++ b/Documentation/filesystems/xfs-delayed-logging-design.rst @@ -100,7 +100,7 @@ transactions together:: ntp = xfs_trans_dup(tp); xfs_trans_commit(tp); - xfs_log_reserve(ntp); + xfs_trans_reserve(ntp); This results in a series of "rolling transactions" where the inode is locked across the entire chain of transactions. Hence while this series of rolling @@ -191,7 +191,7 @@ transaction rolling mechanism to re-reserve space on every transaction roll. We know from the implementation of the permanent transactions how many transaction rolls are likely for the common modifications that need to be made. -For example, and inode allocation is typically two transactions - one to +For example, an inode allocation is typically two transactions - one to physically allocate a free inode chunk on disk, and another to allocate an inode from an inode chunk that has free inodes in it. Hence for an inode allocation transaction, we might set the reservation log count to a value of 2 to indicate @@ -200,7 +200,7 @@ chain. Each time a permanent transaction rolls, it consumes an entire unit reservation. Hence when the permanent transaction is first allocated, the log space -reservation is increases from a single unit reservation to multiple unit +reservation is increased from a single unit reservation to multiple unit reservations. That multiple is defined by the reservation log count, and this means we can roll the transaction multiple times before we have to re-reserve log space when we roll the transaction. This ensures that the common @@ -259,7 +259,7 @@ the next transaction in the sequeunce, but we have none remaining. We cannot sleep during the transaction commit process waiting for new log space to become available, as we may end up on the end of the FIFO queue and the items we have locked while we sleep could end up pinning the tail of the log before there is -enough free space in the log to fulfil all of the pending reservations and +enough free space in the log to fulfill all of the pending reservations and then wake up transaction commit in progress. To take a new reservation without sleeping requires us to be able to take a @@ -551,14 +551,14 @@ Essentially, this shows that an item that is in the AIL can still be modified and relogged, so any tracking must be separate to the AIL infrastructure. As such, we cannot reuse the AIL list pointers for tracking committed items, nor can we store state in any field that is protected by the AIL lock. Hence the -committed item tracking needs it's own locks, lists and state fields in the log +committed item tracking needs its own locks, lists and state fields in the log item. Similar to the AIL, tracking of committed items is done through a new list called the Committed Item List (CIL). The list tracks log items that have been committed and have formatted memory buffers attached to them. It tracks objects in transaction commit order, so when an object is relogged it is removed from -it's place in the list and re-inserted at the tail. This is entirely arbitrary +its place in the list and re-inserted at the tail. This is entirely arbitrary and done to make it easy for debugging - the last items in the list are the ones that are most recently modified. Ordering of the CIL is not necessary for transactional integrity (as discussed in the next section) so the ordering is @@ -615,7 +615,7 @@ those changes into the current checkpoint context. We then initialise a new context and attach that to the CIL for aggregation of new transactions. This allows us to unlock the CIL immediately after transfer of all the -committed items and effectively allow new transactions to be issued while we +committed items and effectively allows new transactions to be issued while we are formatting the checkpoint into the log. It also allows concurrent checkpoints to be written into the log buffers in the case of log force heavy workloads, just like the existing transaction commit code does. This, however, @@ -884,9 +884,9 @@ pin the object the first time it is inserted into the CIL - if it is already in the CIL during a transaction commit, then we do not pin it again. Because there can be multiple outstanding checkpoint contexts, we can still see elevated pin counts, but as each checkpoint completes the pin count will retain the correct -value according to it's context. +value according to its context. -Just to make matters more slightly more complex, this checkpoint level context +Just to make matters slightly more complex, this checkpoint level context for the pin count means that the pinning of an item must take place under the CIL commit/flush lock. If we pin the object outside this lock, we cannot guarantee which context the pin count is associated with. This is because of diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst index dbb03022b127..b9dc0c603f36 100644 --- a/Documentation/firmware-guide/acpi/enumeration.rst +++ b/Documentation/firmware-guide/acpi/enumeration.rst @@ -21,7 +21,7 @@ possible we decided to do following: - Devices behind real busses where there is a connector resource are represented as struct spi_device or struct i2c_device. Note that standard UARTs are not busses so there is no struct uart_device, - although some of them may be represented by sturct serdev_device. + although some of them may be represented by struct serdev_device. As both ACPI and Device Tree represent a tree of devices (and their resources) this implementation follows the Device Tree way as much as @@ -205,7 +205,7 @@ Here is what the ACPI namespace for a SPI slave might look like:: } ... -The SPI device drivers only need to add ACPI IDs in a similar way than with +The SPI device drivers only need to add ACPI IDs in a similar way to the platform device drivers. Below is an example where we add ACPI support to at25 SPI eeprom driver (this is meant for the above ACPI snippet):: @@ -362,7 +362,7 @@ These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0" specifies the path to the controller. In order to use these GPIOs in Linux we need to translate them to the corresponding Linux GPIO descriptors. -There is a standard GPIO API for that and is documented in +There is a standard GPIO API for that and it is documented in Documentation/admin-guide/gpio/. In the above example we can get the corresponding two GPIO descriptors with @@ -538,8 +538,8 @@ information. PCI hierarchy representation ============================ -Sometimes could be useful to enumerate a PCI device, knowing its position on the -PCI bus. +Sometimes it could be useful to enumerate a PCI device, knowing its position on +the PCI bus. For example, some systems use PCI devices soldered directly on the mother board, in a fixed position (ethernet, Wi-Fi, serial ports, etc.). In this conditions it @@ -550,7 +550,7 @@ To identify a PCI device, a complete hierarchical description is required, from the chipset root port to the final device, through all the intermediate bridges/switches of the board. -For example, let us assume to have a system with a PCIe serial port, an +For example, let's assume we have a system with a PCIe serial port, an Exar XR17V3521, soldered on the main board. This UART chip also includes 16 GPIOs and we want to add the property ``gpio-line-names`` [1] to these pins. In this case, the ``lspci`` output for this component is:: @@ -593,8 +593,8 @@ of the chipset bridge (also called "root port") with address:: Bus: 0 - Device: 14 - Function: 1 -To find this information is necessary disassemble the BIOS ACPI tables, in -particular the DSDT (see also [2]):: +To find this information, it is necessary to disassemble the BIOS ACPI tables, +in particular the DSDT (see also [2]):: mkdir ~/tables/ cd ~/tables/ diff --git a/Documentation/firmware-guide/acpi/osi.rst b/Documentation/firmware-guide/acpi/osi.rst index 05869c0045d7..784850adfcb6 100644 --- a/Documentation/firmware-guide/acpi/osi.rst +++ b/Documentation/firmware-guide/acpi/osi.rst @@ -41,26 +41,23 @@ But it is likely that they will all eventually be added. What should an OEM do if they want to support Linux and Windows using the same BIOS image? Often they need to do something different for Linux to deal with how Linux is different from Windows. -Here the BIOS should ask exactly what it wants to know: +In this case, the OEM should create custom ASL to be executed by the +Linux kernel and changes to Linux kernel drivers to execute this custom +ASL. The easiest way to accomplish this is to introduce a device specific +method (_DSM) that is called from the Linux kernel. + +In the past the kernel used to support something like: _OSI("Linux-OEM-my_interface_name") where 'OEM' is needed if this is an OEM-specific hook, and 'my_interface_name' describes the hook, which could be a quirk, a bug, or a bug-fix. -In addition, the OEM should send a patch to upstream Linux -via the linux-acpi@vger.kernel.org mailing list. When that patch -is checked into Linux, the OS will answer "YES" when the BIOS -on the OEM's system uses _OSI to ask if the interface is supported -by the OS. Linux distributors can back-port that patch for Linux -pre-installs, and it will be included by all distributions that -re-base to upstream. If the distribution can not update the kernel binary, -they can also add an acpi_osi=Linux-OEM-my_interface_name -cmdline parameter to the boot loader, as needed. - -If the string refers to a feature where the upstream kernel -eventually grows support, a patch should be sent to remove -the string when that support is added to the kernel. +However this was discovered to be abused by other BIOS vendors to change +completely unrelated code on completely unrelated systems. This prompted +an evaluation of all of it's uses. This uncovered that they aren't needed +for any of the original reasons. As such, the kernel will not respond to +any custom Linux-* strings by default. That was easy. Read on, to find out how to do it wrong. diff --git a/Documentation/index.rst b/Documentation/index.rst index 4737c18c97ff..bf6aa681c960 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -1,11 +1,5 @@ .. SPDX-License-Identifier: GPL-2.0 - -.. The Linux Kernel documentation master file, created by - sphinx-quickstart on Fri Feb 12 13:51:46 2016. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - .. _linux_doc: The Linux Kernel documentation @@ -18,133 +12,85 @@ documents into a coherent whole. Please note that improvements to the documentation are welcome; join the linux-doc list at vger.kernel.org if you want to help out. -Licensing documentation ------------------------ +Working with the development community +-------------------------------------- -The following describes the license of the Linux kernel source code -(GPLv2), how to properly mark the license of individual files in the source -tree, as well as links to the full license text. - -* :ref:`kernel_licensing` - -User-oriented documentation ---------------------------- - -The following manuals are written for *users* of the kernel — those who are -trying to get it to work optimally on a given system. +The essential guides for interacting with the kernel's development +community and getting your work upstream. .. toctree:: - :maxdepth: 2 - - admin-guide/index - kbuild/index - -Firmware-related documentation ------------------------------- -The following holds information on the kernel's expectations regarding the -platform firmwares. + :maxdepth: 1 -.. toctree:: - :maxdepth: 2 + process/development-process + process/submitting-patches + Code of conduct <process/code-of-conduct> + maintainer/index + All development-process docs <process/index> - firmware-guide/index - devicetree/index -Application-developer documentation ------------------------------------ +Internal API manuals +-------------------- -The user-space API manual gathers together documents describing aspects of -the kernel interface as seen by application developers. +Manuals for use by developers working to interface with the rest of the +kernel. .. toctree:: - :maxdepth: 2 - - userspace-api/index + :maxdepth: 1 + core-api/index + driver-api/index + subsystem-apis + Locking in the kernel <locking/index> -Introduction to kernel development ----------------------------------- +Development tools and processes +------------------------------- -These manuals contain overall information about how to develop the kernel. -The kernel community is quite large, with thousands of developers -contributing over the course of a year. As with any large community, -knowing how things are done will make the process of getting your changes -merged much easier. +Various other manuals with useful information for all kernel developers. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 - process/index - dev-tools/index + process/license-rules doc-guide/index + dev-tools/index + dev-tools/testing-overview kernel-hacking/index trace/index - maintainer/index fault-injection/index livepatch/index + rust/index -Kernel API documentation ------------------------- +User-oriented documentation +--------------------------- -These books get into the details of how specific kernel subsystems work -from the point of view of a kernel developer. Much of the information here -is taken directly from the kernel source, with supplemental material added -as needed (or at least as we managed to add it — probably *not* all that is -needed). +The following manuals are written for *users* of the kernel — those who are +trying to get it to work optimally on a given system and application +developers seeking information on the kernel's user-space APIs. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 - driver-api/index - core-api/index - locking/index - accounting/index - block/index - cdrom/index - cpu-freq/index - fb/index - fpga/index - hid/index - i2c/index - iio/index - isdn/index - infiniband/index - leds/index - netlabel/index - networking/index - pcmcia/index - power/index - target/index - timers/index - spi/index - w1/index - watchdog/index - virt/index - input/index - hwmon/index - gpu/index - security/index - sound/index - crypto/index - filesystems/index - mm/index - bpf/index - usb/index - PCI/index - scsi/index - misc-devices/index - scheduler/index - mhi/index - peci/index - -Architecture-agnostic documentation ------------------------------------ + admin-guide/index + The kernel build system <kbuild/index> + admin-guide/reporting-issues.rst + User-space tools <tools/index> + userspace-api/index + +See also: the `Linux man pages <https://www.kernel.org/doc/man-pages/>`_, +which are kept separately from the kernel's own documentation. + +Firmware-related documentation +------------------------------ +The following holds information on the kernel's expectations regarding the +platform firmwares. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 + + firmware-guide/index + devicetree/index - asm-annotations Architecture-specific documentation ----------------------------------- @@ -163,9 +109,8 @@ of the documentation body, or may require some adjustments and/or conversion to ReStructured Text format, or are simply too old. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 - tools/index staging/index diff --git a/Documentation/kbuild/gcc-plugins.rst b/Documentation/kbuild/gcc-plugins.rst index 0ba76719f1b9..c578c6ba3eb6 100644 --- a/Documentation/kbuild/gcc-plugins.rst +++ b/Documentation/kbuild/gcc-plugins.rst @@ -90,7 +90,11 @@ e.g., on Ubuntu for gcc-10:: Or on Fedora:: - dnf install gcc-plugin-devel + dnf install gcc-plugin-devel libmpc-devel + +Or on Fedora when using cross-compilers that include plugins:: + + dnf install libmpc-devel Enable the GCC plugin infrastructure and some plugin(s) you want to use in the kernel config:: @@ -99,6 +103,19 @@ in the kernel config:: CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y ... +Run gcc (native or cross-compiler) to ensure plugin headers are detected:: + + gcc -print-file-name=plugin + CROSS_COMPILE=arm-linux-gnu- ${CROSS_COMPILE}gcc -print-file-name=plugin + +The word "plugin" means they are not detected:: + + plugin + +A full path means they are detected:: + + /usr/lib/gcc/x86_64-redhat-linux/12/plugin + To compile the minimum tool set including the plugin(s):: make scripts diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index ef19b9c13523..08f575e6236c 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -48,6 +48,10 @@ KCFLAGS ------- Additional options to the C compiler (for built-in and modules). +KRUSTFLAGS +---------- +Additional options to the Rust compiler (for built-in and modules). + CFLAGS_KERNEL ------------- Additional options for $(CC) when used to compile @@ -57,6 +61,15 @@ CFLAGS_MODULE ------------- Additional module specific options to use for $(CC). +RUSTFLAGS_KERNEL +---------------- +Additional options for $(RUSTC) when used to compile +code that is compiled as built-in. + +RUSTFLAGS_MODULE +---------------- +Additional module specific options to use for $(RUSTC). + LDFLAGS_MODULE -------------- Additional options used for $(LD) when linking modules. @@ -69,6 +82,10 @@ HOSTCXXFLAGS ------------ Additional flags to be passed to $(HOSTCXX) when building host programs. +HOSTRUSTFLAGS +------------- +Additional flags to be passed to $(HOSTRUSTC) when building host programs. + HOSTLDFLAGS ----------- Additional flags to be passed when linking host programs. diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 11a296e52d68..5ea1e72d89c8 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -29,8 +29,9 @@ This document describes the Linux kernel Makefiles. --- 4.1 Simple Host Program --- 4.2 Composite Host Programs --- 4.3 Using C++ for host programs - --- 4.4 Controlling compiler options for host programs - --- 4.5 When host programs are actually built + --- 4.4 Using Rust for host programs + --- 4.5 Controlling compiler options for host programs + --- 4.6 When host programs are actually built === 5 Userspace Program support --- 5.1 Simple Userspace Program @@ -835,7 +836,24 @@ Both possibilities are described in the following. qconf-cxxobjs := qconf.o qconf-objs := check.o -4.4 Controlling compiler options for host programs +4.4 Using Rust for host programs +-------------------------------- + + Kbuild offers support for host programs written in Rust. However, + since a Rust toolchain is not mandatory for kernel compilation, + it may only be used in scenarios where Rust is required to be + available (e.g. when ``CONFIG_RUST`` is enabled). + + Example:: + + hostprogs := target + target-rust := y + + Kbuild will compile ``target`` using ``target.rs`` as the crate root, + located in the same directory as the ``Makefile``. The crate may + consist of several source files (see ``samples/rust/hostprogs``). + +4.5 Controlling compiler options for host programs -------------------------------------------------- When compiling host programs, it is possible to set specific flags. @@ -867,7 +885,7 @@ Both possibilities are described in the following. When linking qconf, it will be passed the extra option "-L$(QTDIR)/lib". -4.5 When host programs are actually built +4.6 When host programs are actually built ----------------------------------------- Kbuild will only build host-programs when they are referenced @@ -1181,6 +1199,17 @@ When kbuild executes, the following steps are followed (roughly): The first example utilises the trick that a config option expands to 'y' when selected. + KBUILD_RUSTFLAGS + $(RUSTC) compiler flags + + Default value - see top level Makefile + Append or modify as required per architecture. + + Often, the KBUILD_RUSTFLAGS variable depends on the configuration. + + Note that target specification file generation (for ``--target``) + is handled in ``scripts/generate_rust_target.rs``. + KBUILD_AFLAGS_KERNEL Assembler options specific for built-in @@ -1208,6 +1237,19 @@ When kbuild executes, the following steps are followed (roughly): are used for $(CC). From commandline CFLAGS_MODULE shall be used (see kbuild.rst). + KBUILD_RUSTFLAGS_KERNEL + $(RUSTC) options specific for built-in + + $(KBUILD_RUSTFLAGS_KERNEL) contains extra Rust compiler flags used to + compile resident kernel code. + + KBUILD_RUSTFLAGS_MODULE + Options for $(RUSTC) when building modules + + $(KBUILD_RUSTFLAGS_MODULE) is used to add arch-specific options that + are used for $(RUSTC). + From commandline RUSTFLAGS_MODULE shall be used (see kbuild.rst). + KBUILD_LDFLAGS_MODULE Options for $(LD) when linking modules diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index 64405e5da63e..bfda1a5fecad 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -39,7 +39,7 @@ as the writer can invalidate a pointer that the reader is following. Sequence counters (``seqcount_t``) ================================== -This is the the raw counting mechanism, which does not protect against +This is the raw counting mechanism, which does not protect against multiple writers. Write side critical sections must thus be serialized by an external lock. diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 832b5d36e279..06f80e3785c5 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -52,7 +52,7 @@ CONTENTS - Varieties of memory barrier. - What may not be assumed about memory barriers? - - Data dependency barriers (historical). + - Address-dependency barriers (historical). - Control dependencies. - SMP barrier pairing. - Examples of memory barrier sequences. @@ -187,9 +187,9 @@ As a further example, consider this sequence of events: B = 4; Q = P; P = &B; D = *Q; -There is an obvious data dependency here, as the value loaded into D depends on -the address retrieved from P by CPU 2. At the end of the sequence, any of the -following results are possible: +There is an obvious address dependency here, as the value loaded into D depends +on the address retrieved from P by CPU 2. At the end of the sequence, any of +the following results are possible: (Q == &A) and (D == 1) (Q == &B) and (D == 2) @@ -391,58 +391,62 @@ Memory barriers come in four basic varieties: memory system as time progresses. All stores _before_ a write barrier will occur _before_ all the stores after the write barrier. - [!] Note that write barriers should normally be paired with read or data - dependency barriers; see the "SMP barrier pairing" subsection. + [!] Note that write barriers should normally be paired with read or + address-dependency barriers; see the "SMP barrier pairing" subsection. - (2) Data dependency barriers. + (2) Address-dependency barriers (historical). - A data dependency barrier is a weaker form of read barrier. In the case - where two loads are performed such that the second depends on the result - of the first (eg: the first load retrieves the address to which the second - load will be directed), a data dependency barrier would be required to - make sure that the target of the second load is updated after the address - obtained by the first load is accessed. + An address-dependency barrier is a weaker form of read barrier. In the + case where two loads are performed such that the second depends on the + result of the first (eg: the first load retrieves the address to which + the second load will be directed), an address-dependency barrier would + be required to make sure that the target of the second load is updated + after the address obtained by the first load is accessed. - A data dependency barrier is a partial ordering on interdependent loads - only; it is not required to have any effect on stores, independent loads - or overlapping loads. + An address-dependency barrier is a partial ordering on interdependent + loads only; it is not required to have any effect on stores, independent + loads or overlapping loads. As mentioned in (1), the other CPUs in the system can be viewed as committing sequences of stores to the memory system that the CPU being - considered can then perceive. A data dependency barrier issued by the CPU - under consideration guarantees that for any load preceding it, if that - load touches one of a sequence of stores from another CPU, then by the - time the barrier completes, the effects of all the stores prior to that - touched by the load will be perceptible to any loads issued after the data - dependency barrier. + considered can then perceive. An address-dependency barrier issued by + the CPU under consideration guarantees that for any load preceding it, + if that load touches one of a sequence of stores from another CPU, then + by the time the barrier completes, the effects of all the stores prior to + that touched by the load will be perceptible to any loads issued after + the address-dependency barrier. See the "Examples of memory barrier sequences" subsection for diagrams showing the ordering constraints. - [!] Note that the first load really has to have a _data_ dependency and + [!] Note that the first load really has to have an _address_ dependency and not a control dependency. If the address for the second load is dependent on the first load, but the dependency is through a conditional rather than actually loading the address itself, then it's a _control_ dependency and a full read barrier or better is required. See the "Control dependencies" subsection for more information. - [!] Note that data dependency barriers should normally be paired with + [!] Note that address-dependency barriers should normally be paired with write barriers; see the "SMP barrier pairing" subsection. + [!] Kernel release v5.9 removed kernel APIs for explicit address- + dependency barriers. Nowadays, APIs for marking loads from shared + variables such as READ_ONCE() and rcu_dereference() provide implicit + address-dependency barriers. (3) Read (or load) memory barriers. - A read barrier is a data dependency barrier plus a guarantee that all the - LOAD operations specified before the barrier will appear to happen before - all the LOAD operations specified after the barrier with respect to the - other components of the system. + A read barrier is an address-dependency barrier plus a guarantee that all + the LOAD operations specified before the barrier will appear to happen + before all the LOAD operations specified after the barrier with respect to + the other components of the system. A read barrier is a partial ordering on loads only; it is not required to have any effect on stores. - Read memory barriers imply data dependency barriers, and so can substitute - for them. + Read memory barriers imply address-dependency barriers, and so can + substitute for them. [!] Note that read barriers should normally be paired with write barriers; see the "SMP barrier pairing" subsection. @@ -550,17 +554,21 @@ There are certain things that the Linux kernel memory barriers do not guarantee: Documentation/core-api/dma-api.rst -DATA DEPENDENCY BARRIERS (HISTORICAL) -------------------------------------- +ADDRESS-DEPENDENCY BARRIERS (HISTORICAL) +---------------------------------------- As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for DEC Alpha, which means that about the only people who need to pay attention to this section are those working on DEC Alpha architecture-specific code and those working on READ_ONCE() itself. For those who need it, and for those who are interested in the history, here is the story of -data-dependency barriers. +address-dependency barriers. + +[!] While address dependencies are observed in both load-to-load and +load-to-store relations, address-dependency barriers are not necessary +for load-to-store situations. -The usage requirements of data dependency barriers are a little subtle, and +The requirement of address-dependency barriers is a little subtle, and it's not always obvious that they're needed. To illustrate, consider the following sequence of events: @@ -570,11 +578,14 @@ following sequence of events: B = 4; <write barrier> WRITE_ONCE(P, &B); - Q = READ_ONCE(P); + Q = READ_ONCE_OLD(P); D = *Q; -There's a clear data dependency here, and it would seem that by the end of the -sequence, Q must be either &A or &B, and that: +[!] READ_ONCE_OLD() corresponds to READ_ONCE() of pre-4.15 kernel, which +doesn't imply an address-dependency barrier. + +There's a clear address dependency here, and it would seem that by the end of +the sequence, Q must be either &A or &B, and that: (Q == &A) implies (D == 1) (Q == &B) implies (D == 4) @@ -588,8 +599,8 @@ While this may seem like a failure of coherency or causality maintenance, it isn't, and this behaviour can be observed on certain real CPUs (such as the DEC Alpha). -To deal with this, a data dependency barrier or better must be inserted -between the address load and the data load: +To deal with this, READ_ONCE() provides an implicit address-dependency barrier +since kernel release v4.15: CPU 1 CPU 2 =============== =============== @@ -598,7 +609,7 @@ between the address load and the data load: <write barrier> WRITE_ONCE(P, &B); Q = READ_ONCE(P); - <data dependency barrier> + <implicit address-dependency barrier> D = *Q; This enforces the occurrence of one of the two implications, and prevents the @@ -615,13 +626,13 @@ odd-numbered bank is idle, one can see the new value of the pointer P (&B), but the old value of the variable B (2). -A data-dependency barrier is not required to order dependent writes -because the CPUs that the Linux kernel supports don't do writes -until they are certain (1) that the write will actually happen, (2) -of the location of the write, and (3) of the value to be written. +An address-dependency barrier is not required to order dependent writes +because the CPUs that the Linux kernel supports don't do writes until they +are certain (1) that the write will actually happen, (2) of the location of +the write, and (3) of the value to be written. But please carefully read the "CONTROL DEPENDENCIES" section and the -Documentation/RCU/rcu_dereference.rst file: The compiler can and does -break dependencies in a great many highly creative ways. +Documentation/RCU/rcu_dereference.rst file: The compiler can and does break +dependencies in a great many highly creative ways. CPU 1 CPU 2 =============== =============== @@ -629,12 +640,12 @@ break dependencies in a great many highly creative ways. B = 4; <write barrier> WRITE_ONCE(P, &B); - Q = READ_ONCE(P); + Q = READ_ONCE_OLD(P); WRITE_ONCE(*Q, 5); -Therefore, no data-dependency barrier is required to order the read into +Therefore, no address-dependency barrier is required to order the read into Q with the store into *Q. In other words, this outcome is prohibited, -even without a data-dependency barrier: +even without an implicit address-dependency barrier of modern READ_ONCE(): (Q == &B) && (B == 4) @@ -645,12 +656,12 @@ can be used to record rare error conditions and the like, and the CPUs' naturally occurring ordering prevents such records from being lost. -Note well that the ordering provided by a data dependency is local to +Note well that the ordering provided by an address dependency is local to the CPU containing it. See the section on "Multicopy atomicity" for more information. -The data dependency barrier is very important to the RCU system, +The address-dependency barrier is very important to the RCU system, for example. See rcu_assign_pointer() and rcu_dereference() in include/linux/rcupdate.h. This permits the current target of an RCU'd pointer to be replaced with a new modified target, without the replacement @@ -667,20 +678,21 @@ not understand them. The purpose of this section is to help you prevent the compiler's ignorance from breaking your code. A load-load control dependency requires a full read memory barrier, not -simply a data dependency barrier to make it work correctly. Consider the -following bit of code: +simply an (implicit) address-dependency barrier to make it work correctly. +Consider the following bit of code: q = READ_ONCE(a); + <implicit address-dependency barrier> if (q) { - <data dependency barrier> /* BUG: No data dependency!!! */ + /* BUG: No address dependency!!! */ p = READ_ONCE(b); } -This will not have the desired effect because there is no actual data +This will not have the desired effect because there is no actual address dependency, but rather a control dependency that the CPU may short-circuit by attempting to predict the outcome in advance, so that other CPUs see -the load from b as having happened before the load from a. In such a -case what's actually required is: +the load from b as having happened before the load from a. In such a case +what's actually required is: q = READ_ONCE(a); if (q) { @@ -927,9 +939,9 @@ General barriers pair with each other, though they also pair with most other types of barriers, albeit without multicopy atomicity. An acquire barrier pairs with a release barrier, but both may also pair with other barriers, including of course general barriers. A write barrier pairs -with a data dependency barrier, a control dependency, an acquire barrier, +with an address-dependency barrier, a control dependency, an acquire barrier, a release barrier, a read barrier, or a general barrier. Similarly a -read barrier, control dependency, or a data dependency barrier pairs +read barrier, control dependency, or an address-dependency barrier pairs with a write barrier, an acquire barrier, a release barrier, or a general barrier: @@ -948,7 +960,7 @@ Or: a = 1; <write barrier> WRITE_ONCE(b, &a); x = READ_ONCE(b); - <data dependency barrier> + <implicit address-dependency barrier> y = *x; Or even: @@ -968,8 +980,8 @@ Basically, the read barrier always has to be there, even though it can be of the "weaker" type. [!] Note that the stores before the write barrier would normally be expected to -match the loads after the read barrier or the data dependency barrier, and vice -versa: +match the loads after the read barrier or the address-dependency barrier, and +vice versa: CPU 1 CPU 2 =================== =================== @@ -1021,8 +1033,8 @@ STORE B, STORE C } all occurring before the unordered set of { STORE D, STORE E V -Secondly, data dependency barriers act as partial orderings on data-dependent -loads. Consider the following sequence of events: +Secondly, address-dependency barriers act as partial orderings on address- +dependent loads. Consider the following sequence of events: CPU 1 CPU 2 ======================= ======================= @@ -1067,8 +1079,8 @@ effectively random order, despite the write barrier issued by CPU 1: In the above example, CPU 2 perceives that B is 7, despite the load of *C (which would be B) coming after the LOAD of C. -If, however, a data dependency barrier were to be placed between the load of C -and the load of *C (ie: B) on CPU 2: +If, however, an address-dependency barrier were to be placed between the load +of C and the load of *C (ie: B) on CPU 2: CPU 1 CPU 2 ======================= ======================= @@ -1078,7 +1090,7 @@ and the load of *C (ie: B) on CPU 2: <write barrier> STORE C = &B LOAD X STORE D = 4 LOAD C (gets &B) - <data dependency barrier> + <address-dependency barrier> LOAD *C (reads B) then the following will occur: @@ -1101,7 +1113,7 @@ then the following will occur: | +-------+ | | | | X->9 |------>| | | +-------+ | | - Makes sure all effects ---> \ ddddddddddddddddd | | + Makes sure all effects ---> \ aaaaaaaaaaaaaaaaa | | prior to the store of C \ +-------+ | | are perceptible to ----->| B->2 |------>| | subsequent loads +-------+ | | @@ -1292,7 +1304,7 @@ Which might appear as this: LOAD with immediate effect : : +-------+ -Placing a read barrier or a data dependency barrier just before the second +Placing a read barrier or an address-dependency barrier just before the second load: CPU 1 CPU 2 @@ -1816,20 +1828,20 @@ which may then reorder things however it wishes. CPU MEMORY BARRIERS ------------------- -The Linux kernel has eight basic CPU memory barriers: +The Linux kernel has seven basic CPU memory barriers: - TYPE MANDATORY SMP CONDITIONAL - =============== ======================= =========================== - GENERAL mb() smp_mb() - WRITE wmb() smp_wmb() - READ rmb() smp_rmb() - DATA DEPENDENCY READ_ONCE() + TYPE MANDATORY SMP CONDITIONAL + ======================= =============== =============== + GENERAL mb() smp_mb() + WRITE wmb() smp_wmb() + READ rmb() smp_rmb() + ADDRESS DEPENDENCY READ_ONCE() -All memory barriers except the data dependency barriers imply a compiler -barrier. Data dependencies do not impose any additional compiler ordering. +All memory barriers except the address-dependency barriers imply a compiler +barrier. Address dependencies do not impose any additional compiler ordering. -Aside: In the case of data dependencies, the compiler would be expected +Aside: In the case of address dependencies, the compiler would be expected to issue the loads in the correct order (eg. `a[b]` would have to load the value of b before loading a[b]), however there is no guarantee in the C specification that the compiler may not speculate the value of b @@ -2749,7 +2761,8 @@ is discarded from the CPU's cache and reloaded. To deal with this, the appropriate part of the kernel must invalidate the overlapping bits of the cache on each CPU. -See Documentation/core-api/cachetlb.rst for more information on cache management. +See Documentation/core-api/cachetlb.rst for more information on cache +management. CACHE COHERENCY VS MMIO @@ -2889,8 +2902,8 @@ AND THEN THERE'S THE ALPHA The DEC Alpha CPU is one of the most relaxed CPUs there is. Not only that, some versions of the Alpha CPU have a split data cache, permitting them to have two semantically-related cache lines updated at separate times. This is where -the data dependency barrier really becomes necessary as this synchronises both -caches with the memory coherence system, thus making it seem like pointer +the address-dependency barrier really becomes necessary as this synchronises +both caches with the memory coherence system, thus making it seem like pointer changes vs new data occur in the right order. The Alpha defines the Linux kernel's memory model, although as of v4.15 diff --git a/Documentation/mm/unevictable-lru.rst b/Documentation/mm/unevictable-lru.rst index b280367d6a44..4a0e158aa9ce 100644 --- a/Documentation/mm/unevictable-lru.rst +++ b/Documentation/mm/unevictable-lru.rst @@ -197,7 +197,7 @@ unevictable list for the memory cgroup and node being scanned. There may be situations where a page is mapped into a VM_LOCKED VMA, but the page is not marked as PG_mlocked. Such pages will make it all the way to shrink_active_list() or shrink_page_list() where they will be detected when -vmscan walks the reverse map in page_referenced() or try_to_unmap(). The page +vmscan walks the reverse map in folio_referenced() or try_to_unmap(). The page is culled to the unevictable list when it is released by the shrinker. To "cull" an unevictable page, vmscan simply puts the page back on the LRU list @@ -267,7 +267,7 @@ the LRU. Such pages can be "noticed" by memory management in several places: (4) in the fault path and when a VM_LOCKED stack segment is expanded; or (5) as mentioned above, in vmscan:shrink_page_list() when attempting to - reclaim a page in a VM_LOCKED VMA by page_referenced() or try_to_unmap(). + reclaim a page in a VM_LOCKED VMA by folio_referenced() or try_to_unmap(). mlocked pages become unlocked and rescued from the unevictable list when: @@ -547,7 +547,7 @@ vmscan's shrink_inactive_list() and shrink_page_list() also divert obviously unevictable pages found on the inactive lists to the appropriate memory cgroup and node unevictable list. -rmap's page_referenced_one(), called via vmscan's shrink_active_list() or +rmap's folio_referenced_one(), called via vmscan's shrink_active_list() or shrink_page_list(), and rmap's try_to_unmap_one() called via shrink_page_list(), check for (3) pages still mapped into VM_LOCKED VMAs, and call mlock_vma_page() to correct them. Such pages are culled to the unevictable list when released diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst index 906235c11c24..d87f1fee4cbc 100644 --- a/Documentation/process/5.Posting.rst +++ b/Documentation/process/5.Posting.rst @@ -256,8 +256,10 @@ The tags in common use are: - Cc: the named person received a copy of the patch and had the opportunity to comment on it. -Be careful in the addition of tags to your patches: only Cc: is appropriate -for addition without the explicit permission of the person named. +Be careful in the addition of tags to your patches, as only Cc: is appropriate +for addition without the explicit permission of the person named; using +Reported-by: is fine most of the time as well, but ask for permission if +the bug was reported in private. Sending the patch diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 19c286c23786..9a90197989dd 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -31,6 +31,8 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== GNU C 5.1 gcc --version Clang/LLVM (optional) 11.0.0 clang --version +Rust (optional) 1.62.0 rustc --version +bindgen (optional) 0.56.0 bindgen --version GNU make 3.81 make --version bash 4.2 bash --version binutils 2.23 ld -v @@ -80,6 +82,29 @@ kernels. Older releases aren't guaranteed to work, and we may drop workarounds from the kernel that were used to support older versions. Please see additional docs on :ref:`Building Linux with Clang/LLVM <kbuild_llvm>`. +Rust (optional) +--------------- + +A particular version of the Rust toolchain is required. Newer versions may or +may not work because the kernel depends on some unstable Rust features, for +the moment. + +Each Rust toolchain comes with several "components", some of which are required +(like ``rustc``) and some that are optional. The ``rust-src`` component (which +is optional) needs to be installed to build the kernel. Other components are +useful for developing. + +Please see Documentation/rust/quick-start.rst for instructions on how to +satisfy the build requirements of Rust support. In particular, the ``Makefile`` +target ``rustavailable`` is useful to check why the Rust toolchain may not +be detected. + +bindgen (optional) +------------------ + +``bindgen`` is used to generate the Rust bindings to the C side of the kernel. +It depends on ``libclang``. + Make ---- @@ -348,6 +373,12 @@ Sphinx Please see :ref:`sphinx_install` in :ref:`Documentation/doc-guide/sphinx.rst <sphinxdoc>` for details about Sphinx requirements. +rustdoc +------- + +``rustdoc`` is used to generate the documentation for Rust code. Please see +Documentation/rust/general-information.rst for more information. + Getting updated software ======================== @@ -364,6 +395,16 @@ Clang/LLVM - :ref:`Getting LLVM <getting_llvm>`. +Rust +---- + +- Documentation/rust/quick-start.rst. + +bindgen +------- + +- Documentation/rust/quick-start.rst. + Make ---- diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst index e899f14a4ba2..922e0b547bc3 100644 --- a/Documentation/process/code-of-conduct-interpretation.rst +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're uncertain how to handle situations that come up. It will not be considered a violation report unless you want it to be. If you are uncertain about approaching the TAB or any other maintainers, please -reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>. +reach out to our conflict mediator, Joanna Lee <joanna.lee@gesmer.com>. In the end, "be kind to each other" is really what the end goal is for everybody. We know everyone is human and we all fail at times, but the @@ -127,10 +127,12 @@ are listed at https://kernel.org/code-of-conduct.html. Members can not access reports made before they joined or after they have left the committee. -The initial Code of Conduct Committee consists of volunteer members of -the TAB, as well as a professional mediator acting as a neutral third -party. The first task of the committee is to establish documented -processes, which will be made public. +The Code of Conduct Committee consists of volunteer community members +appointed by the TAB, as well as a professional mediator acting as a +neutral third party. The processes the Code of Conduct committee will +use to address reports is varied and will depend on the individual +circumstance, however, this file serves as documentation for the +general process used. Any member of the committee, including the mediator, can be contacted directly if a reporter does not wish to include the full committee in a @@ -141,16 +143,16 @@ processes (see above) and consults with the TAB as needed and appropriate, for instance to request and receive information about the kernel community. -Any decisions by the committee will be brought to the TAB, for -implementation of enforcement with the relevant maintainers if needed. -A decision by the Code of Conduct Committee can be overturned by the TAB -by a two-thirds vote. +Any decisions regarding enforcement recommendations will be brought to +the TAB for implementation of enforcement with the relevant maintainers +if needed. A decision by the Code of Conduct Committee can be overturned +by the TAB by a two-thirds vote. At quarterly intervals, the Code of Conduct Committee and TAB will provide a report summarizing the anonymised reports that the Code of Conduct committee has received and their status, as well details of any overridden decisions including complete and identifiable voting details. -We expect to establish a different process for Code of Conduct Committee -staffing beyond the bootstrap period. This document will be updated -with that information when this occurs. +Because how we interpret and enforce the Code of Conduct will evolve over +time, this document will be updated when necessary to reflect any +changes. diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 03eb53fd029a..007e49ef6cec 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -1186,6 +1186,68 @@ expression used. For instance: #endif /* CONFIG_SOMETHING */ +22) Do not crash the kernel +--------------------------- + +In general, the decision to crash the kernel belongs to the user, rather +than to the kernel developer. + +Avoid panic() +************* + +panic() should be used with care and primarily only during system boot. +panic() is, for example, acceptable when running out of memory during boot and +not being able to continue. + +Use WARN() rather than BUG() +**************************** + +Do not add new code that uses any of the BUG() variants, such as BUG(), +BUG_ON(), or VM_BUG_ON(). Instead, use a WARN*() variant, preferably +WARN_ON_ONCE(), and possibly with recovery code. Recovery code is not +required if there is no reasonable way to at least partially recover. + +"I'm too lazy to do error handling" is not an excuse for using BUG(). Major +internal corruptions with no way of continuing may still use BUG(), but need +good justification. + +Use WARN_ON_ONCE() rather than WARN() or WARN_ON() +************************************************** + +WARN_ON_ONCE() is generally preferred over WARN() or WARN_ON(), because it +is common for a given warning condition, if it occurs at all, to occur +multiple times. This can fill up and wrap the kernel log, and can even slow +the system enough that the excessive logging turns into its own, additional +problem. + +Do not WARN lightly +******************* + +WARN*() is intended for unexpected, this-should-never-happen situations. +WARN*() macros are not to be used for anything that is expected to happen +during normal operation. These are not pre- or post-condition asserts, for +example. Again: WARN*() must not be used for a condition that is expected +to trigger easily, for example, by user space actions. pr_warn_once() is a +possible alternative, if you need to notify the user of a problem. + +Do not worry about panic_on_warn users +************************************** + +A few more words about panic_on_warn: Remember that ``panic_on_warn`` is an +available kernel option, and that many users set this option. This is why +there is a "Do not WARN lightly" writeup, above. However, the existence of +panic_on_warn users is not a valid reason to avoid the judicious use +WARN*(). That is because, whoever enables panic_on_warn has explicitly +asked the kernel to crash if a WARN*() fires, and such users must be +prepared to deal with the consequences of a system that is somewhat more +likely to crash. + +Use BUILD_BUG_ON() for compile-time assertions +********************************************** + +The use of BUILD_BUG_ON() is acceptable and encouraged, because it is a +compile-time assertion that has no effect at runtime. + Appendix I) References ---------------------- diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst index a6e36d9c3d14..c8fd53a11a20 100644 --- a/Documentation/process/deprecated.rst +++ b/Documentation/process/deprecated.rst @@ -138,17 +138,20 @@ be NUL terminated. This can lead to various linear read overflows and other misbehavior due to the missing termination. It also NUL-pads the destination buffer if the source contents are shorter than the destination buffer size, which may be a needless performance penalty -for callers using only NUL-terminated strings. The safe replacement is +for callers using only NUL-terminated strings. + +When the destination is required to be NUL-terminated, the replacement is strscpy(), though care must be given to any cases where the return value of strncpy() was used, since strscpy() does not return a pointer to the destination, but rather a count of non-NUL bytes copied (or negative errno when it truncates). Any cases still needing NUL-padding should instead use strscpy_pad(). -If a caller is using non-NUL-terminated strings, strncpy() can -still be used, but destinations should be marked with the `__nonstring +If a caller is using non-NUL-terminated strings, strtomem() should be +used, and the destinations should be marked with the `__nonstring <https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html>`_ -attribute to avoid future compiler warnings. +attribute to avoid future compiler warnings. For cases still needing +NUL-padding, strtomem_pad() can be used. strlcpy() --------- diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 2ba2a1582bbe..d4b6217472b0 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -5,6 +5,7 @@ .. _process_index: +============================================= Working with the kernel development community ============================================= diff --git a/Documentation/process/maintainer-pgp-guide.rst b/Documentation/process/maintainer-pgp-guide.rst index 29e7d7b1cd44..40bfbd3b7648 100644 --- a/Documentation/process/maintainer-pgp-guide.rst +++ b/Documentation/process/maintainer-pgp-guide.rst @@ -121,57 +121,56 @@ edit your ``~/.gnupg/gpg-agent.conf`` file to set your own values:: to remove anything you had in place for older versions of GnuPG, as it may not be doing the right thing any more. -Set up a refresh cronjob -~~~~~~~~~~~~~~~~~~~~~~~~ - -You will need to regularly refresh your keyring in order to get the -latest changes on other people's public keys, which is best done with a -daily cronjob:: - - @daily /usr/bin/gpg2 --refresh >/dev/null 2>&1 - -Check the full path to your ``gpg`` or ``gpg2`` command and use the -``gpg2`` command if regular ``gpg`` for you is the legacy GnuPG v.1. - -.. _master_key: +.. _protect_your_key: -Protect your master PGP key -=========================== +Protect your PGP key +==================== This guide assumes that you already have a PGP key that you use for Linux kernel development purposes. If you do not yet have one, please see the "`Protecting Code Integrity`_" document mentioned earlier for guidance on how to create a new one. -You should also make a new key if your current one is weaker than 2048 bits -(RSA). - -Master key vs. Subkeys ----------------------- - -Subkeys are fully independent PGP keypairs that are tied to the "master" -key using certifying key signatures (certificates). It is important to -understand the following: - -1. There are no technical differences between the "master key" and "subkeys." -2. At creation time, we assign functional limitations to each key by - giving it specific capabilities. -3. A PGP key can have 4 capabilities: - - - **[S]** key can be used for signing - - **[E]** key can be used for encryption - - **[A]** key can be used for authentication - - **[C]** key can be used for certifying other keys - -4. A single key may have multiple capabilities. -5. A subkey is fully independent from the master key. A message - encrypted to a subkey cannot be decrypted with the master key. If you - lose your private subkey, it cannot be recreated from the master key - in any way. - -The key carrying the **[C]** (certify) capability is considered the -"master" key because it is the only key that can be used to indicate -relationship with other keys. Only the **[C]** key can be used to: +You should also make a new key if your current one is weaker than 2048 +bits (RSA). + +Understanding PGP Subkeys +------------------------- + +A PGP key rarely consists of a single keypair -- usually it is a +collection of independent subkeys that can be used for different +purposes based on their capabilities, assigned at their creation time. +PGP defines four capabilities that a key can have: + +- **[S]** keys can be used for signing +- **[E]** keys can be used for encryption +- **[A]** keys can be used for authentication +- **[C]** keys can be used for certifying other keys + +The key with the **[C]** capability is often called the "master" key, +but this terminology is misleading because it implies that the Certify +key can be used in place of any of other subkey on the same chain (like +a physical "master key" can be used to open the locks made for other +keys). Since this is not the case, this guide will refer to it as "the +Certify key" to avoid any ambiguity. + +It is critical to fully understand the following: + +1. All subkeys are fully independent from each other. If you lose a + private subkey, it cannot be restored or recreated from any other + private key on your chain. +2. With the exception of the Certify key, there can be multiple subkeys + with identical capabilities (e.g. you can have 2 valid encryption + subkeys, 3 valid signing subkeys, but only one valid certification + subkey). All subkeys are fully independent -- a message encrypted to + one **[E]** subkey cannot be decrypted with any other **[E]** subkey + you may also have. +3. A single subkey may have multiple capabilities (e.g. your **[C]** key + can also be your **[S]** key). + +The key carrying the **[C]** (certify) capability is the only key that +can be used to indicate relationship with other keys. Only the **[C]** +key can be used to: - add or revoke other keys (subkeys) with S/E/A capabilities - add, change or revoke identities (uids) associated with the key @@ -180,7 +179,7 @@ relationship with other keys. Only the **[C]** key can be used to: By default, GnuPG creates the following when generating new keys: -- A master key carrying both Certify and Sign capabilities (**[SC]**) +- One subkey carrying both Certify and Sign capabilities (**[SC]**) - A separate subkey with the Encryption capability (**[E]**) If you used the default parameters when generating your key, then that @@ -192,9 +191,6 @@ for example:: uid [ultimate] Alice Dev <adev@kernel.org> ssb rsa2048 2018-01-23 [E] [expires: 2020-01-23] -Any key carrying the **[C]** capability is your master key, regardless -of any other capabilities it may have assigned to it. - The long line under the ``sec`` entry is your key fingerprint -- whenever you see ``[fpr]`` in the examples below, that 40-character string is what it refers to. @@ -215,37 +211,30 @@ strong passphrase. To set it or change it, use:: Create a separate Signing subkey -------------------------------- -Our goal is to protect your master key by moving it to offline media, so -if you only have a combined **[SC]** key, then you should create a separate -signing subkey:: +Our goal is to protect your Certify key by moving it to offline media, +so if you only have a combined **[SC]** key, then you should create a +separate signing subkey:: $ gpg --quick-addkey [fpr] ed25519 sign -Remember to tell the keyservers about this change, so others can pull down -your new subkey:: - - $ gpg --send-key [fpr] - .. note:: ECC support in GnuPG GnuPG 2.1 and later has full support for Elliptic Curve Cryptography, with ability to combine ECC subkeys with traditional - RSA master keys. The main upside of ECC cryptography is that it is - much faster computationally and creates much smaller signatures when + RSA keys. The main upside of ECC cryptography is that it is much + faster computationally and creates much smaller signatures when compared byte for byte with 2048+ bit RSA keys. Unless you plan on using a smartcard device that does not support ECC operations, we recommend that you create an ECC signing subkey for your kernel work. - If for some reason you prefer to stay with RSA subkeys, just replace - "ed25519" with "rsa2048" in the above command. Additionally, if you - plan to use a hardware device that does not support ED25519 ECC - keys, like Nitrokey Pro or a Yubikey, then you should use - "nistp256" instead or "ed25519." + Note, that if you plan to use a hardware device that does not + support ED25519 ECC keys, you should choose "nistp256" instead or + "ed25519." -Back up your master key for disaster recovery ---------------------------------------------- +Back up your Certify key for disaster recovery +---------------------------------------------- The more signatures you have on your PGP key from other developers, the more reasons you have to create a backup version that lives on something @@ -277,9 +266,7 @@ home, such as your bank vault. Your printer is probably no longer a simple dumb device connected to your parallel port, but since the output is still encrypted with your passphrase, printing out even to "cloud-integrated" modern - printers should remain a relatively safe operation. One option is to - change the passphrase on your master key immediately after you are - done with paperkey. + printers should remain a relatively safe operation. Back up your whole GnuPG directory ---------------------------------- @@ -300,7 +287,7 @@ will use for backup purposes. You will need to encrypt them using LUKS -- refer to your distro's documentation on how to accomplish this. For the encryption passphrase, you can use the same one as on your -master key. +PGP key. Once the encryption process is over, re-insert the USB drive and make sure it gets properly mounted. Copy your entire ``.gnupg`` directory @@ -319,7 +306,7 @@ far away, because you'll need to use it every now and again for things like editing identities, adding or revoking subkeys, or signing other people's keys. -Remove the master key from your homedir +Remove the Certify key from your homedir ---------------------------------------- The files in our home directory are not as well protected as we like to @@ -334,7 +321,7 @@ think. They can be leaked or stolen via many different means: Protecting your key with a good passphrase greatly helps reduce the risk of any of the above, but passphrases can be discovered via keyloggers, shoulder-surfing, or any number of other means. For this reason, the -recommended setup is to remove your master key from your home directory +recommended setup is to remove your Certify key from your home directory and store it on offline storage. .. warning:: @@ -343,7 +330,7 @@ and store it on offline storage. your GnuPG directory in its entirety. What we are about to do will render your key useless if you do not have a usable backup! -First, identify the keygrip of your master key:: +First, identify the keygrip of your Certify key:: $ gpg --with-keygrip --list-key [fpr] @@ -359,7 +346,7 @@ The output will be something like this:: Keygrip = 3333000000000000000000000000000000000000 Find the keygrip entry that is beneath the ``pub`` line (right under the -master key fingerprint). This will correspond directly to a file in your +Certify key fingerprint). This will correspond directly to a file in your ``~/.gnupg`` directory:: $ cd ~/.gnupg/private-keys-v1.d @@ -369,13 +356,13 @@ master key fingerprint). This will correspond directly to a file in your 3333000000000000000000000000000000000000.key All you have to do is simply remove the .key file that corresponds to -the master keygrip:: +the Certify key keygrip:: $ cd ~/.gnupg/private-keys-v1.d $ rm 1111000000000000000000000000000000000000.key Now, if you issue the ``--list-secret-keys`` command, it will show that -the master key is missing (the ``#`` indicates it is not available):: +the Certify key is missing (the ``#`` indicates it is not available):: $ gpg --list-secret-keys sec# rsa2048 2018-01-24 [SC] [expires: 2020-01-24] @@ -404,7 +391,7 @@ file, which still contains your private keys. Move the subkeys to a dedicated crypto device ============================================= -Even though the master key is now safe from being leaked or stolen, the +Even though the Certify key is now safe from being leaked or stolen, the subkeys are still in your home directory. Anyone who manages to get their hands on those will be able to decrypt your communication or fake your signatures (if they know the passphrase). Furthermore, each time a @@ -447,7 +434,8 @@ functionality. There are several options available: - `Yubikey 5`_: proprietary hardware and software, but cheaper than Nitrokey Pro and comes available in the USB-C form that is more useful with newer laptops. Offers additional security features such as FIDO - U2F, among others, and now finally supports ECC keys (NISTP). + U2F, among others, and now finally supports NISTP and ED25519 ECC + keys. `LWN has a good review`_ of some of the above models, as well as several others. Your choice will depend on cost, shipping availability in your @@ -460,7 +448,7 @@ geographical region, and open/proprietary hardware considerations. Foundation. .. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6 -.. _`Nitrokey Pro 2`: https://shop.nitrokey.com/shop/product/nitrokey-pro-2-3 +.. _`Nitrokey Pro 2`: https://shop.nitrokey.com/shop/product/nkpr2-nitrokey-pro-2-3 .. _`Yubikey 5`: https://www.yubico.com/products/yubikey-5-overview/ .. _Gnuk: https://www.fsij.org/doc-gnuk/ .. _`LWN has a good review`: https://lwn.net/Articles/736231/ @@ -627,10 +615,10 @@ Other common GnuPG operations Here is a quick reference for some common operations you'll need to do with your PGP key. -Mounting your master key offline storage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mounting your safe offline storage +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You will need your master key for any of the operations below, so you +You will need your Certify key for any of the operations below, so you will first need to mount your backup offline storage and tell GnuPG to use it:: @@ -644,7 +632,7 @@ your regular home directory location). Extending key expiration date ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The master key has the default expiration date of 2 years from the date +The Certify key has the default expiration date of 2 years from the date of creation. This is done both for security reasons and to make obsolete keys eventually disappear from keyservers. @@ -685,6 +673,7 @@ remote end. .. _`Agent Forwarding over SSH`: https://wiki.gnupg.org/AgentForwarding +.. _pgp_with_git: Using PGP with Git ================== @@ -828,6 +817,63 @@ You can tell git to always sign commits:: .. _verify_identities: + +How to work with signed patches +------------------------------- + +It is possible to use your PGP key to sign patches sent to kernel +developer mailing lists. Since existing email signature mechanisms +(PGP-Mime or PGP-inline) tend to cause problems with regular code +review tasks, you should use the tool kernel.org created for this +purpose that puts cryptographic attestation signatures into message +headers (a-la DKIM): + +- `Patatt Patch Attestation`_ + +.. _`Patatt Patch Attestation`: https://pypi.org/project/patatt/ + +Installing and configuring patatt +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Patatt is packaged for many distributions already, so please check there +first. You can also install it from pypi using "``pip install patatt``". + +If you already have your PGP key configured with git (via the +``user.signingKey`` configuration parameter), then patatt requires no +further configuration. You can start signing your patches by installing +the git-send-email hook in the repository you want:: + + patatt install-hook + +Now any patches you send with ``git send-email`` will be automatically +signed with your cryptographic signature. + +Checking patatt signatures +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you are using ``b4`` to retrieve and apply patches, then it will +automatically attempt to verify all DKIM and patatt signatures it +encounters, for example:: + + $ b4 am 20220720205013.890942-1-broonie@kernel.org + [...] + Checking attestation on all messages, may take a moment... + --- + ✓ [PATCH v1 1/3] kselftest/arm64: Correct buffer allocation for SVE Z registers + ✓ [PATCH v1 2/3] arm64/sve: Document our actual ABI for clearing registers on syscall + ✓ [PATCH v1 3/3] kselftest/arm64: Enforce actual ABI for SVE syscalls + --- + ✓ Signed: openpgp/broonie@kernel.org + ✓ Signed: DKIM/kernel.org + +.. note:: + + Patatt and b4 are still in active development and you should check + the latest documentation for these projects for any new or updated + features. + +.. _kernel_identities: + How to verify kernel developer identities ========================================= @@ -899,65 +945,17 @@ the new default in GnuPG v2). To set it, add (or modify) the trust-model tofu+pgp -How to use keyservers (more) safely ------------------------------------ - -If you get a "No public key" error when trying to validate someone's -tag, then you should attempt to lookup that key using a keyserver. It is -important to keep in mind that there is absolutely no guarantee that the -key you retrieve from PGP keyservers belongs to the actual person -- -that much is by design. You are supposed to use the Web of Trust to -establish key validity. - -How to properly maintain the Web of Trust is beyond the scope of this -document, simply because doing it properly requires both effort and -dedication that tends to be beyond the caring threshold of most human -beings. Here are some shortcuts that will help you reduce the risk of -importing a malicious key. - -First, let's say you've tried to run ``git verify-tag`` but it returned -an error saying the key is not found:: - - $ git verify-tag sunxi-fixes-for-4.15-2 - gpg: Signature made Sun 07 Jan 2018 10:51:55 PM EST - gpg: using RSA key DA73759BF8619E484E5A3B47389A54219C0F2430 - gpg: issuer "wens@...org" - gpg: Can't check signature: No public key - -Let's query the keyserver for more info about that key fingerprint (the -fingerprint probably belongs to a subkey, so we can't use it directly -without finding out the ID of the master key it is associated with):: - - $ gpg --search DA73759BF8619E484E5A3B47389A54219C0F2430 - gpg: data source: hkp://keys.gnupg.net - (1) Chen-Yu Tsai <wens@...org> - 4096 bit RSA key C94035C21B4F2AEB, created: 2017-03-14, expires: 2019-03-15 - Keys 1-1 of 1 for "DA73759BF8619E484E5A3B47389A54219C0F2430". Enter number(s), N)ext, or Q)uit > q - -Locate the ID of the master key in the output, in our example -``C94035C21B4F2AEB``. Now display the key of Linus Torvalds that you -have on your keyring:: - - $ gpg --list-key torvalds@kernel.org - pub rsa2048 2011-09-20 [SC] - ABAF11C65A2970B130ABE3C479BE3E4300411886 - uid [ unknown] Linus Torvalds <torvalds@kernel.org> - sub rsa2048 2011-09-20 [E] - -Next, find a trust path from Linus Torvalds to the key-id you found via ``gpg ---search`` of the unknown key. For this, you can use several tools including -https://github.com/mricon/wotmate, -https://git.kernel.org/pub/scm/docs/kernel/pgpkeys.git/tree/graphs, and -https://the.earth.li/~noodles/pathfind.html. - -If you get a few decent trust paths, then it's a pretty good indication -that it is a valid key. You can add it to your keyring from the -keyserver now:: - - $ gpg --recv-key C94035C21B4F2AEB - -This process is not perfect, and you are obviously trusting the -administrators of the PGP Pathfinder service to not be malicious (in -fact, this goes against :ref:`devs_not_infra`). However, if you -do not carefully maintain your own web of trust, then it is a marked -improvement over blindly trusting keyservers. +Using the kernel.org web of trust repository +-------------------------------------------- + +Kernel.org maintains a git repository with developers' public keys as a +replacement for replicating keyserver networks that have gone mostly +dark in the past few years. The full documentation for how to set up +that repository as your source of public keys can be found here: + +- `Kernel developer PGP Keyring`_ + +If you are a kernel developer, please consider submitting your key for +inclusion into that keyring. + +.. _`Kernel developer PGP Keyring`: https://korg.docs.kernel.org/pgpkeys.html diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index c61865e91f52..2fd8aa593a28 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -97,6 +97,12 @@ text, like this: commit <sha1> upstream. +or alternatively: + +.. code-block:: none + + [ Upstream commit <sha1> ] + Additionally, some patches submitted via :ref:`option_1` may have additional patch prerequisites which can be cherry-picked. This can be specified in the following format in the sign-off area: diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index be49d8f2601b..7dc94555417d 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -715,8 +715,8 @@ references. .. _backtraces: -Backtraces in commit mesages -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Backtraces in commit messages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Backtraces help document the call chain leading to a problem. However, not all backtraces are helpful. For example, early boot call chains are diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst new file mode 100644 index 000000000000..6982b63775da --- /dev/null +++ b/Documentation/rust/arch-support.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Arch Support +============ + +Currently, the Rust compiler (``rustc``) uses LLVM for code generation, +which limits the supported architectures that can be targeted. In addition, +support for building the kernel with LLVM/Clang varies (please see +Documentation/kbuild/llvm.rst). This support is needed for ``bindgen`` +which uses ``libclang``. + +Below is a general summary of architectures that currently work. Level of +support corresponds to ``S`` values in the ``MAINTAINERS`` file. + +============ ================ ============================================== +Architecture Level of support Constraints +============ ================ ============================================== +``x86`` Maintained ``x86_64`` only. +============ ================ ============================================== diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst new file mode 100644 index 000000000000..aa8ed082613e --- /dev/null +++ b/Documentation/rust/coding-guidelines.rst @@ -0,0 +1,216 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Coding Guidelines +================= + +This document describes how to write Rust code in the kernel. + + +Style & formatting +------------------ + +The code should be formatted using ``rustfmt``. In this way, a person +contributing from time to time to the kernel does not need to learn and +remember one more style guide. More importantly, reviewers and maintainers +do not need to spend time pointing out style issues anymore, and thus +less patch roundtrips may be needed to land a change. + +.. note:: Conventions on comments and documentation are not checked by + ``rustfmt``. Thus those are still needed to be taken care of. + +The default settings of ``rustfmt`` are used. This means the idiomatic Rust +style is followed. For instance, 4 spaces are used for indentation rather +than tabs. + +It is convenient to instruct editors/IDEs to format while typing, +when saving or at commit time. However, if for some reason reformatting +the entire kernel Rust sources is needed at some point, the following can be +run:: + + make LLVM=1 rustfmt + +It is also possible to check if everything is formatted (printing a diff +otherwise), for instance for a CI, with:: + + make LLVM=1 rustfmtcheck + +Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on +individual files, and does not require a kernel configuration. Sometimes it may +even work with broken code. + + +Comments +-------- + +"Normal" comments (i.e. ``//``, rather than code documentation which starts +with ``///`` or ``//!``) are written in Markdown the same way as documentation +comments are, even though they will not be rendered. This improves consistency, +simplifies the rules and allows to move content between the two kinds of +comments more easily. For instance: + +.. code-block:: rust + + // `object` is ready to be handled now. + f(object); + +Furthermore, just like documentation, comments are capitalized at the beginning +of a sentence and ended with a period (even if it is a single sentence). This +includes ``// SAFETY:``, ``// TODO:`` and other "tagged" comments, e.g.: + +.. code-block:: rust + + // FIXME: The error should be handled properly. + +Comments should not be used for documentation purposes: comments are intended +for implementation details, not users. This distinction is useful even if the +reader of the source file is both an implementor and a user of an API. In fact, +sometimes it is useful to use both comments and documentation at the same time. +For instance, for a ``TODO`` list or to comment on the documentation itself. +For the latter case, comments can be inserted in the middle; that is, closer to +the line of documentation to be commented. For any other case, comments are +written after the documentation, e.g.: + +.. code-block:: rust + + /// Returns a new [`Foo`]. + /// + /// # Examples + /// + // TODO: Find a better example. + /// ``` + /// let foo = f(42); + /// ``` + // FIXME: Use fallible approach. + pub fn f(x: i32) -> Foo { + // ... + } + +One special kind of comments are the ``// SAFETY:`` comments. These must appear +before every ``unsafe`` block, and they explain why the code inside the block is +correct/sound, i.e. why it cannot trigger undefined behavior in any case, e.g.: + +.. code-block:: rust + + // SAFETY: `p` is valid by the safety requirements. + unsafe { *p = 0; } + +``// SAFETY:`` comments are not to be confused with the ``# Safety`` sections +in code documentation. ``# Safety`` sections specify the contract that callers +(for functions) or implementors (for traits) need to abide by. ``// SAFETY:`` +comments show why a call (for functions) or implementation (for traits) actually +respects the preconditions stated in a ``# Safety`` section or the language +reference. + + +Code documentation +------------------ + +Rust kernel code is not documented like C kernel code (i.e. via kernel-doc). +Instead, the usual system for documenting Rust code is used: the ``rustdoc`` +tool, which uses Markdown (a lightweight markup language). + +To learn Markdown, there are many guides available out there. For instance, +the one at: + + https://commonmark.org/help/ + +This is how a well-documented Rust function may look like: + +.. code-block:: rust + + /// Returns the contained [`Some`] value, consuming the `self` value, + /// without checking that the value is not [`None`]. + /// + /// # Safety + /// + /// Calling this method on [`None`] is *[undefined behavior]*. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// let x = Some("air"); + /// assert_eq!(unsafe { x.unwrap_unchecked() }, "air"); + /// ``` + pub unsafe fn unwrap_unchecked(self) -> T { + match self { + Some(val) => val, + + // SAFETY: The safety contract must be upheld by the caller. + None => unsafe { hint::unreachable_unchecked() }, + } + } + +This example showcases a few ``rustdoc`` features and some conventions followed +in the kernel: + + - The first paragraph must be a single sentence briefly describing what + the documented item does. Further explanations must go in extra paragraphs. + + - Unsafe functions must document their safety preconditions under + a ``# Safety`` section. + + - While not shown here, if a function may panic, the conditions under which + that happens must be described under a ``# Panics`` section. + + Please note that panicking should be very rare and used only with a good + reason. In almost all cases, a fallible approach should be used, typically + returning a ``Result``. + + - If providing examples of usage would help readers, they must be written in + a section called ``# Examples``. + + - Rust items (functions, types, constants...) must be linked appropriately + (``rustdoc`` will create a link automatically). + + - Any ``unsafe`` block must be preceded by a ``// SAFETY:`` comment + describing why the code inside is sound. + + While sometimes the reason might look trivial and therefore unneeded, + writing these comments is not just a good way of documenting what has been + taken into account, but most importantly, it provides a way to know that + there are no *extra* implicit constraints. + +To learn more about how to write documentation for Rust and extra features, +please take a look at the ``rustdoc`` book at: + + https://doc.rust-lang.org/rustdoc/how-to-write-documentation.html + + +Naming +------ + +Rust kernel code follows the usual Rust naming conventions: + + https://rust-lang.github.io/api-guidelines/naming.html + +When existing C concepts (e.g. macros, functions, objects...) are wrapped into +a Rust abstraction, a name as close as reasonably possible to the C side should +be used in order to avoid confusion and to improve readability when switching +back and forth between the C and Rust sides. For instance, macros such as +``pr_info`` from C are named the same in the Rust side. + +Having said that, casing should be adjusted to follow the Rust naming +conventions, and namespacing introduced by modules and types should not be +repeated in the item names. For instance, when wrapping constants like: + +.. code-block:: c + + #define GPIO_LINE_DIRECTION_IN 0 + #define GPIO_LINE_DIRECTION_OUT 1 + +The equivalent in Rust may look like (ignoring documentation): + +.. code-block:: rust + + pub mod gpio { + pub enum LineDirection { + In = bindings::GPIO_LINE_DIRECTION_IN as _, + Out = bindings::GPIO_LINE_DIRECTION_OUT as _, + } + } + +That is, the equivalent of ``GPIO_LINE_DIRECTION_IN`` would be referred to as +``gpio::LineDirection::In``. In particular, it should not be named +``gpio::gpio_line_direction::GPIO_LINE_DIRECTION_IN``. diff --git a/Documentation/rust/general-information.rst b/Documentation/rust/general-information.rst new file mode 100644 index 000000000000..49029ee82e55 --- /dev/null +++ b/Documentation/rust/general-information.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 + +General Information +=================== + +This document contains useful information to know when working with +the Rust support in the kernel. + + +Code documentation +------------------ + +Rust kernel code is documented using ``rustdoc``, its built-in documentation +generator. + +The generated HTML docs include integrated search, linked items (e.g. types, +functions, constants), source code, etc. They may be read at (TODO: link when +in mainline and generated alongside the rest of the documentation): + + http://kernel.org/ + +The docs can also be easily generated and read locally. This is quite fast +(same order as compiling the code itself) and no special tools or environment +are needed. This has the added advantage that they will be tailored to +the particular kernel configuration used. To generate them, use the ``rustdoc`` +target with the same invocation used for compilation, e.g.:: + + make LLVM=1 rustdoc + +To read the docs locally in your web browser, run e.g.:: + + xdg-open rust/doc/kernel/index.html + +To learn about how to write the documentation, please see coding-guidelines.rst. + + +Extra lints +----------- + +While ``rustc`` is a very helpful compiler, some extra lints and analyses are +available via ``clippy``, a Rust linter. To enable it, pass ``CLIPPY=1`` to +the same invocation used for compilation, e.g.:: + + make LLVM=1 CLIPPY=1 + +Please note that Clippy may change code generation, thus it should not be +enabled while building a production kernel. + + +Abstractions vs. bindings +------------------------- + +Abstractions are Rust code wrapping kernel functionality from the C side. + +In order to use functions and types from the C side, bindings are created. +Bindings are the declarations for Rust of those functions and types from +the C side. + +For instance, one may write a ``Mutex`` abstraction in Rust which wraps +a ``struct mutex`` from the C side and calls its functions through the bindings. + +Abstractions are not available for all the kernel internal APIs and concepts, +but it is intended that coverage is expanded as time goes on. "Leaf" modules +(e.g. drivers) should not use the C bindings directly. Instead, subsystems +should provide as-safe-as-possible abstractions as needed. + + +Conditional compilation +----------------------- + +Rust code has access to conditional compilation based on the kernel +configuration: + +.. code-block:: rust + + #[cfg(CONFIG_X)] // Enabled (`y` or `m`) + #[cfg(CONFIG_X="y")] // Enabled as a built-in (`y`) + #[cfg(CONFIG_X="m")] // Enabled as a module (`m`) + #[cfg(not(CONFIG_X))] // Disabled diff --git a/Documentation/rust/index.rst b/Documentation/rust/index.rst new file mode 100644 index 000000000000..4ae8c66b94fa --- /dev/null +++ b/Documentation/rust/index.rst @@ -0,0 +1,22 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Rust +==== + +Documentation related to Rust within the kernel. To start using Rust +in the kernel, please read the quick-start.rst guide. + +.. toctree:: + :maxdepth: 1 + + quick-start + general-information + coding-guidelines + arch-support + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst new file mode 100644 index 000000000000..13b7744b1e27 --- /dev/null +++ b/Documentation/rust/quick-start.rst @@ -0,0 +1,232 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Quick Start +=========== + +This document describes how to get started with kernel development in Rust. + + +Requirements: Building +---------------------- + +This section explains how to fetch the tools needed for building. + +Some of these requirements might be available from Linux distributions +under names like ``rustc``, ``rust-src``, ``rust-bindgen``, etc. However, +at the time of writing, they are likely not to be recent enough unless +the distribution tracks the latest releases. + +To easily check whether the requirements are met, the following target +can be used:: + + make LLVM=1 rustavailable + +This triggers the same logic used by Kconfig to determine whether +``RUST_IS_AVAILABLE`` should be enabled; but it also explains why not +if that is the case. + + +rustc +***** + +A particular version of the Rust compiler is required. Newer versions may or +may not work because, for the moment, the kernel depends on some unstable +Rust features. + +If ``rustup`` is being used, enter the checked out source code directory +and run:: + + rustup override set $(scripts/min-tool-version.sh rustc) + +Otherwise, fetch a standalone installer or install ``rustup`` from: + + https://www.rust-lang.org + + +Rust standard library source +**************************** + +The Rust standard library source is required because the build system will +cross-compile ``core`` and ``alloc``. + +If ``rustup`` is being used, run:: + + rustup component add rust-src + +The components are installed per toolchain, thus upgrading the Rust compiler +version later on requires re-adding the component. + +Otherwise, if a standalone installer is used, the Rust repository may be cloned +into the installation folder of the toolchain:: + + git clone --recurse-submodules \ + --branch $(scripts/min-tool-version.sh rustc) \ + https://github.com/rust-lang/rust \ + $(rustc --print sysroot)/lib/rustlib/src/rust + +In this case, upgrading the Rust compiler version later on requires manually +updating this clone. + + +libclang +******** + +``libclang`` (part of LLVM) is used by ``bindgen`` to understand the C code +in the kernel, which means LLVM needs to be installed; like when the kernel +is compiled with ``CC=clang`` or ``LLVM=1``. + +Linux distributions are likely to have a suitable one available, so it is +best to check that first. + +There are also some binaries for several systems and architectures uploaded at: + + https://releases.llvm.org/download.html + +Otherwise, building LLVM takes quite a while, but it is not a complex process: + + https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm + +Please see Documentation/kbuild/llvm.rst for more information and further ways +to fetch pre-built releases and distribution packages. + + +bindgen +******* + +The bindings to the C side of the kernel are generated at build time using +the ``bindgen`` tool. A particular version is required. + +Install it via (note that this will download and build the tool from source):: + + cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen + + +Requirements: Developing +------------------------ + +This section explains how to fetch the tools needed for developing. That is, +they are not needed when just building the kernel. + + +rustfmt +******* + +The ``rustfmt`` tool is used to automatically format all the Rust kernel code, +including the generated C bindings (for details, please see +coding-guidelines.rst). + +If ``rustup`` is being used, its ``default`` profile already installs the tool, +thus nothing needs to be done. If another profile is being used, the component +can be installed manually:: + + rustup component add rustfmt + +The standalone installers also come with ``rustfmt``. + + +clippy +****** + +``clippy`` is a Rust linter. Running it provides extra warnings for Rust code. +It can be run by passing ``CLIPPY=1`` to ``make`` (for details, please see +general-information.rst). + +If ``rustup`` is being used, its ``default`` profile already installs the tool, +thus nothing needs to be done. If another profile is being used, the component +can be installed manually:: + + rustup component add clippy + +The standalone installers also come with ``clippy``. + + +cargo +***** + +``cargo`` is the Rust native build system. It is currently required to run +the tests since it is used to build a custom standard library that contains +the facilities provided by the custom ``alloc`` in the kernel. The tests can +be run using the ``rusttest`` Make target. + +If ``rustup`` is being used, all the profiles already install the tool, +thus nothing needs to be done. + +The standalone installers also come with ``cargo``. + + +rustdoc +******* + +``rustdoc`` is the documentation tool for Rust. It generates pretty HTML +documentation for Rust code (for details, please see +general-information.rst). + +``rustdoc`` is also used to test the examples provided in documented Rust code +(called doctests or documentation tests). The ``rusttest`` Make target uses +this feature. + +If ``rustup`` is being used, all the profiles already install the tool, +thus nothing needs to be done. + +The standalone installers also come with ``rustdoc``. + + +rust-analyzer +************* + +The `rust-analyzer <https://rust-analyzer.github.io/>`_ language server can +be used with many editors to enable syntax highlighting, completion, go to +definition, and other features. + +``rust-analyzer`` needs a configuration file, ``rust-project.json``, which +can be generated by the ``rust-analyzer`` Make target. + + +Configuration +------------- + +``Rust support`` (``CONFIG_RUST``) needs to be enabled in the ``General setup`` +menu. The option is only shown if a suitable Rust toolchain is found (see +above), as long as the other requirements are met. In turn, this will make +visible the rest of options that depend on Rust. + +Afterwards, go to:: + + Kernel hacking + -> Sample kernel code + -> Rust samples + +And enable some sample modules either as built-in or as loadable. + + +Building +-------- + +Building a kernel with a complete LLVM toolchain is the best supported setup +at the moment. That is:: + + make LLVM=1 + +For architectures that do not support a full LLVM toolchain, use:: + + make CC=clang + +Using GCC also works for some configurations, but it is very experimental at +the moment. + + +Hacking +------- + +To dive deeper, take a look at the source code of the samples +at ``samples/rust/``, the Rust support code under ``rust/`` and +the ``Rust hacking`` menu under ``Kernel hacking``. + +If GDB/Binutils is used and Rust symbols are not getting demangled, the reason +is the toolchain does not support Rust's new v0 mangling scheme yet. +There are a few ways out: + + - Install a newer release (GDB >= 10.2, Binutils >= 2.36). + + - Some versions of GDB (e.g. vanilla GDB 10.1) are able to use + the pre-demangled names embedded in the debug info (``CONFIG_DEBUG_INFO``). diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst index 59b2d1fb4dc4..03db55504515 100644 --- a/Documentation/scheduler/sched-design-CFS.rst +++ b/Documentation/scheduler/sched-design-CFS.rst @@ -94,7 +94,7 @@ other HZ detail. Thus the CFS scheduler has no notion of "timeslices" in the way the previous scheduler had, and has no heuristics whatsoever. There is only one central tunable (you have to switch on CONFIG_SCHED_DEBUG): - /proc/sys/kernel/sched_min_granularity_ns + /sys/kernel/debug/sched/min_granularity_ns which can be used to tune the scheduler from "desktop" (i.e., low latencies) to "server" (i.e., good batching) workloads. It defaults to a setting suitable diff --git a/Documentation/security/landlock.rst b/Documentation/security/landlock.rst index 5c77730b4479..c0029d5d02eb 100644 --- a/Documentation/security/landlock.rst +++ b/Documentation/security/landlock.rst @@ -7,7 +7,7 @@ Landlock LSM: kernel documentation ================================== :Author: Mickaël Salaün -:Date: May 2022 +:Date: September 2022 Landlock's goal is to create scoped access-control (i.e. sandboxing). To harden a whole system, this feature should be available to any process, @@ -49,13 +49,13 @@ Filesystem access rights ------------------------ All access rights are tied to an inode and what can be accessed through it. -Reading the content of a directory doesn't imply to be allowed to read the +Reading the content of a directory does not imply to be allowed to read the content of a listed inode. Indeed, a file name is local to its parent directory, and an inode can be referenced by multiple file names thanks to (hard) links. Being able to unlink a file only has a direct impact on the directory, not the unlinked inode. This is the reason why -`LANDLOCK_ACCESS_FS_REMOVE_FILE` or `LANDLOCK_ACCESS_FS_REFER` are not allowed -to be tied to files but only to directories. +``LANDLOCK_ACCESS_FS_REMOVE_FILE`` or ``LANDLOCK_ACCESS_FS_REFER`` are not +allowed to be tied to files but only to directories. Tests ===== diff --git a/Documentation/staging/index.rst b/Documentation/staging/index.rst index abd0d18254d2..ded8254bc0d7 100644 --- a/Documentation/staging/index.rst +++ b/Documentation/staging/index.rst @@ -14,45 +14,3 @@ Unsorted Documentation static-keys tee xz - -Atomic Types -============ - -.. raw:: latex - - \footnotesize - -.. include:: ../atomic_t.txt - :literal: - -.. raw:: latex - - \normalsize - -Atomic bitops -============= - -.. raw:: latex - - \footnotesize - -.. include:: ../atomic_bitops.txt - :literal: - -.. raw:: latex - - \normalsize - -Memory Barriers -=============== - -.. raw:: latex - - \footnotesize - -.. include:: ../memory-barriers.txt - :literal: - -.. raw:: latex - - \normalsize diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst new file mode 100644 index 000000000000..af65004a80aa --- /dev/null +++ b/Documentation/subsystem-apis.rst @@ -0,0 +1,58 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +Kernel subsystem documentation +============================== + +These books get into the details of how specific kernel subsystems work +from the point of view of a kernel developer. Much of the information here +is taken directly from the kernel source, with supplemental material added +as needed (or at least as we managed to add it — probably *not* all that is +needed). + +**Fixme**: much more organizational work is needed here. + +.. toctree:: + :maxdepth: 1 + + driver-api/index + core-api/index + locking/index + accounting/index + block/index + cdrom/index + cpu-freq/index + fb/index + fpga/index + hid/index + i2c/index + iio/index + isdn/index + infiniband/index + leds/index + netlabel/index + networking/index + pcmcia/index + power/index + target/index + timers/index + spi/index + w1/index + watchdog/index + virt/index + input/index + hwmon/index + gpu/index + security/index + sound/index + crypto/index + filesystems/index + mm/index + bpf/index + usb/index + PCI/index + scsi/index + misc-devices/index + scheduler/index + mhi/index + peci/index diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 859fd1b76c63..c1b685a38f6b 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -412,7 +412,7 @@ Extended error information Because the default sort key above is 'hitcount', the above shows a the list of call_sites by increasing hitcount, so that at the bottom we see the functions that made the most kmalloc calls during the - run. If instead we we wanted to see the top kmalloc callers in + run. If instead we wanted to see the top kmalloc callers in terms of the number of bytes requested rather than the number of calls, and we wanted the top caller to appear at the top, we can use the 'sort' parameter, along with the 'descending' modifier:: diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index f318bceda1e6..48cf778a2468 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -328,8 +328,8 @@ Configuring Kprobes =================== When configuring the kernel using make menuconfig/xconfig/oldconfig, -ensure that CONFIG_KPROBES is set to "y". Under "General setup", look -for "Kprobes". +ensure that CONFIG_KPROBES is set to "y", look for "Kprobes" under +"General architecture-dependent options". So that you can load and unload Kprobes-based instrumentation modules, make sure "Loadable module support" (CONFIG_MODULES) and "Module diff --git a/Documentation/trace/timerlat-tracer.rst b/Documentation/trace/timerlat-tracer.rst index d643c95c01eb..db17df312bc8 100644 --- a/Documentation/trace/timerlat-tracer.rst +++ b/Documentation/trace/timerlat-tracer.rst @@ -20,7 +20,7 @@ For example:: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo timerlat > current_tracer -It is possible to follow the trace by reading the trace trace file:: +It is possible to follow the trace by reading the trace file:: [root@f32 tracing]# cat trace # tracer: timerlat diff --git a/Documentation/translations/zh_CN/IRQ.txt b/Documentation/translations/zh_CN/IRQ.txt deleted file mode 100644 index 9aec8dca4fcf..000000000000 --- a/Documentation/translations/zh_CN/IRQ.txt +++ /dev/null @@ -1,39 +0,0 @@ -Chinese translated version of Documentation/core-api/irq/index.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Maintainer: Eric W. Biederman <ebiederman@xmission.com> -Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> ---------------------------------------------------------------------- -Documentation/core-api/irq/index.rst çš„ä¸æ–‡ç¿»è¯‘ - -如果想评论或更新本文的内容,请直接è”ç³»åŽŸæ–‡æ¡£çš„ç»´æŠ¤è€…ã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡ -交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘ä¸æ–‡ç‰ˆç»´æŠ¤è€…求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻 -译å˜åœ¨é—®é¢˜ï¼Œè¯·è”ç³»ä¸æ–‡ç‰ˆç»´æŠ¤è€…。 -英文版维护者: Eric W. Biederman <ebiederman@xmission.com> -ä¸æ–‡ç‰ˆç»´æŠ¤è€…: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> -ä¸æ–‡ç‰ˆç¿»è¯‘者: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> -ä¸æ–‡ç‰ˆæ ¡è¯‘者: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> - - -以下为æ£æ–‡ ---------------------------------------------------------------------- -何为 IRQ? - -一个 IRQ 是æ¥è‡ªæŸä¸ªè®¾å¤‡çš„一个ä¸æ–请求。目å‰ï¼Œå®ƒä»¬å¯ä»¥æ¥è‡ªä¸€ä¸ªç¡¬ä»¶å¼•è„šï¼Œ -或æ¥è‡ªä¸€ä¸ªæ•°æ®åŒ…。多个设备å¯èƒ½è¿žæŽ¥åˆ°åŒä¸ªç¡¬ä»¶å¼•è„šï¼Œä»Žè€Œå…±äº«ä¸€ä¸ª IRQ。 - -一个 IRQ ç¼–å·æ˜¯ç”¨äºŽå‘ŠçŸ¥ç¡¬ä»¶ä¸æ–æºçš„å†…æ ¸æ ‡è¯†ã€‚é€šå¸¸æƒ…å†µä¸‹ï¼Œè¿™æ˜¯ä¸€ä¸ª -全局 irq_desc 数组的索引,但是除了在 linux/interrupt.h ä¸çš„实现, -具体的细节是体系结构特定的。 - -一个 IRQ ç¼–å·æ˜¯è®¾å¤‡ä¸ŠæŸä¸ªå¯èƒ½çš„ä¸æ–æºçš„枚举。通常情况下,枚举的编å·æ˜¯ -该引脚在系统内ä¸æ–控制器的所有输入引脚ä¸çš„ç¼–å·ã€‚对于 ISA 总线ä¸çš„情况, -枚举的是在两个 i8259 ä¸æ–æŽ§åˆ¶å™¨ä¸ 16 个输入引脚。 - -架构å¯ä»¥å¯¹ IRQ ç¼–å·æŒ‡å®šé¢å¤–çš„å«ä¹‰ï¼Œåœ¨ç¡¬ä»¶æ¶‰åŠä»»ä½•æ‰‹å·¥é…置的情况下, -是被æ倡的。ISA çš„ IRQ 是一个分é…这类é¢å¤–å«ä¹‰çš„典型例å。 diff --git a/Documentation/translations/zh_CN/PCI/acpi-info.rst b/Documentation/translations/zh_CN/PCI/acpi-info.rst new file mode 100644 index 000000000000..a35f39dcd858 --- /dev/null +++ b/Documentation/translations/zh_CN/PCI/acpi-info.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/PCI/acpi-info.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +===================== +PCI主桥的ACPI注æ„事项 +===================== + +一般的规则是,ACPI命å空间应该æè¿°æ“作系统å¯èƒ½ä½¿ç”¨çš„所有东西,除éžæœ‰å…¶ä»–方法让æ“作系 +统找到它[1, 2]。 + +ä¾‹å¦‚ï¼Œæ²¡æœ‰æ ‡å‡†çš„ç¡¬ä»¶æœºåˆ¶æ¥æžšä¸¾PCI主桥,所以ACPI命å空间必须æè¿°æ¯ä¸ªä¸»æ¡¥ã€è®¿é—®å®ƒ +下é¢çš„PCIé…置空间的方法ã€ä¸»æ¡¥è½¬å‘到PCI的地å€ç©ºé—´çª—å£ï¼ˆä½¿ç”¨_CRS)以åŠä¼ 统的INTx +ä¸æ–的路由(使用_PRT)。 + +在主桥下é¢çš„PCI设备,通常ä¸éœ€è¦é€šè¿‡ACPIæ述。æ“作系统å¯ä»¥é€šè¿‡æ ‡å‡†çš„PCIæžšä¸¾æœºåˆ¶æ¥ +å‘现它们,使用é…置访问æ¥å‘现和识别设备,并读å–和测é‡å®ƒä»¬çš„BAR。然而,如果ACPI为它们 +æ供电æºç®¡ç†æˆ–çƒæ’拔功能,或者如果设备有由平å°ä¸æ–控制器连接的INTxä¸æ–,需è¦ä¸€ä¸ª_PRT +æ¥æ述这些连接,这ç§æƒ…况下ACPIå¯ä»¥æè¿°PCI设备。 + +ACPI资æºæ述是通过ACPI命å空间ä¸è®¾å¤‡çš„_CRS对象完æˆçš„[2]。_CRSå°±åƒä¸€ä¸ªé€šç”¨çš„PCI BAR: +æ“作系统å¯ä»¥è¯»å–_CRS并找出æ£åœ¨æ¶ˆè€—的资æºï¼Œå³ä½¿å®ƒæ²¡æœ‰è¯¥è®¾å¤‡çš„驱动程åº[3]。这一点很é‡è¦ï¼Œ +å› ä¸ºå®ƒæ„味ç€ä¸€ä¸ªæ—§çš„æ“作系统å¯ä»¥æ£ç¡®åœ°å·¥ä½œï¼Œå³ä½¿æ˜¯åœ¨æ“作系统ä¸çŸ¥é“的新设备的系统上。新设 +备å¯èƒ½ä»€ä¹ˆéƒ½ä¸åšï¼Œä½†æ“作系统至少å¯ä»¥ç¡®ä¿æ²¡æœ‰èµ„æºä¸Žå®ƒä»¬å†²çªã€‚ + +åƒMCFGã€HPETã€ECDTç‰é™æ€è¡¨ï¼Œä¸æ˜¯ä¿ç•™åœ°å€ç©ºé—´çš„机制。é™æ€è¡¨æ˜¯åœ¨æ“作系统在å¯åŠ¨åˆæœŸä¸”在它 +能够解æžACPI命å空间之å‰éœ€è¦çŸ¥é“的东西。如果定义了一个新的表,å³ä½¿æ—§çš„æ“作系统忽略了这 +个表,它也需è¦æ£å¸¸è¿è¡Œã€‚_CRSå…è®¸è¿™æ ·åšï¼Œå› 为它是通用的,å¯ä»¥è¢«æ—§çš„æ“作系统解æžï¼›è€Œé™æ€è¡¨ +则ä¸å…许。 + +如果æ“作系统è¦ç®¡ç†ä¸€ä¸ªé€šè¿‡ACPIæè¿°çš„ä¸å¯å‘现的设备,该设备将有一个特定的_HID/_CID,以 +告诉æ“作系统与之绑定的驱动程åºï¼Œå¹¶ä¸”_CRS告诉æ“作系统和驱动程åºè¯¥è®¾å¤‡çš„寄å˜å™¨åœ¨å“ªé‡Œã€‚ + +PCI主桥是PNP0A03或PNP0A08设备。它们的_CRS应该æ述它们所消耗的所有地å€ç©ºé—´ã€‚这包括它 +们转å‘到PCI总线上的所有窗å£ï¼Œä»¥åŠä¸è½¬å‘到PCI的主桥本身的寄å˜å™¨ã€‚主桥的寄å˜å™¨åŒ…括次è¦/下 +级总线寄å˜å™¨ï¼Œå†³å®šäº†æ¡¥ä¸‹é¢çš„总线范围,窗å£å¯„å˜å™¨æ述了桥洞,ç‰ç‰ã€‚è¿™äº›éƒ½æ˜¯è®¾å¤‡ç›¸å…³çš„ï¼Œéž +架构相关的东西,所以PNP0A03/PNP0A08驱动å¯ä»¥ç®¡ç†å®ƒä»¬çš„唯一方法是通过_PRS/_CRS/_SRS, +它包å«äº†ç‰¹å®šäºŽè®¾å¤‡çš„细节。主桥寄å˜å™¨ä¹ŸåŒ…括ECAMç©ºé—´ï¼Œå› ä¸ºå®ƒæ˜¯ç”±ä¸»æ¡¥æ¶ˆè€—çš„ã€‚ + +ACPI定义了一个Consumer/Producerä½æ¥åŒºåˆ†æ¡¥å¯„å˜å™¨ï¼ˆâ€œConsumerâ€ä¸‹æ–‡è¯‘作消费者)和 +桥洞(“Producerâ€ä¸‹æ–‡è¯‘作生产者)[4, 5],但是早期的BIOS没有æ£ç¡®ä½¿ç”¨è¿™ä¸ªä½ã€‚其结果 +是,目å‰çš„ACPI规范åªä¸ºæ‰©å±•åœ°å€ç©ºé—´æ述符定义了消费者/生产者;在旧的QWord/Word/Word地 +å€ç©ºé—´æ述符ä¸ï¼Œè¯¥ä½åº”è¯¥è¢«å¿½ç•¥ã€‚å› æ¤ï¼Œæ“作系统必须å‡å®šæ‰€æœ‰çš„QWord/Word/Wordæ述符都是 +窗å£ã€‚ + +åœ¨å¢žåŠ æ‰©å±•åœ°å€ç©ºé—´æ述符之å‰ï¼Œæ¶ˆè´¹è€…/生产者的失败æ„味ç€æ²¡æœ‰åŠžæ³•æè¿°PNP0A03/PNP0A08设 +备本身的桥寄å˜å™¨ã€‚解决办法是在PNP0C02æ•æ‰å™¨ä¸æ述桥寄å˜å™¨ï¼ˆåŒ…括ECAM空间)[6]。 +除了ECAM之外,桥寄å˜å™¨ç©ºé—´åæ£æ˜¯ç‰¹å®šäºŽè®¾å¤‡çš„,所以通用的PNP0A03/PNP0A08驱动程 +åº(pci_root.c)没有必è¦äº†è§£å®ƒã€‚ + +新的架构应该能够在PNP0A03设备ä¸ä½¿ç”¨â€œæ¶ˆè´¹è€…â€æ‰©å±•åœ°å€ç©ºé—´æ述符,用于桥寄å˜å™¨ï¼ŒåŒ…括 +ECAM,尽管对[6]çš„ä¸¥æ ¼è§£é‡Šå¯èƒ½ç¦æ¢è¿™æ ·åšã€‚旧的x86å’Œia64å†…æ ¸å‡å®šæ‰€æœ‰çš„地å€ç©ºé—´æè¿° +符,包括“消费者â€æ‰©å±•åœ°å€ç©ºé—´çš„æ述符,都是窗å£ï¼Œæ‰€ä»¥åœ¨è¿™äº›æž¶æž„上以这ç§æ–¹å¼æ述桥寄 +å˜å™¨æ˜¯ä¸å®‰å…¨çš„。 + +PNP0C02“主æ¿â€è®¾å¤‡åŸºæœ¬ä¸Šæ˜¯ä¸‡èƒ½çš„。除了“ä¸è¦å°†è¿™äº›èµ„æºç”¨äºŽå…¶ä»–用途â€ä¹‹å¤–,没有其他的编 +ç¨‹æ¨¡åž‹ã€‚å› æ¤ï¼ŒPNP0C02 _CRS应该声明ACPI命å空间ä¸(1)没有被_CRS声明的任何其他设备对 +象的地å€ç©ºé—´ï¼Œ(2)ä¸åº”该被OS分é…给其他东西。 + +除éžæœ‰ä¸€ä¸ªæ ‡å‡†çš„固件接å£ç”¨äºŽé…置访问,例如ia64 SAL接å£[7],å¦åˆ™PCIe规范è¦æ±‚使用增强 +åž‹é…置访问方法(ECAM)。主桥消耗ECAM内å˜åœ°å€ç©ºé—´å¹¶å°†å†…å˜è®¿é—®è½¬æ¢ä¸ºPCIé…置访问。该规范 +定义了ECAM地å€ç©ºé—´çš„布局和功能;åªæœ‰åœ°å€ç©ºé—´çš„基础是特定于设备的。ACPIæ“作系统从é™æ€ +MCFG表或PNP0A03设备ä¸çš„_CBA方法ä¸äº†è§£åŸºç¡€åœ°å€ã€‚ + +MCFG表必须æè¿°éžçƒæ’拔主桥的ECAM空间[8]。由于MCFG是一个é™æ€è¡¨ï¼Œä¸èƒ½é€šè¿‡çƒæ’拔更新, +PNP0A03设备ä¸çš„_CBA方法æ述了å¯çƒæ’拔主桥的ECAM空间[9]。请注æ„,对于MCFGå’Œ_CBA, +基å€æ€»æ˜¯å¯¹åº”于总线0,å³ä½¿æ¡¥å™¨ä¸‹é¢çš„总线范围(通过_CRS报告)ä¸ä»Ž0开始。 + + +[1] ACPI 6.2, sec 6.1: + 对于任何在éžæžšä¸¾ç±»åž‹çš„总线上的设备(例如,ISA总线),OSPMä¼šæžšä¸¾è®¾å¤‡çš„æ ‡è¯†ç¬¦ï¼ŒACPI + 系统固件必须为æ¯ä¸ªè®¾å¤‡æ供一个_HID对象...以使OSPM能够åšåˆ°è¿™ä¸€ç‚¹ã€‚ + +[2] ACPI 6.2, sec 3.7: + æ“作系统枚举主æ¿è®¾å¤‡æ—¶ï¼Œåªéœ€é€šè¿‡è¯»å–ACPI命å空间æ¥å¯»æ‰¾å…·æœ‰ç¡¬ä»¶ID的设备。 + + ACPI枚举的æ¯ä¸ªè®¾å¤‡éƒ½åŒ…括ACPI命å空间ä¸ACPI定义的对象,该对象报告设备å¯èƒ½å 用的硬 + 件资æº[_PRS],报告设备当å‰ä½¿ç”¨çš„资æº[_CRS]的对象,以åŠé…置这些资æºçš„对象[_SRS]。 + 这些信æ¯è¢«å³æ’å³ç”¨æ“作系统(OSPM)用æ¥é…置设备。 + +[3] ACPI 6.2, sec 6.2: + OSPM使用设备é…置对象æ¥é…置通过ACPI列举的设备的硬件资æºã€‚设备é…置对象æä¾›äº†å…³äºŽå½“å‰ + å’Œå¯èƒ½çš„资æºéœ€æ±‚çš„ä¿¡æ¯ï¼Œå…±äº«èµ„æºä¹‹é—´çš„关系,以åŠé…置硬件资æºçš„方法。 + + 当OSPM枚举一个设备时,它调用_PRSæ¥ç¡®å®šè¯¥è®¾å¤‡çš„资æºéœ€æ±‚。它也å¯ä»¥è°ƒç”¨_CRSæ¥æ‰¾åˆ°è¯¥è®¾ + 备的当å‰èµ„æºè®¾ç½®ã€‚利用这些信æ¯ï¼Œå³æ’å³ç”¨ç³»ç»Ÿå†³å®šè®¾å¤‡åº”该消耗什么资æºï¼Œå¹¶é€šè¿‡è°ƒç”¨è®¾å¤‡ + çš„_SRS控制方法æ¥è®¾ç½®è¿™äº›èµ„æºã€‚ + + 在ACPIä¸ï¼Œè®¾å¤‡å¯ä»¥æ¶ˆè€—资æºï¼ˆä¾‹å¦‚ï¼Œä¼ ç»Ÿçš„é”®ç›˜ï¼‰ï¼Œæ供资æºï¼ˆä¾‹å¦‚,一个专有的PCI桥), + 或者两者都åšã€‚除éžå¦æœ‰è§„定,设备的资æºè¢«å‡å®šä¸ºæ¥è‡ªè®¾å¤‡å±‚次结构ä¸è®¾å¤‡ä¸Šæ–¹æœ€è¿‘的匹é…资 + æºã€‚ + +[4] ACPI 6.2, sec 6.4.3.5.1, 2, 3, 4: + QWord/DWord/Word 地å€ç©ºé—´æ述符 (.1, .2, .3) + å¸¸è§„æ ‡å¿—: Bit [0] 被忽略。 + + 扩展地å€ç©ºé—´æ述符 (.4) + å¸¸è§„æ ‡å¿—: Bit [0] 消费者/生产者: + + * 1 – è¿™ä¸ªè®¾å¤‡æ¶ˆè´¹è¿™ä¸ªèµ„æº + * 0 – è¯¥è®¾å¤‡ç”Ÿäº§å’Œæ¶ˆè´¹è¯¥èµ„æº + +[5] ACPI 6.2, sec 19.6.43: + ResourceUsage指定内å˜èŒƒå›´æ˜¯ç”±è¿™ä¸ªè®¾å¤‡ï¼ˆResourceConsumerï¼‰æ¶ˆè´¹è¿˜æ˜¯ä¼ é€’ç»™å设备 + (ResourceProducer)。如果没有指定,那么就å‡å®šæ˜¯ResourceConsumer。 + +[6] PCI Firmware 3.2, sec 4.1.2: + 如果æ“作系统ä¸èƒ½åŽŸç”Ÿçš„懂得ä¿ç•™MMCFG区域,MMCFG区域必须由固件ä¿ç•™ã€‚在MCFG表ä¸æˆ–通 + 过_CBA方法(è§ç¬¬4.1.3节)报告的地å€èŒƒå›´å¿…须通过声明主æ¿èµ„æºæ¥ä¿ç•™ã€‚对于大多数系统, + 主æ¿èµ„æºå°†å‡ºçŽ°åœ¨ACPI命åç©ºé—´çš„æ ¹éƒ¨ï¼ˆåœ¨_SB下),在一个节点的_HID为EISAID(PNP0C0 + 2),在这ç§æƒ…况下的资æºä¸åº”该è¦æ±‚åœ¨æ ¹PCI总线的_CRS。这些资æºå¯ä»¥é€‰æ‹©åœ¨Int15 E820 + 或EFIGetMemoryMapä¸ä½œä¸ºä¿ç•™å†…å˜è¿”回,但必须始终通过ACPI作为主æ¿èµ„æºæŠ¥å‘Šã€‚ + +[7] PCI Express 4.0, sec 7.2.2: + 对于PC兼容的系统,或者没有实现å…许访问é…置空间的处ç†å™¨æž¶æž„特定固件接å£æ ‡å‡†çš„系统,需 + è¦ä½¿ç”¨æœ¬èŠ‚ä¸å®šä¹‰çš„ECAM。 + +[8] PCI Firmware 3.2, sec 4.1.2: + MCFG表是一个ACPI表,用于沟通的基础地å€å¯¹åº”çš„éžçƒçš„å¯ç§»åŠ¨çš„PCI段组范围内的PCI段组在 + å¯åŠ¨æ—¶æ供给æ“作系统。这对PC兼容系统æ¥è¯´æ˜¯å¿…需的。 + + MCFG表仅用于沟通在å¯åŠ¨æ—¶ç³»ç»Ÿå¯ç”¨çš„PCI段组对应的基å€ã€‚ + +[9] PCI Firmware 3.2, sec 4.1.3: + _CBA (Memory mapped Configuration Base Address) 控制方法是一个å¯é€‰çš„ACPI对 + 象,用于返回çƒæ’拔主桥的64ä½å†…å˜æ˜ å°„çš„é…置基å€ã€‚_CBA 返回的基å€æ˜¯ä¸Žå¤„ç†å™¨ç›¸å…³çš„地å€ã€‚ + _CBA 控制方法被评估为一个整数。 + + 这个控制方法出现在主桥对象下。当_CBA方法出现在一个活动的主桥对象下时,æ“作系统会评 + 估这个结构,以确定内å˜æ˜ å°„çš„é…置基å€ï¼Œå¯¹åº”于_CRS方法ä¸æŒ‡å®šçš„总线编å·èŒƒå›´çš„PCI段组。 + 一个包å«_CBA方法的ACPI命å空间对象也必须包å«ä¸€ä¸ªç›¸åº”çš„_SEG方法。 diff --git a/Documentation/translations/zh_CN/PCI/index.rst b/Documentation/translations/zh_CN/PCI/index.rst index 16acb2bd9b58..cbeb33c34a98 100644 --- a/Documentation/translations/zh_CN/PCI/index.rst +++ b/Documentation/translations/zh_CN/PCI/index.rst @@ -10,9 +10,6 @@ :æ ¡è¯‘: - -.. _cn_PCI_index.rst: - =================== Linux PCI总线å系统 =================== @@ -26,12 +23,12 @@ Linux PCI总线å系统 pci-iov-howto msi-howto sysfs-pci + acpi-info Todolist: - acpi-info - pci-error-recovery - pcieaer-howto - endpoint/index - boot-interrupts +* pci-error-recovery +* pcieaer-howto +* endpoint/index +* boot-interrupts diff --git a/Documentation/translations/zh_CN/admin-guide/README.rst b/Documentation/translations/zh_CN/admin-guide/README.rst index d20949e8bf6f..e679cbc3c89d 100644 --- a/Documentation/translations/zh_CN/admin-guide/README.rst +++ b/Documentation/translations/zh_CN/admin-guide/README.rst @@ -6,10 +6,10 @@ å´æƒ³æˆ Wu XiangCheng <bobwxc@email.cn> -Linuxå†…æ ¸5.x版本 <http://kernel.org/> +Linuxå†…æ ¸6.x版本 <http://kernel.org/> ========================================= -以下是Linux版本5çš„å‘行注记。仔细阅读它们, +以下是Linux版本6çš„å‘行注记。仔细阅读它们, å®ƒä»¬ä¼šå‘Šè¯‰ä½ è¿™äº›éƒ½æ˜¯ä»€ä¹ˆï¼Œè§£é‡Šå¦‚ä½•å®‰è£…å†…æ ¸ï¼Œä»¥åŠé‡åˆ°é—®é¢˜æ—¶è¯¥å¦‚何åšã€‚ 什么是Linux? @@ -61,27 +61,27 @@ Linuxå†…æ ¸5.x版本 <http://kernel.org/> - 如果您è¦å®‰è£…完整的æºä»£ç ï¼Œè¯·æŠŠå†…æ ¸tar档案包放在您有æƒé™çš„目录ä¸ï¼ˆä¾‹å¦‚您 的主目录)并将其解包:: - xz -cd linux-5.x.tar.xz | tar xvf - + xz -cd linux-6.x.tar.xz | tar xvf - 将“Xâ€æ›¿æ¢æˆæœ€æ–°å†…æ ¸çš„ç‰ˆæœ¬å·ã€‚ ã€ä¸è¦ã€‘使用 /usr/src/linux 目录ï¼è¿™é‡Œæœ‰ä¸€ç»„åº“å¤´æ–‡ä»¶ä½¿ç”¨çš„å†…æ ¸å¤´æ–‡ä»¶ (通常是ä¸å®Œæ•´çš„)。它们应该与库匹é…,而ä¸æ˜¯è¢«å†…æ ¸çš„å˜åŒ–æžå¾—一团糟。 - - 您还å¯ä»¥é€šè¿‡æ‰“è¡¥ä¸åœ¨5.x版本之间å‡çº§ã€‚è¡¥ä¸ä»¥xzæ ¼å¼åˆ†å‘。è¦é€šè¿‡æ‰“è¡¥ä¸è¿›è¡Œ - 安装,请获å–所有较新的补ä¸æ–‡ä»¶ï¼Œè¿›å…¥å†…æ ¸æºä»£ç (linux-5.x)的目录并 + - 您还å¯ä»¥é€šè¿‡æ‰“è¡¥ä¸åœ¨6.x版本之间å‡çº§ã€‚è¡¥ä¸ä»¥xzæ ¼å¼åˆ†å‘。è¦é€šè¿‡æ‰“è¡¥ä¸è¿›è¡Œ + 安装,请获å–所有较新的补ä¸æ–‡ä»¶ï¼Œè¿›å…¥å†…æ ¸æºä»£ç (linux-6.x)的目录并 执行:: - xz -cd ../patch-5.x.xz | patch -p1 + xz -cd ../patch-6.x.xz | patch -p1 请ã€æŒ‰é¡ºåºã€‘替æ¢æ‰€æœ‰å¤§äºŽå½“å‰æºä»£ç æ ‘ç‰ˆæœ¬çš„â€œxâ€ï¼Œè¿™æ ·å°±å¯ä»¥äº†ã€‚您å¯èƒ½æƒ³è¦ åˆ é™¤å¤‡ä»½æ–‡ä»¶ï¼ˆæ–‡ä»¶å类似xxx~ 或 xxx.orig),并确ä¿æ²¡æœ‰å¤±è´¥çš„è¡¥ä¸ï¼ˆæ–‡ä»¶å 类似xxx# 或 xxx.rej)。如果有,ä¸æ˜¯ä½ 就是我犯了错误。 - 与5.xå†…æ ¸çš„è¡¥ä¸ä¸åŒï¼Œ5.x.yå†…æ ¸ï¼ˆä¹Ÿç§°ä¸ºç¨³å®šç‰ˆå†…æ ¸ï¼‰çš„è¡¥ä¸ä¸æ˜¯å¢žé‡çš„,而是 - 直接应用于基本的5.xå†…æ ¸ã€‚ä¾‹å¦‚ï¼Œå¦‚æžœæ‚¨çš„åŸºæœ¬å†…æ ¸æ˜¯5.0,并且希望应用5.0.3 - è¡¥ä¸ï¼Œåˆ™ä¸åº”先应用5.0.1å’Œ5.0.2çš„è¡¥ä¸ã€‚类似地,如果您è¿è¡Œçš„是5.0.2å†…æ ¸ï¼Œ - 并且希望跳转到5.0.3,那么在应用5.0.3è¡¥ä¸ä¹‹å‰ï¼Œå¿…须首先撤销5.0.2è¡¥ä¸ + 与6.xå†…æ ¸çš„è¡¥ä¸ä¸åŒï¼Œ6.x.yå†…æ ¸ï¼ˆä¹Ÿç§°ä¸ºç¨³å®šç‰ˆå†…æ ¸ï¼‰çš„è¡¥ä¸ä¸æ˜¯å¢žé‡çš„,而是 + 直接应用于基本的6.xå†…æ ¸ã€‚ä¾‹å¦‚ï¼Œå¦‚æžœæ‚¨çš„åŸºæœ¬å†…æ ¸æ˜¯6.0,并且希望应用6.0.3 + è¡¥ä¸ï¼Œåˆ™ä¸åº”先应用6.0.1å’Œ6.0.2çš„è¡¥ä¸ã€‚类似地,如果您è¿è¡Œçš„是6.0.2å†…æ ¸ï¼Œ + 并且希望跳转到6.0.3,那么在应用6.0.3è¡¥ä¸ä¹‹å‰ï¼Œå¿…须首先撤销6.0.2è¡¥ä¸ ï¼ˆå³patch -R)。更多关于这方é¢çš„内容,请阅读 :ref:`Documentation/process/applying-patches.rst <applying_patches>` 。 @@ -103,7 +103,7 @@ Linuxå†…æ ¸5.x版本 <http://kernel.org/> 软件è¦æ±‚ --------- - 编译和è¿è¡Œ5.xå†…æ ¸éœ€è¦å„ç§è½¯ä»¶åŒ…的最新版本。请å‚考 + 编译和è¿è¡Œ6.xå†…æ ¸éœ€è¦å„ç§è½¯ä»¶åŒ…的最新版本。请å‚考 :ref:`Documentation/process/changes.rst <changes>` æ¥äº†è§£æœ€ä½Žç‰ˆæœ¬è¦æ±‚以åŠå¦‚何å‡çº§è½¯ä»¶åŒ…。请注æ„,使用过旧版本的这些包å¯èƒ½ä¼š å¯¼è‡´å¾ˆéš¾è¿½è¸ªçš„é—´æŽ¥é”™è¯¯ï¼Œå› æ¤ä¸è¦ä»¥ä¸ºåœ¨ç”Ÿæˆæˆ–æ“作过程ä¸å‡ºçŽ°æ˜Žæ˜¾é—®é¢˜æ—¶å¯ä»¥ @@ -116,12 +116,12 @@ Linuxå†…æ ¸5.x版本 <http://kernel.org/> ``make O=output/dir`` 选项å¯ä»¥ä¸ºè¾“出文件(包括 .config)指定备用ä½ç½®ã€‚ 例如:: - kernel source code: /usr/src/linux-5.x + kernel source code: /usr/src/linux-6.x build directory: /home/name/build/kernel è¦é…ç½®å’Œæž„å»ºå†…æ ¸ï¼Œè¯·ä½¿ç”¨:: - cd /usr/src/linux-5.x + cd /usr/src/linux-6.x make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install @@ -227,8 +227,6 @@ Linuxå†…æ ¸5.x版本 <http://kernel.org/> - ç¡®ä¿æ‚¨è‡³å°‘有gcc 5.1å¯ç”¨ã€‚ 有关更多信æ¯ï¼Œè¯·å‚阅 :ref:`Documentation/process/changes.rst <changes>` 。 - 请注æ„,您ä»ç„¶å¯ä»¥ä½¿ç”¨æ¤å†…æ ¸è¿è¡Œa.out用户程åºã€‚ - - 执行 ``make`` æ¥åˆ›å»ºåŽ‹ç¼©å†…æ ¸æ˜ åƒã€‚如果您安装了lilo以适é…å†…æ ¸makefile, 那么也å¯ä»¥è¿›è¡Œ ``make install`` ,但是您å¯èƒ½éœ€è¦å…ˆæ£€æŸ¥ç‰¹å®šçš„lilo设置。 @@ -282,67 +280,12 @@ Linuxå†…æ ¸5.x版本 <http://kernel.org/> è‹¥é‡åˆ°é—®é¢˜ ----------- - - 如果您å‘现了一些å¯èƒ½ç”±äºŽå†…æ ¸ç¼ºé™·æ‰€å¯¼è‡´çš„é—®é¢˜ï¼Œè¯·æ£€æŸ¥MAINTAINERS(维护者) - 文件看看是å¦æœ‰äººä¸Žä»¤æ‚¨é‡åˆ°éº»çƒ¦çš„å†…æ ¸éƒ¨åˆ†ç›¸å…³ã€‚å¦‚æžœæ— äººåœ¨æ¤åˆ—出,那么第二 - 个最好的方案就是把它们å‘给我(torvalds@linux-foundation.org),也å¯èƒ½å‘é€ - 到任何其他相关的邮件列表或新闻组。 - - - 在所有的缺陷报告ä¸ï¼Œã€è¯·ã€‘å‘Šè¯‰æˆ‘ä»¬æ‚¨åœ¨è¯´ä»€ä¹ˆå†…æ ¸ï¼Œå¦‚ä½•å¤çŽ°é—®é¢˜ï¼Œä»¥åŠæ‚¨çš„ - 设置是什么的(使用您的常识)。如果问题是新的,请告诉我;如果问题是旧的, - 请å°è¯•å‘Šè¯‰æˆ‘您什么时候首次注æ„到它。 - - - 如果缺陷导致如下消æ¯:: - - unable to handle kernel paging request at address C0000010 - Oops: 0002 - EIP: 0010:XXXXXXXX - eax: xxxxxxxx ebx: xxxxxxxx ecx: xxxxxxxx edx: xxxxxxxx - esi: xxxxxxxx edi: xxxxxxxx ebp: xxxxxxxx - ds: xxxx es: xxxx fs: xxxx gs: xxxx - Pid: xx, process nr: xx - xx xx xx xx xx xx xx xx xx xx - - æˆ–è€…ç±»ä¼¼çš„å†…æ ¸è°ƒè¯•ä¿¡æ¯æ˜¾ç¤ºåœ¨å±å¹•ä¸Šæˆ–在系统日志里,请ã€å¦‚实】å¤åˆ¶å®ƒã€‚ - å¯èƒ½å¯¹ä½ æ¥è¯´è½¬å‚¨ï¼ˆdump)看起æ¥ä¸å¯ç†è§£ï¼Œä½†å®ƒç¡®å®žåŒ…å«å¯èƒ½æœ‰åŠ©äºŽè°ƒè¯•é—®é¢˜çš„ - ä¿¡æ¯ã€‚转储上方的文本也很é‡è¦ï¼šå®ƒè¯´æ˜Žäº†å†…æ ¸è½¬å‚¨ä»£ç çš„åŽŸå› ï¼ˆåœ¨ä¸Šé¢çš„示例ä¸ï¼Œ - æ˜¯ç”±äºŽå†…æ ¸æŒ‡é’ˆé”™è¯¯ï¼‰ã€‚æ›´å¤šå…³äºŽå¦‚ä½•ç†è§£è½¬å‚¨çš„ä¿¡æ¯ï¼Œè¯·å‚è§ - Documentation/admin-guide/bug-hunting.rst。 - - - 如果使用 CONFIG_KALLSYMS ç¼–è¯‘å†…æ ¸ï¼Œåˆ™å¯ä»¥æŒ‰åŽŸæ ·å‘é€è½¬å‚¨ï¼Œå¦åˆ™å¿…须使用 - ``ksymoops`` 程åºæ¥ç†è§£è½¬å‚¨ï¼ˆä½†é€šå¸¸é¦–选使用CONFIG_KALLSYMS编译)。 - æ¤å®žç”¨ç¨‹åºå¯ä»Ž - https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ 下载。 - 或者,您å¯ä»¥æ‰‹åŠ¨æ‰§è¡Œè½¬å‚¨æŸ¥æ‰¾ï¼š - - - 在调试åƒä¸Šé¢è¿™æ ·çš„转储时,如果您å¯ä»¥æŸ¥æ‰¾EIP值的å«ä¹‰ï¼Œè¿™å°†éžå¸¸æœ‰å¸®åŠ©ã€‚ - åå…进制值本身对我或其他任何人都没有太大帮助:它会å–å†³äºŽç‰¹å®šçš„å†…æ ¸è®¾ç½®ã€‚ - 您应该åšçš„是从EIP行获å–åå…进制值(忽略 ``0010:`` ),然åŽåœ¨å†…æ ¸åå—列表 - ä¸æŸ¥æ‰¾å®ƒï¼Œä»¥æŸ¥çœ‹å“ªä¸ªå†…æ ¸å‡½æ•°åŒ…å«æœ‰é—®é¢˜çš„地å€ã€‚ - - è¦æ‰¾åˆ°å†…æ ¸å‡½æ•°å,您需è¦æ‰¾åˆ°ä¸Žæ˜¾ç¤ºç—‡çŠ¶çš„å†…æ ¸ç›¸å…³è”的系统二进制文件。就是 - 文件“linux/vmlinuxâ€ã€‚è¦æå–åå—åˆ—è¡¨å¹¶å°†å…¶ä¸Žå†…æ ¸å´©æºƒä¸çš„EIP进行匹é…, - 请执行:: - - nm vmlinux | sort | less - - 这将为您æ供一个按å‡åºæŽ’åºçš„å†…æ ¸åœ°å€åˆ—表,从ä¸å¾ˆå®¹æ˜“找到包å«æœ‰é—®é¢˜çš„åœ°å€ - 的函数。请注æ„ï¼Œå†…æ ¸è°ƒè¯•æ¶ˆæ¯æ供的地å€ä¸ä¸€å®šä¸Žå‡½æ•°åœ°å€å®Œå…¨åŒ¹é…(事实上, - 这是ä¸å¯èƒ½çš„ï¼‰ï¼Œå› æ¤æ‚¨ä¸èƒ½åªâ€œgrepâ€åˆ—表:ä¸è¿‡åˆ—表将为您æä¾›æ¯ä¸ªå†…æ ¸å‡½æ•° - çš„èµ·ç‚¹ï¼Œå› æ¤é€šè¿‡æŸ¥æ‰¾èµ·å§‹åœ°å€ä½ŽäºŽä½ æ£åœ¨æœç´¢çš„地å€ï¼Œä½†åŽä¸€ä¸ªå‡½æ•°çš„高于的 - å‡½æ•°ï¼Œä½ ä¼šæ‰¾åˆ°æ‚¨æƒ³è¦çš„。实际上,在您的问题报告ä¸åŠ 入一些“上下文â€å¯èƒ½æ˜¯ - 一个好主æ„ï¼Œç»™å‡ºç›¸å…³çš„ä¸Šä¸‹å‡ è¡Œã€‚ - - 如果您由于æŸäº›åŽŸå› æ— æ³•å®Œæˆä¸Šè¿°æ“ä½œï¼ˆå¦‚æ‚¨ä½¿ç”¨é¢„ç¼–è¯‘çš„å†…æ ¸æ˜ åƒæˆ–ç±»ä¼¼çš„æ˜ åƒï¼‰ï¼Œ - 请尽å¯èƒ½å¤šåœ°å‘Šè¯‰æˆ‘您的相关设置信æ¯ï¼Œè¿™ä¼šæœ‰æ‰€å¸®åŠ©ã€‚有关详细信æ¯è¯·é˜…读 - ‘Documentation/admin-guide/reporting-issues.rst’。 - - - 或者,您å¯ä»¥åœ¨æ£åœ¨è¿è¡Œçš„å†…æ ¸ä¸Šä½¿ç”¨gdb(åªè¯»çš„ï¼›å³ä¸èƒ½æ›´æ”¹å€¼æˆ–设置æ–点)。 - 为æ¤ï¼Œè¯·é¦–先使用-gç¼–è¯‘å†…æ ¸ï¼›é€‚å½“åœ°ç¼–è¾‘arch/x86/Makefile,然åŽæ‰§è¡Œ ``make - clean`` 。您还需è¦å¯ç”¨CONFIG_PROC_FS(通过 ``make config`` )。 - - ä½¿ç”¨æ–°å†…æ ¸é‡æ–°å¯åŠ¨åŽï¼Œæ‰§è¡Œ ``gdb vmlinux /proc/kcore`` 。现在å¯ä»¥ä½¿ç”¨æ‰€æœ‰ - 普通的gdb命令。查找系统崩溃点的命令是 ``l *0xXXXXXXXX`` (将xxx替æ¢ä¸ºEIP - 值)。 - - 用gdbæ— æ³•è°ƒè¯•ä¸€ä¸ªå½“å‰æœªè¿è¡Œçš„å†…æ ¸æ˜¯ç”±äºŽgdbï¼ˆé”™è¯¯åœ°ï¼‰å¿½ç•¥äº†ç¼–è¯‘å†…æ ¸çš„èµ·å§‹ - å移é‡ã€‚ +如果您å‘现了一些å¯èƒ½ç”±äºŽå†…æ ¸ç¼ºé™·æ‰€å¯¼è‡´çš„é—®é¢˜ï¼Œè¯·å‚阅: +Documentation/translations/zh_CN/admin-guide/reporting-issues.rst 。 + +想è¦ç†è§£å†…æ ¸é”™è¯¯æŠ¥å‘Šï¼Œè¯·å‚阅: +Documentation/translations/zh_CN/admin-guide/bug-hunting.rst 。 + +更多用GDBè°ƒè¯•å†…æ ¸çš„ä¿¡æ¯ï¼Œè¯·å‚阅: +Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst +å’Œ Documentation/dev-tools/kgdb.rst 。 diff --git a/Documentation/translations/zh_CN/admin-guide/bootconfig.rst b/Documentation/translations/zh_CN/admin-guide/bootconfig.rst new file mode 100644 index 000000000000..072d17f5f199 --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/bootconfig.rst @@ -0,0 +1,293 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/bootconfig.rst + +:译者: å´æƒ³æˆ Wu XiangCheng <bobwxc@email.cn> + +======== +引导é…ç½® +======== + +:作者: Masami Hiramatsu <mhiramat@kernel.org> + +概述 +==== + +引导é…ç½®æ‰©å±•äº†çŽ°æœ‰çš„å†…æ ¸å‘½ä»¤è¡Œï¼Œä»¥ä¸€ç§æ›´æœ‰æ•ˆçŽ‡çš„æ–¹å¼åœ¨å¼•å¯¼å†…æ ¸æ—¶è¿›ä¸€æ¥æ”¯æŒ +键值数æ®ã€‚è¿™å…许管ç†å‘˜ä¼ 递一份结构化关键å—çš„é…置文件。 + +é…置文件è¯æ³• +============ + +引导é…置文件的è¯æ³•é‡‡ç”¨éžå¸¸ç®€å•çš„键值结构。æ¯ä¸ªå…³é”®å—由点连接的å•è¯ç»„æˆï¼Œé”® +和值由 ``=`` 连接。值以分å·ï¼ˆ ``;`` )或æ¢è¡Œç¬¦ï¼ˆ ``\n`` )结尾。数组值ä¸æ¯ +ä¸ªå…ƒç´ ç”±é€—å·ï¼ˆ ``,`` )分隔。:: + + KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] + +ä¸Žå†…æ ¸å‘½ä»¤è¡Œè¯æ³•ä¸åŒï¼Œé€—å·å’Œ ``=`` 周围å…è®¸æœ‰ç©ºæ ¼ã€‚ + +关键å—åªå…许包å«å—æ¯ã€æ•°å—ã€è¿žå—符( ``-`` )和下划线( ``_`` )。值å¯åŒ…å« +å¯æ‰“å°å—ç¬¦å’Œç©ºæ ¼ï¼Œä½†åˆ†å·ï¼ˆ ``;`` )ã€æ¢è¡Œç¬¦ï¼ˆ ``\n`` )ã€é€—å·ï¼ˆ ``,`` )〠+井å·ï¼ˆ ``#`` )和å³å¤§æ‹¬å·ï¼ˆ ``}`` )ç‰åˆ†éš”符除外。 + +å¦‚æžœä½ éœ€è¦åœ¨å€¼ä¸ä½¿ç”¨è¿™äº›åˆ†éš”符,å¯ä»¥ç”¨åŒå¼•å·ï¼ˆ ``"VALUE"`` )或å•å¼•å· +( ``'VALUE'`` )括起æ¥ã€‚注æ„,引å·æ— 法转义。 + +键的值å¯ä»¥ä¸ºç©ºæˆ–ä¸å˜åœ¨ã€‚这些键用于检查该键是å¦å˜åœ¨ï¼ˆç±»ä¼¼å¸ƒå°”值)。 + +键值è¯æ³• +-------- + +引导é…置文件è¯æ³•å…许用户通过大括å·åˆå¹¶é”®å部分相åŒçš„关键å—。例如:: + + foo.bar.baz = value1 + foo.bar.qux.quux = value2 + +也å¯ä»¥å†™æˆ:: + + foo.bar { + baz = value1 + qux.quux = value2 + } + +或者更紧凑一些,写æˆ:: + + foo.bar { baz = value1; qux.quux = value2 } + +在这两ç§æ ·å¼ä¸ï¼Œå¼•å¯¼è§£æžæ—¶ç›¸åŒçš„关键å—都会自动åˆå¹¶ã€‚å› æ¤å¯ä»¥è¿½åŠ ç±»ä¼¼çš„æ ‘æˆ– +键值。 + +相åŒå…³é”®å—的值 +-------------- + +ç¦æ¢ä¸¤ä¸ªæˆ–多个值或数组共享åŒä¸€ä¸ªå…³é”®å—。例如:: + + foo = bar, baz + foo = qux # !错误! 我们ä¸å¯ä»¥é‡å®šä¹‰ç›¸åŒçš„å…³é”®å— + +å¦‚æžœä½ æƒ³è¦æ›´æ–°å€¼ï¼Œå¿…须显å¼ä½¿ç”¨è¦†ç›–æ“作符 ``:=`` 。例如:: + + foo = bar, baz + foo := qux + +è¿™æ · ``foo`` 关键å—的值就å˜æˆäº† ``qux`` ã€‚è¿™å¯¹äºŽé€šè¿‡æ·»åŠ ï¼ˆéƒ¨åˆ†ï¼‰è‡ªå®šä¹‰å¼•å¯¼ +é…ç½®æ¥è¦†ç›–默认值éžå¸¸æœ‰ç”¨ï¼Œå…于解æžé»˜è®¤å¼•å¯¼é…置。 + +å¦‚æžœä½ æƒ³å¯¹çŽ°æœ‰å…³é”®å—è¿½åŠ å€¼ä½œä¸ºæ•°ç»„æˆå‘˜ï¼Œå¯ä»¥ä½¿ç”¨ ``+=`` æ“作符。例如:: + + foo = bar, baz + foo += qux + +è¿™æ ·ï¼Œ ``foo`` 关键å—å°±åŒæ—¶æ‹¥æœ‰äº† ``bar`` , ``baz`` å’Œ ``qux`` 。 + +æ¤å¤–,父关键å—下å¯åŒæ—¶å˜åœ¨å€¼å’Œå关键å—。 +例如,下列é…置是å¯è¡Œçš„。:: + + foo = value1 + foo.bar = value2 + foo := value3 # 这会更新foo的值。 + +注æ„,裸值ä¸èƒ½ç›´æŽ¥æ”¾è¿›ç»“构化关键å—ä¸ï¼Œå¿…须在大括å·å¤–定义它。例如:: + + foo { + bar = value1 + bar { + baz = value2 + qux = value3 + } + } + +åŒæ—¶ï¼Œå…³é”®å—下值节点的顺åºæ˜¯å›ºå®šçš„。如果值和å关键å—åŒæ—¶å˜åœ¨ï¼Œå€¼æ°¸è¿œæ˜¯è¯¥å…³ +é”®å—的第一个åèŠ‚ç‚¹ã€‚å› æ¤å¦‚果用户先指定å关键å—,如:: + + foo.bar = value1 + foo = value2 + +则在程åºï¼ˆå’Œ/proc/bootconfig)ä¸ï¼Œå®ƒä¼šæŒ‰å¦‚下显示:: + + foo = value2 + foo.bar = value1 + +注释 +---- + +é…ç½®è¯æ³•æŽ¥å—shellè„šæœ¬é£Žæ ¼çš„æ³¨é‡Šã€‚æ³¨é‡Šä»¥äº•å·ï¼ˆ ``#`` )开始,到æ¢è¡Œç¬¦ +( ``\n`` )结æŸã€‚ + +:: + + # comment line + foo = value # value is set to foo. + bar = 1, # 1st element + 2, # 2nd element + 3 # 3rd element + +会被解æžä¸º:: + + foo = value + bar = 1, 2, 3 + +注æ„ä½ ä¸èƒ½æŠŠæ³¨é‡Šæ”¾åœ¨å€¼å’Œåˆ†éš”符( ``,`` 或 ``;`` )之间。如下é…ç½®è¯æ³•æ˜¯é”™è¯¯çš„:: + + key = 1 # comment + ,2 + + +/proc/bootconfig +================ + +/proc/bootconfig是引导é…置的用户空间接å£ã€‚与/proc/cmdlineä¸åŒï¼Œæ¤æ–‡ä»¶å†…容以 +é”®å€¼åˆ—è¡¨æ ·å¼æ˜¾ç¤ºã€‚ +æ¯ä¸ªé”®å€¼å¯¹ä¸€è¡Œï¼Œæ ·å¼å¦‚下:: + + KEY[.WORDS...] = "[VALUE]"[,"VALUE2"...] + + +用引导é…ç½®å¼•å¯¼å†…æ ¸ +================== + +用引导é…ç½®å¼•å¯¼å†…æ ¸æœ‰ä¸¤ç§æ–¹æ³•ï¼šå°†å¼•å¯¼é…ç½®é™„åŠ åˆ°initrdé•œåƒæˆ–ç›´æŽ¥åµŒå…¥å†…æ ¸ä¸ã€‚ + +*initrd: initial RAM disk,åˆå§‹å†…å˜ç£ç›˜* + +将引导é…ç½®é™„åŠ åˆ°initrd +---------------------- + +由于默认情况下引导é…置文件是用initrdåŠ è½½çš„ï¼Œå› æ¤å®ƒå°†è¢«æ·»åŠ 到initrd(initramfs) +é•œåƒæ–‡ä»¶çš„末尾,其ä¸åŒ…å«å¡«å……ã€å¤§å°ã€æ ¡éªŒå€¼å’Œ12å—节幻数,如下所示:: + + [initrd][bootconfig][padding][size(le32)][checksum(le32)][#BOOTCONFIG\n] + +大å°å’Œæ ¡éªŒå€¼ä¸ºå°ç«¯åºå˜æ”¾çš„32ä½æ— 符å·å€¼ã€‚ + +当引导é…ç½®è¢«åŠ åˆ°initrdé•œåƒæ—¶ï¼Œæ•´ä¸ªæ–‡ä»¶å¤§å°ä¼šå¯¹é½åˆ°4å—节。空å—符( ``\0`` ) +会填补对é½ç©ºéš™ã€‚å› æ¤ ``size`` 就是引导é…置文件的长度+å¡«å……çš„å—节。 + +Linuxå†…æ ¸åœ¨å†…å˜ä¸è§£ç initrdé•œåƒçš„最åŽéƒ¨åˆ†ä»¥èŽ·å–引导é…置数æ®ã€‚由于这ç§â€œèƒŒè´Ÿå¼â€ +的方法,åªè¦å¼•å¯¼åŠ è½½å™¨ä¼ é€’äº†æ£ç¡®çš„initrd文件大å°ï¼Œå°±æ— éœ€æ›´æ”¹æˆ–æ›´æ–°å¼•å¯¼åŠ è½½å™¨ +å’Œå†…æ ¸é•œåƒæœ¬èº«ã€‚å¦‚æžœå¼•å¯¼åŠ è½½å™¨æ„å¤–ä¼ é€’äº†æ›´é•¿çš„å¤§å°ï¼Œå†…æ ¸å°†æ— æ³•æ‰¾åˆ°å¼•å¯¼é…置数 +æ®ã€‚ + +Linuxå†…æ ¸åœ¨tools/bootconfig下æ供了 ``bootconfig`` 命令æ¥å®Œæˆæ¤æ“作,管ç†å‘˜ +å¯ä»¥ç”¨å®ƒä»Žinitrdé•œåƒä¸åˆ é™¤æˆ–è¿½åŠ é…ç½®æ–‡ä»¶ã€‚ä½ å¯ä»¥ç”¨ä»¥ä¸‹å‘½ä»¤æ¥æž„建它:: + + # make -C tools/bootconfig + +è¦å‘initrdé•œåƒæ·»åŠ ä½ çš„å¼•å¯¼é…置文件,请按如下命令æ“作(旧数æ®ä¼šè‡ªåŠ¨ç§»é™¤ï¼‰:: + + # tools/bootconfig/bootconfig -a your-config /boot/initrd.img-X.Y.Z + +è¦ä»Žé•œåƒä¸ç§»é™¤é…置,å¯ä»¥ä½¿ç”¨-d选项:: + + # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z + +然åŽåœ¨å†…æ ¸å‘½ä»¤è¡Œä¸Šæ·»åŠ ``bootconfig`` å‘Šè¯‰å†…æ ¸åŽ»initrdæ–‡ä»¶æœ«å°¾å¯»æ‰¾å†…æ ¸é…置。 + +将引导é…ç½®åµŒå…¥å†…æ ¸ +------------------ + +å¦‚æžœä½ ä¸èƒ½ä½¿ç”¨initrd,也å¯ä»¥é€šè¿‡Kconfig选项将引导é…ç½®æ–‡ä»¶åµŒå…¥å†…æ ¸ä¸ã€‚在æ¤æƒ… +å†µä¸‹ï¼Œä½ éœ€è¦ç”¨ä»¥ä¸‹é€‰é¡¹é‡æ–°ç¼–è¯‘å†…æ ¸:: + + CONFIG_BOOT_CONFIG_EMBED=y + CONFIG_BOOT_CONFIG_EMBED_FILE="/引导é…ç½®/文件/çš„/路径" + +``CONFIG_BOOT_CONFIG_EMBED_FILE`` 需è¦ä»Žæºç æ ‘æˆ–å¯¹è±¡æ ‘å¼€å§‹çš„å¼•å¯¼é…置文件的 +ç»å¯¹/ç›¸å¯¹è·¯å¾„ã€‚å†…æ ¸ä¼šå°†å…¶åµŒå…¥ä½œä¸ºé»˜è®¤å¼•å¯¼é…置。 + +与将引导é…ç½®é™„åŠ åˆ°initrdä¸€æ ·ï¼Œä½ ä¹Ÿéœ€è¦åœ¨å†…æ ¸å‘½ä»¤è¡Œä¸Šæ·»åŠ ``bootconfig`` 告诉 +å†…æ ¸åŽ»å¯ç”¨å†…嵌的引导é…置。 + +注æ„,å³ä½¿ä½ å·²ç»è®¾ç½®äº†æ¤é€‰é¡¹ï¼Œä»å¯ç”¨é™„åŠ åˆ°initrd的其他引导é…置覆盖内嵌的引导 +é…置。 + +通过引导é…ç½®ä¼ é€’å†…æ ¸å‚æ•° +======================== + +é™¤äº†å†…æ ¸å‘½ä»¤è¡Œï¼Œå¼•å¯¼é…置也å¯ä»¥ç”¨äºŽä¼ é€’å†…æ ¸å‚数。所有 ``kernel`` 关键å—下的键 +å€¼å¯¹éƒ½å°†ç›´æŽ¥ä¼ é€’ç»™å†…æ ¸å‘½ä»¤è¡Œã€‚æ¤å¤–, ``init`` ä¸‹çš„é”®å€¼å¯¹å°†é€šè¿‡å‘½ä»¤è¡Œä¼ é€’ç»™ +init进程。å‚数按以下顺åºä¸Žç”¨æˆ·ç»™å®šçš„å†…æ ¸å‘½ä»¤è¡Œå—ç¬¦ä¸²ç›¸è¿žï¼Œå› æ¤å‘½ä»¤è¡Œå‚æ•°å¯ä»¥ +覆盖引导é…ç½®å‚数(这å–决于å系统如何处ç†å‚数,但通常å‰é¢çš„å‚数将被åŽé¢çš„å‚æ•° +覆盖):: + + [bootconfig params][cmdline params] -- [bootconfig init params][cmdline init params] + +如果引导é…置文件给出的kernel/initå‚数是:: + + kernel { + root = 01234567-89ab-cdef-0123-456789abcd + } + init { + splash + } + +这将被å¤åˆ¶åˆ°å†…æ ¸å‘½ä»¤è¡Œå—符串ä¸ï¼Œå¦‚下所示:: + + root="01234567-89ab-cdef-0123-456789abcd" -- splash + +如果用户给出的其他命令行是:: + + ro bootconfig -- quiet + +则最åŽçš„å†…æ ¸å‘½ä»¤è¡Œå¦‚ä¸‹:: + + root="01234567-89ab-cdef-0123-456789abcd" ro bootconfig -- splash quiet + + +é…置文件的é™åˆ¶ +============== + +当å‰æœ€å¤§çš„é…置大å°æ˜¯32KB,关键å—总数(ä¸æ˜¯é”®å€¼æ¡ç›®ï¼‰å¿…须少于1024个节点。 +注æ„:这ä¸æ˜¯æ¡ç›®æ•°è€Œæ˜¯èŠ‚点数,æ¡ç›®å¿…须消耗超过2个节点(一个关键å—和一个值)。 +所以从ç†è®ºä¸Šè®²æœ€å¤š512个键值对。如果关键å—å¹³å‡åŒ…å«3个å•è¯ï¼Œåˆ™å¯æœ‰256个键值对。 +在大多数情况下,é…置项的数é‡å°†å°‘于100个æ¡ç›®ï¼Œå°äºŽ8KBï¼Œå› æ¤è¿™åº”该足够了。如果 +节点数超过1024,解æžå™¨å°†è¿”回错误,å³ä½¿æ–‡ä»¶å¤§å°å°äºŽ32KB。(请注æ„,æ¤æœ€å¤§å°ºå¯¸ +ä¸åŒ…括填充的空å—符。) +æ— è®ºå¦‚ä½•ï¼Œå› ä¸º ``bootconfig`` å‘½ä»¤åœ¨é™„åŠ å¯åŠ¨é…置到initrdæ˜ åƒæ—¶ä¼šéªŒè¯å®ƒï¼Œç”¨æˆ· +å¯ä»¥åœ¨å¼•å¯¼ä¹‹å‰æ³¨æ„到它。 + + +引导é…ç½®API +=========== + +用户å¯ä»¥æŸ¥è¯¢æˆ–é历键值对,也å¯ä»¥æŸ¥æ‰¾ï¼ˆå‰ç¼€ï¼‰æ ¹å…³é”®å—节点,并在查找该节点下的 +键值。 + +如果您有一个关键å—å—符串,则å¯ä»¥ç›´æŽ¥ä½¿ç”¨ xbc_find_value() 查询该键的值。如果 +ä½ æƒ³çŸ¥é“引导é…置里有哪些关键å—,å¯ä»¥ä½¿ç”¨ xbc_for_each_key_value() è¿ä»£é”®å€¼å¯¹ã€‚ +请注æ„,您需è¦ä½¿ç”¨ xbc_array_for_each_value() 访问数组的值,例如:: + + vnode = NULL; + xbc_find_value("key.word", &vnode); + if (vnode && xbc_node_is_array(vnode)) + xbc_array_for_each_value(vnode, value) { + printk("%s ", value); + } + +如果您想查找具有å‰ç¼€å—符串的键,å¯ä»¥ä½¿ç”¨ xbc_find_node() 通过å‰ç¼€å—符串查找 +节点,然åŽç”¨ xbc_node_for_each_key_value() è¿ä»£å‰ç¼€èŠ‚点下的键。 + +但最典型的用法是获å–å‰ç¼€ä¸‹çš„命å值或å‰ç¼€ä¸‹çš„命å数组,例如:: + + root = xbc_find_node("key.prefix"); + value = xbc_node_find_value(root, "option", &vnode); + ... + xbc_node_for_each_array_value(root, "array-option", value, anode) { + ... + } + +这将访问值“key.prefix.optionâ€çš„值和“key.prefix.array-optionâ€çš„数组。 + +é”是ä¸éœ€è¦çš„ï¼Œå› ä¸ºåœ¨åˆå§‹åŒ–之åŽé…ç½®åªè¯»ã€‚如果需è¦ä¿®æ”¹ï¼Œå¿…é¡»å¤åˆ¶æ‰€æœ‰æ•°æ®å’Œå…³é”®å—。 + + +函数与结构体 +============ + +相关定义的kernel-docå‚è§ï¼š + + - include/linux/bootconfig.h + - lib/bootconfig.c diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index 2f6970d0a032..ac2960da33e6 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -63,6 +63,7 @@ Todolist: .. toctree:: :maxdepth: 1 + bootconfig clearing-warn-once cpu-load cputopology @@ -80,7 +81,6 @@ Todolist: * binderfs * binfmt-misc * blockdev/index -* bootconfig * braille-console * btmrvl * cgroup-v1/index diff --git a/Documentation/translations/zh_CN/core-api/circular-buffers.rst b/Documentation/translations/zh_CN/core-api/circular-buffers.rst new file mode 100644 index 000000000000..694ad8e61070 --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/circular-buffers.rst @@ -0,0 +1,210 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/circular-buffers.rst + +:翻译: + + 周彬彬 Binbin Zhou <zhoubinbin@loongson.cn> + +:æ ¡è¯‘: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + å´æƒ³æˆ Wu Xiangcheng <bobwxc@email.cn> + 时奎亮 Alex Shi <alexs@kernel.org> + +========== +环形缓冲区 +========== + +:作者: David Howells <dhowells@redhat.com> +:作者: Paul E. McKenney <paulmck@linux.ibm.com> + + +Linux æ供了许多å¯ç”¨äºŽå®žçŽ°å¾ªçŽ¯ç¼“å†²çš„ç‰¹æ€§ã€‚æœ‰ä¸¤ç»„è¿™æ ·çš„ç‰¹æ€§ï¼š + + (1) 用于确定2次方大å°çš„缓冲区信æ¯çš„便利函数。 + + (2) å¯ä»¥ä»£æ›¿ç¼“冲区ä¸å¯¹è±¡çš„生产者和消费者共享é”的内å˜å±éšœã€‚ + +如下所述,è¦ä½¿ç”¨è¿™äº›è®¾æ–½ï¼Œåªéœ€è¦ä¸€ä¸ªç”Ÿäº§è€…和一个消费者。å¯ä»¥é€šè¿‡åºåˆ—化æ¥å¤„ç†å¤šä¸ª +生产者,并通过åºåˆ—化æ¥å¤„ç†å¤šä¸ªæ¶ˆè´¹è€…。 + +.. Contents: + + (*) 什么是环形缓冲区? + + (*) 测é‡2次幂缓冲区 + + (*) 内å˜å±éšœä¸ŽçŽ¯å½¢ç¼“冲区的结åˆä½¿ç”¨ + - 生产者 + - 消费者 + + (*) 延伸阅读 + + + +什么是环形缓冲区? +================== + +首先,什么是环形缓冲区?环形缓冲区是具有固定的有é™å¤§å°çš„缓冲区,它有两个索引: + + (1) 'head'索引 - ç”Ÿäº§è€…å°†å…ƒç´ æ’入缓冲区的ä½ç½®ã€‚ + + (2) 'tail'索引 - 消费者在缓冲区ä¸æ‰¾åˆ°ä¸‹ä¸€ä¸ªå…ƒç´ çš„ä½ç½®ã€‚ + +通常,当tail指针ç‰äºŽhead指针时,表明缓冲区是空的;而当head指针比tail指针少一个时, +表明缓冲区是满的。 + +æ·»åŠ å…ƒç´ æ—¶ï¼Œé€’å¢žheadç´¢å¼•ï¼›åˆ é™¤å…ƒç´ æ—¶ï¼Œé€’å¢žtail索引。tail索引ä¸åº”该跳过head索引, +两个索引在到达缓冲区末端时都应该被赋值为0,从而å…许海é‡çš„æ•°æ®æµè¿‡ç¼“冲区。 + +é€šå¸¸æƒ…å†µä¸‹ï¼Œå…ƒç´ éƒ½æœ‰ç›¸åŒçš„å•å…ƒå¤§å°ï¼Œä½†è¿™å¹¶ä¸æ˜¯ä½¿ç”¨ä»¥ä¸‹æŠ€æœ¯çš„ä¸¥æ ¼è¦æ±‚。如果è¦åœ¨ç¼“ +冲区ä¸åŒ…å«å¤šä¸ªå…ƒç´ 或å¯å˜å¤§å°çš„å…ƒç´ ï¼Œåˆ™ç´¢å¼•å¯ä»¥å¢žåŠ 超过1,å‰æ是两个索引都没有超过 +å¦ä¸€ä¸ªã€‚然而,实现者必须å°å¿ƒï¼Œå› 为超过一个å•ä½å¤§å°çš„区域å¯èƒ½ä¼šè¦†ç›–缓冲区的末端并 +且缓冲区会被分æˆä¸¤æ®µã€‚ + +测é‡2次幂缓冲区 +=============== + +计算任æ„大å°çš„环形缓冲区的å 用或剩余容é‡é€šå¸¸æ˜¯ä¸€ä¸ªè´¹æ—¶çš„æ“作,需è¦ä½¿ç”¨æ¨¡ï¼ˆé™¤æ³•ï¼‰ +指令。但是如果缓冲区的大å°ä¸º2次幂,则å¯ä»¥ä½¿ç”¨æ›´å¿«çš„按ä½ä¸ŽæŒ‡ä»¤ä»£æ›¿ã€‚ + +Linuxæ供了一组用于处ç†2次幂环形缓冲区的å®ã€‚å¯ä»¥é€šè¿‡ä»¥ä¸‹æ–¹å¼ä½¿ç”¨:: + + #include <linux/circ_buf.h> + +这些å®åŒ…括: + + (#) 测é‡ç¼“冲区的剩余容é‡:: + + CIRC_SPACE(head_index, tail_index, buffer_size); + + 返回缓冲区[1]ä¸å¯æ’å…¥å…ƒç´ çš„å‰©ä½™ç©ºé—´å¤§å°ã€‚ + + + (#) 测é‡ç¼“冲区ä¸çš„最大连ç»ç«‹å³å¯ç”¨ç©ºé—´:: + + CIRC_SPACE_TO_END(head_index, tail_index, buffer_size); + + 返回缓冲区[1]ä¸å‰©ä½™çš„è¿žç»ç©ºé—´çš„大å°ï¼Œå…ƒç´ å¯ä»¥ç«‹å³æ’入其ä¸ï¼Œè€Œä¸å¿…绕回到缓冲 + 区的开头。 + + + (#) 测é‡ç¼“冲区的使用数:: + + CIRC_CNT(head_index, tail_index, buffer_size); + + 返回当å‰å 用缓冲区[2]çš„å…ƒç´ æ•°é‡ã€‚ + + + (#) 测é‡ç¼“冲区的连ç»ä½¿ç”¨æ•°:: + + CIRC_CNT_TO_END(head_index, tail_index, buffer_size); + + 返回å¯ä»¥ä»Žç¼“冲区ä¸æå–çš„è¿žç»å…ƒç´ [2]çš„æ•°é‡ï¼Œè€Œä¸å¿…绕回到缓冲区的开头。 + +这里的æ¯ä¸€ä¸ªå®å义上都会返回一个介于0å’Œbuffer_size-1之间的值,但是: + + (1) CIRC_SPACE*()是为了在生产者ä¸ä½¿ç”¨ã€‚对生产者æ¥è¯´ï¼Œå®ƒä»¬å°†è¿”回一个下é™ï¼Œå› 为生 + 产者控制ç€head索引,但消费者å¯èƒ½ä»ç„¶åœ¨å¦ä¸€ä¸ªCPU上耗尽缓冲区并移动tail索引。 + + 对消费者æ¥è¯´ï¼Œå®ƒå°†æ˜¾ç¤ºä¸€ä¸ªä¸Šé™ï¼Œå› 为生产者å¯èƒ½æ£å¿™äºŽè€—尽空间。 + + (2) CIRC_CNT*()是为了在消费者ä¸ä½¿ç”¨ã€‚对消费者æ¥è¯´ï¼Œå®ƒä»¬å°†è¿”回一个下é™ï¼Œå› 为消费 + 者控制ç€tail索引,但生产者å¯èƒ½ä»ç„¶åœ¨å¦ä¸€ä¸ªCPU上填充缓冲区并移动head索引。 + + 对于生产者,它将显示一个上é™ï¼Œå› 为消费者å¯èƒ½æ£å¿™äºŽæ¸…空缓冲区。 + + (3) 对于第三方æ¥è¯´ï¼Œç”Ÿäº§è€…和消费者对索引的写入顺åºæ˜¯æ— 法ä¿è¯çš„ï¼Œå› ä¸ºå®ƒä»¬æ˜¯ç‹¬ç«‹çš„ï¼Œ + 而且å¯èƒ½æ˜¯åœ¨ä¸åŒçš„CPU上进行的,所以在这ç§æƒ…况下的结果åªæ˜¯ä¸€ç§çŒœæµ‹ï¼Œç”šè‡³å¯èƒ½ + 是错误的。 + +内å˜å±éšœä¸ŽçŽ¯å½¢ç¼“冲区的结åˆä½¿ç”¨ +============================== + +通过将内å˜å±éšœä¸ŽçŽ¯å½¢ç¼“冲区结åˆä½¿ç”¨ï¼Œå¯ä»¥é¿å…以下需求: + + (1) 使用å•ä¸ªé”æ¥æŽ§åˆ¶å¯¹ç¼“冲区两端的访问,从而å…许åŒæ—¶å¡«å……å’Œæ¸…ç©ºç¼“å†²åŒºï¼›ä»¥åŠ + + (2) 使用原å计数器æ“作。 + +这有两个方é¢ï¼šå¡«å……缓冲区的生产者和清空缓冲区的消费者。在任何时候,åªåº”有一个生产 +者在填充缓冲区,åŒæ ·çš„也åªåº”有一个消费者在清空缓冲区,但åŒæ–¹å¯ä»¥åŒæ—¶æ“作。 + + +生产者 +------ + +生产者看起æ¥åƒè¿™æ ·:: + + spin_lock(&producer_lock); + + unsigned long head = buffer->head; + /* spin_unlock()和下一个spin_lock()æ供必è¦çš„排åºã€‚ */ + unsigned long tail = READ_ONCE(buffer->tail); + + if (CIRC_SPACE(head, tail, buffer->size) >= 1) { + /* æ·»åŠ ä¸€ä¸ªå…ƒç´ åˆ°ç¼“å†²åŒº */ + struct item *item = buffer[head]; + + produce_item(item); + + smp_store_release(buffer->head, + (head + 1) & (buffer->size - 1)); + + /* wake_up()将确ä¿åœ¨å”¤é†’任何人之å‰æ交head */ + wake_up(consumer); + } + + spin_unlock(&producer_lock); + +这将表明CPU必须在head索引使其对消费者å¯ç”¨ä¹‹å‰å†™å…¥æ–°é¡¹ç›®çš„内容,åŒæ—¶CPU必须在唤醒 +消费者之å‰å†™å…¥ä¿®æ”¹åŽçš„head索引。 + +请注æ„,wake_up()并ä¸ä¿è¯ä»»ä½•å½¢å¼çš„å±éšœï¼Œé™¤éžç¡®å®žå”¤é†’了æŸäº›ä¸œè¥¿ã€‚å› æ¤æˆ‘们ä¸èƒ½ä¾é +它æ¥è¿›è¡ŒæŽ’åºã€‚但是数组ä¸å§‹ç»ˆæœ‰ä¸€ä¸ªå…ƒç´ ç•™ç©ºï¼Œå› æ¤ç”Ÿäº§è€…å¿…é¡»äº§ç”Ÿä¸¤ä¸ªå…ƒç´ ï¼Œç„¶åŽæ‰å¯ +èƒ½ç ´å消费者当å‰æ£åœ¨è¯»å–çš„å…ƒç´ ã€‚åŒæ—¶ï¼Œæ¶ˆè´¹è€…è¿žç»è°ƒç”¨ä¹‹é—´æˆå¯¹çš„解é”-åŠ é”æ供了索引 +读å–ï¼ˆæŒ‡ç¤ºæ¶ˆè´¹è€…å·²æ¸…ç©ºç»™å®šå…ƒç´ ï¼‰å’Œç”Ÿäº§è€…å¯¹è¯¥ç›¸åŒå…ƒç´ 的写入之间的必è¦é¡ºåºã€‚ + + +消费者 +------ + +消费者看起æ¥åƒè¿™æ ·:: + + spin_lock(&consumer_lock); + + /* 读å–该索引处的内容之å‰ï¼Œå…ˆè¯»å–索引 */ + unsigned long head = smp_load_acquire(buffer->head); + unsigned long tail = buffer->tail; + + if (CIRC_CNT(head, tail, buffer->size) >= 1) { + + /* 从缓冲区ä¸æå–ä¸€ä¸ªå…ƒç´ */ + struct item *item = buffer[tail]; + + consume_item(item); + + /* 在递增tail之å‰å®Œæˆå¯¹æ述符的读å–。 */ + smp_store_release(buffer->tail, + (tail + 1) & (buffer->size - 1)); + } + + spin_unlock(&consumer_lock); + +这表明CPU在读å–æ–°å…ƒç´ ä¹‹å‰ç¡®ä¿ç´¢å¼•æ˜¯æœ€æ–°çš„,然åŽåœ¨å†™å…¥æ–°çš„尾指针之å‰åº”ç¡®ä¿CPU已完 +æˆè¯»å–è¯¥å…ƒç´ ï¼Œè¿™å°†æ“¦é™¤è¯¥å…ƒç´ ã€‚ + +请注æ„,使用READ_ONCE()å’Œsmp_load_acquire()æ¥è¯»å–åå‘(head)索引。这å¯ä»¥é˜²æ¢ç¼–译 +器丢弃并é‡æ–°åŠ 载其缓å˜å€¼ã€‚如果您能确定åå‘(head)索引将仅使用一次,则这ä¸æ˜¯å¿…é¡» +的。smp_load_acquire()还å¯ä»¥å¼ºåˆ¶CPU对åŽç»çš„内å˜å¼•ç”¨è¿›è¡ŒæŽ’åºã€‚类似地,两ç§ç®—法都使 +用smp_store_release()æ¥å†™å…¥çº¿ç¨‹çš„索引。这记录了我们æ£åœ¨å†™å…¥å¯ä»¥å¹¶å‘读å–的内容的事 +实,以防æ¢ç¼–è¯‘å™¨ç ´åå˜å‚¨ï¼Œå¹¶å¼ºåˆ¶å¯¹ä»¥å‰çš„访问进行排åºã€‚ + + +延伸阅读 +======== + +关于Linux的内å˜å±éšœè®¾æ–½çš„æ述,请查看Documentation/memory-barriers.txt。 diff --git a/Documentation/translations/zh_CN/core-api/generic-radix-tree.rst b/Documentation/translations/zh_CN/core-api/generic-radix-tree.rst new file mode 100644 index 000000000000..eacd1d2ebddc --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/generic-radix-tree.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/generic-radix-tree.rst + +:翻译: + + 周彬彬 Binbin Zhou <zhoubinbin@loongson.cn> + +=================== +é€šç”¨åŸºæ•°æ ‘/稀ç–数组 +=================== + +é€šç”¨åŸºæ•°æ ‘/稀ç–数组的相关内容请è§include/linux/generic-radix-tree.h文件ä¸çš„ +“DOC: Generic radix trees/sparse arraysâ€ã€‚ + +é€šç”¨åŸºæ•°æ ‘å‡½æ•° +-------------- + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/generic-radix-tree.h diff --git a/Documentation/translations/zh_CN/core-api/idr.rst b/Documentation/translations/zh_CN/core-api/idr.rst new file mode 100644 index 000000000000..97a16e76b81b --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/idr.rst @@ -0,0 +1,80 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/idr.rst + +:翻译: + + 周彬彬 Binbin Zhou <zhoubinbin@loongson.cn> + +:æ ¡è¯‘: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + å´æƒ³æˆ Wu Xiangcheng <bobwxc@email.cn> + 时奎亮 Alex Shi <alexs@kernel.org> + +====== +IDåˆ†é… +====== + +:作者: Matthew Wilcox + +概述 +==== + +è¦è§£å†³çš„一个常è§é—®é¢˜æ˜¯åˆ†é…æ ‡è¯†ç¬¦ï¼ˆIDsï¼‰ï¼›å®ƒé€šå¸¸æ˜¯æ ‡è¯†äº‹ç‰©çš„æ•°å—。比如包括文件æè¿° +符ã€è¿›ç¨‹IDã€ç½‘络åè®®ä¸çš„æ•°æ®åŒ…æ ‡è¯†ç¬¦ã€SCSIæ ‡è®°å’Œè®¾å¤‡å®žä¾‹ç¼–å·ã€‚IDRå’ŒIDA为这个问题 +æ供了一个åˆç†çš„解决方案,以é¿å…æ¯ä¸ªäººéƒ½è‡ªåˆ›ã€‚IDRæ供将IDæ˜ å°„åˆ°æŒ‡é’ˆçš„èƒ½åŠ›ï¼Œè€ŒIDA +ä»…æä¾›ID分é…ï¼Œå› æ¤å†…å˜æ•ˆçŽ‡æ›´é«˜ã€‚ + +IDR接å£å·²ç»è¢«åºŸå¼ƒï¼Œè¯·ä½¿ç”¨ ``XArray`` 。 + +IDR的用法 +========= + +首先åˆå§‹åŒ–一个IDR,对于é™æ€åˆ†é…çš„IDR使用DEFINE_IDR(),或者对于动æ€åˆ†é…çš„IDR使用 +idr_init()。 + +您å¯ä»¥è°ƒç”¨idr_alloc()æ¥åˆ†é…一个未使用的ID。通过调用idr_find()查询与该ID相关的指针, +并通过调用idr_remove()释放该ID。 + +如果需è¦æ›´æ”¹ä¸Žä¸€ä¸ªID相关è”的指针,å¯ä»¥è°ƒç”¨idr_replace()ã€‚è¿™æ ·åšçš„一个常è§åŽŸå› 是通 +过将 ``NULL`` æŒ‡é’ˆä¼ é€’ç»™åˆ†é…函数æ¥ä¿ç•™ID;用ä¿ç•™çš„IDåˆå§‹åŒ–对象,最åŽå°†åˆå§‹åŒ–的对 +象æ’å…¥IDR。 + +一些用户需è¦åˆ†é…大于 ``INT_MAX`` çš„ID。到目å‰ä¸ºæ¢ï¼Œæ‰€æœ‰è¿™äº›ç”¨æˆ·éƒ½æ»¡è¶³ ``UINT_MAX`` +çš„é™åˆ¶ï¼Œä»–们使用idr_alloc_u32()。如果您需è¦è¶…出u32çš„ID,我们将与您åˆä½œä»¥æ»¡è¶³æ‚¨çš„ +需求。 + +如果需è¦æŒ‰é¡ºåºåˆ†é…ID,å¯ä»¥ä½¿ç”¨idr_alloc_cyclic()。处ç†è¾ƒå¤§æ•°é‡çš„ID时,IDR的效率会 +é™ä½Žï¼Œæ‰€ä»¥ä½¿ç”¨è¿™ä¸ªå‡½æ•°ä¼šæœ‰ä¸€ç‚¹ä»£ä»·ã€‚ + +è¦å¯¹IDR使用的所有指针进行æ“作,您å¯ä»¥ä½¿ç”¨åŸºäºŽå›žè°ƒçš„idr_for_each()或è¿ä»£å™¨æ ·å¼çš„ +idr_for_each_entry()。您å¯èƒ½éœ€è¦ä½¿ç”¨idr_for_each_entry_continue()æ¥ç»§ç»è¿ä»£ã€‚如果 +è¿ä»£å™¨ä¸ç¬¦åˆæ‚¨çš„需求,您也å¯ä»¥ä½¿ç”¨idr_get_next()。 + +当使用完IDRåŽï¼Œæ‚¨å¯ä»¥è°ƒç”¨idr_destroy()æ¥é‡Šæ”¾IDRå 用的内å˜ã€‚这并ä¸ä¼šé‡Šæ”¾IDR指å‘çš„ +å¯¹è±¡ï¼›å¦‚æžœæ‚¨æƒ³è¿™æ ·åšï¼Œè¯·ä½¿ç”¨å…¶ä¸ä¸€ä¸ªè¿ä»£å™¨æ¥æ‰§è¡Œæ¤æ“作。 + +您å¯ä»¥ä½¿ç”¨idr_is_empty()æ¥æŸ¥çœ‹å½“å‰æ˜¯å¦åˆ†é…了任何ID。 + +如果在从IDR分é…一个新ID时需è¦å¸¦é”,您å¯èƒ½éœ€è¦ä¼ 递一组é™åˆ¶æ€§çš„GFPæ ‡å¿—ï¼Œä½†è¿™å¯èƒ½å¯¼ +致IDRæ— æ³•åˆ†é…内å˜ã€‚为了解决该问题,您å¯ä»¥åœ¨èŽ·å–é”之å‰è°ƒç”¨idr_preload(),然åŽåœ¨åˆ† +é…之åŽè°ƒç”¨idr_preload_end()。 + +IDRåŒæ¥çš„相关内容请è§include/linux/idr.h文件ä¸çš„“DOC: idr syncâ€ã€‚ + +IDA的用法 +========= + +IDA的用法的相关内容请è§lib/idr.c文件ä¸çš„“DOC: IDA descriptionâ€ã€‚ + +函数和数æ®ç»“æž„ +============== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/idr.h + +lib/idr.c diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst index 8a94ad87465d..37756d240b5e 100644 --- a/Documentation/translations/zh_CN/core-api/index.rst +++ b/Documentation/translations/zh_CN/core-api/index.rst @@ -44,15 +44,15 @@ assoc_array xarray rbtree + idr + circular-buffers + generic-radix-tree + packing Todolist: - idr - circular-buffers - generic-radix-tree - packing this_cpu_ops timekeeping errseq diff --git a/Documentation/translations/zh_CN/core-api/packing.rst b/Documentation/translations/zh_CN/core-api/packing.rst new file mode 100644 index 000000000000..c0aab3a349d0 --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/packing.rst @@ -0,0 +1,160 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/packing.rst + +:翻译: + + 周彬彬 Binbin Zhou <zhoubinbin@loongson.cn> + +:æ ¡è¯‘: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + å´æƒ³æˆ Wu Xiangcheng <bobwxc@email.cn> + 时奎亮 Alex Shi <alexs@kernel.org> + +======================== +通用的ä½åŸŸæ‰“包和解包函数 +======================== + +问题陈述 +-------- + +ä½¿ç”¨ç¡¬ä»¶æ—¶ï¼Œå¿…é¡»åœ¨å‡ ç§ä¸Žå…¶äº¤äº’的方法之间进行选择。 + +å¯ä»¥å°†æŒ‡é’ˆæ˜ 射到在硬件设备的内å˜åŒºä¸Šç²¾å¿ƒè®¾è®¡çš„结构体,并将其å—段作为结构æˆå‘˜ï¼ˆå¯ +能声明为ä½åŸŸï¼‰è®¿é—®ã€‚但是由于CPU和硬件设备之间潜在的å—节顺åºä¸åŒ¹é…,以这ç§æ–¹å¼ç¼–写 +代ç 会é™ä½Žå…¶å¯ç§»æ¤æ€§ã€‚ + +æ¤å¤–,必须密切注æ„将硬件文档ä¸çš„寄å˜å™¨å®šä¹‰è½¬æ¢ä¸ºç»“æž„çš„ä½åŸŸç´¢å¼•ã€‚æ¤å¤–,一些硬件 +(通常是网络设备)倾å‘于以è¿å任何åˆç†å—边界(有时甚至是64ä½ï¼‰çš„æ–¹å¼å¯¹å…¶å¯„å˜å™¨å— +æ®µè¿›è¡Œåˆ†ç»„ã€‚è¿™å°±é€ æˆäº†ä¸å¾—ä¸åœ¨ç»“æž„ä¸å®šä¹‰å¯„å˜å™¨å—段的“高â€å’Œâ€œä½Žâ€éƒ¨åˆ†çš„ä¸ä¾¿ã€‚ + +结构域定义的更å¯é 的替代方法是通过移动适当数é‡çš„ä½æ¥æå–所需的å—段。但这ä»ç„¶ä¸èƒ½ +防æ¢å—节顺åºä¸åŒ¹é…,除éžæ‰€æœ‰å†…å˜è®¿é—®éƒ½æ˜¯é€å—节执行的。æ¤å¤–,代ç 很容易å˜å¾—æ‚ä¹±æ— +ç« ï¼ŒåŒæ—¶å¯èƒ½ä¼šåœ¨æ‰€éœ€çš„许多ä½ç§»æ“作ä¸ä¸¢å¤±ä¸€äº›é«˜å±‚次的想法。 + +许多驱动程åºé‡‡ç”¨äº†ä½ç§»çš„方法,然åŽè¯•å›¾ç”¨å®šåˆ¶çš„å®æ¥å‡å°‘æ‚ä¹±æ— ç« çš„ä¸œè¥¿ï¼Œä½†æ›´å¤šçš„æ—¶ +候,这些å®æ‰€é‡‡ç”¨çš„æ·å¾„ä¾æ—§å¦¨ç¢äº†ä»£ç 真æ£çš„å¯ç§»æ¤æ€§ã€‚ + +解决方案 +-------- + +该API涉åŠ2个基本æ“作: + + - 将一个CPUå¯ä½¿ç”¨çš„æ•°å—打包到内å˜ç¼“冲区ä¸ï¼ˆå…·æœ‰ç¡¬ä»¶çº¦æŸ/特殊性)。 + - 将内å˜ç¼“冲区(具有硬件约æŸ/特殊性)解压缩为一个CPUå¯ä½¿ç”¨çš„æ•°å—。 + +该APIæ供了对所述硬件约æŸå’Œç‰¹æ®Šæ€§ä»¥åŠCPUå—节åºçš„æŠ½è±¡ï¼Œå› æ¤è¿™ä¸¤è€…之间å¯èƒ½ä¸åŒ¹é…。 + +这些API函数的基本å•å…ƒæ˜¯u64。从CPU的角度æ¥çœ‹ï¼Œä½63总是æ„味ç€å—节7çš„ä½å移é‡7,尽管 +åªæ˜¯é€»è¾‘上的。问题是:我们将这个比特放在内å˜çš„什么ä½ç½®ï¼Ÿ + +以下示例介ç»äº†æ‰“包u64å—段的内å˜å¸ƒå±€ã€‚打包缓冲区ä¸çš„å—节å移é‡å§‹ç»ˆé»˜è®¤ä¸º0,1...7。 +示例显示的是逻辑å—节和ä½æ‰€åœ¨çš„ä½ç½®ã€‚ + +1. é€šå¸¸æƒ…å†µä¸‹ï¼ˆæ— ç‰¹æ®Šæ€§ï¼‰ï¼Œæˆ‘ä»¬ä¼šè¿™æ ·åšï¼š + +:: + + 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 + 7 6 5 4 + 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + 3 2 1 0 + +也就是说,CPUå¯ä½¿ç”¨çš„u64çš„MSByte(7)ä½äºŽå†…å˜å移é‡0处,而u64çš„LSByte(0)ä½äºŽå†…å˜å移é‡7处。 + +这对应于大多数人认为的“大端â€ï¼Œå…¶ä¸ä½i对应于数å—2^i。这在代ç 注释ä¸ä¹Ÿç§°ä¸ºâ€œé€»è¾‘â€ç¬¦å·ã€‚ + + +2. 如果设置了QUIRK_MSB_ON_THE_RIGHT,我们按如下方å¼æ“作: + +:: + + 56 57 58 59 60 61 62 63 48 49 50 51 52 53 54 55 40 41 42 43 44 45 46 47 32 33 34 35 36 37 38 39 + 7 6 5 4 + 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7 + 3 2 1 0 + +也就是说,QUIRK_MSB_ON_THE_RIGHTä¸ä¼šå½±å“å—节定ä½ï¼Œä½†ä¼šå转å—节内的ä½å移é‡ã€‚ + + +3. 如果设置了QUIRK_LITTLE_ENDIAN,我们按如下方å¼æ“作: + +:: + + 39 38 37 36 35 34 33 32 47 46 45 44 43 42 41 40 55 54 53 52 51 50 49 48 63 62 61 60 59 58 57 56 + 4 5 6 7 + 7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 + 0 1 2 3 + +å› æ¤ï¼ŒQUIRK_LITTLE_ENDIANæ„味ç€åœ¨å†…å˜åŒºåŸŸå†…,æ¯ä¸ª4å—节的å—çš„æ¯ä¸ªå—节都被放置在与 +该å—的边界相比的镜åƒä½ç½®ã€‚ + + +4. 如果设置了QUIRK_MSB_ON_THE_RIGHTå’ŒQUIRK_LITTLE_ENDIANï¼Œæˆ‘ä»¬è¿™æ ·åšï¼š + +:: + + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 + 4 5 6 7 + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0 1 2 3 + + +5. 如果åªè®¾ç½®äº†QUIRK_LSW32_IS_FIRSTï¼Œæˆ‘ä»¬è¿™æ ·åšï¼š + +:: + + 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + 3 2 1 0 + 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 + 7 6 5 4 + +在这ç§æƒ…况下,8å—节内å˜åŒºåŸŸè§£é‡Šå¦‚下:å‰4å—节对应最ä¸é‡è¦çš„4å—节的å—,åŽ4å—节对应 +æ›´é‡è¦çš„4å—节的å—。 + +6. 如果设置了QUIRK_LSW32_IS_FIRSTå’ŒQUIRK_MSB_ON_THE_RIGHTï¼Œæˆ‘ä»¬è¿™æ ·åšï¼š + +:: + + 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7 + 3 2 1 0 + 56 57 58 59 60 61 62 63 48 49 50 51 52 53 54 55 40 41 42 43 44 45 46 47 32 33 34 35 36 37 38 39 + 7 6 5 4 + + +7. 如果设置了QUIRK_LSW32_IS_FIRSTå’ŒQUIRK_LITTLE_ENDIAN,则如下所示: + +:: + + 7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 + 0 1 2 3 + 39 38 37 36 35 34 33 32 47 46 45 44 43 42 41 40 55 54 53 52 51 50 49 48 63 62 61 60 59 58 57 56 + 4 5 6 7 + + +8. 如果设置了QUIRK_LSW32_IS_FIRST,QUIRK_LITTLE_ENDIANå’ŒQUIRK_MSB_ON_THE_RIGHT, + 则如下所示: + +:: + + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0 1 2 3 + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 + 4 5 6 7 + + +我们总是认为我们的å移é‡å¥½åƒæ²¡æœ‰ç‰¹æ®Šæ€§ï¼Œç„¶åŽåœ¨è®¿é—®å†…å˜åŒºåŸŸä¹‹å‰ç¿»è¯‘它们。 + +预期用途 +-------- + +选择使用该API的驱动程åºé¦–先需è¦ç¡®å®šä¸Šè¿°3ç§quirk组åˆï¼ˆå…±8ç§ï¼‰ä¸çš„哪一ç§ä¸Žç¡¬ä»¶æ–‡æ¡£ +ä¸æ述的相匹é…。然åŽï¼Œä»–们应该å°è£…packing()函数,创建一个新的xxx_packing(),使用 +适当的QUIRK_* one-hot ä½é›†åˆæ¥è°ƒç”¨å®ƒã€‚ + +packing()函数返回一个int类型的错误ç ,以防æ¢ç¨‹åºå‘˜ä½¿ç”¨ä¸æ£ç¡®çš„APIã€‚è¿™äº›é”™è¯¯é¢„è®¡ä¸ +会在è¿è¡Œæ—¶å‘ç”Ÿï¼Œå› æ¤xxx_packing()返回void并简å•åœ°æŽ¥å—这些错误是åˆç†çš„。它å¯ä»¥é€‰æ‹© +è½¬å‚¨æ ˆæˆ–æ‰“å°é”™è¯¯æ述。 diff --git a/Documentation/translations/zh_CN/devicetree/changesets.rst b/Documentation/translations/zh_CN/devicetree/changesets.rst new file mode 100644 index 000000000000..2ace05f3c377 --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/changesets.rst @@ -0,0 +1,37 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/changesets.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +============ +è®¾å¤‡æ ‘å˜æ›´é›† +============ + +è®¾å¤‡æ ‘å˜æ›´é›†æ˜¯ä¸€ç§æ–¹æ³•ï¼Œå®ƒå…è®¸äººä»¬ä»¥è¿™æ ·ä¸€ç§æ–¹å¼åœ¨å®žæ—¶æ ‘ä¸ä½¿ç”¨å˜åŒ–,å³è¦ä¹ˆä½¿ç”¨å…¨éƒ¨çš„ +å˜åŒ–,è¦ä¹ˆä¸ä½¿ç”¨ã€‚如果在使用å˜æ›´é›†çš„过程ä¸å‘ç”Ÿé”™è¯¯ï¼Œé‚£ä¹ˆæ ‘å°†è¢«å›žæ»šåˆ°ä¹‹å‰çš„状æ€ã€‚一个 +å˜æ›´é›†ä¹Ÿå¯ä»¥åœ¨ä½¿ç”¨åŽè¢«åˆ 除。 + +当一个å˜æ›´é›†è¢«ä½¿ç”¨æ—¶ï¼Œæ‰€æœ‰çš„改å˜åœ¨å‘出OF_RECONFIG通知器之å‰è¢«ä¸€æ¬¡æ€§ä½¿ç”¨åˆ°æ ‘上。这是 +为了让接收者在收到通知时看到一个完整的ã€ä¸€è‡´çš„æ ‘çš„çŠ¶æ€ã€‚ + +一个å˜åŒ–集的顺åºå¦‚下。 + +1. of_changeset_init() - åˆå§‹åŒ–一个å˜æ›´é›†ã€‚ + +2. 一些DTæ ‘å˜åŒ–的调用,of_changeset_attach_node(), of_changeset_detach_node(), + of_changeset_add_property(), of_changeset_remove_property, + of_changeset_update_property()æ¥å‡†å¤‡ä¸€ç»„å˜æ›´ã€‚æ¤æ—¶ä¸ä¼šå¯¹æ´»åŠ¨æ ‘åšä»»ä½•å˜æ›´ã€‚所有 + çš„å˜æ›´æ“作都记录在of_changesetçš„ `entries` 列表ä¸ã€‚ + +3. of_changeset_apply() - å°†å˜æ›´ä½¿ç”¨åˆ°æ ‘上。è¦ä¹ˆæ•´ä¸ªå˜æ›´é›†è¢«ä½¿ç”¨ï¼Œè¦ä¹ˆå¦‚果有错误, + æ ‘ä¼šè¢«æ¢å¤åˆ°ä¹‹å‰çš„状æ€ã€‚æ ¸å¿ƒé€šè¿‡é”ç¡®ä¿æ£ç¡®çš„顺åºã€‚如果需è¦çš„è¯ï¼Œå¯ä»¥ä½¿ç”¨ä¸€ä¸ªè§£é”çš„ + __of_changeset_apply版本。 + +如果一个æˆåŠŸä½¿ç”¨çš„å˜æ›´é›†éœ€è¦è¢«åˆ 除,å¯ä»¥ç”¨of_changeset_revert()æ¥å®Œæˆã€‚ diff --git a/Documentation/translations/zh_CN/devicetree/dynamic-resolution-notes.rst b/Documentation/translations/zh_CN/devicetree/dynamic-resolution-notes.rst new file mode 100644 index 000000000000..115190341305 --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/dynamic-resolution-notes.rst @@ -0,0 +1,31 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/dynamic-resolution-notes.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +======================== +Devicetree动æ€è§£æžå™¨è¯´æ˜Ž +======================== + +本文æè¿°äº†å†…æ ¸å†…DeviceTree解æžå™¨çš„实现,它ä½äºŽdrivers/of/resolver.cä¸ã€‚ + +解æžå™¨å¦‚何工作? +---------------- + +解æžå™¨è¢«èµ‹äºˆä¸€ä¸ªä»»æ„çš„æ ‘ä½œä¸ºè¾“å…¥ï¼Œè¯¥æ ‘ç”¨é€‚å½“çš„dtc选项编译,并有一个/plugin/æ ‡ç¾ã€‚这就产 +生了适当的__fixups__å’Œ__local_fixups__节点。 + +解æžå™¨ä¾æ¬¡é€šè¿‡ä»¥ä¸‹æ¥éª¤å·¥ä½œ: + +1. ä»Žå®žæ—¶æ ‘ä¸èŽ·å–æœ€å¤§çš„è®¾å¤‡æ ‘phandle值 + 1. +2. è°ƒæ•´æ ‘çš„æ‰€æœ‰æœ¬åœ° phandles,以解决这个é‡ã€‚ +3. 使用 __local__fixups__ 节点信æ¯ä»¥ç›¸åŒçš„é‡è°ƒæ•´æ‰€æœ‰æœ¬åœ°å¼•ç”¨ã€‚ +4. 对于__fixups__节点ä¸çš„æ¯ä¸ªå±žæ€§ï¼Œæ‰¾åˆ°å®ƒåœ¨å®žæ—¶æ ‘ä¸å¼•ç”¨çš„节点。这是用æ¥æ ‡è®°è¯¥èŠ‚ç‚¹çš„æ ‡ç¾ã€‚ +5. 检索fixupçš„ç›®æ ‡çš„phandle。 +6. 对于属性ä¸çš„æ¯ä¸ªfixup,找到节点:属性:å移的ä½ç½®ï¼Œå¹¶ç”¨phandle值替æ¢å®ƒã€‚ diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst index 3fc355fe0037..7451dbfdd3e5 100644 --- a/Documentation/translations/zh_CN/devicetree/index.rst +++ b/Documentation/translations/zh_CN/devicetree/index.rst @@ -24,21 +24,16 @@ Open Firmware å’Œ Devicetree usage-model of_unittest - -Todolist: - -* kernel-api + kernel-api Devicetree Overlays =================== .. toctree:: :maxdepth: 1 -Todolist: - -* changesets -* dynamic-resolution-notes -* overlay-notes + changesets + dynamic-resolution-notes + overlay-notes Devicetree Bindings =================== diff --git a/Documentation/translations/zh_CN/devicetree/kernel-api.rst b/Documentation/translations/zh_CN/devicetree/kernel-api.rst new file mode 100644 index 000000000000..6aa3b685494e --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/kernel-api.rst @@ -0,0 +1,58 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/kernel-api.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +================= +å†…æ ¸ä¸çš„è®¾å¤‡æ ‘API +================= + +æ ¸å¿ƒå‡½æ•° +-------- + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/of/base.c + +include/linux/of.h + +drivers/of/property.c + +include/linux/of_graph.h + +drivers/of/address.c + +drivers/of/irq.c + +drivers/of/fdt.c + +驱动模型函数 +------------ + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/of_device.h + +drivers/of/device.c + +include/linux/of_platform.h + +drivers/of/platform.c + +覆盖和动æ€DT函数 +---------------- + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/of/resolver.c + +drivers/of/dynamic.c + +drivers/of/overlay.c diff --git a/Documentation/translations/zh_CN/devicetree/overlay-notes.rst b/Documentation/translations/zh_CN/devicetree/overlay-notes.rst new file mode 100644 index 000000000000..1bd482cb0a1b --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/overlay-notes.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/overlay-notes.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +============== +è®¾å¤‡æ ‘è¦†ç›–è¯´æ˜Ž +============== + +本文档æ述了drivers/of/overlay.cä¸çš„å†…æ ¸å†…è®¾å¤‡æ ‘è¦†ç›–åŠŸèƒ½çš„å®žçŽ°ï¼Œæ˜¯ +Documentation/devicetree/dynamic-resolution-notes.rst[1]çš„é…套文档。 + +覆盖如何工作 +------------ + +è®¾å¤‡æ ‘è¦†ç›–çš„ç›®çš„æ˜¯ä¿®æ”¹å†…æ ¸çš„å®žæ—¶æ ‘ï¼Œå¹¶ä½¿ä¿®æ”¹ä»¥åæ˜ å˜åŒ–çš„æ–¹å¼å½±å“å†…æ ¸çš„çŠ¶æ€ã€‚ +ç”±äºŽå†…æ ¸ä¸»è¦å¤„ç†çš„是设备,任何新的设备节点如果导致一个活动的设备,就应该创建它, +而如果设备节点被ç¦ç”¨æˆ–è¢«å…¨éƒ¨åˆ é™¤ï¼Œå—å½±å“的设备应该被å–消注册。 + +让我们举个例å,我们有一个fooæ¿ï¼Œå®ƒçš„åŸºæœ¬æ ‘å½¢å›¾å¦‚ä¸‹:: + + ---- foo.dts --------------------------------------------------------------- + /* FOOå¹³å° */ + /dts-v1/; + / { + compatible = "corp,foo"; + + /* å…±äº«çš„èµ„æº */ + res: res { + }; + + /* 芯片上的外围设备 */ + ocp: ocp { + /* 总是被实例化的外围设备 */ + peripheral1 { ... }; + }; + }; + ---- foo.dts --------------------------------------------------------------- + +覆盖bar.dts, +:: + + ---- bar.dts - æŒ‰æ ‡ç¾è¦†ç›–ç›®æ ‡ä½ç½® ---------------------------- + /dts-v1/; + /æ’件/; + &ocp { + /* bar外围 */ + bar { + compatible = "corp,bar"; + ... /* å„ç§å±žæ€§å’Œå节点 */ + }; + }; + ---- bar.dts --------------------------------------------------------------- + +å½“åŠ è½½ï¼ˆå¹¶æŒ‰ç…§[1]ä¸æè¿°çš„æ–¹å¼è§£å†³ï¼‰æ—¶ï¼Œåº”该产生foo+bar.dts:: + + ---- foo+bar.dts ----------------------------------------------------------- + /* FOOå¹³å° + bar外围 */ + / { + compatible = "corp,foo"; + + /* å…±äº«èµ„æº */ + res: res { + }; + + /* 芯片上的外围设备 */ + ocp: ocp { + /* 总是被实例化的外围设备 */ + peripheral1 { ... }; + + /* bar外围 */ + bar { + compatible = "corp,bar"; + ... /* å„ç§å±žæ€§å’Œå节点 */ + }; + }; + }; + ---- foo+bar.dts ----------------------------------------------------------- + +作为覆盖的结果,已ç»åˆ›å»ºäº†ä¸€ä¸ªæ–°çš„设备节点(barï¼‰ï¼Œå› æ¤å°†æ³¨å†Œä¸€ä¸ªbarå¹³å°è®¾å¤‡ï¼Œ +å¦‚æžœåŠ è½½äº†åŒ¹é…的设备驱动程åºï¼Œå°†æŒ‰é¢„期创建设备。 + +如果基础DTä¸æ˜¯ç”¨-@选项编译的,那么“&ocpâ€æ ‡ç¾å°†ä¸èƒ½ç”¨äºŽå°†è¦†ç›–节点解æžåˆ°åŸºç¡€ +DTä¸çš„适当ä½ç½®ã€‚在这ç§æƒ…况下,å¯ä»¥æä¾›ç›®æ ‡è·¯å¾„ã€‚é€šè¿‡æ ‡ç¾çš„ç›®æ ‡ä½ç½®çš„è¯æ³•æ˜¯æ¯” +è¾ƒå¥½çš„ï¼Œå› ä¸ºä¸ç®¡æ ‡ç¾åœ¨DTä¸å‡ºçŽ°åœ¨å“ªé‡Œï¼Œè¦†ç›–都å¯ä»¥è¢«åº”用到任何包å«æ ‡ç¾çš„基础DT上。 + +上é¢çš„bar.dts例åè¢«ä¿®æ”¹ä¸ºä½¿ç”¨ç›®æ ‡è·¯å¾„è¯æ³•ï¼Œå³ä¸º:: + + ---- bar.dts - é€šè¿‡æ˜Žç¡®çš„è·¯å¾„è¦†ç›–ç›®æ ‡ä½ç½® -------------------- + /dts-v1/; + /æ’件/; + &{/ocp} { + /* bar外围 */ + bar { + compatible = "corp,bar"; + ... /* å„ç§å¤–围设备和å节点 */ + } + }; + ---- bar.dts --------------------------------------------------------------- + + +å†…æ ¸ä¸å…³äºŽè¦†ç›–çš„API +------------------- + +该API相当容易使用。 + +1) 调用of_overlay_fdt_apply()æ¥åˆ›å»ºå’Œåº”用一个覆盖的å˜æ›´é›†ã€‚返回值是一个 + 错误或一个识别这个覆盖的cookie。 + +2) 调用of_overlay_remove()æ¥åˆ 除和清ç†å…ˆå‰é€šè¿‡è°ƒç”¨of_overlay_fdt_apply() + 而创建的覆盖å˜æ›´é›†ã€‚ä¸å…è®¸åˆ é™¤ä¸€ä¸ªè¢«å¦ä¸€ä¸ªè¦†ç›–的覆盖å˜åŒ–集。 + +最åŽï¼Œå¦‚æžœä½ éœ€è¦ä¸€æ¬¡æ€§åˆ 除所有的覆盖,åªéœ€è°ƒç”¨of_overlay_remove_all(), +它将以æ£ç¡®çš„顺åºåˆ 除æ¯ä¸€ä¸ªè¦†ç›–。 + +ä½ å¯ä»¥é€‰æ‹©æ³¨å†Œåœ¨è¦†ç›–æ“作ä¸è¢«è°ƒç”¨çš„é€šçŸ¥å™¨ã€‚è¯¦è§ +of_overlay_notifier_register/unregisterå’Œenum of_overlay_notify_action。 + +OF_OVERLAY_PRE_APPLYã€OF_OVERLAY_POST_APPLY或OF_OVERLAY_PRE_REMOVE +的通知器回调å¯ä»¥å˜å‚¨æŒ‡å‘覆盖层ä¸çš„è®¾å¤‡æ ‘èŠ‚ç‚¹æˆ–å…¶å†…å®¹çš„æŒ‡é’ˆï¼Œä½†è¿™äº›æŒ‡é’ˆä¸èƒ½æŒ +ç»åˆ°OF_OVERLAY_POST_REMOVE的通知器回调。在OF_OVERLAY_POST_REMOVE通 +知器被调用åŽï¼ŒåŒ…å«è¦†ç›–层的内å˜å°†è¢«kfree()ed。请注æ„,å³ä½¿OF_OVERLAY_POST_REMOVE +的通知器返回错误,内å˜ä¹Ÿä¼šè¢«kfree()ed。 + +drivers/of/dynamic.cä¸çš„å˜æ›´é›†é€šçŸ¥å™¨æ˜¯ç¬¬äºŒç§ç±»åž‹çš„通知器,å¯ä»¥é€šè¿‡åº”用或移除 +覆盖层æ¥è§¦å‘。这些通知器ä¸å…许在覆盖层或其内容ä¸å˜å‚¨æŒ‡å‘è®¾å¤‡æ ‘èŠ‚ç‚¹çš„æŒ‡é’ˆã€‚å½“åŒ…å« +覆盖层的内å˜å› 移除覆盖层而被释放时,覆盖层代ç 并ä¸èƒ½é˜²æ¢è¿™ç±»æŒ‡é’ˆä»ç„¶æœ‰æ•ˆã€‚ + +任何其他ä¿ç•™æŒ‡å‘覆盖层节点或数æ®çš„指针的代ç éƒ½è¢«è®¤ä¸ºæ˜¯ä¸€ä¸ªé”™è¯¯ï¼Œå› ä¸ºåœ¨ç§»é™¤è¦†ç›–å±‚ +åŽï¼Œè¯¥æŒ‡é’ˆå°†æŒ‡å‘已释放的内å˜ã€‚ + +覆盖层的用户必须特别注æ„系统上å‘生的整体æ“作,以确ä¿å…¶ä»–å†…æ ¸ä»£ç ä¸ä¿ç•™ä»»ä½•æŒ‡å‘覆 +盖层节点或数æ®çš„æŒ‡é’ˆã€‚ä»»ä½•æ— æ„ä¸ä½¿ç”¨è¿™ç§æŒ‡é’ˆçš„例å是,如果一个驱动或å系统模å—在 +应用了覆盖åŽè¢«åŠ 载,并且该驱动或å系统扫æäº†æ•´ä¸ªè®¾å¤‡æ ‘æˆ–å…¶å¤§éƒ¨åˆ†ï¼ŒåŒ…æ‹¬è¦†ç›–èŠ‚ç‚¹ã€‚ diff --git a/Documentation/translations/zh_CN/driver-api/gpio/index.rst b/Documentation/translations/zh_CN/driver-api/gpio/index.rst new file mode 100644 index 000000000000..9ab64e94aced --- /dev/null +++ b/Documentation/translations/zh_CN/driver-api/gpio/index.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/driver-api/gpio/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +======================= +通用型输入/输出(GPIO) +======================= + +目录: + +.. toctree:: + :maxdepth: 2 + + legacy + +Todolist: + +* intro +* using-gpio +* driver +* consumer +* board +* drivers-on-gpio +* bt8xxgpio + +æ ¸å¿ƒ +==== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/gpio/driver.h + +drivers/gpio/gpiolib.c + +ACPIæ”¯æŒ +======== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/gpio/gpiolib-acpi.c + +è®¾å¤‡æ ‘æ”¯æŒ +========== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/gpio/gpiolib-of.c + +设备管ç†æ”¯æŒ +============ + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/gpio/gpiolib-devres.c + +sysfs帮助(函数) +================= + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/gpio/gpiolib-sysfs.c diff --git a/Documentation/translations/zh_CN/gpio.txt b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst index a23ee14fc927..6399521d0548 100644 --- a/Documentation/translations/zh_CN/gpio.txt +++ b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst @@ -1,39 +1,28 @@ -Chinese translated version of Documentation/admin-guide/gpio +.. SPDX-License-Identifier: GPL-2.0 -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. +.. include:: ../../disclaimer-zh_CN.rst -Maintainer: Grant Likely <grant.likely@secretlab.ca> - Linus Walleij <linus.walleij@linaro.org> -Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> ---------------------------------------------------------------------- -Documentation/admin-guide/gpio çš„ä¸æ–‡ç¿»è¯‘ +:Original: Documentation/driver-api/gpio/legacy.rst -如果想评论或更新本文的内容,请直接è”ç³»åŽŸæ–‡æ¡£çš„ç»´æŠ¤è€…ã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡ -交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘ä¸æ–‡ç‰ˆç»´æŠ¤è€…求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻 -译å˜åœ¨é—®é¢˜ï¼Œè¯·è”ç³»ä¸æ–‡ç‰ˆç»´æŠ¤è€…。 -英文版维护者: Grant Likely <grant.likely@secretlab.ca> - Linus Walleij <linus.walleij@linaro.org> -ä¸æ–‡ç‰ˆç»´æŠ¤è€…: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> -ä¸æ–‡ç‰ˆç¿»è¯‘者: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> -ä¸æ–‡ç‰ˆæ ¡è¯‘者: å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> +:翻译: + å‚…ç‚œ Fu Wei <tekkamanninja@gmail.com> + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> -以下为æ£æ–‡ ---------------------------------------------------------------------- -GPIO æŽ¥å£ +:æ ¡è¯‘: -本文档æ供了一个在Linux下访问GPIO的公约概述。 + +ä¼ ç»ŸGPIOæŽ¥å£ +============ + +本文档概述了Linux下的GPIO访问公约。 这些函数以 gpio_* 作为å‰ç¼€ã€‚其他的函数ä¸å…è®¸ä½¿ç”¨è¿™æ ·çš„å‰ç¼€æˆ–相关的 __gpio_* å‰ç¼€ã€‚ -什么是GPIO? -========== +什么是GPIO? +============ "通用输入/输出å£"(GPIO)是一个çµæ´»çš„由软件控制的数å—ä¿¡å·ã€‚ä»–ä»¬å¯ ç”±å¤šç§èŠ¯ç‰‡æä¾›,且对于从事嵌入å¼å’Œå®šåˆ¶ç¡¬ä»¶çš„ Linux å¼€å‘者æ¥è¯´æ˜¯ 比较熟悉。æ¯ä¸ªGPIO 都代表一个连接到特定引脚或çƒæ …阵列(BGA)å°è£…ä¸ @@ -99,6 +88,7 @@ GPIO 公约 æ ‡è¯† GPIO --------- + GPIO æ˜¯é€šè¿‡æ— ç¬¦å·æ•´åž‹æ¥æ ‡è¯†çš„,范围是 0 到 MAX_INT。ä¿ç•™â€œè´Ÿâ€æ•° 用于其他目的,ä¾‹å¦‚æ ‡è¯†ä¿¡å·â€œåœ¨è¿™ä¸ªæ¿å上ä¸å¯ç”¨â€æˆ–指示错误。未接触底层 硬件的代ç 会忽略这些整数。 @@ -115,7 +105,7 @@ FPGA 的特定æ¿å上使用 80-95。编å·ä¸ä¸€å®šè¦è¿žç»,那些平å°ä¸ï¼ å¦‚æžœä½ è¦åˆå§‹åŒ–ä¸€ä¸ªå¸¦æœ‰æ— æ•ˆ GPIO ç¼–å·çš„结构体,å¯ä»¥ä½¿ç”¨ä¸€äº›è´Ÿç¼–ç (如"-EINVAL"),那将使其永远ä¸ä¼šæ˜¯æœ‰æ•ˆã€‚æ¥æµ‹è¯•è¿™æ ·ä¸€ä¸ªç»“构体ä¸çš„ç¼–å· -是å¦å…³è”一个 GPIOï¼Œä½ å¯ä½¿ç”¨ä»¥ä¸‹æ–言: +是å¦å…³è”一个 GPIOï¼Œä½ å¯ä½¿ç”¨ä»¥ä¸‹æ–言:: int gpio_is_valid(int number); @@ -128,11 +118,12 @@ FPGA 的特定æ¿å上使用 80-95。编å·ä¸ä¸€å®šè¦è¿žç»,那些平å°ä¸ï¼ 使用 GPIO --------- + 对于一个 GPIO,系统应该åšçš„第一件事情就是通过 gpio_request() 函数分é…它,è§ä¸‹æ–‡ã€‚ 接下æ¥æ˜¯è®¾ç½®I/Oæ–¹å‘,这通常是在æ¿çº§å¯åŠ¨ä»£ç ä¸ä¸ºæ‰€ä½¿ç”¨çš„ GPIO 设置 -platform_device 时完æˆã€‚ +platform_device 时完æˆ:: /* 设置为输入或输出, 返回 0 或负的错误代ç */ int gpio_direction_input(unsigned gpio); @@ -157,12 +148,13 @@ get/set(获å–/设置)函数调用没法返回错误,且有å¯èƒ½æ˜¯é…置错误 访问自旋é”安全的 GPIO -------------------- +--------------------- + 大多数 GPIO 控制器å¯ä»¥é€šè¿‡å†…å˜è¯»/写指令æ¥è®¿é—®ã€‚这些指令ä¸ä¼šä¼‘çœ ,å¯ä»¥ 安全地在硬(éžçº¿ç¨‹)ä¸æ–例程和类似的上下文ä¸å®Œæˆã€‚ 对于那些用 gpio_cansleep()测试总是返回失败的 GPIO(è§ä¸‹æ–‡),使用 -以下的函数访问: +以下的函数访问:: /* GPIO 输入:返回零或éžé›¶ */ int gpio_get_value(unsigned gpio); @@ -188,17 +180,18 @@ GPIO值是布尔值,零表示低电平,éžé›¶è¡¨ç¤ºé«˜ç”µå¹³ã€‚当读å–一ä 访问å¯èƒ½ä¼‘çœ çš„ GPIO ------------------ +------------------- + æŸäº› GPIO 控制器必须通过基于总线(如 I2C 或 SPI)的消æ¯è®¿é—®ã€‚读或写这些 GPIO 值的命令需è¦ç‰å¾…其信æ¯æŽ’到队首æ‰å‘é€å‘½ä»¤ï¼Œå†èŽ·å¾—å…¶åé¦ˆã€‚æœŸé—´éœ€è¦ ä¼‘çœ ï¼Œè¿™ä¸èƒ½åœ¨ IRQ 例程(ä¸æ–上下文)ä¸æ‰§è¡Œã€‚ 支æŒæ¤ç±» GPIO çš„å¹³å°é€šè¿‡ä»¥ä¸‹å‡½æ•°è¿”回éžé›¶å€¼æ¥åŒºåˆ†å‡ºè¿™ç§ GPIO。(æ¤å‡½æ•°éœ€è¦ -一个之å‰é€šè¿‡ gpio_request 分é…到的有效 GPIO ç¼–å·): +一个之å‰é€šè¿‡ gpio_request 分é…到的有效 GPIO ç¼–å·):: int gpio_cansleep(unsigned gpio); -ä¸ºäº†è®¿é—®è¿™ç§ GPIO,å†…æ ¸å®šä¹‰äº†ä¸€å¥—ä¸åŒçš„函数: +ä¸ºäº†è®¿é—®è¿™ç§ GPIO,å†…æ ¸å®šä¹‰äº†ä¸€å¥—ä¸åŒçš„函数:: /* GPIO 输入:返回零或éžé›¶ ,å¯èƒ½ä¼šä¼‘çœ */ int gpio_get_value_cansleep(unsigned gpio); @@ -214,25 +207,26 @@ GPIO 值的命令需è¦ç‰å¾…其信æ¯æŽ’到队首æ‰å‘é€å‘½ä»¤ï¼Œå†èŽ·å¾—å…¶ 事实,这些处ç†ä¾‹ç¨‹å®žé™…上和自旋é”å®‰å…¨çš„å‡½æ•°æ˜¯ä¸€æ ·çš„ã€‚ ** 除æ¤ä¹‹å¤– ** 调用设置和é…ç½®æ¤ç±» GPIO 的函数也必须在å…è®¸ä¼‘çœ çš„ä¸Šä¸‹æ–‡ä¸ï¼Œ -å› ä¸ºå®ƒä»¬å¯èƒ½ä¹Ÿéœ€è¦è®¿é—® GPIO 控制器芯片: (这些设置函数通常在æ¿çº§å¯åŠ¨ä»£ç 或者 -驱动探测/æ–开代ç ä¸ï¼Œæ‰€ä»¥è¿™æ˜¯ä¸€ä¸ªå®¹æ˜“满足的约æŸæ¡ä»¶ã€‚) +å› ä¸ºå®ƒä»¬å¯èƒ½ä¹Ÿéœ€è¦è®¿é—® GPIO 控制器芯片 (这些设置函数通常在æ¿çº§å¯åŠ¨ä»£ç 或者 +驱动探测/æ–开代ç ä¸ï¼Œæ‰€ä»¥è¿™æ˜¯ä¸€ä¸ªå®¹æ˜“满足的约æŸæ¡ä»¶ã€‚) :: - gpio_direction_input() - gpio_direction_output() - gpio_request() + gpio_direction_input() + gpio_direction_output() + gpio_request() -## gpio_request_one() -## gpio_request_array() -## gpio_free_array() + ## gpio_request_one() + ## gpio_request_array() + ## gpio_free_array() - gpio_free() - gpio_set_debounce() + gpio_free() + gpio_set_debounce() 声明和释放 GPIO ----------------------------- -为了有助于æ•èŽ·ç³»ç»Ÿé…置错误,定义了两个函数。 +---------------- + +为了有助于æ•èŽ·ç³»ç»Ÿé…置错误,定义了两个函数:: /* 申请 GPIO, 返回 0 或负的错误代ç . * éžç©ºæ ‡ç¾å¯èƒ½æœ‰åŠ©äºŽè¯Šæ–. @@ -256,9 +250,9 @@ GPIO 值的命令需è¦ç‰å¾…其信æ¯æŽ’到队首æ‰å‘é€å‘½ä»¤ï¼Œå†èŽ·å¾—å…¶ æŸäº›å¹³å°å¯èƒ½ä¹Ÿä½¿ç”¨ GPIO 作为电æºç®¡ç†æ¿€æ´»ä¿¡å·(例如通过关é—未使用芯片区和 简å•åœ°å…³é—未使用时钟)。 -对于 GPIO 使用 pinctrl å系统已知的引脚,å系统应该被告知其使用情况; +对于 GPIO 使用引脚控制å系统已知的引脚,å系统应该被告知其使用情况; 一个 gpiolib 驱动的 .request()æ“作应调用 pinctrl_gpio_request(), -而 gpiolib 驱动的 .free()æ“作应调用 pinctrl_gpio_free()。pinctrl +而 gpiolib 驱动的 .free()æ“作应调用 pinctrl_gpio_free()。引脚控制 å系统å…许 pinctrl_gpio_request()在æŸä¸ªå¼•è„šæˆ–引脚组以å¤ç”¨å½¢å¼â€œå±žäºŽâ€ 一个设备时都æˆåŠŸè¿”回。 @@ -270,7 +264,7 @@ GPIO 值的命令需è¦ç‰å¾…其信æ¯æŽ’到队首æ‰å‘é€å‘½ä»¤ï¼Œå†èŽ·å¾—å…¶ æŸäº›å¹³å°å…许部分或所有 GPIO ä¿¡å·ä½¿ç”¨ä¸åŒçš„引脚。类似的,GPIO 或引脚的 其他方é¢ä¹Ÿéœ€è¦é…置,如上拉/下拉。平å°è½¯ä»¶åº”该在对这些 GPIO 调用 -gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用 pinctrl åç³»ç»Ÿçš„æ˜ å°„è¡¨ï¼Œ +gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用引脚控制åç³»ç»Ÿçš„æ˜ å°„è¡¨ï¼Œ 使得 GPIO çš„ç”¨æˆ·æ— é¡»å…³æ³¨è¿™äº›ç»†èŠ‚ã€‚ 还有一个值得注æ„的是在释放 GPIO å‰ï¼Œä½ å¿…é¡»åœæ¢ä½¿ç”¨å®ƒã€‚ @@ -278,7 +272,7 @@ gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用 pinctrl åç³»ç»Ÿçš„æ˜ æ³¨æ„:申请一个 GPIO 并没有以任何方å¼é…置它,åªä¸è¿‡æ ‡è¯†é‚£ä¸ª GPIO 处于使用 状æ€ã€‚必须有å¦å¤–的代ç æ¥å¤„ç†å¼•è„šé…ç½®(如控制 GPIO 使用的引脚ã€ä¸Šæ‹‰/下拉)。 -考虑到大多数情况下声明 GPIO 之åŽå°±ä¼šç«‹å³é…置它们,所以定义了以下三个辅助函数: +考虑到大多数情况下声明 GPIO 之åŽå°±ä¼šç«‹å³é…置它们,所以定义了以下三个辅助函数:: /* 申请一个 GPIO ä¿¡å·, åŒæ—¶é€šè¿‡ç‰¹å®šçš„'flags'åˆå§‹åŒ–é…ç½®, * 其他和 gpio_request()çš„å‚æ•°å’Œè¿”å›žå€¼ç›¸åŒ @@ -326,7 +320,7 @@ gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用 pinctrl åç³»ç»Ÿçš„æ˜ å°†æ¥è¿™äº›æ ‡å¿—å¯èƒ½æ‰©å±•åˆ°æ”¯æŒæ›´å¤šçš„属性。 更进一æ¥,为了更简å•åœ°å£°æ˜Ž/释放多个 GPIO,'struct gpio'被引进æ¥å°è£…所有 -这三个领域: +这三个领域:: struct gpio { unsigned gpio; @@ -334,7 +328,7 @@ gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用 pinctrl åç³»ç»Ÿçš„æ˜ const char *label; }; -一个典型的用例: +一个典型的用例:: static struct gpio leds_gpios[] = { { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* é»˜è®¤å¼€å¯ */ @@ -356,9 +350,10 @@ gpio_request()å‰å°†è¿™ç±»ç»†èŠ‚é…置好,例如使用 pinctrl åç³»ç»Ÿçš„æ˜ GPIO æ˜ å°„åˆ° IRQ --------------------- +---------------- + GPIO ç¼–å·æ˜¯æ— 符å·æ•´æ•°;IRQ ç¼–å·ä¹Ÿæ˜¯ã€‚这些构æˆäº†ä¸¤ä¸ªé€»è¾‘上ä¸åŒçš„命å空间 -(GPIO 0 ä¸ä¸€å®šä½¿ç”¨ IRQ 0)ã€‚ä½ å¯ä»¥é€šè¿‡ä»¥ä¸‹å‡½æ•°åœ¨å®ƒä»¬ä¹‹é—´å®žçŽ°æ˜ å°„: +(GPIO 0 ä¸ä¸€å®šä½¿ç”¨ IRQ 0)ã€‚ä½ å¯ä»¥é€šè¿‡ä»¥ä¸‹å‡½æ•°åœ¨å®ƒä»¬ä¹‹é—´å®žçŽ°æ˜ å°„:: /* æ˜ å°„ GPIO ç¼–å·åˆ° IRQ ç¼–å· */ int gpio_to_irq(unsigned gpio); @@ -384,7 +379,8 @@ irq_to_gpio()返回的éžé”™è¯¯å€¼å¤§å¤šæ•°é€šå¸¸å¯ä»¥è¢« gpio_get_value()æ‰€ä½ æ¨¡æ‹Ÿå¼€æ¼ä¿¡å· ----------------------------- +------------ + 有时在åªæœ‰ä½Žç”µå¹³ä¿¡å·ä½œä¸ºå®žé™…驱动结果(译者注:多个输出连接于一点,逻辑电平 结果为所有输出的逻辑与)的时候,共享的信å·çº¿éœ€è¦ä½¿ç”¨â€œå¼€æ¼â€ä¿¡å·ã€‚(è¯¥æœ¯è¯ é€‚ç”¨äºŽ CMOS 管;而 TTL 用“集电æžå¼€è·¯â€ã€‚)一个上拉电阻使信å·ä¸ºé«˜ç”µå¹³ã€‚è¿™ @@ -408,9 +404,44 @@ irq_to_gpio()返回的éžé”™è¯¯å€¼å¤§å¤šæ•°é€šå¸¸å¯ä»¥è¢« gpio_get_value()æ‰€ä½ è¿™ä¸ä¸€å®šæ˜¯é”™è¯¯çš„。一个常è§çš„例å就是 I2C 时钟的延长:一个需è¦è¾ƒæ…¢æ—¶é’Ÿçš„ 从设备延迟 SCK 的上å‡æ²¿ï¼Œè€Œ I2C 主设备相应地调整其信å·ä¼ 输速率。 +GPIO控制器和引脚控制å系统 +-------------------------- + +SOC上的GPIO控制器å¯èƒ½ä¸Žå¼•è„šæŽ§åˆ¶å系统紧密结åˆï¼Œå³å¼•è„šå¯ä»¥ä¸Žå¯é€‰çš„gpio功 +能一起被其他功能使用。我们已ç»æ¶µç›–äº†è¿™æ ·çš„æƒ…å†µï¼Œä¾‹å¦‚ä¸€ä¸ªGPIO控制器需è¦ä¿ +留一个引脚或通过调用以下任何一个引脚æ¥è®¾ç½®å…¶æ–¹å‘:: + + pinctrl_gpio_request() + pinctrl_gpio_free() + pinctrl_gpio_direction_input() + pinctrl_gpio_direction_output() + +但是,引脚控制å系统是如何将GPIOå·ç (这是一个全局事项)与æŸä¸ªå¼•è„šæŽ§åˆ¶å™¨ +上的æŸä¸ªå¼•è„šäº¤å‰å…³è”的? + +这是通过注册引脚的“范围â€æ¥å®žçŽ°çš„,这基本上是交å‰å‚考表。这些æ述是在 +Documentation/driver-api/pin-control.rst + +虽然引脚分é…完全由引脚控制å系统管ç†ï¼Œä½†gpio(在gpiolib下)ä»ç”±gpio驱动 +维护。å¯èƒ½å‘生的情况是,SoCä¸çš„ä¸åŒå¼•è„šèŒƒå›´ç”±ä¸åŒçš„gpio驱动器管ç†ã€‚ + +这使得在调用 "pinctrl_gpio_request" 之å‰ï¼Œè®©gpio驱动å‘pin ctrlåç³» +统宣布它们的引脚范围是åˆç†çš„,以便在使用任何gpio之å‰è¦æ±‚引脚控制å系统准 +备相应的引脚。 + +为æ¤ï¼Œgpio控制器å¯ä»¥ç”¨å¼•è„šæŽ§åˆ¶å系统注册其引脚范围。目å‰æœ‰ä¸¤ç§æ–¹æ³•ï¼šæœ‰æˆ– +æ— DT。 + +关于对DT的支æŒï¼Œè¯·å‚考 Documentation/devicetree/bindings/gpio/gpio.txt. + +对于éžDT支æŒï¼Œç”¨æˆ·å¯ä»¥ç”¨é€‚当的å‚数调用gpiochip_add_pin_range(),将一 +系列的gpio引脚注册到引脚控制驱动上。为æ¤ï¼Œå¿…须将引脚控制设备的å称å—符串 +作为å‚æ•°ä¹‹ä¸€ä¼ ç»™è¿™ä¸ªç¨‹åºã€‚ + + +这些公约忽略了什么? +==================== -这些公约忽略了什么? -================ 这些公约忽略的最大一件事就是引脚å¤ç”¨ï¼Œå› 为这属于高度芯片特定的属性且 没有å¯ç§»æ¤æ€§ã€‚æŸä¸ªå¹³å°å¯èƒ½ä¸éœ€è¦æ˜Žç¡®çš„å¤ç”¨ä¿¡æ¯ï¼›æœ‰çš„对于任æ„给定的引脚 å¯èƒ½åªæœ‰ä¸¤ä¸ªåŠŸèƒ½é€‰é¡¹ï¼›æœ‰çš„å¯èƒ½æ¯ä¸ªå¼•è„šæœ‰å…«ä¸ªåŠŸèƒ½é€‰é¡¹ï¼›æœ‰çš„å¯èƒ½å¯ä»¥å°† @@ -433,8 +464,9 @@ Linux 的系统。) 当å‰ï¼ŒåŠ¨æ€å®šä¹‰ GPIO 并ä¸æ˜¯æ ‡å‡†çš„,例如作为é…置一个带有æŸäº› GPIO 扩展器的 é™„åŠ ç”µè·¯æ¿çš„副作用。 -GPIO 实现者的框架 (å¯é€‰) -===================== +GPIO 实现者的框架(å¯é€‰ï¼‰ +========================= + å‰é¢æ到了,有一个å¯é€‰çš„实现框架,让平å°ä½¿ç”¨ç›¸åŒçš„编程接å£ï¼Œæ›´åŠ 简å•åœ°æ”¯æŒ ä¸åŒç§ç±»çš„ GPIO 控制器。这个框架称为"gpiolib"。 @@ -444,15 +476,16 @@ GPIO 实现者的框架 (å¯é€‰) 控制器驱动: gpio_chip -------------------- +--------------------- + 在框架ä¸æ¯ä¸ª GPIO 控制器都包装为一个 "struct gpio_chip",他包å«äº† 该类型的æ¯ä¸ªæŽ§åˆ¶å™¨çš„常用信æ¯: - - 设置 GPIO æ–¹å‘的方法 - - 用于访问 GPIO 值的方法 - - 告知调用其方法是å¦å¯èƒ½ä¼‘çœ çš„æ ‡å¿— - - å¯é€‰çš„ debugfs ä¿¡æ¯å¯¼å‡ºæ–¹æ³• (显示类似上拉é…ç½®ä¸€æ ·çš„é¢å¤–状æ€) - - 诊æ–æ ‡ç¾ + - 设置 GPIO æ–¹å‘的方法 + - 用于访问 GPIO 值的方法 + - 告知调用其方法是å¦å¯èƒ½ä¼‘çœ çš„æ ‡å¿— + - å¯é€‰çš„ debugfs ä¿¡æ¯å¯¼å‡ºæ–¹æ³• (显示类似上拉é…ç½®ä¸€æ ·çš„é¢å¤–状æ€) + - 诊æ–æ ‡ç¾ ä¹ŸåŒ…å«äº†æ¥è‡ª device.platform_data çš„æ¯ä¸ªå®žä¾‹çš„æ•°æ®ï¼šå®ƒç¬¬ä¸€ä¸ª GPIO çš„ ç¼–å·å’Œå®ƒå¯ç”¨çš„ GPIO çš„æ•°é‡ã€‚ @@ -471,7 +504,8 @@ GPIO 实现者的框架 (å¯é€‰) å¹³å°æ”¯æŒ -------- +-------- + 为了支æŒè¿™ä¸ªæ¡†æž¶ï¼Œä¸€ä¸ªå¹³å°çš„ Kconfig 文件将会 "select"(选择) ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB,并让它的 <asm/gpio.h> åŒ…å« <asm-generic/gpio.h>,åŒæ—¶å®šä¹‰ä¸‰ä¸ªæ–¹æ³•: @@ -489,7 +523,7 @@ ARCH_WANT_OPTIONAL_GPIOLIB æ„å‘³ç€ gpiolib æ ¸å¿ƒé»˜è®¤å…³é—,且用户å¯ä»¥ 如果这些选项都没被选择,该平å°å°±ä¸é€šè¿‡ GPIO-lib æ”¯æŒ GPIO,且代ç ä¸å¯ä»¥ 被用户使能。 -以下这些方法的实现å¯ä»¥ç›´æŽ¥ä½¿ç”¨æ¡†æž¶ä»£ç ,并总是通过 gpio_chip 调度: +以下这些方法的实现å¯ä»¥ç›´æŽ¥ä½¿ç”¨æ¡†æž¶ä»£ç ,并总是通过 gpio_chip 调度:: #define gpio_get_value __gpio_get_value #define gpio_set_value __gpio_set_value @@ -508,7 +542,8 @@ arch_initcall()或者更早的地方集æˆè¿›å¹³å°åˆå§‹åŒ–代ç ,使这些 G 且他们通常å¯ä»¥ä½œä¸º IRQ 使用。 æ¿çº§æ”¯æŒ -------- +-------- + 对于外部 GPIO 控制器(例如 I2C 或 SPI 扩展器ã€ä¸“用芯片ã€å¤šåŠŸèƒ½å™¨ä»¶ã€FPGA 或 CPLD),大多数常用æ¿çº§ç‰¹å®šä»£ç 都å¯ä»¥æ³¨å†ŒæŽ§åˆ¶å™¨è®¾å¤‡ï¼Œå¹¶ä¿è¯ä»–ä»¬çš„é©±åŠ¨çŸ¥é“ gpiochip_add()所使用的 GPIO ç¼–å·ã€‚他们的起始编å·é€šå¸¸è·Ÿåœ¨å¹³å°ç‰¹å®šçš„ GPIO @@ -526,8 +561,9 @@ GPIO å¯ä»¥å·¥ä½œä¹‹åŽæ‰å¯è¢«æ³¨å†Œã€‚解决这类ä¾èµ–的的一ç§æ–¹æ³•æ˜¯ 设备å˜æˆæ— 效时移除它们。 -用户空间的 Sysfs 接å£(å¯é€‰) -======================== +用户空间的 Sysfs 接å£ï¼ˆå¯é€‰ï¼‰ +============================= + 使用“gpiolibâ€å®žçŽ°æ¡†æž¶çš„å¹³å°å¯ä»¥é€‰æ‹©é…置一个 GPIO çš„ sysfs 用户接å£ã€‚ è¿™ä¸åŒäºŽ debugfs 接å£ï¼Œå› 为它æ供的是对 GPIOæ–¹å‘和值的控制,而ä¸åªæ˜¾ç¤º 一个GPIO 的状æ€æ‘˜è¦ã€‚æ¤å¤–,它å¯ä»¥å‡ºçŽ°åœ¨æ²¡æœ‰è°ƒè¯•æ”¯æŒçš„产å“级系统ä¸ã€‚ @@ -548,6 +584,7 @@ GPIO å¯ä»¥å·¥ä½œä¹‹åŽæ‰å¯è¢«æ³¨å†Œã€‚解决这类ä¾èµ–的的一ç§æ–¹æ³•æ˜¯ Sysfs ä¸çš„路径 -------------- + 在/sys/class/gpio ä¸æœ‰ 3 类入å£: - 用于在用户空间控制 GPIO 的控制接å£; @@ -625,8 +662,9 @@ GPIO 控制器的路径类似 /sys/class/gpio/gpiochip42/ (对于从#42 GPIO ä»Žå†…æ ¸ä»£ç ä¸å¯¼å‡º -------------- -å†…æ ¸ä»£ç å¯ä»¥æ˜Žç¡®åœ°ç®¡ç†é‚£äº›å·²é€šè¿‡ gpio_request()申请的 GPIO 的导出: +---------------- + +å†…æ ¸ä»£ç å¯ä»¥æ˜Žç¡®åœ°ç®¡ç†é‚£äº›å·²é€šè¿‡ gpio_request()申请的 GPIO 的导出:: /* 导出 GPIO 到用户空间 */ int gpio_export(unsigned gpio, bool direction_may_change); @@ -648,3 +686,9 @@ GPIO 控制器的路径类似 /sys/class/gpio/gpiochip42/ (对于从#42 GPIO 在 GPIO 被导出之åŽï¼Œgpio_export_link()å…许在 sysfs 文件系统的任何地方 创建一个到这个 GPIO sysfs 节点的符å·é“¾æŽ¥ã€‚è¿™æ ·é©±åŠ¨å°±å¯ä»¥é€šè¿‡ä¸€ä¸ªæ述性的 åå—,在 sysfs ä¸ä»–们所拥有的设备下æ供一个(到这个 GPIO sysfs 节点的)接å£ã€‚ + + +APIå‚考 +======= + +本节ä¸åˆ—出的函数已被废弃。在新的代ç ä¸åº”该使用基于GPIOæ述符的API。 diff --git a/Documentation/translations/zh_CN/driver-api/index.rst b/Documentation/translations/zh_CN/driver-api/index.rst new file mode 100644 index 000000000000..ba354e1f4e6d --- /dev/null +++ b/Documentation/translations/zh_CN/driver-api/index.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/driver-api/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +======================== +Linux驱动实现者的APIæŒ‡å— +======================== + +å†…æ ¸æ供了å„ç§å„æ ·çš„æŽ¥å£æ¥æ”¯æŒè®¾å¤‡é©±åŠ¨çš„å¼€å‘。这份文档åªæ˜¯å¯¹å…¶ä¸ä¸€äº›æŽ¥å£è¿›è¡Œäº† +一定程度的整ç†â€”—希望éšç€æ—¶é—´çš„推移,它能å˜å¾—更好ï¼å¯ç”¨çš„å°èŠ‚å¯ä»¥åœ¨ä¸‹é¢çœ‹åˆ°ã€‚ + +.. class:: toc-title + + 目录列表: + +.. toctree:: + :maxdepth: 2 + + gpio/index + io_ordering + +Todolist: + +* driver-model/index +* basics +* infrastructure +* ioctl +* early-userspace/index +* pm/index +* clk +* device-io +* dma-buf +* device_link +* component +* message-based +* infiniband +* aperture +* frame-buffer +* regulator +* reset +* iio/index +* input +* usb/index +* firewire +* pci/index +* cxl/index +* spi +* i2c +* ipmb +* ipmi +* i3c/index +* interconnect +* devfreq +* hsi +* edac +* scsi +* libata +* target +* mailbox +* mtdnand +* miscellaneous +* mei/index +* mtd/index +* mmc/index +* nvdimm/index +* w1 +* rapidio/index +* s390-drivers +* vme +* 80211/index +* uio-howto +* firmware/index +* pin-control +* md/index +* media/index +* misc_devices +* nfc/index +* dmaengine/index +* slimbus +* soundwire/index +* thermal/index +* fpga/index +* acpi/index +* auxiliary_bus +* backlight/lp855x-driver.rst +* connector +* console +* dcdbas +* eisa +* isa +* isapnp +* io-mapping +* generic-counter +* memory-devices/index +* men-chameleon-bus +* ntb +* nvmem +* parport-lowlevel +* pps +* ptp +* phy/index +* pwm +* pldmfw/index +* rfkill +* serial/index +* sm501 +* surface_aggregator/index +* switchtec +* sync_file +* tty/index +* vfio-mediated-device +* vfio +* vfio-pci-device-specific-driver-acceptance +* xilinx/index +* xillybus +* zorro +* hte/index + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/driver-api/io_ordering.rst b/Documentation/translations/zh_CN/driver-api/io_ordering.rst new file mode 100644 index 000000000000..4dbfa4ce92a0 --- /dev/null +++ b/Documentation/translations/zh_CN/driver-api/io_ordering.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/driver-api/io_ordering.rst + +:翻译: + + æž—æ°¸å¬ Lin Yongting <linyongting@gmail.com> + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +=========================== +对内å˜æ˜ 射地å€çš„I/Oå†™å…¥æŽ’åº +=========================== + +在æŸäº›å¹³å°ä¸Šï¼Œæ‰€è°“的内å˜æ˜ å°„I/O是弱顺åºã€‚在这些平å°ä¸Šï¼Œé©±åŠ¨å¼€å‘者有责任 +ä¿è¯I/O内å˜æ˜ 射地å€çš„写æ“作按程åºå›¾æ„的顺åºè¾¾åˆ°è®¾å¤‡ã€‚通常读å–一个“安全†+设备寄å˜å™¨æˆ–桥寄å˜å™¨ï¼Œè§¦å‘IO芯片清刷未处ç†çš„写æ“作到达设备åŽæ‰å¤„ç†è¯»æ“作, +而达到ä¿è¯ç›®çš„。驱动程åºé€šå¸¸åœ¨spinlockä¿æŠ¤çš„临界区退出之å‰ä½¿ç”¨è¿™ç§æŠ€æœ¯ã€‚ +这也å¯ä»¥ä¿è¯åŽé¢çš„写æ“作åªåœ¨å‰é¢çš„写æ“作之åŽåˆ°è¾¾è®¾å¤‡ï¼ˆè¿™éžå¸¸ç±»ä¼¼äºŽå†…å˜ +å±éšœæ“作,mb(),ä¸è¿‡ä»…适用于I/O)。 + +å‡è®¾ä¸€ä¸ªè®¾å¤‡é©±åŠ¨ç¨‹çš„具体例å:: + + ... + CPU A: spin_lock_irqsave(&dev_lock, flags) + CPU A: val = readl(my_status); + CPU A: ... + CPU A: writel(newval, ring_ptr); + CPU A: spin_unlock_irqrestore(&dev_lock, flags) + ... + CPU B: spin_lock_irqsave(&dev_lock, flags) + CPU B: val = readl(my_status); + CPU B: ... + CPU B: writel(newval2, ring_ptr); + CPU B: spin_unlock_irqrestore(&dev_lock, flags) + ... + +上述例åä¸ï¼Œè®¾å¤‡å¯èƒ½ä¼šå…ˆæŽ¥æ”¶åˆ°newval2的值,然åŽæŽ¥æ”¶åˆ°newval的值,问题就 +å‘生了。ä¸è¿‡å¾ˆå®¹æ˜“通过下é¢æ–¹æ³•æ¥ä¿®å¤:: + + ... + CPU A: spin_lock_irqsave(&dev_lock, flags) + CPU A: val = readl(my_status); + CPU A: ... + CPU A: writel(newval, ring_ptr); + CPU A: (void)readl(safe_register); /* é…置寄å˜å™¨ï¼Ÿ*/ + CPU A: spin_unlock_irqrestore(&dev_lock, flags) + ... + CPU B: spin_lock_irqsave(&dev_lock, flags) + CPU B: val = readl(my_status); + CPU B: ... + CPU B: writel(newval2, ring_ptr); + CPU B: (void)readl(safe_register); /* é…置寄å˜å™¨ï¼Ÿ*/ + CPU B: spin_unlock_irqrestore(&dev_lock, flags) + +在解决方案ä¸ï¼Œè¯»å–safe_register寄å˜å™¨ï¼Œè§¦å‘IO芯片清刷未处ç†çš„写æ“作, +å†å¤„ç†åŽé¢çš„读æ“作,防æ¢å¼•å‘æ•°æ®ä¸ä¸€è‡´é—®é¢˜ã€‚ diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index bf85baca8b3e..2fc60e60feb4 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -108,6 +108,7 @@ TODOList: :maxdepth: 2 core-api/index + driver-api/index locking/index accounting/index cpu-freq/index @@ -120,10 +121,10 @@ TODOList: scheduler/index mm/index peci/index + PCI/index TODOList: -* driver-api/index * block/index * cdrom/index * ide/index @@ -148,7 +149,6 @@ TODOList: * crypto/index * bpf/index * usb/index -* PCI/index * scsi/index * misc-devices/index * mhi/index diff --git a/Documentation/translations/zh_CN/io_ordering.txt b/Documentation/translations/zh_CN/io_ordering.txt deleted file mode 100644 index 7bb3086227ae..000000000000 --- a/Documentation/translations/zh_CN/io_ordering.txt +++ /dev/null @@ -1,67 +0,0 @@ -Chinese translated version of Documentation/driver-api/io_ordering.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Chinese maintainer: Lin Yongting <linyongting@gmail.com> ---------------------------------------------------------------------- -Documentation/driver-api/io_ordering.rst çš„ä¸æ–‡ç¿»è¯‘ - -如果想评论或更新本文的内容,请直接è”ç³»åŽŸæ–‡æ¡£çš„ç»´æŠ¤è€…ã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡ -交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘ä¸æ–‡ç‰ˆç»´æŠ¤è€…求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻 -译å˜åœ¨é—®é¢˜ï¼Œè¯·è”ç³»ä¸æ–‡ç‰ˆç»´æŠ¤è€…。 - -ä¸æ–‡ç‰ˆç»´æŠ¤è€…: æž—æ°¸å¬ Lin Yongting <linyongting@gmail.com> -ä¸æ–‡ç‰ˆç¿»è¯‘者: æž—æ°¸å¬ Lin Yongting <linyongting@gmail.com> -ä¸æ–‡ç‰ˆæ ¡è¯‘者: æž—æ°¸å¬ Lin Yongting <linyongting@gmail.com> - - -以下为æ£æ–‡ ---------------------------------------------------------------------- - -在æŸäº›å¹³å°ä¸Šï¼Œæ‰€è°“的内å˜æ˜ å°„I/O是弱顺åºã€‚在这些平å°ä¸Šï¼Œé©±åŠ¨å¼€å‘者有责任 -ä¿è¯I/O内å˜æ˜ 射地å€çš„写æ“作按程åºå›¾æ„的顺åºè¾¾åˆ°è®¾å¤‡ã€‚通常读å–一个“安全†-设备寄å˜å™¨æˆ–桥寄å˜å™¨ï¼Œè§¦å‘IO芯片清刷未处ç†çš„写æ“作到达设备åŽæ‰å¤„ç†è¯»æ“作, -而达到ä¿è¯ç›®çš„。驱动程åºé€šå¸¸åœ¨spinlockä¿æŠ¤çš„临界区退出之å‰ä½¿ç”¨è¿™ç§æŠ€æœ¯ã€‚ -这也å¯ä»¥ä¿è¯åŽé¢çš„写æ“作åªåœ¨å‰é¢çš„写æ“作之åŽåˆ°è¾¾è®¾å¤‡ï¼ˆè¿™éžå¸¸ç±»ä¼¼äºŽå†…å˜ -å±éšœæ“作,mb(),ä¸è¿‡ä»…适用于I/O)。 - -å‡è®¾ä¸€ä¸ªè®¾å¤‡é©±åŠ¨ç¨‹çš„具体例å: - - ... -CPU A: spin_lock_irqsave(&dev_lock, flags) -CPU A: val = readl(my_status); -CPU A: ... -CPU A: writel(newval, ring_ptr); -CPU A: spin_unlock_irqrestore(&dev_lock, flags) - ... -CPU B: spin_lock_irqsave(&dev_lock, flags) -CPU B: val = readl(my_status); -CPU B: ... -CPU B: writel(newval2, ring_ptr); -CPU B: spin_unlock_irqrestore(&dev_lock, flags) - ... - -上述例åä¸ï¼Œè®¾å¤‡å¯èƒ½ä¼šå…ˆæŽ¥æ”¶åˆ°newval2的值,然åŽæŽ¥æ”¶åˆ°newval的值,问题就 -å‘生了。ä¸è¿‡å¾ˆå®¹æ˜“通过下é¢æ–¹æ³•æ¥ä¿®å¤ï¼š - - ... -CPU A: spin_lock_irqsave(&dev_lock, flags) -CPU A: val = readl(my_status); -CPU A: ... -CPU A: writel(newval, ring_ptr); -CPU A: (void)readl(safe_register); /* é…置寄å˜å™¨ï¼Ÿ*/ -CPU A: spin_unlock_irqrestore(&dev_lock, flags) - ... -CPU B: spin_lock_irqsave(&dev_lock, flags) -CPU B: val = readl(my_status); -CPU B: ... -CPU B: writel(newval2, ring_ptr); -CPU B: (void)readl(safe_register); /* é…置寄å˜å™¨ï¼Ÿ*/ -CPU B: spin_unlock_irqrestore(&dev_lock, flags) - -在解决方案ä¸ï¼Œè¯»å–safe_register寄å˜å™¨ï¼Œè§¦å‘IO芯片清刷未处ç†çš„写æ“作, -å†å¤„ç†åŽé¢çš„读æ“作,防æ¢å¼•å‘æ•°æ®ä¸ä¸€è‡´é—®é¢˜ã€‚ diff --git a/Documentation/translations/zh_CN/oops-tracing.txt b/Documentation/translations/zh_CN/oops-tracing.txt deleted file mode 100644 index c5f3bda7abcb..000000000000 --- a/Documentation/translations/zh_CN/oops-tracing.txt +++ /dev/null @@ -1,212 +0,0 @@ -Chinese translated version of Documentation/admin-guide/bug-hunting.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Chinese maintainer: Dave Young <hidave.darkstar@gmail.com> ---------------------------------------------------------------------- -Documentation/admin-guide/bug-hunting.rst çš„ä¸æ–‡ç¿»è¯‘ - -如果想评论或更新本文的内容,请直接è”ç³»åŽŸæ–‡æ¡£çš„ç»´æŠ¤è€…ã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡ -交æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯ä»¥å‘ä¸æ–‡ç‰ˆç»´æŠ¤è€…求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻 -译å˜åœ¨é—®é¢˜ï¼Œè¯·è”ç³»ä¸æ–‡ç‰ˆç»´æŠ¤è€…。 - -ä¸æ–‡ç‰ˆç»´æŠ¤è€…: æ¨ç‘ž Dave Young <hidave.darkstar@gmail.com> -ä¸æ–‡ç‰ˆç¿»è¯‘者: æ¨ç‘ž Dave Young <hidave.darkstar@gmail.com> -ä¸æ–‡ç‰ˆæ ¡è¯‘者: æŽé˜³ Li Yang <leoyang.li@nxp.com> - çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com> - -以下为æ£æ–‡ ---------------------------------------------------------------------- - -注æ„: ksymoops 在2.6ä¸æ˜¯æ²¡æœ‰ç”¨çš„。 è¯·ä»¥åŽŸæœ‰æ ¼å¼ä½¿ç”¨Oops(æ¥è‡ªdmesg,ç‰ç‰)。 -å¿½ç•¥ä»»ä½•è¿™æ ·é‚£æ ·å…³äºŽâ€œè§£ç Oopsâ€æˆ–者“通过ksymoopsè¿è¡Œâ€çš„文档。 å¦‚æžœä½ è´´å‡ºè¿è¡Œè¿‡ -ksymoopsçš„æ¥è‡ª2.6çš„Oops,人们åªä¼šè®©ä½ é‡è´´ä¸€æ¬¡ã€‚ - -快速总结 -------------- - -å‘现Oops并å‘é€ç»™çœ‹ä¼¼ç›¸å…³çš„å†…æ ¸é¢†åŸŸçš„ç»´æŠ¤è€…ã€‚åˆ«å¤ªæ‹…å¿ƒå¯¹ä¸ä¸Šå·ã€‚å¦‚æžœä½ ä¸ç¡®å®šå°±å‘ç»™ -å’Œä½ æ‰€åšçš„事情相关的代ç 的负责人。 如果å¯é‡çŽ°è¯•ç€æè¿°æ€Žæ ·é‡æž„。 那甚至比oops更有 -价值。 - -å¦‚æžœä½ å¯¹äºŽå‘é€ç»™è°ä¸€æ— 所知, å‘ç»™linux-kernel@vger.kernel.orgã€‚æ„Ÿè°¢ä½ å¸®åŠ©Linux -å°½å¯èƒ½åœ°ç¨³å®šã€‚ - -Oops在哪里? ----------------------- - -通常Oops文本由klogdä»Žå†…æ ¸ç¼“å†²åŒºé‡Œè¯»å–å¹¶ä¼ ç»™syslogd,由syslogd写到syslog文件ä¸ï¼Œ -典型地是/var/log/messages(ä¾èµ–于/etc/syslog.conf)。有时klogd崩溃了,è¿™ç§æƒ…å†µä¸‹ä½ -能够è¿è¡Œdmesg > fileæ¥ä»Žå†…æ ¸ç¼“å†²åŒºä¸è¯»å–æ•°æ®å¹¶ä¿å˜ä¸‹æ¥ã€‚ å¦åˆ™ä½ å¯ä»¥ -cat /proc/kmsg > file, ç„¶è€Œä½ å¿…é¡»ä»‹å…¥ä¸æ¢ä¼ 输, kmsg是一个“永ä¸ç»“æŸçš„文件â€ã€‚如 -果机器崩溃ååˆ°ä½ ä¸èƒ½è¾“入命令或者ç£ç›˜ä¸å¯ç”¨é‚£ä¹ˆä½ 有三ç§é€‰æ‹©:- - -(1) 手抄å±å¹•ä¸Šçš„文本待机器é‡å¯åŽå†è¾“入计算机。 麻烦但如果没有针对崩溃的准备, -这是仅有的选择。 å¦å¤–ï¼Œä½ å¯ä»¥ç”¨æ•°ç 相机把å±å¹•æ‹ä¸‹æ¥-ä¸å¤ªå¥½ï¼Œä½†æ¯”没有强。 如果信 -æ¯æ»šåŠ¨åˆ°äº†ç»ˆç«¯çš„上é¢ï¼Œä½ 会å‘现以高分辩率å¯åŠ¨ï¼ˆæ¯”如,vga=791ï¼‰ä¼šè®©ä½ è¯»åˆ°æ›´å¤šçš„æ–‡ -本。(注æ„:这需è¦vesafb,所以对‘早期’的oops没有帮助) - -(2)用串å£ç»ˆç«¯å¯åŠ¨ï¼ˆè¯·å‚看Documentation/admin-guide/serial-console.rst),è¿è¡Œä¸€ä¸ªnull -modem到å¦ä¸€å°æœºå™¨å¹¶ç”¨ä½ 喜欢的通讯工具获å–输出。Minicom工作地很好。 - -(3)使用Kdump(请å‚看Documentation/admin-guide/kdump/kdump.rst), -使用在Documentation/admin-guide/kdump/gdbmacros.txtä¸å®šä¹‰çš„dmesg gdbå®ï¼Œä»Žæ—§çš„内å˜ä¸æå–å†…æ ¸ -环形缓冲区。 - -å®Œæ•´ä¿¡æ¯ ----------------- - -注æ„:以下æ¥è‡ªäºŽLinus的邮件适用于2.4å†…æ ¸ã€‚ æˆ‘å› ä¸ºåŽ†å²åŽŸå› ä¿ç•™äº†å®ƒï¼Œå¹¶ä¸”å› ä¸ºå…¶ä¸ -一些信æ¯ä»ç„¶é€‚用。 特别注æ„的是,请忽略任何ksymoops的引用。 - -From: Linus Torvalds <torvalds@osdl.org> - -æ€Žæ ·è·Ÿè¸ªOops.. [原å‘到linux-kernel的一å°é‚®ä»¶] - -主è¦çš„çªé—¨æ˜¯æœ‰äº”年和这些烦人的oops消æ¯æ‰“交é“çš„ç»éªŒ;-) - -å®žé™…ä¸Šï¼Œä½ æœ‰åŠžæ³•ä½¿å®ƒæ›´ç®€å•ã€‚我有两个ä¸åŒçš„方法: - - gdb /usr/src/linux/vmlinux - gdb> disassemble <offending_function> - -那是å‘现问题的简å•åŠžæ³•ï¼Œè‡³å°‘如果bug报告åšçš„å¥½çš„æƒ…å†µä¸‹ï¼ˆè±¡è¿™ä¸ªä¸€æ ·-è¿è¡Œksymoops -得到oopså‘生的函数åŠå‡½æ•°å†…çš„å移)。 - -哦,如果报告å‘ç”Ÿçš„å†…æ ¸ä»¥ç›¸åŒçš„编译器和相似的é…置编译它会有帮助的。 - -å¦ä¸€ä»¶è¦åšçš„事是å汇编bug报告的“Codeâ€éƒ¨åˆ†ï¼šksymoops也会用æ£ç¡®çš„工具æ¥åšè¿™ä»¶äº‹ï¼Œ -ä½†å¦‚æžœæ²¡æœ‰é‚£äº›å·¥å…·ä½ å¯ä»¥å†™ä¸€ä¸ªå‚»ç¨‹åºï¼š - - char str[] = "\xXX\xXX\xXX..."; - main(){} - -并用gcc -g编译它然åŽæ‰§è¡Œâ€œdisassemble strâ€ï¼ˆXX部分是由Oops报告的值-ä½ å¯ä»¥ä»…剪切 -粘贴并用“\xâ€æ›¿æ¢ç©ºæ ¼-我就是这么åšçš„ï¼Œå› ä¸ºæˆ‘æ‡’å¾—å†™ç¨‹åºè‡ªåŠ¨åšè¿™ä¸€åˆ‡ï¼‰ã€‚ - -å¦å¤–ï¼Œä½ å¯ä»¥ç”¨scripts/decodecode这个shell脚本。它的使用方法是: -decodecode < oops.txt - -“Codeâ€ä¹‹åŽçš„åå…进制å—节å¯èƒ½ï¼ˆåœ¨æŸäº›æž¶æž„上)有一些当å‰æŒ‡ä»¤ä¹‹å‰çš„指令å—èŠ‚ä»¥åŠ -当å‰å’Œä¹‹åŽçš„指令å—节 - -Code: f9 0f 8d f9 00 00 00 8d 42 0c e8 dd 26 11 c7 a1 60 ea 2b f9 8b 50 08 a1 -64 ea 2b f9 8d 34 82 8b 1e 85 db 74 6d 8b 15 60 ea 2b f9 <8b> 43 04 39 42 54 -7e 04 40 89 42 54 8b 43 04 3b 05 00 f6 52 c0 - -最åŽï¼Œå¦‚æžœä½ æƒ³çŸ¥é“代ç æ¥è‡ªå“ªé‡Œï¼Œä½ å¯ä»¥ï¼š - - cd /usr/src/linux - make fs/buffer.s # 或任何产生BUG的文件 - -然åŽä½ 会比gdbå汇编更清楚的知é“å‘生了什么。 - -çŽ°åœ¨ï¼Œé—®é¢˜æ˜¯æŠŠä½ æ‰€æ‹¥æœ‰çš„æ‰€æœ‰æ•°æ®ç»“åˆèµ·æ¥ï¼šCæºç ï¼ˆå…³äºŽå®ƒåº”è¯¥æ€Žæ ·çš„ä¸€èˆ¬çŸ¥è¯†ï¼‰ï¼Œ -汇编代ç åŠå…¶å汇编得到的代ç (å¦å¤–还有从“oopsâ€æ¶ˆæ¯å¾—到的寄å˜å™¨çŠ¶æ€-对了解æ¯åçš„ -æŒ‡é’ˆæœ‰ç”¨ï¼Œè€Œä¸”å½“ä½ æœ‰äº†æ±‡ç¼–ä»£ç ä½ ä¹Ÿèƒ½æ‹¿å…¶å®ƒçš„å¯„å˜å™¨å’Œä»»ä½•å®ƒä»¬å¯¹åº”çš„C表达å¼åšåŒ¹é… -)。 - -å®žé™…ä¸Šï¼Œä½ ä»…éœ€çœ‹çœ‹å“ªé‡Œä¸åŒ¹é…(这个例å是“Codeâ€å汇编和编译器生æˆçš„代ç ä¸åŒ¹é…)。 -然åŽä½ é¡»è¦æ‰¾å‡ºä¸ºä»€ä¹ˆä¸åŒ¹é…。通常很简å•-ä½ çœ‹åˆ°ä»£ç 使用了空指针然åŽä½ 看代ç æƒ³çŸ¥é“ -空指针是怎么出现的,还有检查它是å¦åˆæ³•.. - -现在,如果明白这是一项耗时的工作而且需è¦ä¸€ä¸ç‚¹å„¿çš„专心,没错。这就是我为什么大多 -åªæ˜¯å¿½ç•¥é‚£äº›æ²¡æœ‰ç¬¦å·è¡¨ä¿¡æ¯çš„å´©æºƒæŠ¥å‘Šçš„åŽŸå› ï¼šç®€å•çš„说太难查找了(我有一些 -程åºç”¨äºŽåœ¨å†…æ ¸ä»£ç 段ä¸æœç´¢ç‰¹å®šçš„模å¼ï¼Œè€Œä¸”有时我也已ç»èƒ½æ‰¾å‡ºé‚£äº›å´©æºƒçš„地方,但是 -仅仅是找出æ£ç¡®çš„åºåˆ—也确实需è¦ç›¸å½“æ‰Žå®žçš„å†…æ ¸çŸ¥è¯†ï¼‰ - -_有时_会å‘生这ç§æƒ…况,我仅看到崩溃ä¸çš„å汇编代ç åºåˆ—, 然åŽæˆ‘马上就明白问题出在 -哪里。这时我æ‰æ„识到自己干这个工作已ç»å¤ªé•¿æ—¶é—´äº†;-) - - Linus - - ---------------------------------------------------------------------------- -关于Oops跟踪的注解: - -为了帮助Linuså’Œå…¶å®ƒå†…æ ¸å¼€å‘者,klogd纳入了大é‡çš„支æŒæ¥å¤„ç†ä¿æŠ¤é”™è¯¯ã€‚为了拥有对 -地å€è§£æžçš„完整支æŒè‡³å°‘应该使用1.3-pl3çš„sysklogd包。 - -当ä¿æŠ¤é”™è¯¯å‘生时,klogdå®ˆæŠ¤è¿›ç¨‹è‡ªåŠ¨æŠŠå†…æ ¸æ—¥å¿—ä¿¡æ¯ä¸çš„é‡è¦åœ°å€ç¿»è¯‘æˆå®ƒä»¬ç›¸åº”的符 -å·ã€‚ - -klogd执行两ç§ç±»åž‹çš„地å€è§£æžã€‚首先是é™æ€ç¿»è¯‘其次是动æ€ç¿»è¯‘。é™æ€ç¿»è¯‘å’Œksymoops -ä¸€æ ·ä½¿ç”¨System.map文件。为了åšé™æ€ç¿»è¯‘klogd守护进程必须在åˆå§‹åŒ–时能找到system -map文件。关于klogdæ€Žæ ·æœç´¢map文件请å‚看klogd手册页。 - -动æ€åœ°å€ç¿»è¯‘åœ¨ä½¿ç”¨å†…æ ¸å¯è£…载模å—时很é‡è¦ã€‚ å› ä¸ºå†…æ ¸æ¨¡å—的内å˜æ˜¯ä»Žå†…æ ¸åŠ¨æ€å†…å˜æ± -里分é…的,所以ä¸ç®¡æ˜¯æ¨¡å—开始ä½ç½®è¿˜æ˜¯æ¨¡å—ä¸å‡½æ•°å’Œç¬¦å·çš„ä½ç½®éƒ½ä¸æ˜¯å›ºå®šçš„。 - -å†…æ ¸æ”¯æŒå…许程åºå†³å®šè£…载哪些模å—和它们在内å˜ä¸ä½ç½®çš„系统调用。使用这些系统调用 -klogd守护进程生æˆä¸€å¼ 符å·è¡¨ç”¨äºŽè°ƒè¯•å‘生在å¯è£…载模å—ä¸çš„ä¿æŠ¤é”™è¯¯ã€‚ - -至少klogd会æ供产生ä¿æŠ¤é”™è¯¯çš„模å—å。还å¯æœ‰é¢å¤–的符å·ä¿¡æ¯ä¾›å¯è£…载模å—å¼€å‘者选择 -以从模å—ä¸è¾“出符å·ä¿¡æ¯ã€‚ - -å› ä¸ºå†…æ ¸æ¨¡å—环境å¯èƒ½æ˜¯åŠ¨æ€çš„,所以必须有一ç§æœºåˆ¶å½“模å—环境å‘生改å˜æ—¶æ¥é€šçŸ¥klogd -守护进程。 有一些å¯ç”¨çš„命令行选项å…许klogdå‘当å‰æ‰§è¡Œä¸çš„守护进程å‘é€ä¿¡å·ï¼Œå‘ŠçŸ¥ç¬¦ -å·ä¿¡æ¯åº”该被刷新了。 更多信æ¯è¯·å‚看klogd手册页。 - -sysklogdå‘布时包å«ä¸€ä¸ªè¡¥ä¸ä¿®æ”¹äº†modules-2.0.0åŒ…ï¼Œæ— è®ºä½•æ—¶ä¸€ä¸ªæ¨¡å—装载或者å¸è½½éƒ½ -会自动å‘klogdå‘é€ä¿¡å·ã€‚打上这个补ä¸æ供了必è¦çš„对调试å‘ç”ŸäºŽå†…æ ¸å¯è£…载模å—çš„ä¿æŠ¤ -é”™è¯¯çš„æ— ç¼æ”¯æŒã€‚ - -以下是被klogd处ç†è¿‡çš„å‘生在å¯è£…载模å—ä¸çš„一个ä¿æŠ¤é”™è¯¯ä¾‹å: ---------------------------------------------------------------------------- -Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc -Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000 -Aug 29 09:51:01 blizard kernel: *pde = 00000000 -Aug 29 09:51:01 blizard kernel: Oops: 0002 -Aug 29 09:51:01 blizard kernel: CPU: 0 -Aug 29 09:51:01 blizard kernel: EIP: 0010:[oops:_oops+16/3868] -Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212 -Aug 29 09:51:01 blizard kernel: eax: 315e97cc ebx: 003a6f80 ecx: 001be77b edx: 00237c0c -Aug 29 09:51:01 blizard kernel: esi: 00000000 edi: bffffdb3 ebp: 00589f90 esp: 00589f8c -Aug 29 09:51:01 blizard kernel: ds: 0018 es: 0018 fs: 002b gs: 002b ss: 0018 -Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000) -Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001 -Aug 29 09:51:01 blizard kernel: 00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00 -Aug 29 09:51:01 blizard kernel: bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036 -Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128] -Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3 ---------------------------------------------------------------------------- - -Dr. G.W. Wettstein Oncology Research Div. Computing Facility -Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com -820 4th St. N. -Fargo, ND 58122 -Phone: 701-234-7556 - - ---------------------------------------------------------------------------- -å—æ±¡æŸ“çš„å†…æ ¸ - -一些oops报告在程åºè®°æ•°å™¨ä¹‹åŽåŒ…å«å—符串'Tainted: 'ã€‚è¿™è¡¨æ˜Žå†…æ ¸å·²ç»è¢«ä¸€äº›ä¸œè¥¿ç»™æ±¡ -染了。 该å—符串之åŽç´§è·Ÿç€ä¸€ç³»åˆ—çš„ä½ç½®æ•æ„Ÿçš„å—符,æ¯ä¸ªä»£è¡¨ä¸€ä¸ªç‰¹å®šçš„污染值。 - - 1:'G'如果所有装载的模å—都有GPL或相容的许å¯è¯ï¼Œ'P'如果装载了任何的专有模å—。 -没有模å—MODULE_LICENSE或者带有insmod认为是与GPLä¸ç›¸å®¹çš„çš„MODULE_LICENSE的模å—被 -认定是专有的。 - - 2:'F'如果有任何通过“insmod -fâ€è¢«å¼ºåˆ¶è£…载的模å—,' '如果所有模å—都被æ£å¸¸è£…载。 - - 3:'S'如果oopså‘生在SMPå†…æ ¸ä¸ï¼Œè¿è¡ŒäºŽæ²¡æœ‰è¯æ˜Žå®‰å…¨è¿è¡Œå¤šå¤„ç†å™¨çš„硬件。 当å‰è¿™ç§ -情况仅é™äºŽå‡ ç§ä¸æ”¯æŒSMP的速龙处ç†å™¨ã€‚ - - 4:'R'如果模å—通过“insmod -fâ€è¢«å¼ºåˆ¶è£…载,' '如果所有模å—都被æ£å¸¸è£…载。 - - 5:'M'如果任何处ç†å™¨æŠ¥å‘Šäº†æœºå™¨æ£€æŸ¥å¼‚常,' '如果没有å‘生机器检查异常。 - - 6:'B'如果页释放函数å‘现了一个错误的页引用或者一些éžé¢„æœŸçš„é¡µæ ‡å¿—ã€‚ - - 7:'U'如果用户或者用户应用程åºç‰¹åˆ«è¯·æ±‚è®¾ç½®æ±¡æŸ“æ ‡å¿—ï¼Œå¦åˆ™' '。 - - 8:'D'å¦‚æžœå†…æ ¸åˆšåˆšæ»æŽ‰ï¼Œæ¯”如有OOPS或者BUG。 - -使用'Tainted: 'å—符串的主è¦åŽŸå› 是è¦å‘Šè¯‰å†…æ ¸è°ƒè¯•è€…ï¼Œè¿™æ˜¯å¦æ˜¯ä¸€ä¸ªå¹²å‡€çš„å†…æ ¸äº¦æˆ–å‘ -生了任何的ä¸æ£å¸¸çš„事。污染是永久的:å³ä½¿å‡ºé”™çš„模å—å·²ç»è¢«å¸è½½äº†ï¼Œæ±¡æŸ“值ä»ç„¶å˜åœ¨ï¼Œ -ä»¥è¡¨æ˜Žå†…æ ¸ä¸å†å€¼å¾—信任。 diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst index 638d714bec83..fa28ef0a7fee 100644 --- a/Documentation/translations/zh_CN/process/coding-style.rst +++ b/Documentation/translations/zh_CN/process/coding-style.rst @@ -1,21 +1,23 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/process/coding-style.rst <codingstyle>` +:Original: Documentation/process/coding-style.rst .. _cn_codingstyle: -译者:: +:译者: + - å¼ ä¹ Zhang Le <r0bertz@gentoo.org> + - Andy Deng <theandy.deng@gmail.com> + - å´æƒ³æˆ <bobwxc@email.cn> - ä¸æ–‡ç‰ˆç»´æŠ¤è€…: å¼ ä¹ Zhang Le <r0bertz@gentoo.org> - ä¸æ–‡ç‰ˆç¿»è¯‘者: å¼ ä¹ Zhang Le <r0bertz@gentoo.org> - ä¸æ–‡ç‰ˆæ ¡è¯‘者: çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com> - wheelz <kernel.zeng@gmail.com> - 管æ—东 Xudong Guan <xudong.guan@gmail.com> - Li Zefan <lizf@cn.fujitsu.com> - Wang Chen <wangchen@cn.fujitsu.com> +:æ ¡è¯‘: + - çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com> + - wheelz <kernel.zeng@gmail.com> + - 管æ—东 Xudong Guan <xudong.guan@gmail.com> + - Li Zefan <lizf@cn.fujitsu.com> + - Wang Chen <wangchen@cn.fujitsu.com> Linux å†…æ ¸ä»£ç é£Žæ ¼ -========================= +================== 这是一个简çŸçš„文档,æ述了 linux å†…æ ¸çš„é¦–é€‰ä»£ç é£Žæ ¼ã€‚ä»£ç é£Žæ ¼æ˜¯å› äººè€Œå¼‚çš„ï¼Œ 而且我ä¸æ„¿æ„æŠŠè‡ªå·±çš„è§‚ç‚¹å¼ºåŠ ç»™ä»»ä½•äººï¼Œä½†è¿™å°±åƒæˆ‘去åšä»»ä½•äº‹æƒ…都必须éµå¾ªçš„原则 @@ -29,7 +31,7 @@ Linux å†…æ ¸ä»£ç é£Žæ ¼ 1) 缩进 --------------- +------- 制表符是 8 个å—符,所以缩进也是 8 个å—符。有些异端è¿åŠ¨è¯•å›¾å°†ç¼©è¿›å˜ä¸º 4 (甚至 2ï¼) å—ç¬¦æ·±ï¼Œè¿™å‡ ä¹Žç›¸å½“äºŽå°è¯•å°†åœ†å‘¨çŽ‡çš„值定义为 3。 @@ -73,6 +75,22 @@ Linux å†…æ ¸ä»£ç é£Žæ ¼ if (condition) do_this; do_something_everytime; +ä¸è¦ä½¿ç”¨é€—å·æ¥é¿å…使用大括å·ï¼š + +.. code-block:: c + + if (condition) + do_this(), do_that(); + +使用大括å·åŒ…裹多è¯å¥ï¼š + +.. code-block:: c + + if (condition) { + do_this(); + do_that(); + } + 也ä¸è¦åœ¨ä¸€è¡Œé‡Œæ”¾å¤šä¸ªèµ‹å€¼è¯å¥ã€‚å†…æ ¸ä»£ç é£Žæ ¼è¶…çº§ç®€å•ã€‚就是é¿å…å¯èƒ½å¯¼è‡´åˆ«äººè¯¯è¯» 的表达å¼ã€‚ @@ -83,20 +101,25 @@ Linux å†…æ ¸ä»£ç é£Žæ ¼ 2) 把长的行和å—符串打散 ------------------------------- +----------------------- 代ç é£Žæ ¼çš„æ„义就在于使用平常使用的工具æ¥ç»´æŒä»£ç çš„å¯è¯»æ€§å’Œå¯ç»´æŠ¤æ€§ã€‚ æ¯ä¸€è¡Œçš„长度的é™åˆ¶æ˜¯ 80 列,我们强烈建议您éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ã€‚ 长于 80 列的è¯å¥è¦æ‰“æ•£æˆæœ‰æ„义的片段。除éžè¶…过 80 åˆ—èƒ½æ˜¾è‘—å¢žåŠ å¯è¯»æ€§ï¼Œå¹¶ä¸”ä¸ -会éšè—ä¿¡æ¯ã€‚å片段è¦æ˜Žæ˜¾çŸäºŽæ¯ç‰‡æ®µï¼Œå¹¶æ˜Žæ˜¾é å³ã€‚è¿™åŒæ ·é€‚用于有ç€å¾ˆé•¿å‚数列表 -的函数头。然而,ç»å¯¹ä¸è¦æ‰“散对用户å¯è§çš„å—符串,例如 printk ä¿¡æ¯ï¼Œå› ä¸ºè¿™æ ·å°± +会éšè—ä¿¡æ¯ã€‚ + +å片段è¦æ˜Žæ˜¾çŸäºŽæ¯ç‰‡æ®µï¼Œå¹¶æ˜Žæ˜¾é å³ã€‚一ç§éžå¸¸å¸¸ç”¨çš„æ ·å¼æ˜¯å°†å体与函数左括å·å¯¹é½ã€‚ + +è¿™åŒæ ·é€‚用于有ç€å¾ˆé•¿å‚数列表的函数头。 + +然而,ç»å¯¹ä¸è¦æ‰“散对用户å¯è§çš„å—符串,例如 printk ä¿¡æ¯ï¼Œå› ä¸ºè¿™æ ·å°± 很难对它们 grep。 3) 大括å·å’Œç©ºæ ¼çš„放置 ------------------------------- +--------------------- C è¯è¨€é£Žæ ¼ä¸å¦å¤–一个常è§é—®é¢˜æ˜¯å¤§æ‹¬å·çš„放置。和缩进大å°ä¸åŒï¼Œé€‰æ‹©æˆ–弃用æŸç§æ”¾ ç½®ç–ç•¥å¹¶æ²¡æœ‰å¤šå°‘æŠ€æœ¯ä¸Šçš„åŽŸå› ï¼Œä¸è¿‡é¦–选的方å¼ï¼Œå°±åƒ Kernighan å’Œ Ritchie 展示 @@ -132,12 +155,12 @@ C è¯è¨€é£Žæ ¼ä¸å¦å¤–一个常è§é—®é¢˜æ˜¯å¤§æ‹¬å·çš„æ”¾ç½®ã€‚å’Œç¼©è¿›å¤§å° body of function } -全世界的异端å¯èƒ½ä¼šæŠ±æ€¨è¿™ä¸ªä¸ä¸€è‡´æ€§æ˜¯... 呃... ä¸ä¸€è‡´çš„,ä¸è¿‡æ‰€æœ‰æ€ç»´å¥å…¨çš„人 +全世界的异端å¯èƒ½ä¼šæŠ±æ€¨è¿™ä¸ªä¸ä¸€è‡´æ€§æ˜¯â€¦â€¦å‘ƒâ€¦â€¦ä¸ä¸€è‡´ï¼Œä¸è¿‡æ‰€æœ‰æ€ç»´å¥å…¨çš„人 éƒ½çŸ¥é“ (a) K&R 是 **æ£ç¡®çš„** 并且 (b) K&R 是æ£ç¡®çš„。æ¤å¤–,ä¸ç®¡æ€Žæ ·å‡½æ•°éƒ½æ˜¯ç‰¹ 殊的 (C 函数是ä¸èƒ½åµŒå¥—çš„)。 注æ„结æŸå¤§æ‹¬å·ç‹¬è‡ªå æ®ä¸€è¡Œï¼Œé™¤éžå®ƒåŽé¢è·Ÿç€åŒä¸€ä¸ªè¯å¥çš„剩余部分,也就是 do è¯ -å¥ä¸çš„ "while" 或者 if è¯å¥ä¸çš„ "else",åƒè¿™æ ·ï¼š +å¥ä¸çš„ ``while`` 或者 if è¯å¥ä¸çš„ ``else`` ,åƒè¿™æ ·ï¼š .. code-block:: c @@ -191,7 +214,7 @@ C è¯è¨€é£Žæ ¼ä¸å¦å¤–一个常è§é—®é¢˜æ˜¯å¤§æ‹¬å·çš„æ”¾ç½®ã€‚å’Œç¼©è¿›å¤§å° } 3.1) ç©ºæ ¼ -******************** +********* Linux å†…æ ¸çš„ç©ºæ ¼ä½¿ç”¨æ–¹å¼ (主è¦) å–决于它是用于函数还是关键å—。(大多数) å…³é”®å— åŽè¦åŠ ä¸€ä¸ªç©ºæ ¼ã€‚å€¼å¾—æ³¨æ„的例外是 sizeof, typeof, alignof å’Œ __attribute__,这 @@ -254,7 +277,7 @@ Linux å†…æ ¸çš„ç©ºæ ¼ä½¿ç”¨æ–¹å¼ (主è¦) å–决于它是用于函数还是关é 4) 命å ------------------------------- +------- C 是一个简朴的è¯è¨€ï¼Œä½ 的命åä¹Ÿåº”è¯¥è¿™æ ·ã€‚å’Œ Modula-2 å’Œ Pascal 程åºå‘˜ä¸åŒï¼Œ C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿™æ ·åŽä¸½çš„åå—。C 程åºå‘˜ä¼š @@ -275,11 +298,31 @@ C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿™æ ·åŽä¸½çš„åå— å¯èƒ½çš„è¯ã€‚类似的, ``tmp`` å¯ä»¥ç”¨æ¥ç§°å‘¼ä»»æ„类型的临时å˜é‡ã€‚ å¦‚æžœä½ æ€•æ··æ·†äº†ä½ çš„æœ¬åœ°å˜é‡åï¼Œä½ å°±é‡åˆ°å¦ä¸€ä¸ªé—®é¢˜äº†ï¼Œå«åšå‡½æ•°å¢žé•¿è·å°”蒙失衡综 -åˆç—‡ã€‚请看第å…ç« (函数)。 +åˆå¾ã€‚请看第å…ç« (函数)。 +对于符å·å称和文档,é¿å…引入新的“master/slaveâ€ï¼ˆæˆ–独立于“masterâ€çš„“slaveâ€ï¼‰ +和“blacklist/whitelistâ€ã€‚ + +“master/slaveâ€æŽ¨è替æ¢ä¸ºï¼š + '{primary,main} / {secondary,replica,subordinate}' + '{initiator,requester} / {target,responder}' + '{controller,host} / {device,worker,proxy}' + 'leader/follower' + 'director/performer' + +“blacklist/whitelistâ€æŽ¨è替æ¢ä¸ºï¼š + 'denylist/allowlist' + 'blocklist/passlist' + +引入新用法的例外情况是:维护用户空间ABI/API,或更新现有(截至2020年)硬件或 +å议规范的代ç æ—¶è¦æ±‚这些术è¯ã€‚对于新规范,尽å¯èƒ½å°†æœ¯è¯çš„规范用法转æ¢ä¸ºå†…æ ¸ +ç¼–ç æ ‡å‡†ã€‚ + +.. warning:: + 以上主从ã€é»‘白åå•è§„则ä¸é€‚用于ä¸æ–‡æ–‡æ¡£ï¼Œè¯·å‹¿æ›´æ”¹ä¸æ–‡æœ¯è¯ï¼ 5) Typedef ------------ +---------- ä¸è¦ä½¿ç”¨ç±»ä¼¼ ``vps_t`` 之类的东西。 @@ -308,7 +351,7 @@ C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿™æ ·åŽä¸½çš„åå— .. note:: - ä¸é€æ˜Žæ€§å’Œ "访问函数" 本身是ä¸å¥½çš„。我们使用 pte_t ç‰ç±»åž‹çš„åŽŸå› åœ¨äºŽçœŸ + ä¸é€æ˜Žæ€§å’Œâ€œè®¿é—®å‡½æ•°â€æœ¬èº«æ˜¯ä¸å¥½çš„。我们使用 pte_t ç‰ç±»åž‹çš„åŽŸå› åœ¨äºŽçœŸ 的是完全没有任何共用的å¯è®¿é—®ä¿¡æ¯ã€‚ (b) 清楚的整数类型,如æ¤ï¼Œè¿™å±‚抽象就å¯ä»¥ **帮助** 消除到底是 ``int`` 还是 @@ -353,7 +396,7 @@ C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿™æ ·åŽä¸½çš„åå— 6) 函数 ------------------------------- +------- 函数应该简çŸè€Œæ¼‚亮,并且åªå®Œæˆä¸€ä»¶äº‹æƒ…。函数应该å¯ä»¥ä¸€å±æˆ–者两å±æ˜¾ç¤ºå®Œ (我们 éƒ½çŸ¥é“ ISO/ANSI å±å¹•å¤§å°æ˜¯ 80x24),åªåšä¸€ä»¶äº‹æƒ…,而且把它åšå¥½ã€‚ @@ -383,12 +426,46 @@ C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter è¿™æ ·åŽä¸½çš„åå— } EXPORT_SYMBOL(system_is_up); -在函数原型ä¸ï¼ŒåŒ…å«å‡½æ•°å和它们的数æ®ç±»åž‹ã€‚虽然 C è¯è¨€é‡Œæ²¡æœ‰è¿™æ ·çš„è¦æ±‚,在 +6.1) 函数原型 +************* + +在函数原型ä¸åŒ…å«å‚æ•°å和它们的数æ®ç±»åž‹ã€‚虽然 C è¯è¨€é‡Œæ²¡æœ‰è¿™æ ·çš„è¦æ±‚,但在 Linux 里这是æ倡的åšæ³•ï¼Œå› ä¸ºè¿™æ ·å¯ä»¥å¾ˆç®€å•çš„给读者æ供更多的有价值的信æ¯ã€‚ +ä¸è¦åœ¨å‡½æ•°å£°æ˜Žé‡Œä½¿ç”¨ ``extern`` 关键å—ï¼Œå› ä¸ºè¿™ä¼šå¯¼è‡´ä»£ç è¡Œå˜é•¿ï¼Œå¹¶ä¸”ä¸æ˜¯ä¸¥æ ¼ +必需的。 + +写函数原型时,请ä¿æŒ `å…ƒç´ é¡ºåºè§„则 <https://lore.kernel.org/mm-commits/CAHk-=wiOCLRny5aifWNhr621kYrJwhfURsa0vFPeUEm8mF0ufg@mail.gmail.com/>`_ 。 +例如下列函数声明:: + + __init void * __must_check action(enum magic value, size_t size, u8 count, + char *fmt, ...) __printf(4, 5) __malloc; + +推èçš„å‡½æ•°åŽŸåž‹å…ƒç´ é¡ºåºæ˜¯ï¼š + +- 储å˜ç±»åž‹ï¼ˆä¸‹æ–¹çš„ ``static __always_inline`` ï¼Œæ³¨æ„ ``__always_inline`` + 技术上æ¥è®²æ˜¯ä¸ªå±žæ€§ä½†è¢«å½“åš ``inline`` ) +- 储å˜ç±»åž‹å±žæ€§ï¼ˆä¸Šæ–¹çš„ ``__init`` ——å³èŠ‚å£°æ˜Žï¼Œä½†ä¹Ÿåƒ ``__cold`` ) +- 返回类型(上方的 ``void *`` ) +- 返回类型属性(上方的 ``__must_check`` ) +- 函数å(上方的 ``action`` ) +- 函数å‚数(上方的 ``(enum magic value, size_t size, u8 count, char *fmt, ...)`` , + 注æ„必须写上å‚æ•°å) +- 函数å‚数属性(上方的 ``__printf(4, 5)`` ) +- 函数行为属性(上方的 ``__malloc`` ) + +请注æ„,对于函数 **定义** (å³å®žé™…函数体),编译器ä¸å…许在函数å‚数之åŽæ·»åŠ 函 +æ•°å‚数属性。在这ç§æƒ…况下,它们应该跟éšå˜å‚¨ç±»åž‹å±žæ€§ï¼ˆä¾‹å¦‚,与上é¢çš„ **声明** +示例相比,请注æ„下é¢çš„ ``__printf(4, 5)`` çš„ä½ç½®å‘生了å˜åŒ–):: + + static __always_inline __init __printf(4, 5) void * __must_check action(enum magic value, + size_t size, u8 count, char *fmt, ...) __malloc + { + ... + } 7) 集ä¸çš„函数退出途径 ------------------------------- +--------------------- 虽然被æŸäº›äººå£°ç§°å·²ç»è¿‡æ—¶ï¼Œä½†æ˜¯ goto è¯å¥çš„ç‰ä»·ç‰©è¿˜æ˜¯ç»å¸¸è¢«ç¼–译器所使用,具体 å½¢å¼æ˜¯æ— æ¡ä»¶è·³è½¬æŒ‡ä»¤ã€‚ @@ -432,7 +509,7 @@ Linux 里这是æ倡的åšæ³•ï¼Œå› ä¸ºè¿™æ ·å¯ä»¥å¾ˆç®€å•çš„给读者æä¾›æ› return result; } -一个需è¦æ³¨æ„的常è§é”™è¯¯æ˜¯ ``一个 err 错误`` ,就åƒè¿™æ ·ï¼š +一个需è¦æ³¨æ„的常è§é”™è¯¯æ˜¯ ``å• err 错误`` ,就åƒè¿™æ ·ï¼š .. code-block:: c @@ -456,19 +533,19 @@ Linux 里这是æ倡的åšæ³•ï¼Œå› ä¸ºè¿™æ ·å¯ä»¥å¾ˆç®€å•çš„给读者æä¾›æ› 8) 注释 ------------------------------- +------- 注释是好的,ä¸è¿‡æœ‰è¿‡åº¦æ³¨é‡Šçš„å±é™©ã€‚永远ä¸è¦åœ¨æ³¨é‡Šé‡Œè§£é‡Šä½ 的代ç 是如何è¿ä½œçš„: 更好的åšæ³•æ˜¯è®©åˆ«äººä¸€çœ‹ä½ 的代ç å°±å¯ä»¥æ˜Žç™½ï¼Œè§£é‡Šå†™çš„很差的代ç 是浪费时间。 -ä¸€èˆ¬çš„ï¼Œä½ æƒ³è¦ä½ çš„æ³¨é‡Šå‘Šè¯‰åˆ«äººä½ çš„ä»£ç åšäº†ä»€ä¹ˆï¼Œè€Œä¸æ˜¯æ€Žä¹ˆåšçš„ã€‚ä¹Ÿè¯·ä½ ä¸è¦æŠŠ +一般æ¥è¯´ä½ ç”¨æ³¨é‡Šå‘Šè¯‰åˆ«äººä½ çš„ä»£ç åšäº†ä»€ä¹ˆï¼Œè€Œä¸æ˜¯æ€Žä¹ˆåšçš„ã€‚ä¹Ÿè¯·ä½ ä¸è¦æŠŠ 注释放在一个函数体内部:如果函数å¤æ‚åˆ°ä½ éœ€è¦ç‹¬ç«‹çš„注释其ä¸çš„ä¸€éƒ¨åˆ†ï¼Œä½ å¾ˆå¯èƒ½ 需è¦å›žåˆ°ç¬¬å…ç« çœ‹ä¸€çœ‹ã€‚ä½ å¯ä»¥åšä¸€äº›å°æ³¨é‡Šæ¥æ³¨æ˜Žæˆ–è¦å‘ŠæŸäº›å¾ˆèªæ˜Ž (或者槽糕) çš„ åšæ³•ï¼Œä½†ä¸è¦åŠ å¤ªå¤šã€‚ä½ åº”è¯¥åšçš„,是把注释放在函数的头部,告诉人们它åšäº†ä»€ä¹ˆï¼Œ 也å¯ä»¥åŠ 上它åšè¿™äº›äº‹æƒ…çš„åŽŸå› ã€‚ -å½“æ³¨é‡Šå†…æ ¸ API 函数时,请使用 kernel-doc æ ¼å¼ã€‚请看 -Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚ +å½“æ³¨é‡Šå†…æ ¸ API 函数时,请使用 kernel-doc æ ¼å¼ã€‚è¯¦è§ +Documentation/translations/zh_CN/doc-guide/index.rst å’Œ scripts/kernel-doc 。 é•¿ (多行) æ³¨é‡Šçš„é¦–é€‰é£Žæ ¼æ˜¯ï¼š @@ -500,17 +577,18 @@ Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚ 9) ä½ å·²ç»æŠŠäº‹æƒ…弄糟了 ------------------------------- +--------------------- -è¿™æ²¡ä»€ä¹ˆï¼Œæˆ‘ä»¬éƒ½æ˜¯è¿™æ ·ã€‚å¯èƒ½ä½ 的使用了很长时间 Unix 的朋å‹å·²ç»å‘Šè¯‰ä½ +è¿™æ²¡ä»€ä¹ˆï¼Œæˆ‘ä»¬éƒ½æ˜¯è¿™æ ·ã€‚å¯èƒ½ä½ 长期使用 Unix 的朋å‹å·²ç»å‘Šè¯‰ä½ ``GNU emacs`` èƒ½è‡ªåŠ¨å¸®ä½ æ ¼å¼åŒ– C æºä»£ç ï¼Œè€Œä¸”ä½ ä¹Ÿæ³¨æ„åˆ°äº†ï¼Œç¡®å®žæ˜¯è¿™æ ·ï¼Œä¸è¿‡å®ƒ 所使用的默认值和我们想è¦çš„相去甚远 (实际上,甚至比éšæœºæ‰“的还è¦å·®â€”â€”æ— æ•°ä¸ªçŒ´å -在 GNU emacs 里打å—永远ä¸ä¼šåˆ›é€ 出一个好程åº) (译注:Infinite Monkey Theorem) +在 GNU emacs 里打å—永远ä¸ä¼šåˆ›é€ 出一个好程åº) +*(译注:Infinite Monkey Theorem)* æ‰€ä»¥ä½ è¦ä¹ˆæ”¾å¼ƒ GNU emacs,è¦ä¹ˆæ”¹å˜å®ƒè®©å®ƒä½¿ç”¨æ›´åˆç†çš„设定。è¦é‡‡ç”¨åŽä¸€ä¸ªæ–¹æ¡ˆï¼Œ ä½ å¯ä»¥æŠŠä¸‹é¢è¿™æ®µç²˜è´´åˆ°ä½ çš„ .emacs 文件里。 -.. code-block:: none +.. code-block:: elisp (defun c-lineup-arglist-tabs-only (ignored) "Line up argument lists by tabs, not spaces" @@ -529,7 +607,7 @@ Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚ (c-offsets-alist . ( (arglist-close . c-lineup-arglist-tabs-only) (arglist-cont-nonempty . - (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only)) + (c-lineup-gcc-asm-reg c-lineup-arglist-tabs-only)) (arglist-intro . +) (brace-list-intro . +) (c . c-lineup-C-comments) @@ -573,9 +651,14 @@ Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚ ``indent`` 有很多选项,特别是é‡æ–°æ ¼å¼åŒ–æ³¨é‡Šçš„æ—¶å€™ï¼Œä½ å¯èƒ½éœ€è¦çœ‹ä¸€ä¸‹å®ƒçš„手册。 ä¸è¿‡è®°ä½ï¼š ``indent`` ä¸èƒ½ä¿®æ£åçš„ç¼–ç¨‹ä¹ æƒ¯ã€‚ +请注æ„,您还å¯ä»¥ä½¿ç”¨ ``clang-format`` 工具帮助您处ç†è¿™äº›è§„则,快速自动é‡æ–°æ ¼ +å¼åŒ–部分代ç ,并审阅整个文件以å‘现代ç é£Žæ ¼é”™è¯¯ã€æ‰“å—错误和å¯èƒ½çš„æ”¹è¿›ã€‚å®ƒè¿˜å¯ +ä»¥æ–¹ä¾¿åœ°æŽ’åº ``#include`` ,对é½å˜é‡/å®ï¼Œé‡æŽ’文本和其他类似任务。 +è¯¦è§ Documentation/process/clang-format.rst 。 + 10) Kconfig é…置文件 ------------------------------- +-------------------- 对于é布æºç æ ‘çš„æ‰€æœ‰ Kconfig* é…置文件æ¥è¯´ï¼Œå®ƒä»¬ç¼©è¿›æ–¹å¼æœ‰æ‰€ä¸åŒã€‚ç´§æŒ¨ç€ ``config`` 定义的行,用一个制表符缩进,然而 help ä¿¡æ¯çš„缩进则é¢å¤–å¢žåŠ 2 个空 @@ -598,11 +681,11 @@ Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚ depends on ADFS_FS ... -è¦æŸ¥çœ‹é…置文件的完整文档,请看 Documentation/kbuild/kconfig-language.rst。 +è¦æŸ¥çœ‹é…置文件的完整文档,请看 Documentation/kbuild/kconfig-language.rst 。 11) æ•°æ®ç»“æž„ ------------------------------- +------------ 如果一个数æ®ç»“构,在创建和销æ¯å®ƒçš„å•çº¿æ‰§è¡ŒçŽ¯å¢ƒä¹‹å¤–å¯è§ï¼Œé‚£ä¹ˆå®ƒå¿…é¡»è¦æœ‰ä¸€ä¸ªå¼• ç”¨è®¡æ•°å™¨ã€‚å†…æ ¸é‡Œæ²¡æœ‰åžƒåœ¾æ”¶é›† (å¹¶ä¸”å†…æ ¸ä¹‹å¤–çš„åžƒåœ¾æ”¶é›†æ…¢ä¸”æ•ˆçŽ‡ä½Žä¸‹),这æ„味ç€ä½ @@ -626,7 +709,7 @@ mm_count),和文件系统 (``struct super_block``: s_count å’Œ s_active) ä¸æ‰ 12) å®ï¼Œæžšä¸¾å’ŒRTL ------------------------------- +----------------- 用于定义常é‡çš„å®çš„åå—åŠæžšä¸¾é‡Œçš„æ ‡ç¾éœ€è¦å¤§å†™ã€‚ @@ -638,7 +721,7 @@ mm_count),和文件系统 (``struct super_block``: s_count å’Œ s_active) ä¸æ‰ å®çš„åå—请用大写å—æ¯ï¼Œä¸è¿‡å½¢å¦‚函数的å®çš„åå—å¯ä»¥ç”¨å°å†™å—æ¯ã€‚ -一般的,如果能写æˆå†…è”函数就ä¸è¦å†™æˆåƒå‡½æ•°çš„å®ã€‚ +通常如果能写æˆå†…è”函数就ä¸è¦å†™æˆåƒå‡½æ•°çš„å®ã€‚ å«æœ‰å¤šä¸ªè¯å¥çš„å®åº”该被包å«åœ¨ä¸€ä¸ª do-while 代ç å—里: @@ -696,18 +779,18 @@ mm_count),和文件系统 (``struct super_block``: s_count å’Œ s_active) ä¸æ‰ (ret); \ }) -ret 是本地å˜é‡çš„通用åå— - __foo_ret æ›´ä¸å®¹æ˜“与一个已å˜åœ¨çš„å˜é‡å†²çªã€‚ +ret 是本地å˜é‡çš„通用åå——— __foo_ret æ›´ä¸å®¹æ˜“与一个已å˜åœ¨çš„å˜é‡å†²çªã€‚ cpp 手册对å®çš„讲解很详细。gcc internals 手册也详细讲解了 RTLï¼Œå†…æ ¸é‡Œçš„æ±‡ç¼–è¯ è¨€ç»å¸¸ç”¨åˆ°å®ƒã€‚ 13) 打å°å†…æ ¸æ¶ˆæ¯ ------------------------------- +---------------- -å†…æ ¸å¼€å‘者应该是å—过良好教育的。请一定注æ„å†…æ ¸ä¿¡æ¯çš„拼写,以给人以好的å°è±¡ã€‚ +å†…æ ¸å¼€å‘者应该看起æ¥æœ‰æ–‡åŒ–。请一定注æ„å†…æ ¸ä¿¡æ¯çš„拼写,以给人良好的å°è±¡ã€‚ ä¸è¦ç”¨ä¸è§„范的å•è¯æ¯”如 ``dont``,而è¦ç”¨ ``do not`` 或者 ``don't`` 。ä¿è¯è¿™äº›ä¿¡ -æ¯ç®€å•æ˜Žäº†,æ— æ§ä¹‰ã€‚ +æ¯ç®€å•æ˜Žäº†ã€æ— æ§ä¹‰ã€‚ å†…æ ¸ä¿¡æ¯ä¸å¿…以英文å¥å·ç»“æŸã€‚ @@ -724,17 +807,18 @@ dev_info() ç‰ç‰ã€‚对于那些ä¸å’ŒæŸä¸ªç‰¹å®šè®¾å¤‡ç›¸å…³è¿žçš„ä¿¡æ¯ï¼Œ<li 或设定了 CONFIG_DYNAMIC_DEBUG。实际这åŒæ ·æ˜¯ä¸ºäº† dev_dbg(),一个相关约定是在一 个已ç»å¼€å¯äº† DEBUG 时,使用 VERBOSE_DEBUG æ¥æ·»åŠ dev_vdbg()。 -许多å系统拥有 Kconfig 调试选项æ¥å¼€å¯ -DDEBUG 在对应的 Makefile 里é¢ï¼›åœ¨å…¶ä»– +许多å系统拥有 Kconfig 调试选项æ¥å¼€å¯å¯¹åº” Makefile 里é¢çš„ -DDEBUG;在其他 情况下,特殊文件使用 #define DEBUG。当一æ¡è°ƒè¯•ä¿¡æ¯éœ€è¦è¢«æ— æ¡ä»¶æ‰“å°æ—¶ï¼Œä¾‹å¦‚, 如果已ç»åŒ…å«ä¸€ä¸ªè°ƒè¯•ç›¸å…³çš„ #ifdef æ¡ä»¶ï¼Œprintk(KERN_DEBUG ...) å°±å¯è¢«ä½¿ç”¨ã€‚ 14) 分é…å†…å˜ ------------------------------- +------------ å†…æ ¸æ供了下é¢çš„一般用途的内å˜åˆ†é…函数: kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc() å’Œ vzalloc()。 -请å‚考 API 文档以获å–有关它们的详细信æ¯ã€‚ +请å‚考 API 文档以获å–有关它们的详细信æ¯ï¼š +Documentation/translations/zh_CN/core-api/memory-allocation.rst 。 ä¼ é€’ç»“æž„ä½“å¤§å°çš„首选形å¼æ˜¯è¿™æ ·çš„: @@ -761,11 +845,13 @@ kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc() å’Œ vzalloc()。 p = kcalloc(n, sizeof(...), ...); -两ç§å½¢å¼æ£€æŸ¥åˆ†é…å¤§å° n * sizeof(...) 的溢出,如果溢出返回 NULL。 +两ç§å½¢å¼éƒ½ä¼šæ£€æŸ¥åˆ†é… n * sizeof(...) 大å°æ—¶å†…å˜çš„溢出,如果溢出返回 NULL。 +在没有 __GFP_NOWARN 的情况下使用时,这些通用分é…函数都会在失败时å‘èµ·å †æ ˆè½¬å‚¨ï¼Œ +å› æ¤å½“返回NULL时,没有必è¦å‘出é¢å¤–的失败消æ¯ã€‚ 15) 内è”弊病 ------------------------------- +------------ 有一个常è§çš„误解是 ``内è”`` 是 gcc æ供的å¯ä»¥è®©ä»£ç è¿è¡Œæ›´å¿«çš„一个选项。虽然使 用内è”函数有时候是æ°å½“çš„ (比如作为一ç§æ›¿ä»£å®çš„æ–¹å¼ï¼Œè¯·çœ‹ç¬¬åäºŒç« ),ä¸è¿‡å¾ˆå¤šæƒ… @@ -786,7 +872,7 @@ inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 16) 函数返回值åŠå‘½å ------------------------------- +-------------------- 函数å¯ä»¥è¿”回多ç§ä¸åŒç±»åž‹çš„值,最常è§çš„一ç§æ˜¯è¡¨æ˜Žå‡½æ•°æ‰§è¡ŒæˆåŠŸæˆ–è€…å¤±è´¥çš„å€¼ã€‚è¿™æ · 的一个值å¯ä»¥è¡¨ç¤ºä¸ºä¸€ä¸ªé”™è¯¯ä»£ç æ•´æ•° (-Exxxï¼å¤±è´¥ï¼Œ0ï¼æˆåŠŸ) 或者一个 ``æˆåŠŸ`` @@ -797,7 +883,7 @@ inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 äº§ç”Ÿè¿™ç§ bug,请éµå¾ªä¸‹é¢çš„惯例:: 如果函数的åå—是一个动作或者强制性的命令,那么这个函数应该返回错误代 - ç 整数。如果是一个判æ–,那么函数应该返回一个 "æˆåŠŸ" 布尔值。 + ç 整数。如果是一个判æ–,那么函数应该返回一个“æˆåŠŸâ€å¸ƒå°”值。 比如, ``add work`` 是一个命令,所以 add_work() 在æˆåŠŸæ—¶è¿”回 0,在失败时返回 -EBUSYã€‚ç±»ä¼¼çš„ï¼Œå› ä¸º ``PCI device present`` 是一个判æ–,所以 pci_dev_present() @@ -806,13 +892,35 @@ inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 所有 EXPORTed 函数都必须éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ï¼Œæ‰€æœ‰çš„公共函数也都应该如æ¤ã€‚ç§æœ‰ (static) 函数ä¸éœ€è¦å¦‚æ¤ï¼Œä½†æ˜¯æˆ‘们也推èè¿™æ ·åšã€‚ -返回值是实际计算结果而ä¸æ˜¯è®¡ç®—是å¦æˆåŠŸçš„æ ‡å¿—çš„å‡½æ•°ä¸å—æ¤æƒ¯ä¾‹çš„é™åˆ¶ã€‚一般的, +返回值是实际计算结果而ä¸æ˜¯è®¡ç®—是å¦æˆåŠŸçš„æ ‡å¿—çš„å‡½æ•°ä¸å—æ¤æƒ¯ä¾‹çš„é™åˆ¶ã€‚通常 他们通过返回一些æ£å¸¸å€¼èŒƒå›´ä¹‹å¤–的结果æ¥è¡¨ç¤ºå‡ºé”™ã€‚典型的例å是返回指针的函数, 他们使用 NULL 或者 ERR_PTR 机制æ¥æŠ¥å‘Šé”™è¯¯ã€‚ +17) 使用布尔类型 +---------------- + +Linuxå†…æ ¸å¸ƒå°”ï¼ˆbool)类型是C99 _Bool类型的别å。布尔值åªèƒ½ä¸º0或1,而对布尔的 +éšå¼æˆ–显å¼è½¬æ¢å°†è‡ªåŠ¨å°†å€¼è½¬æ¢ä¸ºtrue或false。在使用布尔类型时 **ä¸éœ€è¦** æž„é€ ï¼Œ +它会消除一类错误。 + +使用布尔值时,应使用trueå’Œfalse定义,而ä¸æ˜¯1å’Œ0。 -17) ä¸è¦é‡æ–°å‘æ˜Žå†…æ ¸å® ------------------------------- +å¸ƒå°”å‡½æ•°è¿”å›žç±»åž‹å’Œå †æ ˆå˜é‡æ€»æ˜¯å¯ä»¥åœ¨é€‚当的时候使用。鼓励使用布尔æ¥æ高å¯è¯»æ€§ï¼Œ +并且布尔值在å˜å‚¨æ—¶é€šå¸¸æ¯”“intâ€æ›´å¥½ã€‚ + +如果缓å˜è¡Œå¸ƒå±€æˆ–值的大å°å¾ˆé‡è¦ï¼Œè¯·ä¸è¦ä½¿ç”¨å¸ƒå°”ï¼Œå› ä¸ºå…¶å¤§å°å’Œå¯¹é½æ–¹å¼æ ¹æ®ç¼–译 +的体系结构而ä¸åŒã€‚针对对é½å’Œå¤§å°è¿›è¡Œä¼˜åŒ–的结构体ä¸åº”使用布尔。 + +如果一个结构体有多个true/false值,请考虑将它们åˆå¹¶ä¸ºå…·æœ‰1比特æˆå‘˜çš„ä½åŸŸï¼Œæˆ–使 +用适当的固定宽度类型,如u8。 + +类似地,对于函数å‚数,多个true/false值å¯ä»¥åˆå¹¶ä¸ºå•ä¸ªæŒ‰ä½çš„â€œæ ‡å¿—â€å‚数,如果调 +用点具有裸true/false常é‡ï¼Œâ€œæ ‡å¿—â€å‚数通常是更具å¯è¯»æ€§çš„替代方法。 + +总之,在结构体和å‚æ•°ä¸æœ‰é™åœ°ä½¿ç”¨å¸ƒå°”å¯ä»¥æ高å¯è¯»æ€§ã€‚ + +18) ä¸è¦é‡æ–°å‘æ˜Žå†…æ ¸å® +---------------------- 头文件 include/linux/kernel.h 包å«äº†ä¸€äº›å®ï¼Œä½ 应该使用它们,而ä¸è¦è‡ªå·±å†™ä¸€äº› 它们的å˜ç§ã€‚æ¯”å¦‚ï¼Œå¦‚æžœä½ éœ€è¦è®¡ç®—ä¸€ä¸ªæ•°ç»„çš„é•¿åº¦ï¼Œä½¿ç”¨è¿™ä¸ªå® @@ -832,11 +940,11 @@ inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 åœ¨ä½ çš„ä»£ç 里自己é‡æ–°å®šä¹‰ã€‚ -18) 编辑器模å¼è¡Œå’Œå…¶ä»–需è¦ç½—嗦的事情 --------------------------------------------------- +19) 编辑器模å¼è¡Œå’Œå…¶ä»–需è¦ç½—嗦的事情 +------------------------------------ 有一些编辑器å¯ä»¥è§£é‡ŠåµŒå…¥åœ¨æºæ–‡ä»¶é‡Œçš„ç”±ä¸€äº›ç‰¹æ®Šæ ‡è®°æ ‡æ˜Žçš„é…置信æ¯ã€‚比如,emacs -èƒ½å¤Ÿè§£é‡Šè¢«æ ‡è®°æˆè¿™æ ·çš„行: +能够解æžè¢«æ ‡è®°æˆè¿™æ ·çš„行: .. code-block:: c @@ -852,7 +960,7 @@ inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 End: */ -Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š +Vim 能够解æžè¿™æ ·çš„æ ‡è®°ï¼š .. code-block:: c @@ -863,8 +971,8 @@ Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š 的模å¼ï¼Œæˆ–者使用其他å¯ä»¥äº§ç”Ÿæ£ç¡®çš„缩进的巧妙方法。 -19) 内è”汇编 ------------------------------- +20) 内è”汇编 +------------ 在特定架构的代ç ä¸ï¼Œä½ å¯èƒ½éœ€è¦å†…è”汇编与 CPU 和平å°ç›¸å…³åŠŸèƒ½è¿žæŽ¥ã€‚需è¦è¿™ä¹ˆåšæ—¶ å°±ä¸è¦çŠ¹è±«ã€‚然而,当 C å¯ä»¥å®Œæˆå·¥ä½œæ—¶ï¼Œä¸è¦å¹³ç™½æ— 故地使用内è”汇编。在å¯èƒ½çš„情 @@ -880,8 +988,8 @@ Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š ç§»é™¤äº†ã€‚ä½ ä¸å¿…æ€»æ˜¯è¿™æ ·åšï¼Œå°½ç®¡ï¼Œè¿™ä¸å¿…è¦çš„举动会é™åˆ¶ä¼˜åŒ–。 在写一个包å«å¤šæ¡æŒ‡ä»¤çš„å•ä¸ªå†…è”汇编è¯å¥æ—¶ï¼ŒæŠŠæ¯æ¡æŒ‡ä»¤ç”¨å¼•å·åˆ†å‰²è€Œä¸”å„å 一行, -除了最åŽä¸€æ¡æŒ‡ä»¤å¤–,在æ¯ä¸ªæŒ‡ä»¤ç»“å°¾åŠ ä¸Š \n\t,让汇编输出时å¯ä»¥æ£ç¡®åœ°ç¼©è¿›ä¸‹ä¸€æ¡ -指令: +除了最åŽä¸€æ¡æŒ‡ä»¤å¤–,在æ¯ä¸ªæŒ‡ä»¤ç»“å°¾åŠ ä¸Š ``\n\t`` ,让汇编输出时å¯ä»¥æ£ç¡®åœ°ç¼©è¿› +下一æ¡æŒ‡ä»¤ï¼š .. code-block:: c @@ -890,10 +998,10 @@ Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š : /* outputs */ : /* inputs */ : /* clobbers */); -20) æ¡ä»¶ç¼–译 ------------------------------- +21) æ¡ä»¶ç¼–译 +------------ -åªè¦å¯èƒ½ï¼Œå°±ä¸è¦åœ¨ .c 文件里é¢ä½¿ç”¨é¢„处ç†æ¡ä»¶ (#if, #ifdef)ï¼›è¿™æ ·åšè®©ä»£ç æ›´éš¾ +åªè¦å¯èƒ½ï¼Œå°±ä¸è¦åœ¨ .c 文件里é¢ä½¿ç”¨é¢„处ç†æ¡ä»¶ (#if, #ifdef)ï¼›è¿™æ ·åšä¼šè®©ä»£ç æ›´éš¾ 阅读并且更难去跟踪逻辑。替代方案是,在头文件ä¸ç”¨é¢„处ç†æ¡ä»¶æ供给那些 .c 文件 使用,å†ç»™ #else æ供一个空桩 (no-op stub) 版本,然åŽåœ¨ .c æ–‡ä»¶å†…æ— æ¡ä»¶åœ°è°ƒç”¨ 那些 (定义在头文件内的) å‡½æ•°ã€‚è¿™æ ·åšï¼Œç¼–译器会é¿å…为桩函数 (stub) çš„è°ƒç”¨ç”Ÿæˆ @@ -904,8 +1012,8 @@ Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š æ¡ä»¶åˆ°è¿™ä¸ªè¾…助函数内。 å¦‚æžœä½ æœ‰ä¸€ä¸ªåœ¨ç‰¹å®šé…ç½®ä¸ï¼Œå¯èƒ½å˜æˆæœªä½¿ç”¨çš„函数或å˜é‡ï¼Œç¼–译器会è¦å‘Šå®ƒå®šä¹‰äº†ä½† -æœªä½¿ç”¨ï¼ŒæŠŠå®ƒæ ‡è®°ä¸º __maybe_unused 而ä¸æ˜¯å°†å®ƒåŒ…å«åœ¨ä¸€ä¸ªé¢„处ç†æ¡ä»¶ä¸ã€‚(然而,如 -果一个函数或å˜é‡æ€»æ˜¯æœªä½¿ç”¨ï¼Œå°±ç›´æŽ¥åˆ 除它。) +æœªä½¿ç”¨ï¼Œè¯·æŠŠå®ƒæ ‡è®°ä¸º __maybe_unused 而ä¸æ˜¯å°†å®ƒåŒ…å«åœ¨ä¸€ä¸ªé¢„处ç†æ¡ä»¶ä¸ã€‚(然而, +如果一个函数或å˜é‡æ€»æ˜¯æœªä½¿ç”¨ï¼Œå°±ç›´æŽ¥åˆ 除它。) 在代ç ä¸ï¼Œå°½å¯èƒ½åœ°ä½¿ç”¨ IS_ENABLED å®æ¥è½¬åŒ–æŸä¸ª Kconfig æ ‡è®°ä¸º C 的布尔 表达å¼ï¼Œå¹¶åœ¨ä¸€èˆ¬çš„ C æ¡ä»¶ä¸ä½¿ç”¨å®ƒï¼š @@ -931,23 +1039,45 @@ Vim èƒ½å¤Ÿè§£é‡Šè¿™æ ·çš„æ ‡è®°ï¼š #endif /* CONFIG_SOMETHING */ -附录 I) å‚考 -------------------- +附录 I) å‚考资料 +---------------- -The C Programming Language, 第二版 +The C Programming Language, 2nd Edition 作者:Brian W. Kernighan å’Œ Denni M. Ritchie. Prentice Hall, Inc., 1988. -ISBN 0-13-110362-8 (软皮), 0-13-110370-9 (硬皮). +ISBN 0-13-110362-8 (平装), 0-13-110370-9 (精装). + +.. note:: + + 《C程åºè®¾è®¡è¯è¨€ï¼ˆç¬¬2版)》 + 作者:[美] Brian W. Kernighan / [美] Dennis M. Ritchie + 译者:å¾å®æ–‡ / æŽå¿— / å°¤æ™‹å…ƒï¼ˆå®¡æ ¡ï¼‰ + 出版社:机械工业出版社,2019 + ISBN:9787111617945 The Practice of Programming 作者:Brian W. Kernighan å’Œ Rob Pike. Addison-Wesley, Inc., 1999. ISBN 0-201-61586-X. +.. note:: + + 《程åºè®¾è®¡å®žè·µã€‹ + 作者:[美] Brian W. Kernighan / [美] Rob Pike + 出版社:机械工业出版社,2005 + ISBN:9787111091578 + + 《程åºè®¾è®¡å®žè·µã€‹ + 作者:[美] Brian W. Kernighan / Rob Pike + 译者:裘宗燕 + 出版社:机械工业出版社,2000 + ISBN:9787111075738 + GNU 手册 - éµå¾ª K&R æ ‡å‡†å’Œæ¤æ–‡æœ¬ - cpp, gcc, gcc internals and indent, 都å¯ä»¥ä»Ž https://www.gnu.org/manual/ 找到 WG14 是 C è¯è¨€çš„å›½é™…æ ‡å‡†åŒ–å·¥ä½œç»„ï¼ŒURL: http://www.open-std.org/JTC1/SC22/WG14/ -Kernel process/coding-style.rst,作者 greg@kroah.com å‘表于 OLS 2002: +å†…æ ¸æ–‡æ¡£ Documentation/process/coding-style.rst, +作者 greg@kroah.com å‘表于 OLS 2002: http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ diff --git a/Documentation/translations/zh_CN/process/email-clients.rst b/Documentation/translations/zh_CN/process/email-clients.rst index 102023651118..34d51cdadc7b 100644 --- a/Documentation/translations/zh_CN/process/email-clients.rst +++ b/Documentation/translations/zh_CN/process/email-clients.rst @@ -1,17 +1,20 @@ -.. _cn_email_clients: +.. SPDX-License-Identifier: GPL-2.0-or-later .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/process/email-clients.rst <email_clients>` +.. _cn_email_clients: -译者:: +:Original: Documentation/process/email-clients.rst - ä¸æ–‡ç‰ˆç»´æŠ¤è€…: è´¾å¨å¨ Harry Wei <harryxiyou@gmail.com> - ä¸æ–‡ç‰ˆç¿»è¯‘者: è´¾å¨å¨ Harry Wei <harryxiyou@gmail.com> - 时奎亮 Alex Shi <alex.shi@linux.alibaba.com> - ä¸æ–‡ç‰ˆæ ¡è¯‘者: Yinglin Luan <synmyth@gmail.com> - Xiaochen Wang <wangxiaochen0@gmail.com> - yaxinsn <yaxinsn@163.com> +:译者: + - è´¾å¨å¨ Harry Wei <harryxiyou@gmail.com> + - 时奎亮 Alex Shi <alexs@kernel.org> + - å´æƒ³æˆ Wu XiangCheng <bobwxc@email.cn> + +:æ ¡è¯‘: + - Yinglin Luan <synmyth@gmail.com> + - Xiaochen Wang <wangxiaochen0@gmail.com> + - yaxinsn <yaxinsn@163.com> Linux邮件客户端é…ç½®ä¿¡æ¯ ======================= @@ -27,12 +30,17 @@ Git 改日志。如果工作æ£å¸¸ï¼Œå†å°†è¡¥ä¸å‘é€åˆ°ç›¸åº”的邮件列表。 -普通é…ç½® +通用é…ç½® -------- + Linuxå†…æ ¸è¡¥ä¸æ˜¯é€šè¿‡é‚®ä»¶è¢«æ交的,最好把补ä¸ä½œä¸ºé‚®ä»¶ä½“的内嵌文本。有些维护者 æŽ¥æ”¶é™„ä»¶ï¼Œä½†æ˜¯é™„ä»¶çš„å†…å®¹æ ¼å¼åº”该是"text/plain"。然而,附件一般是ä¸èµžæˆçš„, å› ä¸ºè¿™ä¼šä½¿è¡¥ä¸çš„引用部分在评论过程ä¸å˜çš„很困难。 +åŒæ—¶ä¹Ÿå¼ºçƒˆå»ºè®®åœ¨è¡¥ä¸æˆ–其他邮件的æ£æ–‡ä¸ä½¿ç”¨çº¯æ–‡æœ¬æ ¼å¼ã€‚https://useplaintext.email +有助于了解如何é…ç½®ä½ å–œæ¬¢çš„é‚®ä»¶å®¢æˆ·ç«¯ï¼Œå¹¶åœ¨æ‚¨è¿˜æ²¡æœ‰é¦–é€‰çš„æƒ…å†µä¸‹åˆ—å‡ºä¸€äº›æŽ¨èçš„ +客户端。 + 用æ¥å‘é€Linuxå†…æ ¸è¡¥ä¸çš„邮件客户端在å‘é€è¡¥ä¸æ—¶åº”该处于文本的原始状æ€ã€‚例如, 他们ä¸èƒ½æ”¹å˜æˆ–è€…åˆ é™¤åˆ¶è¡¨ç¬¦æˆ–è€…ç©ºæ ¼ï¼Œç”šè‡³æ˜¯åœ¨æ¯ä¸€è¡Œçš„开头或者结尾。 @@ -40,17 +48,17 @@ Linuxå†…æ ¸è¡¥ä¸æ˜¯é€šè¿‡é‚®ä»¶è¢«æ交的,最好把补ä¸ä½œä¸ºé‚®ä»¶ä½“çš„ ä¸è¦è®©ä½ 的邮件客户端进行自动æ¢è¡Œã€‚è¿™æ ·ä¹Ÿä¼šç ´åä½ çš„è¡¥ä¸ã€‚ -邮件客户端ä¸èƒ½æ”¹å˜æ–‡æœ¬çš„å—符集编ç æ–¹å¼ã€‚è¦å‘é€çš„è¡¥ä¸åªèƒ½æ˜¯ASCII或者UTF-8ç¼–ç æ–¹å¼ï¼Œ -å¦‚æžœä½ ä½¿ç”¨UTF-8ç¼–ç æ–¹å¼å‘é€é‚®ä»¶ï¼Œé‚£ä¹ˆä½ 将会é¿å…一些å¯èƒ½å‘生的å—符集问题。 +邮件客户端ä¸èƒ½æ”¹å˜æ–‡æœ¬çš„å—符集编ç æ–¹å¼ã€‚è¦å‘é€çš„è¡¥ä¸åªèƒ½æ˜¯ASCII或者UTF-8ç¼–ç +æ–¹å¼ï¼Œå¦‚æžœä½ ä½¿ç”¨UTF-8ç¼–ç æ–¹å¼å‘é€é‚®ä»¶ï¼Œé‚£ä¹ˆä½ 将会é¿å…一些å¯èƒ½å‘生的å—符集问题。 -邮件客户端应该形æˆå¹¶ä¸”ä¿æŒ References: 或者 In-Reply-To: æ ‡é¢˜ï¼Œé‚£ä¹ˆ -邮件è¯é¢˜å°±ä¸ä¼šä¸æ–。 +邮件客户端应该生æˆå¹¶ä¸”ä¿æŒâ€œReferences:â€æˆ–者“In-Reply-To:â€é‚®ä»¶å¤´ï¼Œè¿™æ ·é‚®ä»¶ä¼šè¯ +å°±ä¸ä¼šä¸æ–。 -å¤åˆ¶ç²˜å¸–(或者剪贴粘帖)通常ä¸èƒ½ç”¨äºŽè¡¥ä¸ï¼Œå› 为制表符会转æ¢ä¸ºç©ºæ ¼ã€‚使用xclipboard, xclip -或者xcutsel也许å¯ä»¥ï¼Œä½†æ˜¯æœ€å¥½æµ‹è¯•ä¸€ä¸‹æˆ–者é¿å…使用å¤åˆ¶ç²˜å¸–。 +å¤åˆ¶ç²˜å¸–(或者剪贴粘帖)通常ä¸èƒ½ç”¨äºŽè¡¥ä¸ï¼Œå› 为制表符会转æ¢ä¸ºç©ºæ ¼ã€‚使用xclipboard, +xclip或者xcutsel也许å¯ä»¥ï¼Œä½†æ˜¯æœ€å¥½æµ‹è¯•ä¸€ä¸‹æˆ–者é¿å…使用å¤åˆ¶ç²˜å¸–。 -ä¸è¦åœ¨ä½¿ç”¨PGP/GPGç½²å的邮件ä¸åŒ…å«è¡¥ä¸ã€‚è¿™æ ·ä¼šä½¿å¾—å¾ˆå¤šè„šæœ¬ä¸èƒ½è¯»å–å’Œé€‚ç”¨äºŽä½ çš„è¡¥ä¸ã€‚ -(这个问题应该是å¯ä»¥ä¿®å¤çš„) +ä¸è¦åœ¨ä½¿ç”¨PGP/GPGç¾å的邮件ä¸åŒ…å«è¡¥ä¸ã€‚è¿™æ ·ä¼šä½¿å¾—å¾ˆå¤šè„šæœ¬ä¸èƒ½è¯»å–å’Œé€‚ç”¨äºŽä½ çš„ +è¡¥ä¸ã€‚(这个问题应该是å¯ä»¥ä¿®å¤çš„) åœ¨ç»™å†…æ ¸é‚®ä»¶åˆ—è¡¨å‘é€è¡¥ä¸ä¹‹å‰ï¼Œç»™è‡ªå·±å‘é€ä¸€ä¸ªè¡¥ä¸æ˜¯ä¸ªä¸é”™çš„主æ„,ä¿å˜æŽ¥æ”¶åˆ°çš„ 邮件,将补ä¸ç”¨'patch'命令打上,如果æˆåŠŸäº†ï¼Œå†ç»™å†…æ ¸é‚®ä»¶åˆ—è¡¨å‘é€ã€‚ @@ -58,98 +66,133 @@ Linuxå†…æ ¸è¡¥ä¸æ˜¯é€šè¿‡é‚®ä»¶è¢«æ交的,最好把补ä¸ä½œä¸ºé‚®ä»¶ä½“çš„ 一些邮件客户端æ示 ------------------ + 这里给出一些详细的MUAé…ç½®æ示,å¯ä»¥ç”¨äºŽç»™Linuxå†…æ ¸å‘é€è¡¥ä¸ã€‚这些并ä¸æ„味是 所有的软件包é…置总结。 说明: -TUI = ä»¥æ–‡æœ¬ä¸ºåŸºç¡€çš„ç”¨æˆ·æŽ¥å£ -GUI = 图形界é¢ç”¨æˆ·æŽ¥å£ + +- TUI = ä»¥æ–‡æœ¬ä¸ºåŸºç¡€çš„ç”¨æˆ·æŽ¥å£ +- GUI = 图形界é¢ç”¨æˆ·æŽ¥å£ Alpine (TUI) -~~~~~~~~~~~~ +************ é…置选项: -在"Sending Preferences"部分: -- "Do Not Send Flowed Text"å¿…é¡»å¼€å¯ -- "Strip Whitespace Before Sending"å¿…é¡»å…³é— +在 :menuselection:`Sending Preferences` èœå•ï¼š + +- :menuselection:`Do Not Send Flowed Text` å¿…é¡»å¼€å¯ +- :menuselection:`Strip Whitespace Before Sending` å¿…é¡»å…³é— + +å½“å†™é‚®ä»¶æ—¶ï¼Œå…‰æ ‡åº”è¯¥æ”¾åœ¨è¡¥ä¸ä¼šå‡ºçŽ°çš„地方,然åŽæŒ‰ä¸‹ :kbd:`CTRL-R` 组åˆé”®ï¼Œä½¿æŒ‡ +定的补ä¸æ–‡ä»¶åµŒå…¥åˆ°é‚®ä»¶ä¸ã€‚ + +Claws Mail (GUI) +**************** + +å¯ä»¥ç”¨ï¼Œæœ‰äººç”¨å®ƒæˆåŠŸåœ°å‘过补ä¸ã€‚ -å½“å†™é‚®ä»¶æ—¶ï¼Œå…‰æ ‡åº”è¯¥æ”¾åœ¨è¡¥ä¸ä¼šå‡ºçŽ°çš„地方,然åŽæŒ‰ä¸‹CTRL-R组åˆé”®ï¼Œä½¿æŒ‡å®šçš„ -è¡¥ä¸æ–‡ä»¶åµŒå…¥åˆ°é‚®ä»¶ä¸ã€‚ +用 :menuselection:`Message-->Insert File` (:kbd:`CTRL-I`) 或外置编辑器æ’入补ä¸ã€‚ + +è‹¥è¦åœ¨Claws编辑窗å£é‡ä¿®æ”¹æ’入的补ä¸ï¼Œéœ€å…³é— +:menuselection:`Configuration-->Preferences-->Compose-->Wrapping` +çš„ `Auto wrapping` 。 Evolution (GUI) -~~~~~~~~~~~~~~~ +*************** -一些开å‘者æˆåŠŸçš„使用它å‘é€è¡¥ä¸ +一些开å‘者æˆåŠŸçš„使用它å‘é€è¡¥ä¸ã€‚ -当选择邮件选项:Preformat - 从Format->Heading->Preformatted (Ctrl-7)或者工具æ +撰写邮件时: +从 :menuselection:`æ ¼å¼-->段è½æ ·å¼-->é¢„æ ¼å¼åŒ–` (:kbd:`CTRL-7`) +或工具æ 选择 :menuselection:`é¢„æ ¼å¼åŒ–` ï¼› 然åŽä½¿ç”¨ï¼š - Insert->Text File... (Alt-n x)æ’入补ä¸æ–‡ä»¶ã€‚ +:menuselection:`æ’å…¥-->文本文件...` (:kbd:`ALT-N x`) æ’入补ä¸æ–‡ä»¶ã€‚ -ä½ è¿˜å¯ä»¥"diff -Nru old.c new.c | xclip",选择Preformat,然åŽä½¿ç”¨ä¸é—´é”®è¿›è¡Œç²˜å¸–。 +ä½ è¿˜å¯ä»¥ ``diff -Nru old.c new.c | xclip`` ,选择 :menuselection:`é¢„æ ¼å¼åŒ–` , +然åŽä½¿ç”¨é¼ æ ‡ä¸é”®è¿›è¡Œç²˜å¸–。 Kmail (GUI) -~~~~~~~~~~~ +*********** 一些开å‘者æˆåŠŸçš„使用它å‘é€è¡¥ä¸ã€‚ -默认设置ä¸ä¸ºHTMLæ ¼å¼æ˜¯åˆé€‚çš„ï¼›ä¸è¦å¯ç”¨å®ƒã€‚ +默认撰写设置ç¦ç”¨HTMLæ ¼å¼æ˜¯åˆé€‚çš„ï¼›ä¸è¦å¯ç”¨å®ƒã€‚ -当书写一å°é‚®ä»¶çš„时候,在选项下é¢ä¸è¦é€‰æ‹©è‡ªåŠ¨æ¢è¡Œã€‚å”¯ä¸€çš„ç¼ºç‚¹å°±æ˜¯ä½ åœ¨é‚®ä»¶ä¸è¾“入的任何文本 -都ä¸ä¼šè¢«è‡ªåŠ¨æ¢è¡Œï¼Œå› æ¤ä½ 必须在å‘é€è¡¥ä¸ä¹‹å‰æ‰‹åŠ¨æ¢è¡Œã€‚最简å•çš„方法就是å¯ç”¨è‡ªåŠ¨æ¢è¡Œæ¥ä¹¦å†™é‚®ä»¶ï¼Œ -然åŽæŠŠå®ƒä¿å˜ä¸ºè‰ç¨¿ã€‚ä¸€æ—¦ä½ åœ¨è‰ç¨¿ä¸å†æ¬¡æ‰“开它,它已ç»å…¨éƒ¨è‡ªåŠ¨æ¢è¡Œäº†ï¼Œé‚£ä¹ˆä½ 的邮件虽然没有 -选择自动æ¢è¡Œï¼Œä½†æ˜¯è¿˜ä¸ä¼šå¤±åŽ»å·²æœ‰çš„自动æ¢è¡Œã€‚ +当书写一å°é‚®ä»¶çš„时候,在选项下é¢ä¸è¦é€‰æ‹©è‡ªåŠ¨æ¢è¡Œã€‚å”¯ä¸€çš„ç¼ºç‚¹å°±æ˜¯ä½ åœ¨é‚®ä»¶ä¸è¾“ +入的任何文本都ä¸ä¼šè¢«è‡ªåŠ¨æ¢è¡Œï¼Œå› æ¤ä½ 必须在å‘é€è¡¥ä¸ä¹‹å‰æ‰‹åŠ¨æ¢è¡Œã€‚最简å•çš„方法 +就是å¯ç”¨è‡ªåŠ¨æ¢è¡Œæ¥ä¹¦å†™é‚®ä»¶ï¼Œç„¶åŽæŠŠå®ƒä¿å˜ä¸ºè‰ç¨¿ã€‚ä¸€æ—¦ä½ åœ¨è‰ç¨¿ä¸å†æ¬¡æ‰“开它,它 +å·²ç»å…¨éƒ¨è‡ªåŠ¨æ¢è¡Œäº†ï¼Œé‚£ä¹ˆä½ 的邮件虽然没有选择自动æ¢è¡Œï¼Œä½†æ˜¯è¿˜ä¸ä¼šå¤±åŽ»å·²æœ‰çš„自 +动æ¢è¡Œã€‚ -在邮件的底部,æ’入补ä¸ä¹‹å‰ï¼Œæ”¾ä¸Šå¸¸ç”¨çš„è¡¥ä¸å®šç•Œç¬¦ï¼šä¸‰ä¸ªè¿žå—å·(---)。 +在邮件的底部,æ’入补ä¸ä¹‹å‰ï¼Œæ”¾ä¸Šå¸¸ç”¨çš„è¡¥ä¸å®šç•Œç¬¦ï¼šä¸‰ä¸ªè¿žå—符(``---``)。 -然åŽåœ¨"Message"èœå•æ¡ç›®ï¼Œé€‰æ‹©æ’入文件,接ç€é€‰å–ä½ çš„è¡¥ä¸æ–‡ä»¶ã€‚还有一个é¢å¤–çš„é€‰é¡¹ï¼Œä½ å¯ä»¥ -通过它é…ç½®ä½ çš„é‚®ä»¶å»ºç«‹å·¥å…·æ èœå•ï¼Œè¿˜å¯ä»¥å¸¦ä¸Š"insert file"å›¾æ ‡ã€‚ +然åŽåœ¨ :menuselection:`信件` èœå•ï¼Œé€‰æ‹© :menuselection:`æ’入文本文件` ,接 +ç€é€‰å–ä½ çš„è¡¥ä¸æ–‡ä»¶ã€‚还有一个é¢å¤–çš„é€‰é¡¹ï¼Œä½ å¯ä»¥é€šè¿‡å®ƒé…ç½®ä½ çš„åˆ›å»ºæ–°é‚®ä»¶å·¥å…·æ , +åŠ ä¸Š :menuselection:`æ’入文本文件` å›¾æ ‡ã€‚ -ä½ å¯ä»¥å®‰å…¨åœ°é€šè¿‡GPGæ ‡è®°é™„ä»¶ï¼Œä½†æ˜¯å†…åµŒè¡¥ä¸æœ€å¥½ä¸è¦ä½¿ç”¨GPGæ ‡è®°å®ƒä»¬ã€‚ä½œä¸ºå†…åµŒæ–‡æœ¬çš„ç¾å‘è¡¥ä¸ï¼Œ -当从GPGä¸æå–7ä½ç¼–ç 时会使他们å˜çš„æ›´åŠ å¤æ‚。 +将编辑器窗å£æ‹‰åˆ°è¶³å¤Ÿå®½é¿å…折行。对于KMail 1.13.5 (KDE 4.5.4),它会在å‘é€é‚®ä»¶ +时对编辑器窗å£ä¸æ˜¾ç¤ºæŠ˜è¡Œçš„地方自动æ¢è¡Œã€‚在选项èœå•ä¸å–消自动æ¢è¡Œä»ä¸èƒ½è§£å†³ã€‚ +å› æ¤ï¼Œå¦‚æžœä½ çš„è¡¥ä¸ä¸æœ‰éžå¸¸é•¿çš„行,必须在å‘é€ä¹‹å‰æŠŠç¼–辑器窗å£æ‹‰å¾—éžå¸¸å®½ã€‚ +å‚è§ï¼šhttps://bugs.kde.org/show_bug.cgi?id=174034 -å¦‚æžœä½ éžè¦ä»¥é™„件的形å¼å‘é€è¡¥ä¸ï¼Œé‚£ä¹ˆå°±å³é”®ç‚¹å‡»é™„件,然åŽé€‰ä¸å±žæ€§ï¼Œçªå‡º"Suggest automatic -display"ï¼Œè¿™æ ·å†…åµŒé™„ä»¶æ›´å®¹æ˜“è®©è¯»è€…çœ‹åˆ°ã€‚ +ä½ å¯ä»¥å®‰å…¨åœ°ç”¨GPGç¾å附件,但是内嵌补ä¸æœ€å¥½ä¸è¦ä½¿ç”¨GPGç¾å它们。作为内嵌文本 +æ’入的ç¾åè¡¥ä¸å°†ä½¿å…¶éš¾ä»¥ä»Ž7-bitç¼–ç ä¸æå–。 -å½“ä½ è¦ä¿å˜å°†è¦å‘é€çš„内嵌文本补ä¸ï¼Œä½ å¯ä»¥ä»Žæ¶ˆæ¯åˆ—è¡¨çª—æ ¼é€‰æ‹©åŒ…å«è¡¥ä¸çš„邮件,然åŽå³å‡»é€‰æ‹© -"save as"ã€‚ä½ å¯ä»¥ä½¿ç”¨ä¸€ä¸ªæ²¡æœ‰æ›´æ”¹çš„包å«è¡¥ä¸çš„邮件,如果它是以æ£ç¡®çš„å½¢å¼ç»„æˆã€‚å½“ä½ æ£çœŸåœ¨å®ƒ -自己的窗å£ä¹‹ä¸‹å¯Ÿçœ‹ï¼Œé‚£æ—¶æ²¡æœ‰é€‰é¡¹å¯ä»¥ä¿å˜é‚®ä»¶--å·²ç»æœ‰ä¸€ä¸ªè¿™æ ·çš„bug被汇报到了kmailçš„bugzilla -并且希望这将会被处ç†ã€‚邮件是以åªé’ˆå¯¹æŸä¸ªç”¨æˆ·å¯è¯»å†™çš„æƒé™è¢«ä¿å˜çš„ï¼Œæ‰€ä»¥å¦‚æžœä½ æƒ³æŠŠé‚®ä»¶å¤åˆ¶åˆ°å…¶ä»–地方, -ä½ ä¸å¾—ä¸æŠŠä»–们的æƒé™æ”¹ä¸ºç»„或者整体å¯è¯»ã€‚ +å¦‚æžœä½ éžè¦ä»¥é™„件的形å¼å‘é€è¡¥ä¸ï¼Œé‚£ä¹ˆå°±å³é”®ç‚¹å‡»é™„件,然åŽé€‰æ‹© +:menuselection:`属性` ,打开 :menuselection:`建议自动显示` ,使附件内è”更容 +易让读者看到。 + +å½“ä½ è¦ä¿å˜å°†è¦å‘é€çš„内嵌文本补ä¸ï¼Œä½ å¯ä»¥ä»Žæ¶ˆæ¯åˆ—è¡¨çª—æ ¼é€‰æ‹©åŒ…å«è¡¥ä¸çš„邮件,然 +åŽå³é”®é€‰æ‹© :menuselection:`å¦å˜ä¸º` 。如果整个电å邮件的组æˆæ£ç¡®ï¼Œæ‚¨å¯ç›´æŽ¥å°† +其作为补ä¸ä½¿ç”¨ã€‚电å邮件以当å‰ç”¨æˆ·å¯è¯»å†™æƒé™ä¿å˜ï¼Œå› æ¤æ‚¨å¿…é¡» ``chmod`` ,以 +使其在å¤åˆ¶åˆ°åˆ«å¤„时用户组和其他人å¯è¯»ã€‚ Lotus Notes (GUI) -~~~~~~~~~~~~~~~~~ +***************** ä¸è¦ä½¿ç”¨å®ƒã€‚ +IBM Verse (Web GUI) +******************* + +åŒä¸Šæ¡ã€‚ + Mutt (TUI) -~~~~~~~~~~ +********** + +很多Linuxå¼€å‘人员使用mutt客户端,这è¯æ˜Žå®ƒè‚¯å®šå·¥ä½œå¾—éžå¸¸æ¼‚亮。 -很多Linuxå¼€å‘人员使用mutt客户端,所以è¯æ˜Žå®ƒè‚¯å®šå·¥ä½œçš„éžå¸¸æ¼‚亮。 +Muttä¸è‡ªå¸¦ç¼–辑器,所以ä¸ç®¡ä½ 使用什么编辑器,ä¸è‡ªåŠ¨æ–行就行。大多数编辑器都有 +:menuselection:`æ’入文件` 选项,它å¯ä»¥åœ¨ä¸æ”¹å˜æ–‡ä»¶å†…容的情况下æ’入文件。 -Muttä¸è‡ªå¸¦ç¼–辑器,所以ä¸ç®¡ä½ 使用什么编辑器都ä¸åº”该带有自动æ–行。大多数编辑器都带有 -一个"insert file"选项,它å¯ä»¥é€šè¿‡ä¸æ”¹å˜æ–‡ä»¶å†…容的方å¼æ’入文件。 +用 ``vim`` 作为mutt的编辑器:: -'vim'作为mutt的编辑器: set editor="vi" - 如果使用xclip,敲入以下命令 +如果使用xclip,敲入以下命令:: + :set paste - 按ä¸é”®ä¹‹å‰æˆ–者shift-insert或者使用 + +然åŽå†æŒ‰ä¸é”®æˆ–者shift-insert或者使用:: + :r filename -如果想è¦æŠŠè¡¥ä¸ä½œä¸ºå†…嵌文本。 -(a)ttach工作的很好,ä¸å¸¦æœ‰"set paste"。 +把补ä¸æ’入为内嵌文本。 +在未设置 ``set paste`` æ—¶(a)ttach工作的很好。 ä½ å¯ä»¥é€šè¿‡ ``git format-patch`` 生æˆè¡¥ä¸ï¼Œç„¶åŽç”¨ Muttå‘é€å®ƒä»¬:: - $ mutt -H 0001-some-bug-fix.patch + $ mutt -H 0001-some-bug-fix.patch é…置选项: + 它应该以默认设置的形å¼å·¥ä½œã€‚ -然而,把"send_charset"设置为"us-ascii::utf-8"也是一个ä¸é”™çš„主æ„。 +然而,把 ``send_charset`` 设置一下也是一个ä¸é”™çš„主æ„:: + + set send_charset="us-ascii:utf-8" Mutt 是高度å¯é…置的。 这里是个使用mutt通过 Gmail å‘é€çš„è¡¥ä¸çš„最å°é…ç½®:: @@ -178,71 +221,107 @@ Mutt 是高度å¯é…置的。 这里是个使用mutt通过 Gmail å‘é€çš„è¡¥ä¸ set from = "username@gmail.com" set use_from = yes -Mutt文档å«æœ‰æ›´å¤šä¿¡æ¯: +Mutt文档å«æœ‰æ›´å¤šä¿¡æ¯ï¼š - http://dev.mutt.org/trac/wiki/UseCases/Gmail + https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail - http://dev.mutt.org/doc/manual.html + http://www.mutt.org/doc/manual/ Pine (TUI) -~~~~~~~~~~ +********** Pineè¿‡åŽ»æœ‰ä¸€äº›ç©ºæ ¼åˆ å‡é—®é¢˜ï¼Œä½†æ˜¯è¿™äº›çŽ°åœ¨åº”该都被修å¤äº†ã€‚ -如果å¯ä»¥ï¼Œè¯·ä½¿ç”¨alpine(pine的继承者) +如果å¯ä»¥ï¼Œè¯·ä½¿ç”¨alpine(pine的继承者)。 é…置选项: -- 最近的版本需è¦æ¶ˆé™¤æµç¨‹æ–‡æœ¬ -- "no-strip-whitespace-before-send"选项也是需è¦çš„。 + +- æœ€è¿‘çš„ç‰ˆæœ¬éœ€è¦ ``quell-flowed-text`` +- ``no-strip-whitespace-before-send`` 选项也是需è¦çš„。 Sylpheed (GUI) -~~~~~~~~~~~~~~ +************** - 内嵌文本å¯ä»¥å¾ˆå¥½çš„工作(或者使用附件)。 - å…许使用外部的编辑器。 -- 对于目录较多时éžå¸¸æ…¢ã€‚ +- 收件箱较多时éžå¸¸æ…¢ã€‚ - 如果通过non-SSLè¿žæŽ¥ï¼Œæ— æ³•ä½¿ç”¨TLS SMTP授æƒã€‚ -- 在组æˆçª—å£ä¸æœ‰ä¸€ä¸ªå¾ˆæœ‰ç”¨çš„ruler bar。 -- 给地å€æœ¬ä¸æ·»åŠ 地å€å°±ä¸ä¼šæ£ç¡®çš„了解显示å。 +- 撰写窗å£çš„æ ‡å°ºå¾ˆæœ‰ç”¨ã€‚ +- 将地å€æ·»åŠ åˆ°é€šè®¯ç°¿æ—¶æ— æ³•æ£ç¡®ç†è§£æ˜¾ç¤ºçš„å称。 Thunderbird (GUI) -~~~~~~~~~~~~~~~~~ +***************** + +Thunderbird是Outlook的克隆版本,它很容易æŸå文本,但也有一些方法强制修æ£ã€‚ + +在完æˆä¿®æ”¹åŽï¼ˆåŒ…括安装扩展),您需è¦é‡æ–°å¯åŠ¨Thunderbird。 + +- å…许使用外部编辑器: + + 使用Thunderbirdå‘è¡¥ä¸æœ€ç®€å•çš„方法是使用扩展æ¥æ‰“开您最喜欢的外部编辑器。 + + 下é¢æ˜¯ä¸€äº›èƒ½å¤Ÿåšåˆ°è¿™ä¸€ç‚¹çš„æ‰©å±•æ ·ä¾‹ã€‚ + + - “External Editor Revived†+ + https://github.com/Frederick888/external-editor-revived + + https://addons.thunderbird.net/en-GB/thunderbird/addon/external-editor-revived/ + + 它需è¦å®‰è£…“本地消æ¯ä¸»æœºï¼ˆnative messaging host)â€ã€‚ + å‚è§ä»¥ä¸‹æ–‡æ¡£: + https://github.com/Frederick888/external-editor-revived/wiki + + - “External Editor†+ + https://github.com/exteditor/exteditor + + 下载并安装æ¤æ‰©å±•ï¼Œç„¶åŽæ‰“å¼€ :menuselection:`新建消æ¯` 窗å£, 用 + :menuselection:`查看-->工具æ -->自定义...` ç»™å®ƒå¢žåŠ ä¸€ä¸ªæŒ‰é’®ï¼Œç›´æŽ¥ç‚¹å‡»æ¤ + 按钮å³å¯ä½¿ç”¨å¤–置编辑器。 + + 请注æ„,“External Editorâ€è¦æ±‚ä½ çš„ç¼–è¾‘å™¨ä¸èƒ½fork,æ¢å¥è¯è¯´ï¼Œç¼–辑器必须在 + å…³é—å‰ä¸è¿”å›žã€‚ä½ å¯èƒ½éœ€è¦ä¼ 递é¢å¤–çš„å‚数或修改编辑器设置。最值得注æ„的是, + 如果您使用的是gvim,那么您必须将 :menuselection:`external editor` 设置的 + 编辑器å—段设置为 ``/usr/bin/gvim --nofork"`` (å‡è®¾å¯æ‰§è¡Œæ–‡ä»¶åœ¨ + ``/usr/bin`` ï¼‰ï¼Œä»¥ä¼ é€’ ``-f`` å‚数。如果您æ£åœ¨ä½¿ç”¨å…¶ä»–编辑器,请阅读其 + 手册了解如何处ç†ã€‚ -默认情况下,thunderbird很容易æŸå文本,但是还有一些方法å¯ä»¥å¼ºåˆ¶å®ƒå˜å¾—更好。 +è‹¥è¦ä¿®æ£å†…部编辑器,请执行以下æ“作: -- 在用户å¸å·è®¾ç½®é‡Œï¼Œç»„æˆå’Œå¯»å€ï¼Œä¸è¦é€‰æ‹©"Compose messages in HTML format"。 +- ä¿®æ”¹ä½ çš„Thunderbird设置,ä¸è¦ä½¿ç”¨ ``format=flowed`` ï¼ + 回到主窗å£ï¼ŒæŒ‰ç…§ + :menuselection:`主èœå•-->首选项-->常规-->é…置编辑器...` + 打开Thunderbirdçš„é…置编辑器。 -- ç¼–è¾‘ä½ çš„Thunderbirdé…置设置æ¥ä½¿å®ƒä¸è¦æ‹†è¡Œä½¿ç”¨ï¼šuser_pref("mailnews.wraplength", 0); + - å°† ``mailnews.send_plaintext_flowed`` 设为 ``false`` -- ç¼–è¾‘ä½ çš„Thunderbirdé…置设置,使它ä¸è¦ä½¿ç”¨"format=flowed"æ ¼å¼ï¼šuser_pref("mailnews. - send_plaintext_flowed", false); + - å°† ``mailnews.wraplength`` 从 ``72`` 改为 ``0`` -- ä½ éœ€è¦ä½¿Thunderbirdå˜ä¸ºé¢„å…ˆæ ¼å¼æ–¹å¼ï¼š - å¦‚æžœé»˜è®¤æƒ…å†µä¸‹ä½ ä¹¦å†™çš„æ˜¯HTMLæ ¼å¼ï¼Œé‚£ä¸æ˜¯å¾ˆéš¾ã€‚ä»…ä»…ä»Žæ ‡é¢˜æ 的下拉框ä¸é€‰æ‹©"Preformat"æ ¼å¼ã€‚ - å¦‚æžœé»˜è®¤æƒ…å†µä¸‹ä½ ä¹¦å†™çš„æ˜¯æ–‡æœ¬æ ¼å¼ï¼Œä½ ä¸å¾—把它改为HTMLæ ¼å¼ï¼ˆä»…仅作为一次性的)æ¥ä¹¦å†™æ–°çš„消æ¯ï¼Œ - 然åŽå¼ºåˆ¶ä½¿å®ƒå›žåˆ°æ–‡æœ¬æ ¼å¼ï¼Œå¦åˆ™å®ƒå°±ä¼šæ‹†è¡Œã€‚è¦å®žçŽ°å®ƒï¼Œåœ¨å†™ä¿¡çš„å›¾æ ‡ä¸Šä½¿ç”¨shifté”®æ¥ä½¿å®ƒå˜ä¸ºHTML - æ ¼å¼ï¼Œç„¶åŽæ ‡é¢˜æ 的下拉框ä¸é€‰æ‹©"Preformat"æ ¼å¼ã€‚ +- ä¸è¦å†™HTMLé‚®ä»¶ï¼ + 回到主窗å£ï¼Œæ‰“å¼€ + :menuselection:`主èœå•-->账户设置-->ä½ çš„@邮件.地å€-->通讯录/编写&地å€ç°¿` , + 关掉 ``以HTMLæ ¼å¼ç¼–写消æ¯`` 。 -- å…许使用外部的编辑器: - 针对Thunderbird打补ä¸æœ€ç®€å•çš„方法就是使用一个"external editor"扩展,然åŽä½¿ç”¨ä½ 最喜欢的 - $EDITORæ¥è¯»å–或者åˆå¹¶è¡¥ä¸åˆ°æ–‡æœ¬ä¸ã€‚è¦å®žçŽ°å®ƒï¼Œå¯ä»¥ä¸‹è½½å¹¶ä¸”安装这个扩展,然åŽæ·»åŠ 一个使用它的 - 按键View->Toolbars->Customize...最åŽå½“ä½ ä¹¦å†™ä¿¡æ¯çš„时候仅仅点击它就å¯ä»¥äº†ã€‚ +- åªç”¨çº¯æ–‡æœ¬æ ¼å¼æŸ¥çœ‹é‚®ä»¶ï¼ + 回到主窗å£ï¼Œ :menuselection:`主èœå•-->查看-->消æ¯ä½“为-->纯文本` ï¼ TkRat (GUI) -~~~~~~~~~~~ +*********** å¯ä»¥ä½¿ç”¨å®ƒã€‚使用"Insert file..."或者外部的编辑器。 Gmail (Web GUI) -~~~~~~~~~~~~~~~ +*************** ä¸è¦ä½¿ç”¨å®ƒå‘é€è¡¥ä¸ã€‚ Gmail网页客户端自动地把制表符转æ¢ä¸ºç©ºæ ¼ã€‚ -虽然制表符转æ¢ä¸ºç©ºæ ¼é—®é¢˜å¯ä»¥è¢«å¤–部编辑器解决,åŒæ—¶å®ƒè¿˜ä¼šä½¿ç”¨å›žè½¦æ¢è¡ŒæŠŠæ¯è¡Œæ‹†åˆ†ä¸º78个å—符。 +虽然制表符转æ¢ä¸ºç©ºæ ¼é—®é¢˜å¯ä»¥è¢«å¤–部编辑器解决,但它åŒæ—¶è¿˜ä¼šä½¿ç”¨å›žè½¦æ¢è¡ŒæŠŠæ¯è¡Œ +拆分为78个å—符。 -å¦ä¸€ä¸ªé—®é¢˜æ˜¯Gmail还会把任何ä¸æ˜¯ASCIIçš„å—符的信æ¯æ”¹ä¸ºbase64ç¼–ç 。它把东西å˜çš„åƒæ¬§æ´²äººçš„åå—。 +å¦ä¸€ä¸ªé—®é¢˜æ˜¯Gmail还会把任何å«æœ‰éžASCIIçš„å—符的消æ¯æ”¹ç”¨base64ç¼–ç ,如欧洲人的 +åå—。 - ### diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst index a64858d321fc..3d6ee21c74ae 100644 --- a/Documentation/translations/zh_CN/process/submit-checklist.rst +++ b/Documentation/translations/zh_CN/process/submit-checklist.rst @@ -1,105 +1,111 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/process/submit-checklist.rst <submitchecklist>` -:Translator: Alex Shi <alex.shi@linux.alibaba.com> +:Original: Documentation/process/submit-checklist.rst +:Translator: + - Alex Shi <alexs@kernel.org> + - Wu XiangCheng <bobwxc@email.cn> .. _cn_submitchecklist: -Linuxå†…æ ¸è¡¥ä¸æäº¤æ¸…å• -~~~~~~~~~~~~~~~~~~~~~ +Linuxå†…æ ¸è¡¥ä¸æäº¤æ£€æŸ¥å• +~~~~~~~~~~~~~~~~~~~~~~~ 如果开å‘äººå‘˜å¸Œæœ›çœ‹åˆ°ä»–ä»¬çš„å†…æ ¸è¡¥ä¸æ交更快地被接å—,那么他们应该åšä¸€äº›åŸºæœ¬ 的事情。 -这些都是在 -:ref:`Documentation/translations/zh_CN/process/submitting-patches.rst <cn_submittingpatches>` +这些都是在 Documentation/translations/zh_CN/process/submitting-patches.rst 和其他有关æ交Linuxå†…æ ¸è¡¥ä¸çš„文档ä¸æ供的。 -1) 如果使用工具,则包括定义/声明该工具的文件。ä¸è¦ä¾èµ–于其他头文件拉入您使用 +1) 如果使用工具,则包括定义/声明该工具的文件。ä¸è¦ä¾èµ–其他头文件æ¥å¼•å…¥æ‚¨ä½¿ç”¨ 的头文件。 2) 干净的编译: - a) 使用适用或修改的 ``CONFIG`` 选项 ``=y``ã€``=m`` å’Œ ``=n`` 。没有GCC + a) 使用åˆé€‚çš„ ``CONFIG`` 选项 ``=y``ã€``=m`` å’Œ ``=n`` 。没有 ``gcc`` è¦å‘Š/错误,没有链接器è¦å‘Š/错误。 - b) 通过allnoconfigã€allmodconfig + b) 通过 ``allnoconfig`` 〠``allmodconfig`` c) 使用 ``O=builddir`` æ—¶å¯ä»¥æˆåŠŸç¼–译 -3) 通过使用本地交å‰ç¼–译工具或其他一些构建场在多个CPU体系结构上构建。 + d) 任何 Doucmentation/ 下的å˜æ›´éƒ½èƒ½æˆåŠŸæž„建且ä¸å¼•å…¥æ–°è¦å‘Š/错误。 + 用 ``make htmldocs`` 或 ``make pdfdocs`` 检验构建情况并修å¤é—®é¢˜ã€‚ + +3) 通过使用本地交å‰ç¼–译工具或其他一些构建设施在多个CPU体系结构上构建。 4) PPC64是一ç§å¾ˆå¥½çš„交å‰ç¼–è¯‘æ£€æŸ¥ä½“ç³»ç»“æž„ï¼Œå› ä¸ºå®ƒå€¾å‘于对64ä½çš„æ•°ä½¿ç”¨æ— ç¬¦å· é•¿æ•´åž‹ã€‚ -5) 如下所述 :ref:`Documentation/translations/zh_CN/process/coding-style.rst <cn_codingstyle>`. - 检查您的补ä¸æ˜¯å¦ä¸ºå¸¸è§„æ ·å¼ã€‚在æ交( ``scripts/check patch.pl`` )之å‰ï¼Œ - 使用补ä¸æ ·å¼æ£€æŸ¥å™¨æ£€æŸ¥æ˜¯å¦æœ‰è½»å¾®çš„冲çªã€‚您应该能够处ç†æ‚¨çš„è¡¥ä¸ä¸å˜åœ¨çš„所有 +5) 按 Documentation/translations/zh_CN/process/coding-style.rst 所述检查您的 + è¡¥ä¸æ˜¯å¦ä¸ºå¸¸è§„æ ·å¼ã€‚在æ交之å‰ä½¿ç”¨è¡¥ä¸æ ·å¼æ£€æŸ¥å™¨ ``scripts/checkpatch.pl`` + 检查是å¦æœ‰è½»å¾®çš„冲çªã€‚您应该能够处ç†æ‚¨çš„è¡¥ä¸ä¸å˜åœ¨çš„所有 è¿è§„行为。 -6) 任何新的或修改过的 ``CONFIG`` 选项都ä¸ä¼šå¼„è„é…ç½®èœå•ï¼Œå¹¶é»˜è®¤ä¸ºå…³é—ï¼Œé™¤éž - å®ƒä»¬ç¬¦åˆ ``Documentation/kbuild/kconfig-language.rst`` ä¸è®°å½•çš„异常æ¡ä»¶, - èœå•å±žæ€§ï¼šé»˜è®¤å€¼. +6) 任何新的或修改过的 ``CONFIG`` 选项都ä¸åº”æžä¹±é…ç½®èœå•ï¼Œå¹¶é»˜è®¤ä¸ºå…³é—ï¼Œé™¤éž + å®ƒä»¬ç¬¦åˆ ``Documentation/kbuild/kconfig-language.rst`` èœå•å±žæ€§ï¼šé»˜è®¤å€¼ä¸ + 记录的例外æ¡ä»¶ã€‚ 7) 所有新的 ``kconfig`` 选项都有帮助文本。 8) 已仔细审查了相关的 ``Kconfig`` 组åˆã€‚这很难用测试æ¥çº æ£â€”—脑力在这里是有 回报的。 -9) 用 sparse 检查干净。 +9) 通过 sparse 清查。 + (å‚è§ Documentation/translations/zh_CN/dev-tools/sparse.rst ) 10) 使用 ``make checkstack`` å’Œ ``make namespacecheck`` 并修å¤ä»–们å‘现的任何 问题。 .. note:: - ``checkstack`` å¹¶æ²¡æœ‰æ˜Žç¡®æŒ‡å‡ºé—®é¢˜ï¼Œä½†æ˜¯ä»»ä½•ä¸€ä¸ªåœ¨å †æ ˆä¸Šä½¿ç”¨è¶…è¿‡512 + ``checkstack`` 并ä¸ä¼šæ˜Žç¡®æŒ‡å‡ºé—®é¢˜ï¼Œä½†æ˜¯ä»»ä½•ä¸€ä¸ªåœ¨å †æ ˆä¸Šä½¿ç”¨è¶…过512 å—节的函数都å¯ä»¥è¿›è¡Œæ›´æ”¹ã€‚ -11) 包括 :ref:`kernel-doc <kernel_doc>` å†…æ ¸æ–‡æ¡£ä»¥è®°å½•å…¨å±€å†…æ ¸API。(é™æ€å‡½æ•° - ä¸éœ€è¦ï¼Œä½†ä¹Ÿå¯ä»¥ã€‚)使用 ``make htmldocs`` 或 ``make pdfdocs`` 检查 - :ref:`kernel-doc <kernel_doc>` 并修å¤ä»»ä½•é—®é¢˜ã€‚ +11) 包括 :ref:`kernel-doc <kernel_doc_zh>` å†…æ ¸æ–‡æ¡£ä»¥è®°å½•å…¨å±€å†…æ ¸API。(é™æ€ + 函数ä¸éœ€è¦ï¼Œä½†ä¹Ÿå¯ä»¥ã€‚)使用 ``make htmldocs`` 或 ``make pdfdocs`` 检查 + :ref:`kernel-doc <kernel_doc_zh>` 并修å¤ä»»ä½•é—®é¢˜ã€‚ -12) 通过以下选项åŒæ—¶å¯ç”¨çš„测试 ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``, +12) 通过以下选项åŒæ—¶å¯ç”¨çš„测试: ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``, ``CONFIG_DEBUG_SLAB``, ``CONFIG_DEBUG_PAGEALLOC``, ``CONFIG_DEBUG_MUTEXES``, ``CONFIG_DEBUG_SPINLOCK``, ``CONFIG_DEBUG_ATOMIC_SLEEP``, - ``CONFIG_PROVE_RCU`` and ``CONFIG_DEBUG_OBJECTS_RCU_HEAD`` - -13) å·²ç»è¿‡æž„建和è¿è¡Œæ—¶æµ‹è¯•ï¼ŒåŒ…括有或没有 ``CONFIG_SMP``, ``CONFIG_PREEMPT``. + ``CONFIG_PROVE_RCU`` å’Œ ``CONFIG_DEBUG_OBJECTS_RCU_HEAD`` 。 -14) 如果补ä¸ç¨‹åºå½±å“IO/ç£ç›˜ç‰ï¼šä½¿ç”¨æˆ–ä¸ä½¿ç”¨ ``CONFIG_LBDAF`` 进行测试。 +13) 在 ``CONFIG_SMP``, ``CONFIG_PREEMPT`` å¼€å¯å’Œå…³é—的情况下都进行构建和è¿è¡Œ + 时测试。 -15) 所有代ç 路径都已在å¯ç”¨æ‰€æœ‰lockdep功能的情况下è¿è¡Œã€‚ +14) 所有代ç 路径都已在å¯ç”¨æ‰€æœ‰æ»é”检测(lockdep)功能的情况下è¿è¡Œã€‚ -16) 所有新的/procæ¡ç›®éƒ½è®°å½•åœ¨ ``Documentation/`` +15) 所有新的 ``/proc`` æ¡ç›®éƒ½è®°å½•åœ¨ ``Documentation/`` -17) æ‰€æœ‰æ–°çš„å†…æ ¸å¼•å¯¼å‚数都记录在 +16) æ‰€æœ‰æ–°çš„å†…æ ¸å¼•å¯¼å‚数都记录在 Documentation/admin-guide/kernel-parameters.rst ä¸ã€‚ -18) 所有新的模å—å‚数都记录在 ``MODULE_PARM_DESC()`` +17) 所有新的模å—å‚数都记录在 ``MODULE_PARM_DESC()`` -19) 所有新的用户空间接å£éƒ½è®°å½•åœ¨ ``Documentation/ABI/`` ä¸ã€‚有关详细信æ¯ï¼Œ +18) 所有新的用户空间接å£éƒ½è®°å½•åœ¨ ``Documentation/ABI/`` ä¸ã€‚有关详细信æ¯ï¼Œ 请å‚阅 ``Documentation/ABI/README`` 。更改用户空间接å£çš„è¡¥ä¸åº”è¯¥æŠ„é€ linux-api@vger.kernel.org。 -20) 已通过至少注入slabå’Œpage分é…失败进行检查。请å‚阅 ``Documentation/fault-injection/`` +19) 已通过至少注入slabå’Œpage分é…失败进行检查。请å‚阅 ``Documentation/fault-injection/`` 。 如果新代ç æ˜¯å®žè´¨æ€§çš„ï¼Œé‚£ä¹ˆæ·»åŠ å系统特定的故障注入å¯èƒ½æ˜¯åˆé€‚的。 -21) æ–°æ·»åŠ çš„ä»£ç å·²ç»ç”¨ ``gcc -W`` 编译(使用 ``make EXTRA-CFLAGS=-W`` )。这 +20) æ–°æ·»åŠ çš„ä»£ç å·²ç»ç”¨ ``gcc -W`` 编译(使用 ``make EXTRA-CFLAGS=-W`` )。这 将产生大é‡å™ªå£°ï¼Œä½†å¯¹äºŽæŸ¥æ‰¾è¯¸å¦‚“è¦å‘Šï¼šæœ‰ç¬¦å·å’Œæ— 符å·ä¹‹é—´çš„比较â€ä¹‹ç±»çš„错误 很有用。 -22) 在它被åˆå¹¶åˆ°-mmè¡¥ä¸é›†ä¸ä¹‹åŽè¿›è¡Œæµ‹è¯•ï¼Œä»¥ç¡®ä¿å®ƒä»ç„¶ä¸Žæ‰€æœ‰å…¶ä»–排队的补ä¸ä»¥ +21) 在它被åˆå¹¶åˆ°-mmè¡¥ä¸é›†ä¸ä¹‹åŽè¿›è¡Œæµ‹è¯•ï¼Œä»¥ç¡®ä¿å®ƒä»ç„¶ä¸Žæ‰€æœ‰å…¶ä»–排队的补ä¸ä»¥ åŠVMã€VFS和其他å系统ä¸çš„å„ç§æ›´æ”¹ä¸€èµ·å·¥ä½œã€‚ -23) 所有内å˜å±éšœä¾‹å¦‚ ``barrier()``, ``rmb()``, ``wmb()`` 都需è¦æºä»£ç ä¸çš„注 +22) 所有内å˜å±éšœï¼ˆä¾‹å¦‚ ``barrier()``, ``rmb()``, ``wmb()`` )都需è¦æºä»£ç 注 释æ¥è§£é‡Šå®ƒä»¬æ£åœ¨æ‰§è¡Œçš„æ“作åŠå…¶åŽŸå› 的逻辑。 -24) 如果补ä¸æ·»åŠ 了任何ioctl,那么也è¦æ›´æ–° ``Documentation/userspace-api/ioctl/ioctl-number.rst`` +23) 如果补ä¸æ·»åŠ 了任何ioctl,那么也è¦æ›´æ–° + ``Documentation/userspace-api/ioctl/ioctl-number.rst`` 。 -25) 如果修改åŽçš„æºä»£ç ä¾èµ–或使用与以下 ``Kconfig`` 符å·ç›¸å…³çš„ä»»ä½•å†…æ ¸API或 +24) 如果修改åŽçš„æºä»£ç ä¾èµ–或使用与以下 ``Kconfig`` 符å·ç›¸å…³çš„ä»»ä½•å†…æ ¸API或 功能,则在ç¦ç”¨ç›¸å…³ ``Kconfig`` 符å·å’Œ/或 ``=m`` (如果该选项å¯ç”¨ï¼‰çš„情况 下测试以下多个构建[并éžæ‰€æœ‰è¿™äº›éƒ½åŒæ—¶å˜åœ¨ï¼Œåªæ˜¯å®ƒä»¬çš„å„ç§/éšæœºç»„åˆ]: - ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``, ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``, - ``CONFIG_NET``, ``CONFIG_INET=n`` (但是åŽè€…ä¼´éš ``CONFIG_NET=y``). + ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``, + ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``, + ``CONFIG_NET``, ``CONFIG_INET=n`` (但是最åŽä¸€ä¸ªéœ€è¦ ``CONFIG_NET=y`` )。 diff --git a/Documentation/translations/zh_CN/process/submitting-patches.rst b/Documentation/translations/zh_CN/process/submitting-patches.rst index ebb7f37575c1..f8978f02057c 100644 --- a/Documentation/translations/zh_CN/process/submitting-patches.rst +++ b/Documentation/translations/zh_CN/process/submitting-patches.rst @@ -1,142 +1,92 @@ -.. _cn_submittingpatches: +.. SPDX-License-Identifier: GPL-2.0-or-later .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/process/submitting-patches.rst <submittingpatches>` +.. _cn_submittingpatches: + +:Original: Documentation/process/submitting-patches.rst -译者:: +:译者: + - 钟宇 TripleX Chung <xxx.phy@gmail.com> + - 时奎亮 Alex Shi <alexs@kernel.org> + - å´æƒ³æˆ Wu XiangCheng <bobwxc@email.cn> - ä¸æ–‡ç‰ˆç»´æŠ¤è€…: 钟宇 TripleX Chung <xxx.phy@gmail.com> - ä¸æ–‡ç‰ˆç¿»è¯‘者: 钟宇 TripleX Chung <xxx.phy@gmail.com> - 时奎亮 Alex Shi <alex.shi@linux.alibaba.com> - ä¸æ–‡ç‰ˆæ ¡è¯‘者: æŽé˜³ Li Yang <leoyang.li@nxp.com> - çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com> +:æ ¡è¯‘: + - æŽé˜³ Li Yang <leoyang.li@nxp.com> + - çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com> -å¦‚ä½•è®©ä½ çš„æ”¹åŠ¨è¿›å…¥å†…æ ¸ -====================== +æ交补ä¸ï¼šå¦‚ä½•è®©ä½ çš„æ”¹åŠ¨è¿›å…¥å†…æ ¸ +================================ 对于想è¦å°†æ”¹åŠ¨æ交到 Linux å†…æ ¸çš„ä¸ªäººæˆ–è€…å…¬å¸æ¥è¯´ï¼Œå¦‚æžœä¸ç†Ÿæ‚‰â€œè§„矩â€ï¼Œ -æ交的æµç¨‹ä¼šè®©äººç•æƒ§ã€‚本文档收集了一系列建议,这些建议å¯ä»¥å¤§å¤§çš„æé«˜ä½ +æ交的æµç¨‹ä¼šè®©äººç•æƒ§ã€‚本文档包å«äº†ä¸€ç³»åˆ—建议,å¯ä»¥å¤§å¤§æé«˜ä½ çš„æ”¹åŠ¨è¢«æŽ¥å—的机会. -以下文档å«æœ‰å¤§é‡ç®€æ´çš„建议, 具体请è§ï¼š -:ref:`Documentation/process <development_process_main>` -åŒæ ·ï¼Œ:ref:`Documentation/translations/zh_CN/process/submit-checklist.rst <cn_submitchecklist>` -给出在æ交代ç å‰éœ€è¦æ£€æŸ¥çš„项目的列表。 +本文档以较为简æ´çš„行文给出了大é‡å»ºè®®ã€‚å…³äºŽå†…æ ¸å¼€å‘æµç¨‹å¦‚何进行的详细信æ¯ï¼Œ +å‚è§ï¼š Documentation/translations/zh_CN/process/development-process.rst 。 +Documentation/translations/zh_CN/process/submit-checklist.rst 给出了一系列 +æ交补ä¸ä¹‹å‰è¦æ£€æŸ¥çš„äº‹é¡¹ã€‚è®¾å¤‡æ ‘ç›¸å…³çš„è¡¥ä¸ï¼Œè¯·å‚阅 +Documentation/devicetree/bindings/submitting-patches.rst 。 -å…¶ä¸è®¸å¤šæ¥éª¤æ述了Git版本控制系统的默认行为;如果您使用Gitæ¥å‡†å¤‡è¡¥ä¸ï¼Œ -您将å‘现它为您完æˆçš„大部分机械工作,尽管您ä»ç„¶éœ€è¦å‡†å¤‡å’Œè®°å½•ä¸€ç»„åˆç†çš„ -è¡¥ä¸ã€‚一般æ¥è¯´ï¼Œä½¿ç”¨gitå°†ä½¿æ‚¨ä½œä¸ºå†…æ ¸å¼€å‘人员的生活更轻æ¾ã€‚ +本文档å‡è®¾æ‚¨æ£åœ¨ä½¿ç”¨ ``git`` å‡†å¤‡ä½ çš„è¡¥ä¸ã€‚如果您ä¸ç†Ÿæ‚‰ ``git`` ,最好å¦ä¹ +å¦‚ä½•ä½¿ç”¨å®ƒï¼Œè¿™å°†ä½¿æ‚¨ä½œä¸ºå†…æ ¸å¼€å‘人员的生活å˜å¾—æ›´åŠ è½»æ¾ã€‚ +部分åç³»ç»Ÿå’Œç»´æŠ¤äººå‘˜çš„æ ‘æœ‰ä¸€äº›å…³äºŽå…¶å·¥ä½œæµç¨‹å’Œè¦æ±‚çš„é¢å¤–ä¿¡æ¯ï¼Œè¯·å‚阅 +Documentation/process/maintainer-handbooks.rst 。 -0) 获å–当å‰æºç æ ‘ ------------------ +获å–当å‰æºç æ ‘ +-------------- -如果您没有一个å¯ä»¥ä½¿ç”¨å½“å‰å†…æ ¸æºä»£ç çš„å˜å‚¨åº“,请使用git获å–ä¸€ä¸ªã€‚æ‚¨å°†è¦ -从主线å˜å‚¨åº“开始,它å¯ä»¥é€šè¿‡ä»¥ä¸‹æ–¹å¼èŽ·å–:: +如果您手头没有当å‰å†…æ ¸æºä»£ç çš„å˜å‚¨åº“,请使用 ``git`` 获å–一份。您需è¦å…ˆèŽ·å– +主线å˜å‚¨åº“,它å¯ä»¥é€šè¿‡ä»¥ä¸‹å‘½ä»¤æ‹‰å–:: - git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git -但是,请注æ„,您å¯èƒ½ä¸å¸Œæœ›ç›´æŽ¥é’ˆå¯¹ä¸»çº¿æ ‘进行开å‘。大多数åç³»ç»Ÿç»´æŠ¤äººå‘˜è¿ +但是,请注æ„,您å¯èƒ½ä¸æƒ³ç›´æŽ¥é’ˆå¯¹ä¸»çº¿æ ‘进行开å‘。大多数åç³»ç»Ÿç»´æŠ¤äººå‘˜è¿ è¡Œè‡ªå·±çš„æ ‘ï¼Œå¹¶å¸Œæœ›çœ‹åˆ°é’ˆå¯¹è¿™äº›æ ‘å‡†å¤‡çš„è¡¥ä¸ã€‚请å‚è§MAINTAINERS文件ä¸åç³» -统的 **T:** é¡¹ä»¥æŸ¥æ‰¾è¯¥æ ‘ï¼Œæˆ–è€…ç®€å•åœ°è¯¢é—®ç»´æŠ¤è€…è¯¥æ ‘æ˜¯å¦æœªåœ¨å…¶ä¸åˆ—出。 - -ä»ç„¶å¯ä»¥é€šè¿‡tarballsä¸‹è½½å†…æ ¸ç‰ˆæœ¬ï¼ˆå¦‚ä¸‹ä¸€èŠ‚æ‰€è¿°ï¼‰ï¼Œä½†è¿™æ˜¯è¿›è¡Œå†…æ ¸å¼€å‘çš„ -一ç§å›°éš¾çš„æ–¹å¼ã€‚ - -1) "diff -up" -------------- - -使用 "diff -up" 或者 "diff -uprN" æ¥åˆ›å»ºè¡¥ä¸ã€‚ - -æ‰€æœ‰å†…æ ¸çš„æ”¹åŠ¨ï¼Œéƒ½æ˜¯ä»¥è¡¥ä¸çš„å½¢å¼å‘ˆçŽ°çš„,补ä¸ç”± diff(1) 生æˆã€‚创建补ä¸çš„ -时候,è¦ç¡®è®¤å®ƒæ˜¯ä»¥ "unified diff" æ ¼å¼åˆ›å»ºçš„,这ç§æ ¼å¼ç”± diff(1) çš„ '-u' -å‚数生æˆã€‚而且,请使用 '-p' å‚æ•°ï¼Œé‚£æ ·ä¼šæ˜¾ç¤ºæ¯ä¸ªæ”¹åŠ¨æ‰€åœ¨çš„C函数,使得 -产生的补ä¸å®¹æ˜“读得多。补ä¸åº”è¯¥åŸºäºŽå†…æ ¸æºä»£ç æ ‘çš„æ ¹ç›®å½•ï¼Œè€Œä¸æ˜¯é‡Œè¾¹çš„ä»» -何å目录。 - -为一个å•ç‹¬çš„文件创建补ä¸ï¼Œä¸€èˆ¬æ¥è¯´è¿™æ ·åšå°±å¤Ÿäº†:: - - SRCTREE=linux - MYFILE=drivers/net/mydriver.c +统的 **T:** é¡¹ä»¥æŸ¥æ‰¾è¯¥æ ‘ï¼Œæˆ–è€…ç›´æŽ¥è¯¢é—®ç»´æŠ¤è€…è¯¥æ ‘æ˜¯å¦æœªåœ¨å…¶ä¸åˆ—出。 - cd $SRCTREE - cp $MYFILE $MYFILE.orig - vi $MYFILE # make your change - cd .. - diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch +.. _zh_describe_changes: -为多个文件创建补ä¸ï¼Œä½ å¯ä»¥è§£å¼€ä¸€ä¸ªæ²¡æœ‰ä¿®æ”¹è¿‡çš„å†…æ ¸æºä»£ç æ ‘ï¼Œç„¶åŽå’Œä½ 自 -己的代ç æ ‘ä¹‹é—´åš diff 。例如:: - - MYSRC=/devel/linux - - tar xvfz linux-3.19.tar.gz - mv linux-3.19 linux-3.19-vanilla - diff -uprN -X linux-3.19-vanilla/Documentation/dontdiff \ - linux-3.19-vanilla $MYSRC > /tmp/patch - -"dontdiff" æ˜¯å†…æ ¸åœ¨ç¼–è¯‘çš„æ—¶å€™äº§ç”Ÿçš„æ–‡ä»¶çš„åˆ—è¡¨ï¼Œåˆ—è¡¨ä¸çš„文件在 diff(1) -产生的补ä¸é‡Œä¼šè¢«è·³è¿‡ã€‚ - -ç¡®å®šä½ çš„è¡¥ä¸é‡Œæ²¡æœ‰åŒ…å«ä»»ä½•ä¸å±žäºŽè¿™æ¬¡è¡¥ä¸æ交的é¢å¤–文件。记得在用diff(1) -生æˆè¡¥ä¸ä¹‹åŽï¼Œå®¡é˜…一次补ä¸ï¼Œä»¥ç¡®ä¿å‡†ç¡®ã€‚ - -å¦‚æžœä½ çš„æ”¹åŠ¨å¾ˆæ•£ä¹±ï¼Œä½ åº”è¯¥ç ”ç©¶ä¸€ä¸‹å¦‚ä½•å°†è¡¥ä¸åˆ†å‰²æˆç‹¬ç«‹çš„部分,将改动分 -割æˆä¸€ç³»åˆ—åˆä¹Žé€»è¾‘çš„æ¥éª¤ã€‚è¿™æ ·æ›´å®¹æ˜“è®©å…¶ä»–å†…æ ¸å¼€å‘è€…å®¡æ ¸ï¼Œå¦‚æžœä½ æƒ³ä½ çš„ -è¡¥ä¸è¢«æŽ¥å—,这是很é‡è¦çš„。请å‚阅: -:ref:`cn_split_changes` - -å¦‚æžœä½ ç”¨ ``git`` , ``git rebase -i`` å¯ä»¥å¸®åŠ©ä½ è¿™ä¸€ç‚¹ã€‚å¦‚æžœä½ ä¸ç”¨ ``git``, -``quilt`` <https://savannah.nongnu.org/projects/quilt> å¦å¤–一个æµè¡Œçš„选择。 - -.. _cn_describe_changes: - -2) æè¿°ä½ çš„æ”¹åŠ¨ ---------------- +æè¿°ä½ çš„æ”¹åŠ¨ +------------ æè¿°ä½ çš„é—®é¢˜ã€‚æ— è®ºæ‚¨çš„è¡¥ä¸æ˜¯ä¸€è¡Œé”™è¯¯ä¿®å¤è¿˜æ˜¯5000行新功能,都必须有一个潜在 -的问题激励您完æˆè¿™é¡¹å·¥ä½œã€‚让审稿人相信有一个问题值得解决,让他们读完第一段 -是有æ„义的。 +的问题激励您完æˆè¿™é¡¹å·¥ä½œã€‚说æœå®¡é˜…者相信有一个问题值得解决,让他们读完第一段 +åŽå°±èƒ½æ˜Žç™½è¿™ä¸€ç‚¹ã€‚ æ述用户å¯è§çš„å½±å“。直接崩溃和é”定是相当有说æœåŠ›çš„,但并ä¸æ˜¯æ‰€æœ‰çš„错误都那么 -æ˜Žç›®å¼ èƒ†ã€‚å³ä½¿åœ¨ä»£ç 审查期间å‘现了这个问题,也è¦æ述一下您认为它å¯èƒ½å¯¹ç”¨æˆ·äº§ +æ˜Žç›®å¼ èƒ†ã€‚å³ä½¿åœ¨ä»£ç 审阅期间å‘现了这个问题,也è¦æ述一下您认为它å¯èƒ½å¯¹ç”¨æˆ·äº§ 生的影å“。请记ä½ï¼Œå¤§å¤šæ•°Linux安装è¿è¡Œçš„å†…æ ¸æ¥è‡ªäºŒçº§ç¨³å®šæ ‘或特定于供应商/äº§å“ çš„æ ‘ï¼Œåªä»Žä¸Šæ¸¸ç²¾é€‰ç‰¹å®šçš„è¡¥ä¸ï¼Œå› æ¤è¯·åŒ…å«ä»»ä½•å¯ä»¥å¸®åŠ©æ‚¨å°†æ›´æ”¹å®šä½åˆ°ä¸‹æ¸¸çš„内容: 触å‘的场景ã€DMESG的摘录ã€å´©æºƒæè¿°ã€æ€§èƒ½å›žå½’ã€å»¶è¿Ÿå°–å³°ã€é”定ç‰ã€‚ -é‡åŒ–优化和æƒè¡¡ã€‚如果您声称在性能ã€å†…å˜æ¶ˆè€—ã€å †æ ˆå 用空间或二进制大å°æ–¹é¢æœ‰æ‰€ -改进,请包括支æŒå®ƒä»¬çš„æ•°å—。但也è¦æè¿°ä¸æ˜Žæ˜¾çš„æˆæœ¬ã€‚优化通常ä¸æ˜¯å…费的,而是 -在CPUã€å†…å˜å’Œå¯è¯»æ€§ä¹‹é—´è¿›è¡Œæƒè¡¡ï¼›æˆ–者,探索性的工作,在ä¸åŒçš„工作负载之间进 +è´¨é‡ä¼˜åŒ–å’Œæƒè¡¡ã€‚如果您声称在性能ã€å†…å˜æ¶ˆè€—ã€å †æ ˆå 用空间或二进制大å°æ–¹é¢æœ‰æ‰€ +改进,请包括支æŒå®ƒä»¬çš„æ•°æ®ã€‚但也è¦æè¿°ä¸æ˜Žæ˜¾çš„æˆæœ¬ã€‚优化通常ä¸æ˜¯é›¶æˆæœ¬çš„,而是 +在CPUã€å†…å˜å’Œå¯è¯»æ€§ä¹‹é—´è¿›è¡Œæƒè¡¡ï¼›æˆ–者,åšæŽ¢ç´¢æ€§çš„工作,在ä¸åŒçš„工作负载之间进 è¡Œæƒè¡¡ã€‚请æ述优化的预期缺点,以便审阅者å¯ä»¥æƒè¡¡æˆæœ¬å’Œæ”¶ç›Šã€‚ -一旦问题建立起æ¥ï¼Œå°±è¦è¯¦ç»†åœ°æ述一下您实际在åšä»€ä¹ˆã€‚对于审阅者æ¥è¯´ï¼Œç”¨ç®€å•çš„ -英è¯æ述代ç çš„å˜åŒ–是很é‡è¦çš„,以验è¯ä»£ç 的行为是å¦ç¬¦åˆæ‚¨çš„æ„愿。 +æ出问题之åŽï¼Œå°±è¦è¯¦ç»†åœ°æ述一下您实际在åšçš„技术细节。对于审阅者æ¥è¯´ï¼Œç”¨ç®€ç»ƒçš„ +英è¯æ述代ç çš„å˜åŒ–是很é‡è¦çš„,以验è¯ä»£ç 的行为是å¦ç¬¦åˆæ‚¨çš„æ„图。 -如果您将补ä¸æ述写在一个表å•ä¸ï¼Œè¿™ä¸ªè¡¨å•å¯ä»¥å¾ˆå®¹æ˜“地作为“æ交日志â€æ”¾å…¥Linux -çš„æºä»£ç 管ç†ç³»ç»Ÿgitä¸ï¼Œé‚£ä¹ˆç»´æŠ¤äººå‘˜å°†éžå¸¸æ„Ÿè°¢æ‚¨ã€‚è§ :ref:`cn_explicit_in_reply_to`. +如果您将补ä¸æ述写æˆâ€œæ ‡å‡†æ ¼å¼â€ï¼Œå¯ä»¥å¾ˆå®¹æ˜“地作为“æ交日志â€æ”¾å…¥Linuxçš„æºä»£ +ç 管ç†ç³»ç»Ÿ ``git`` ä¸ï¼Œé‚£ä¹ˆç»´æŠ¤äººå‘˜å°†éžå¸¸æ„Ÿè°¢æ‚¨ã€‚ +å‚è§ :ref:`zh_the_canonical_patch_format` 。 æ¯ä¸ªè¡¥ä¸åªè§£å†³ä¸€ä¸ªé—®é¢˜ã€‚å¦‚æžœä½ çš„æ述开始å˜é•¿ï¼Œè¿™å°±è¡¨æ˜Žä½ å¯èƒ½éœ€è¦æ‹†åˆ†ä½ çš„è¡¥ä¸ã€‚ -è¯·è§ :ref:`cn_split_changes` +è¯·è§ :ref:`zh_split_changes` 。 -æ交或é‡æ–°æ交修补程åºæˆ–修补程åºç³»åˆ—时,请包括完整的修补程åºè¯´æ˜Žå’Œç†ç”±ã€‚ä¸è¦ +æ交或é‡æ–°æ交补ä¸æˆ–è¡¥ä¸ç³»åˆ—时,请包括完整的补ä¸è¯´æ˜Žå’Œç†ç”±ã€‚ä¸è¦ åªè¯´è¿™æ˜¯è¡¥ä¸ï¼ˆç³»åˆ—ï¼‰çš„ç¬¬å‡ ç‰ˆã€‚ä¸è¦æœŸæœ›å系统维护人员引用更早的补ä¸ç‰ˆæœ¬æˆ–引用 URLæ¥æŸ¥æ‰¾è¡¥ä¸æ述并将其放入补ä¸ä¸ã€‚也就是说,补ä¸ï¼ˆç³»åˆ—)åŠå…¶æ述应该是独立的。 -这对维护人员和审查人员都有好处。一些评审者å¯èƒ½ç”šè‡³æ²¡æœ‰æ”¶åˆ°è¡¥ä¸çš„早期版本。 +这对维护人员和审阅者都有好处。一些审阅者å¯èƒ½ç”šè‡³æ²¡æœ‰æ”¶åˆ°è¡¥ä¸çš„早期版本。 -æè¿°ä½ åœ¨å‘½ä»¤è¯æ°”ä¸çš„å˜åŒ–,例如“make xyzzy do frotzâ€è€Œä¸æ˜¯â€œ[This patch]make +用祈使å¥æè¿°ä½ çš„å˜æ›´ï¼Œä¾‹å¦‚“make xyzzy do frotzâ€è€Œä¸æ˜¯â€œ[This patch]make xyzzy do frotzâ€æˆ–“[I]changed xyzzy to do frotzâ€ï¼Œå°±å¥½åƒä½ 在命令代ç åº“æ”¹å˜ å®ƒçš„è¡Œä¸ºä¸€æ ·ã€‚ -如果修补程åºä¿®å¤äº†ä¸€ä¸ªè®°å½•çš„bugæ¡ç›®ï¼Œè¯·æŒ‰ç¼–å·å’ŒURL引用该bugæ¡ç›®ã€‚如果补ä¸æ¥ -自邮件列表讨论,请给出邮件列表å˜æ¡£çš„URL;使用带有 ``Message-ID`` çš„ -https://lore.kernel.org/ é‡å®šå‘,以确ä¿é“¾æŽ¥ä¸ä¼šè¿‡æ—¶ã€‚ - -但是,在没有外部资æºçš„情况下,尽é‡è®©ä½ 的解释å¯ç†è§£ã€‚除了æ供邮件列表å˜æ¡£æˆ– -bugçš„URL之外,还è¦æ€»ç»“需è¦æ交补ä¸çš„相关讨论è¦ç‚¹ã€‚ - -如果您想è¦å¼•ç”¨ä¸€ä¸ªç‰¹å®šçš„æ交,ä¸è¦åªå¼•ç”¨æ交的 SHA-1 ID。还请包括æ交的一行 +如果您想è¦å¼•ç”¨ä¸€ä¸ªç‰¹å®šçš„æ交,ä¸è¦åªå¼•ç”¨æ交的SHA-1 ID。还请包括æ交的一行 摘è¦ï¼Œä»¥ä¾¿äºŽå®¡é˜…者了解它是关于什么的。例如:: Commit e21d2170f36602ae2708 ("video: remove unnecessary @@ -144,82 +94,104 @@ bugçš„URL之外,还è¦æ€»ç»“需è¦æ交补ä¸çš„相关讨论è¦ç‚¹ã€‚ platform_set_drvdata(), but left the variable "dev" unused, delete it. -您还应该确ä¿è‡³å°‘使用å‰12ä½ SHA-1 ID. å†…æ ¸å˜å‚¨åº“包å«*许多*对象,使与较çŸçš„ID +您还应该确ä¿è‡³å°‘使用å‰12ä½SHA-1 IDã€‚å†…æ ¸å˜å‚¨åº“åŒ…å« *许多* 对象,使较çŸçš„ID å‘生冲çªçš„å¯èƒ½æ€§å¾ˆå¤§ã€‚è®°ä½ï¼Œå³ä½¿çŽ°åœ¨ä¸ä¼šä¸Žæ‚¨çš„å…个å—符IDå‘生冲çªï¼Œè¿™ç§æƒ…况 -å¯èƒ½äº”å¹´åŽæ”¹å˜ã€‚ +也å¯èƒ½åœ¨äº”å¹´åŽæ”¹å˜ã€‚ + +如果该å˜æ›´çš„相关讨论或背景信æ¯å¯ä»¥åœ¨ç½‘ä¸ŠæŸ¥é˜…ï¼Œè¯·åŠ ä¸Šâ€œLink:â€æ ‡ç¾æŒ‡å‘它。例如 +ä½ çš„è¡¥ä¸ä¿®å¤äº†ä¸€ä¸ªç¼ºé™·ï¼Œéœ€è¦æ·»åŠ 一个带有URLçš„æ ‡ç¾æŒ‡å‘邮件列表å˜æ¡£æˆ–缺陷跟踪器 +的相关报告;如果该补ä¸æ˜¯ç”±ä¸€äº›æ—©å…ˆé‚®ä»¶åˆ—表讨论或网络上的记录引起的,请指å‘它。 + +当链接到邮件列表å˜æ¡£æ—¶ï¼Œè¯·é¦–选lore.kernel.org邮件å˜æ¡£æœåŠ¡ã€‚用邮件ä¸çš„ +``Message-ID`` 头(去掉尖括å·ï¼‰å¯ä»¥åˆ›å»ºé“¾æŽ¥URL。例如:: + + Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + +请检查该链接以确ä¿å¯ç”¨ä¸”指å‘æ£ç¡®çš„邮件。 -如果修补程åºä¿®å¤äº†ç‰¹å®šæ交ä¸çš„错误,例如,使用 ``git bisct`` ï¼Œè¯·ä½¿ç”¨å¸¦æœ‰å‰ -12个å—符SHA-1 ID çš„"Fixes:"æ ‡è®°å’Œå•è¡Œæ‘˜è¦ã€‚为了简化ä¸è¦å°†æ ‡è®°æ‹†åˆ†ä¸ºå¤šä¸ªï¼Œ -è¡Œã€æ ‡è®°ä¸å—分æžè„šæœ¬â€œ75列æ¢è¡Œâ€è§„则的é™åˆ¶ã€‚例如:: +ä¸è¿‡ï¼Œåœ¨æ²¡æœ‰å¤–部资æºçš„情况下,也è¦å°½é‡è®©ä½ 的解释å¯ç†è§£ã€‚除了æ供邮件列表å˜æ¡£æˆ– +缺陷的URL之外,还è¦éœ€è¦æ€»ç»“该补ä¸çš„相关讨论è¦ç‚¹ã€‚ - Fixes: 54a4f0239f2e ("KVM: MMU: make kvm_mmu_zap_page() return the number of pages it actually freed") +如果补ä¸ä¿®å¤äº†ç‰¹å®šæ交ä¸çš„错误,例如使用 ``git bisct`` å‘现了一个问题,请使用 +带有å‰12个å—符SHA-1 ID的“Fixes:â€æ ‡ç¾å’Œå•è¡Œæ‘˜è¦ã€‚为了简化解æžè„šæœ¬ï¼Œä¸è¦å°†è¯¥ +æ ‡ç¾æ‹†åˆ†ä¸ºå¤šè¡Œï¼Œæ ‡ç¾ä¸å—“75列æ¢è¡Œâ€è§„则的é™åˆ¶ã€‚例如:: -下列 ``git config`` 设置å¯ä»¥æ·»åŠ 让 ``git log``, ``git show`` æ¼‚äº®çš„æ˜¾ç¤ºæ ¼å¼:: + Fixes: 54a4f0239f2e ("KVM: MMU: make kvm_mmu_zap_page() return the number of pages it actually freed") + +下列 ``git config`` 设置å¯ä»¥è®© ``git log``, ``git show`` å¢žåŠ ä¸Šè¿°é£Žæ ¼çš„æ˜¾ç¤ºæ ¼å¼:: [core] abbrev = 12 [pretty] fixes = Fixes: %h (\"%s\") -.. _cn_split_changes: +使用示例:: + + $ git log -1 --pretty=fixes 54a4f0239f2e + Fixes: 54a4f0239f2e ("KVM: MMU: make kvm_mmu_zap_page() return the number of pages it actually freed") -3) æ‹†åˆ†ä½ çš„æ”¹åŠ¨ ---------------- +.. _zh_split_changes: -å°†æ¯ä¸ªé€»è¾‘更改分隔æˆä¸€ä¸ªå•ç‹¬çš„è¡¥ä¸ã€‚ +æ‹†åˆ†ä½ çš„æ”¹åŠ¨ +------------ + +å°†æ¯ä¸ª **逻辑更改** 拆分æˆä¸€ä¸ªå•ç‹¬çš„è¡¥ä¸ã€‚ ä¾‹å¦‚ï¼Œå¦‚æžœä½ çš„æ”¹åŠ¨é‡ŒåŒæ—¶æœ‰bugä¿®æ£å’Œæ€§èƒ½ä¼˜åŒ–,那么把这些改动拆分到两个或 -者更多的补ä¸æ–‡ä»¶ä¸ã€‚å¦‚æžœä½ çš„æ”¹åŠ¨åŒ…å«å¯¹API的修改,并且修改了驱动程åºæ¥é€‚ -应这些新的API,那么把这些修改分æˆä¸¤ä¸ªè¡¥ä¸ã€‚ +者更多的补ä¸æ–‡ä»¶ä¸ã€‚å¦‚æžœä½ çš„æ”¹åŠ¨åŒ…å«å¯¹APIçš„ä¿®æ”¹ï¼Œå¹¶ä¸”å¢žåŠ äº†ä¸€ä¸ªä½¿ç”¨è¯¥æ–°API +的驱动,那么把这些修改分æˆä¸¤ä¸ªè¡¥ä¸ã€‚ å¦ä¸€æ–¹é¢ï¼Œå¦‚æžœä½ å°†ä¸€ä¸ªå•ç‹¬çš„改动åšæˆå¤šä¸ªè¡¥ä¸æ–‡ä»¶ï¼Œé‚£ä¹ˆå°†å®ƒä»¬åˆå¹¶æˆä¸€ä¸ª å•ç‹¬çš„è¡¥ä¸æ–‡ä»¶ã€‚è¿™æ ·ä¸€ä¸ªé€»è¾‘ä¸Šå•ç‹¬çš„改动åªè¢«åŒ…å«åœ¨ä¸€ä¸ªè¡¥ä¸æ–‡ä»¶é‡Œã€‚ -如果有一个补ä¸ä¾èµ–å¦å¤–一个补ä¸æ¥å®Œæˆå®ƒçš„改动,那没问题。简å•çš„åœ¨ä½ çš„è¡¥ -ä¸æ述里指出“这个补ä¸ä¾èµ–æŸè¡¥ä¸â€å°±å¥½äº†ã€‚ +需è¦è®°ä½çš„一点是,æ¯ä¸ªè¡¥ä¸çš„更改都应易于ç†è§£ï¼Œä»¥ä¾¿å®¡é˜…者验è¯ã€‚æ¯ä¸ªè¡¥ä¸éƒ½åº”该 +对其价值进行é˜è¿°ã€‚ + +如果有一个补ä¸ä¾èµ–å¦å¤–一个补ä¸æ¥å®Œæˆå®ƒçš„æ”¹åŠ¨ï¼Œé‚£æ²¡é—®é¢˜ã€‚ç›´æŽ¥åœ¨ä½ çš„è¡¥ +ä¸æ述里指出 **“这个补ä¸ä¾èµ–æŸè¡¥ä¸â€** 就好了。 -在将您的更改划分为一系列补ä¸æ—¶ï¼Œè¦ç‰¹åˆ«æ³¨æ„ç¡®ä¿å†…æ ¸åœ¨ç³»åˆ—ä¸çš„æ¯ä¸ªè¡¥ä¸ä¹‹åŽ -都能æ£å¸¸æž„建和è¿è¡Œã€‚使用 ``git bisect`` æ¥è¿½è¸ªé—®é¢˜çš„å¼€å‘者å¯èƒ½ä¼šåœ¨ä»»ä½•æ—¶ -å€™åˆ†å‰²ä½ çš„è¡¥ä¸ç³»åˆ—ï¼›å¦‚æžœä½ åœ¨ä¸é—´å¼•å…¥é”™è¯¯ï¼Œä»–们ä¸ä¼šæ„Ÿè°¢ä½ 。 +在将您的更改划分为一系列补ä¸æ—¶ï¼Œè¦ç‰¹åˆ«æ³¨æ„ç¡®ä¿å†…æ ¸åœ¨åº”ç”¨ç³»åˆ—ä¸çš„æ¯ä¸ªè¡¥ä¸ä¹‹åŽ +都能æ£å¸¸æž„建和è¿è¡Œã€‚使用 ``git bisect`` æ¥è¿½è¸ªé—®é¢˜çš„å¼€å‘者å¯èƒ½ä¼šåœ¨ä»»ä½•åœ°æ–¹åˆ† +å‰²ä½ çš„è¡¥ä¸ç³»åˆ—ï¼›å¦‚æžœä½ åœ¨ä¸é—´å¼•å…¥é”™è¯¯ï¼Œä»–们ä¸ä¼šæ„Ÿè°¢ä½ 。 -å¦‚æžœä½ ä¸èƒ½å°†è¡¥ä¸æµ“缩æˆæ›´å°‘的文件,那么æ¯æ¬¡å¤§çº¦å‘é€å‡º15个,然åŽç‰å¾…审查 +å¦‚æžœä½ ä¸èƒ½å°†è¡¥ä¸ç³»åˆ—浓缩得更å°ï¼Œé‚£ä¹ˆæ¯æ¬¡å¤§çº¦å‘é€å‡º15个补ä¸ï¼Œç„¶åŽç‰å¾…审阅 和集æˆã€‚ -4) æ£€æŸ¥ä½ çš„æ›´æ”¹é£Žæ ¼ -------------------- +æ£€æŸ¥ä½ çš„æ›´æ”¹é£Žæ ¼ +---------------- -检查您的补ä¸æ˜¯å¦å˜åœ¨åŸºæœ¬æ ·å¼å†²çªï¼Œè¯¦ç»†ä¿¡æ¯å¯åœ¨ -:ref:`Documentation/translations/zh_CN/process/coding-style.rst <cn_codingstyle>` -ä¸æ‰¾åˆ°ã€‚如果ä¸è¿™æ ·åšï¼Œåªä¼šæµªè´¹å®¡ç¨¿äººçš„æ—¶é—´ï¼Œå¹¶ä¸”ä¼šå¯¼è‡´ä½ çš„è¡¥ä¸è¢«æ‹’ç»ï¼Œç”šè‡³ +检查您的补ä¸æ˜¯å¦è¿åäº†åŸºæœ¬æ ·å¼è§„定,详细信æ¯å‚è§ +Documentation/translations/zh_CN/process/coding-style.rst +ä¸æ‰¾åˆ°ã€‚如果ä¸è¿™æ ·åšï¼Œåªä¼šæµªè´¹å®¡é˜…è€…çš„æ—¶é—´ï¼Œå¹¶ä¸”ä¼šå¯¼è‡´ä½ çš„è¡¥ä¸è¢«æ‹’ç»ï¼Œç”šè‡³ å¯èƒ½æ²¡æœ‰è¢«é˜…读。 一个é‡è¦çš„例外是在将代ç 从一个文件移动到å¦ä¸€ä¸ªæ–‡ä»¶æ—¶â€”—在这ç§æƒ…况下,您ä¸åº” 该在移动代ç çš„åŒä¸€ä¸ªè¡¥ä¸ä¸ä¿®æ”¹ç§»åŠ¨çš„代ç 。这清楚地æ述了移动代ç 和您的更改 -的行为。这大大有助于审查实际差异,并å…许工具更好地跟踪代ç 本身的历å²ã€‚ +的行为。这大大有助于审阅实际差异,并å…许工具更好地跟踪代ç 本身的历å²ã€‚ 在æ交之å‰ï¼Œä½¿ç”¨è¡¥ä¸æ ·å¼æ£€æŸ¥ç¨‹åºæ£€æŸ¥è¡¥ä¸ï¼ˆscripts/check patch.pl)。ä¸è¿‡ï¼Œ 请注æ„ï¼Œæ ·å¼æ£€æŸ¥ç¨‹åºåº”该被视为一个指å—,而ä¸æ˜¯ä½œä¸ºäººç±»åˆ¤æ–的替代å“。如果您 -的代ç 看起æ¥æ›´å¥½ï¼Œä½†æœ‰è¿è§„行为,那么最好ä¸è¦ä½¿ç”¨å®ƒã€‚ +的代ç 看起æ¥æ›´å¥½ï¼Œä½†æœ‰è¿è§„行为,那么最好别管它。 检查者报告三个级别: - ERROR:很å¯èƒ½å‡ºé”™çš„事情 - - WARNING:需è¦ä»”细审查的事项 + - WARNING:需è¦ä»”细审阅的事项 - CHECK:需è¦æ€è€ƒçš„事情 您应该能够判æ–您的补ä¸ä¸å˜åœ¨çš„所有è¿è§„行为。 -5) 选择补ä¸æ”¶ä»¶äºº ------------------ +选择补ä¸æ”¶ä»¶äºº +-------------- -您应该总是在任何补ä¸ä¸Šå¤åˆ¶ç›¸åº”çš„å系统维护人员,以获得他们维护的代ç ;查看 +您应该总是知会任何补ä¸ç›¸åº”代ç çš„å系统维护人员;查看 维护人员文件和æºä»£ç 修订历å²è®°å½•ï¼Œä»¥äº†è§£è¿™äº›ç»´æŠ¤äººå‘˜æ˜¯è°ã€‚脚本 -scripts/get_Maintainer.pl在这个æ¥éª¤ä¸éžå¸¸æœ‰ç”¨ã€‚如果您找ä¸åˆ°æ£åœ¨å·¥ä½œçš„å系统 +scripts/get_maintainer.pl在这个æ¥éª¤ä¸éžå¸¸æœ‰ç”¨ã€‚如果您找ä¸åˆ°æ£åœ¨å·¥ä½œçš„å系统 的维护人员,那么Andrew Morton(akpm@linux-foundation.org)将充当最åŽçš„维护 人员。 -您通常还应该选择至少一个邮件列表æ¥æŽ¥æ”¶è¡¥ä¸é›†çš„。linux-kernel@vger.kernel.org -作为最åŽä¸€ä¸ªè§£å†³åŠžæ³•çš„列表,但是这个列表上的体积已ç»å¼•èµ·äº†è®¸å¤šå¼€å‘人员的拒ç»ã€‚ +您通常还应该选择至少一个邮件列表æ¥æŽ¥æ”¶è¡¥ä¸é›†çš„副本。linux-kernel@vger.kernel.org +是所有补ä¸çš„默认列表,但是这个列表的æµé‡å·²ç»å¯¼è‡´äº†è®¸å¤šå¼€å‘人员ä¸å†çœ‹å®ƒã€‚ 在MAINTAINERS文件ä¸æŸ¥æ‰¾å系统特定的列表;您的补ä¸å¯èƒ½ä¼šåœ¨é‚£é‡Œå¾—到更多的关注。 ä¸è¿‡ï¼Œè¯·ä¸è¦å‘é€åžƒåœ¾é‚®ä»¶åˆ°æ— 关的列表。 @@ -229,189 +201,170 @@ http://vger.kernel.org/vger-lists.html 上找到它们的列表。ä¸è¿‡ï¼Œä¹Ÿæœ ä¸è¦ä¸€æ¬¡å‘é€è¶…过15个补ä¸åˆ°vger邮件列表ï¼ï¼ï¼ï¼ -Linus Torvalds 是决定改动能å¦è¿›å…¥ Linux å†…æ ¸çš„æœ€ç»ˆè£å†³è€…。他的 e-mail -地å€æ˜¯ <torvalds@linux-foundation.org> 。他收到的 e-mail 很多,所以一般 -çš„è¯´ï¼Œæœ€å¥½åˆ«ç»™ä»–å‘ e-mail。 - -如果您有修å¤å¯åˆ©ç”¨å®‰å…¨æ¼æ´žçš„è¡¥ä¸ï¼Œè¯·å°†è¯¥è¡¥ä¸å‘é€åˆ° security@kernel.org。对于 -严é‡çš„bug,å¯ä»¥è€ƒè™‘çŸæœŸæš‚åœä»¥å…许分销商å‘用户å‘布补ä¸ï¼›åœ¨è¿™ç§æƒ…况下,显然ä¸åº” -将补ä¸å‘é€åˆ°ä»»ä½•å…¬å…±åˆ—表。 +Linus Torvalds是决定改动能å¦è¿›å…¥ Linux å†…æ ¸çš„æœ€ç»ˆè£å†³è€…。他的邮件地å€æ˜¯ +torvalds@linux-foundation.org 。他收到的邮件很多,所以一般æ¥è¯´æœ€å¥½ **别** +给他å‘邮件。 -ä¿®å¤å·²å‘å¸ƒå†…æ ¸ä¸ä¸¥é‡é”™è¯¯çš„è¡¥ä¸ç¨‹åºåº”该指å‘ç¨³å®šç‰ˆç»´æŠ¤äººå‘˜ï¼Œæ–¹æ³•æ˜¯æ”¾è¿™æ ·çš„ä¸€è¡Œ:: +如果您有修å¤å¯åˆ©ç”¨å®‰å…¨æ¼æ´žçš„è¡¥ä¸ï¼Œè¯·å°†è¯¥è¡¥ä¸å‘é€åˆ° security@kernel.org 。对于 +严é‡çš„bug,å¯ä»¥è€ƒè™‘çŸæœŸç¦ä»¤ä»¥å…许分销商(有时间)å‘用户å‘布补ä¸ï¼›åœ¨è¿™ç§æƒ…况下, +显然ä¸åº”将补ä¸å‘é€åˆ°ä»»ä½•å…¬å…±åˆ—表。 +å‚è§ Documentation/translations/zh_CN/admin-guide/security-bugs.rst 。 - Cc: stable@vger.kernel.org +ä¿®å¤å·²å‘å¸ƒå†…æ ¸ä¸ä¸¥é‡é”™è¯¯çš„è¡¥ä¸ç¨‹åºåº”该抄é€ç»™ç¨³å®šç‰ˆç»´æŠ¤äººå‘˜ï¼Œæ–¹æ³•æ˜¯æŠŠä»¥ä¸‹åˆ—è¡Œ +放进补ä¸çš„ç¾å‡†åŒºï¼ˆæ³¨æ„,ä¸æ˜¯ç”µå邮件收件人):: -进入补ä¸çš„ç¾å‡†åŒºï¼ˆæ³¨æ„,ä¸æ˜¯ç”µå邮件收件人)。除了这个文件之外,您还应该阅读 -:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>` + Cc: stable@vger.kernel.org -但是,请注æ„,一些å系统维护人员希望得出他们自己的结论,å³å“ªäº›è¡¥ä¸åº”该被放到 -ç¨³å®šçš„æ ‘ä¸Šã€‚å°¤å…¶æ˜¯ç½‘ç»œç»´æŠ¤äººå‘˜ï¼Œä¸å¸Œæœ›çœ‹åˆ°å•ä¸ªå¼€å‘人员在补ä¸ä¸æ·»åŠ åƒä¸Šé¢è¿™æ · -的行。 +除了本文件之外,您还应该阅读 +Documentation/translations/zh_CN/process/stable-kernel-rules.rst 。 -如果更改影å“åˆ°ç”¨æˆ·å’Œå†…æ ¸æŽ¥å£ï¼Œè¯·å‘手册页维护人员(如维护人员文件ä¸æ‰€åˆ—)å‘é€ +如果更改影å“åˆ°ç”¨æˆ·ä¾§å†…æ ¸æŽ¥å£ï¼Œè¯·å‘手册页维护人员(如维护人员文件ä¸æ‰€åˆ—)å‘é€ æ‰‹å†Œé¡µè¡¥ä¸ï¼Œæˆ–至少å‘é€æ›´æ”¹é€šçŸ¥ï¼Œä»¥ä¾¿ä¸€äº›ä¿¡æ¯è¿›å…¥æ‰‹å†Œé¡µã€‚还应将用户空间API -更改å¤åˆ¶åˆ° linux-api@vger.kernel.org。 +更改抄é€åˆ° linux-api@vger.kernel.org 。 -6) 没有 MIME ç¼–ç ,没有链接,没有压缩,没有附件,åªæœ‰çº¯æ–‡æœ¬ ------------------------------------------------------------ +ä¸è¦MIMEç¼–ç ,ä¸è¦é“¾æŽ¥ï¼Œä¸è¦åŽ‹ç¼©ï¼Œä¸è¦é™„件,åªè¦çº¯æ–‡æœ¬ +------------------------------------------------------ Linus å’Œå…¶ä»–çš„å†…æ ¸å¼€å‘者需è¦é˜…è¯»å’Œè¯„è®ºä½ æäº¤çš„æ”¹åŠ¨ã€‚å¯¹äºŽå†…æ ¸å¼€å‘者æ¥è¯´ -,å¯ä»¥â€œå¼•ç”¨â€ä½ 的改动很é‡è¦ï¼Œä½¿ç”¨ä¸€èˆ¬çš„ e-mail 工具,他们就å¯ä»¥åœ¨ä½ çš„ +,å¯ä»¥â€œå¼•ç”¨â€ä½ 的改动很é‡è¦ï¼Œä½¿ç”¨ä¸€èˆ¬çš„邮件工具,他们就å¯ä»¥åœ¨ä½ çš„ 代ç 的任何ä½ç½®æ·»åŠ 评论。 -å› ä¸ºè¿™ä¸ªåŽŸå› ï¼Œæ‰€æœ‰çš„æ交的补ä¸éƒ½æ˜¯ e-mail ä¸â€œå†…嵌â€çš„。 +å› ä¸ºè¿™ä¸ªåŽŸå› ï¼Œæ‰€æœ‰çš„æ交的补ä¸éƒ½æ˜¯é‚®ä»¶ä¸â€œå†…嵌â€çš„。最简å•ï¼ˆå’ŒæŽ¨è)的方法就 +是使用 ``git send-email`` 。https://git-send-email.io 有 ``git send-email`` +的交互å¼æ•™ç¨‹ã€‚ + +å¦‚æžœä½ é€‰æ‹©ä¸ç”¨ ``git send-email`` : .. warning:: - å¦‚æžœä½ ä½¿ç”¨å‰ªåˆ‡-ç²˜è´´ä½ çš„è¡¥ä¸ï¼Œå°å¿ƒä½ 的编辑器的自动æ¢è¡ŒåŠŸèƒ½ç ´åä½ çš„è¡¥ä¸ -ä¸è¦å°†è¡¥ä¸ä½œä¸º MIME ç¼–ç 的附件,ä¸ç®¡æ˜¯å¦åŽ‹ç¼©ã€‚很多æµè¡Œçš„ e-mail è½¯ä»¶ä¸ -是任何时候都将 MIME ç¼–ç 的附件当作纯文本å‘é€çš„ï¼Œè¿™ä¼šä½¿å¾—åˆ«äººæ— æ³•åœ¨ä½ çš„ -代ç ä¸åŠ 评论。å¦å¤–,MIME ç¼–ç 的附件会让 Linus 多花一点时间æ¥å¤„ç†ï¼Œè¿™å°± -é™ä½Žäº†ä½ 的改动被接å—çš„å¯èƒ½æ€§ã€‚ + å¦‚æžœä½ ä½¿ç”¨å‰ªåˆ‡-ç²˜è´´ä½ çš„è¡¥ä¸ï¼Œå°å¿ƒä½ 的编辑器的自动æ¢è¡ŒåŠŸèƒ½ç ´åä½ çš„è¡¥ä¸ -ä¾‹å¤–ï¼šå¦‚æžœä½ çš„é‚®é€’å‘˜å¼„å了补ä¸ï¼Œé‚£ä¹ˆæœ‰äººå¯èƒ½ä¼šè¦æ±‚ä½ ä½¿ç”¨mimeé‡æ–°å‘é€è¡¥ä¸ +ä¸è¦å°†è¡¥ä¸ä½œä¸ºMIMEç¼–ç 的附件,ä¸ç®¡æ˜¯å¦åŽ‹ç¼©ã€‚很多æµè¡Œçš„é‚®ä»¶è½¯ä»¶ä¸ +是任何时候都将MIMEç¼–ç 的附件当作纯文本å‘é€çš„ï¼Œè¿™ä¼šä½¿å¾—åˆ«äººæ— æ³•åœ¨ä½ çš„ +代ç ä¸åŠ 评论。å¦å¤–,MIMEç¼–ç 的附件会让Linus多花一点时间æ¥å¤„ç†ï¼Œè¿™å°± +é™ä½Žäº†ä½ 的改动被接å—çš„å¯èƒ½æ€§ã€‚ -请å‚阅 :ref:`Documentation/translations/zh_CN/process/email-clients.rst <cn_email_clients>` -以获å–有关é…置电å邮件客户端以使其ä¸å—å½±å“地å‘é€ä¿®è¡¥ç¨‹åºçš„æ示。 +ä¾‹å¤–ï¼šå¦‚æžœä½ çš„é‚®è·¯æŸå了补ä¸ï¼Œé‚£ä¹ˆæœ‰äººå¯èƒ½ä¼šè¦æ±‚ä½ ä½¿ç”¨MIMEé‡æ–°å‘é€è¡¥ä¸ã€‚ -7) e-mail çš„å¤§å° ----------------- +请å‚阅 Documentation/translations/zh_CN/process/email-clients.rst +以获å–有关é…置电å邮件客户端以使其ä¸å—å½±å“地å‘é€è¡¥ä¸çš„æ示。 -大的改动对邮件列表ä¸åˆé€‚,对æŸäº›ç»´æŠ¤è€…也ä¸åˆé€‚ã€‚å¦‚æžœä½ çš„è¡¥ä¸ï¼Œåœ¨ä¸åŽ‹ç¼© -的情况下,超过了300kBï¼Œé‚£ä¹ˆä½ æœ€å¥½å°†è¡¥ä¸æ”¾åœ¨ä¸€ä¸ªèƒ½é€šè¿‡ internet è®¿é—®çš„æœ -务器上,然åŽç”¨æŒ‡å‘ä½ çš„è¡¥ä¸çš„ URL 替代。但是请注æ„,如果您的补ä¸è¶…过了 -300kbï¼Œé‚£ä¹ˆå®ƒå‡ ä¹Žè‚¯å®šéœ€è¦è¢«ç ´å。 +回å¤å®¡é˜…æ„è§ +------------ -8)回å¤è¯„审æ„è§ ---------------- +ä½ çš„è¡¥ä¸å‡ 乎肯定会得到审阅者对补ä¸æ”¹è¿›æ–¹æ³•çš„评论(以回å¤é‚®ä»¶çš„å½¢å¼ï¼‰ã€‚您必须 +对这些评论作出回应;让补ä¸è¢«å¿½ç•¥çš„一个好办法就是忽略审阅者的æ„è§ã€‚直接回å¤é‚® +件æ¥å›žåº”æ„è§å³å¯ã€‚ä¸ä¼šå¯¼è‡´ä»£ç 更改的æ„è§æˆ–é—®é¢˜å‡ ä¹Žè‚¯å®šä¼šå¸¦æ¥æ³¨é‡Šæˆ–å˜æ›´æ—¥å¿—çš„ +改å˜ï¼Œä»¥ä¾¿ä¸‹ä¸€ä¸ªå®¡é˜…者更好地了解æ£åœ¨å‘生的事情。 -ä½ çš„è¡¥ä¸å‡ 乎肯定会得到评审者对补ä¸æ”¹è¿›æ–¹æ³•çš„评论。您必须对这些评论作出 -回应;让补ä¸è¢«å¿½ç•¥çš„一个好办法就是忽略审阅者的æ„è§ã€‚ä¸ä¼šå¯¼è‡´ä»£ç 更改的 -æ„è§æˆ–é—®é¢˜å‡ ä¹Žè‚¯å®šä¼šå¸¦æ¥æ³¨é‡Šæˆ–å˜æ›´æ—¥å¿—的改å˜ï¼Œä»¥ä¾¿ä¸‹ä¸€ä¸ªè¯„审者更好地了解 -æ£åœ¨å‘生的事情。 +一定è¦å‘Šè¯‰å®¡é˜…è€…ä½ åœ¨åšä»€ä¹ˆæ”¹å˜ï¼Œå¹¶æ„Ÿè°¢ä»–们的时间。代ç 审阅是一个累人且耗时的 +过程,审阅者有时会å˜å¾—æš´èºã€‚å³ä½¿åœ¨è¿™ç§æƒ…况下,也è¦ç¤¼è²Œåœ°å›žåº”并解决他们指出的 +问题。当å‘é€ä¸‹ä¸€ç‰ˆæ—¶ï¼Œåœ¨å°é¢é‚®ä»¶æˆ–独立补ä¸é‡ŒåŠ 上 ``patch changelog`` 说明与 +å‰ä¸€ç‰ˆæœ¬çš„ä¸åŒä¹‹å¤„(å‚è§ :ref:`zh_the_canonical_patch_format` )。 -一定è¦å‘Šè¯‰å®¡ç¨¿äººä½ 在åšä»€ä¹ˆæ”¹å˜ï¼Œå¹¶æ„Ÿè°¢ä»–们的时间。代ç 审查是一个累人且 -耗时的过程,审查人员有时会å˜å¾—æš´èºã€‚å³ä½¿åœ¨è¿™ç§æƒ…况下,也è¦ç¤¼è²Œåœ°å›žåº”并 -解决他们指出的问题。 +.. _zh_resend_reminders: -9)ä¸è¦æ³„气或ä¸è€çƒ¦ -------------------- +ä¸è¦æ³„气或ä¸è€çƒ¦ +---------------- -æ交更改åŽï¼Œè¯·è€å¿ƒç‰å¾…。审阅者是忙碌的人,å¯èƒ½æ— 法立å³è®¿é—®æ‚¨çš„修补程åºã€‚ +æ交更改åŽï¼Œè¯·è€å¿ƒç‰å¾…。审阅者是大忙人,å¯èƒ½æ— 法立å³å®¡é˜…您的补ä¸ã€‚ -æ›¾å‡ ä½•æ—¶ï¼Œè¡¥ä¸æ›¾åœ¨æ²¡æœ‰è¯„论的情况下消失在空白ä¸ï¼Œä½†å¼€å‘è¿‡ç¨‹æ¯”çŽ°åœ¨æ›´åŠ é¡ºåˆ©ã€‚ +æ›¾å‡ ä½•æ—¶ï¼Œè¡¥ä¸æ›¾åœ¨æ²¡æ”¶åˆ°è¯„论的情况下消失在虚空ä¸ï¼Œä½†çŽ°åœ¨å¼€å‘è¿‡ç¨‹åº”è¯¥æ›´åŠ é¡ºåˆ©äº†ã€‚ 您应该在一周左å³çš„时间内收到评论;如果没有收到评论,请确ä¿æ‚¨å·²å°†è¡¥ä¸å‘é€ -到æ£ç¡®çš„ä½ç½®ã€‚在é‡æ–°æ交或è”系审阅者之å‰è‡³å°‘ç‰å¾…一周-在诸如åˆå¹¶çª—å£ä¹‹ç±»çš„ +到æ£ç¡®çš„ä½ç½®ã€‚在é‡æ–°æ交或è”系审阅者之å‰è‡³å°‘ç‰å¾…一周——在诸如åˆå¹¶çª—å£ä¹‹ç±»çš„ ç¹å¿™æ—¶é—´å¯èƒ½æ›´é•¿ã€‚ -10)主题ä¸åŒ…å« PATCH --------------------- +在ç‰äº†å‡ 个星期åŽï¼Œç”¨å¸¦RESEND的主题é‡å‘è¡¥ä¸ä¹Ÿæ˜¯å¯ä»¥çš„:: -由于到linuså’Œlinuxå†…æ ¸çš„ç”µå邮件æµé‡å¾ˆé«˜ï¼Œé€šå¸¸ä¼šåœ¨ä¸»é¢˜è¡Œå‰é¢åŠ 上[PATCH] -å‰ç¼€. 这使Linuså’Œå…¶ä»–å†…æ ¸å¼€å‘人员更容易将补ä¸ä¸Žå…¶ä»–电å邮件讨论区分开。 + [PATCH Vx RESEND] sub/sys: Condensed patch summary -11)ç¾ç½²ä½ 的作å“-å¼€å‘è€…åŽŸå§‹è®¤è¯ -------------------------------- +å½“ä½ å‘布补ä¸ï¼ˆç³»åˆ—)修改版的时候,ä¸è¦åŠ 上“RESENDâ€â€”—“RESENDâ€åªé€‚ç”¨äºŽé‡ +æ–°æ交之å‰æœªç»ä¿®æ”¹çš„è¡¥ä¸ï¼ˆç³»åˆ—)。 -ä¸ºäº†åŠ å¼ºå¯¹è°åšäº†ä½•äº‹çš„追踪,尤其是对那些é€è¿‡å¥½å‡ 层的维护者的补ä¸ï¼Œæˆ‘们 -建议在å‘é€å‡ºåŽ»çš„è¡¥ä¸ä¸ŠåŠ 一个 “sign-off†的过程。 +主题ä¸åŒ…å« PATCH +---------------- -"sign-off" 是在补ä¸çš„注释的最åŽçš„简å•çš„一行文å—,认è¯ä½ 编写了它或者其他 +由于到Linuså’Œlinux-kernel的电å邮件æµé‡å¾ˆé«˜ï¼Œé€šå¸¸ä¼šåœ¨ä¸»é¢˜è¡Œå‰é¢åŠ 上[PATCH] +å‰ç¼€ã€‚这使Linuså’Œå…¶ä»–å†…æ ¸å¼€å‘人员更容易将补ä¸ä¸Žå…¶ä»–电å邮件讨论区分开。 + +``git send-email`` ä¼šè‡ªåŠ¨ä¸ºä½ åŠ ä¸Šã€‚ + +ç¾ç½²ä½ 的作å“——开å‘者æ¥æºè®¤è¯ +------------------------------ + +ä¸ºäº†åŠ å¼ºå¯¹è°åšäº†ä½•äº‹çš„追踪,尤其是对那些é€è¿‡å¥½å‡ 层维护者æ‰æœ€ç»ˆåˆ°è¾¾çš„è¡¥ä¸ï¼Œæˆ‘ +们在通过邮件å‘é€çš„è¡¥ä¸ä¸Šå¼•å…¥äº†â€œç¾ç½²ï¼ˆsign-off)â€æµç¨‹ã€‚ + +“ç¾ç½²â€æ˜¯åœ¨è¡¥ä¸æ³¨é‡Šæœ€åŽçš„一行简å•æ–‡å—,认è¯ä½ 编写了它或者其他 人有æƒåŠ›å°†å®ƒä½œä¸ºå¼€æ”¾æºä»£ç çš„è¡¥ä¸ä¼ 递。规则很简å•ï¼šå¦‚æžœä½ èƒ½è®¤è¯å¦‚下信æ¯: -å¼€å‘者æ¥æºè¯ä¹¦ 1.1 +å¼€å‘者æ¥æºè®¤è¯ 1.1 ^^^^^^^^^^^^^^^^^^ 对于本项目的贡献,我认è¯å¦‚下信æ¯ï¼š - (a)这些贡献是完全或者部分的由我创建,我有æƒåˆ©ä»¥æ–‡ä»¶ä¸æŒ‡å‡º + (a) 这些贡献是完全或者部分的由我创建,我有æƒåˆ©ä»¥æ–‡ä»¶ä¸æŒ‡å‡º 的开放æºä»£ç 许å¯è¯æ交它;或者 - (b)这些贡献基于以å‰çš„工作,æ®æˆ‘所知,这些以å‰çš„工作å—æ°å½“的开放 - æºä»£ç 许å¯è¯ä¿æŠ¤ï¼Œè€Œä¸”ï¼Œæ ¹æ®è®¸å¯è¯ï¼Œæˆ‘有æƒæ交修改åŽçš„贡献, + + (b) 这些贡献基于以å‰çš„工作,æ®æˆ‘所知,这些以å‰çš„工作å—æ°å½“的开放 + æºä»£ç 许å¯è¯ä¿æŠ¤ï¼Œè€Œä¸”ï¼Œæ ¹æ®æ–‡ä»¶ä¸æŒ‡å‡ºçš„许å¯è¯ï¼Œæˆ‘有æƒæ交修改åŽçš„贡献, æ— è®ºæ˜¯å®Œå…¨è¿˜æ˜¯éƒ¨åˆ†ç”±æˆ‘åˆ›é€ ï¼Œè¿™äº›è´¡çŒ®éƒ½ä½¿ç”¨åŒä¸€ä¸ªå¼€æ”¾æºä»£ç 许å¯è¯ - (除éžæˆ‘被å…许用其它的许å¯è¯ï¼‰ï¼Œæ£å¦‚文件ä¸æŒ‡å‡ºçš„;或者 - (c)这些贡献由认è¯ï¼ˆa),(b)或者(c)的人直接æ供给我,而 + (除éžæˆ‘被å…许用其它的许å¯è¯ï¼‰ï¼›æˆ–者 + + (c) 这些贡献由认è¯ï¼ˆa),(b)或者(c)的人直接æ供给我,而 且我没有修改它。 - (d)我ç†è§£å¹¶åŒæ„这个项目和贡献是公开的,贡献的记录(包括我 - 一起æ交的个人记录,包括 sign-off )被永久维护并且å¯ä»¥å’Œè¿™ä¸ªé¡¹ç›® + + (d) 我ç†è§£å¹¶åŒæ„这个项目和贡献是公开的,贡献的记录(包括我 + 一起æ交的个人记录,包括sign-off)被永久维护并且å¯ä»¥å’Œè¿™ä¸ªé¡¹ç›® 或者开放æºä»£ç 的许å¯è¯åŒæ¥åœ°å†å‘行。 é‚£ä¹ˆåŠ å…¥è¿™æ ·ä¸€è¡Œ:: - Signed-off-by: Random J Developer <random@developer.example.org> - -ä½¿ç”¨ä½ çš„çœŸå(抱æ‰ï¼Œä¸èƒ½ä½¿ç”¨å‡å或者匿å。) - -有人在最åŽåŠ ä¸Šæ ‡ç¾ã€‚çŽ°åœ¨è¿™äº›ä¸œè¥¿ä¼šè¢«å¿½ç•¥ï¼Œä½†æ˜¯ä½ å¯ä»¥è¿™æ ·åšï¼Œæ¥æ ‡è®°å…¬å¸ -内部的过程,或者åªæ˜¯æŒ‡å‡ºå…³äºŽ sign-off 的一些特殊细节。 - -如果您是å系统或分支维护人员,有时需è¦ç¨å¾®ä¿®æ”¹æ”¶åˆ°çš„è¡¥ä¸ï¼Œä»¥ä¾¿åˆå¹¶å®ƒä»¬ï¼Œ -å› ä¸ºæ ‘å’Œæ交者ä¸çš„代ç ä¸å®Œå…¨ç›¸åŒã€‚å¦‚æžœä½ ä¸¥æ ¼éµå®ˆè§„则(cï¼‰ï¼Œä½ åº”è¯¥è¦æ±‚æ交者 -é‡æ–°å‘布,但这完全是在浪费时间和精力。规则(b)å…许您调整代ç ,但是更改一个 -æ交者的代ç 并让他认å¯æ‚¨çš„错误是éžå¸¸ä¸ç¤¼è²Œçš„。è¦è§£å†³æ¤é—®é¢˜ï¼Œå»ºè®®åœ¨æœ€åŽä¸€ä¸ª -ç”±ç¾åè¡Œå’Œæ‚¨çš„è¡Œä¹‹é—´æ·»åŠ ä¸€è¡Œï¼ŒæŒ‡ç¤ºæ›´æ”¹çš„æ€§è´¨ã€‚è™½ç„¶è¿™å¹¶ä¸æ˜¯å¼ºåˆ¶æ€§çš„,但似乎 -在æè¿°å‰åŠ 上您的邮件和/或姓å(全部用方括å·æ‹¬èµ·æ¥ï¼‰ï¼Œè¿™è¶³ä»¥è®©äººæ³¨æ„到您对最 -åŽä¸€åˆ†é’Ÿçš„更改负有责任。例如:: - - Signed-off-by: Random J Developer <random@developer.example.org> - [lucky@maintainer.example.org: struct foo moved from foo.c to foo.h] - Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org> - -如果您维护一个稳定的分支机构,åŒæ—¶å¸Œæœ›å¯¹ä½œè€…进行致谢ã€è·Ÿè¸ªæ›´æ”¹ã€åˆå¹¶ä¿®å¤å¹¶ -ä¿æŠ¤æ交者ä¸å—投诉,那么这ç§åšæ³•å°¤å…¶æœ‰ç”¨ã€‚请注æ„,在任何情况下都ä¸èƒ½æ›´æ”¹ä½œè€… -çš„ID(From å¤´ï¼‰ï¼Œå› ä¸ºå®ƒæ˜¯å‡ºçŽ°åœ¨æ›´æ”¹æ—¥å¿—ä¸çš„æ ‡è¯†ã€‚ - -对回åˆï¼ˆback-porters)的特别说明:在æ交消æ¯çš„顶部(主题行之åŽï¼‰æ’å…¥ä¸€ä¸ªè¡¥ä¸ -çš„èµ·æºæŒ‡ç¤ºä¼¼ä¹Žæ˜¯ä¸€ç§å¸¸è§ä¸”有用的实践,以便于跟踪。例如,下é¢æ˜¯æˆ‘们在3.x稳定 -版本ä¸çœ‹åˆ°çš„内容:: - - Date: Tue Oct 7 07:26:38 2014 -0400 - - libata: Un-break ATA blacklist - - commit 1c40279960bcd7d52dbdf1d466b20d24b99176c8 upstream. - -还有, è¿™é‡Œæ˜¯ä¸€ä¸ªæ—§ç‰ˆå†…æ ¸ä¸çš„一个回åˆè¡¥ä¸:: + Signed-off-by: Random J Developer <random@developer.example.org> - Date: Tue May 13 22:12:27 2008 +0200 +ä½¿ç”¨ä½ çš„çœŸå(抱æ‰ï¼Œä¸èƒ½ä½¿ç”¨å‡å或者匿å。)如果使用 ``git commit -s`` çš„è¯ +将会自动完æˆã€‚撤销也应当包å«â€œSigned-off-byâ€ï¼Œ ``git revert -s`` ä¼šå¸®ä½ æžå®šã€‚ - wireless, airo: waitbusy() won't delay +有些人会在最åŽåŠ 上é¢å¤–çš„æ ‡ç¾ã€‚çŽ°åœ¨è¿™äº›ä¸œè¥¿ä¼šè¢«å¿½ç•¥ï¼Œä½†æ˜¯ä½ å¯ä»¥è¿™æ ·åšï¼Œæ¥æ ‡è®° +å…¬å¸å†…部的过程,或者åªæ˜¯æŒ‡å‡ºå…³äºŽç¾ç½²çš„一些特殊细节。 - [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a] +作者ç¾ç½²ä¹‹åŽçš„任何其他ç¾ç½²ï¼ˆSigned-off-by:'s)å‡æ¥è‡ªå¤„ç†å’Œä¼ 递补ä¸çš„人员,但 +未å‚与其开å‘。ç¾ç½²é“¾åº”当åæ˜ è¡¥ä¸ä¼ æ’åˆ°ç»´æŠ¤è€…å¹¶æœ€ç»ˆä¼ æ’到Linus所ç»è¿‡çš„ **真实** +路径,首个ç¾ç½²æŒ‡æ˜Žå•ä¸ªä½œè€…的主è¦ä½œè€…身份。 -12)何时使用Acked-by:,CC:,和Co-Developed by: ----------------------------------------------- +何时使用Acked-by:,CC:,和Co-Developed by: +------------------------------------------ -Singed-off-by: æ ‡è®°è¡¨ç¤ºç¾å者å‚与了补ä¸çš„å¼€å‘,或者他/她在补ä¸çš„ä¼ é€’è·¯å¾„ä¸ã€‚ +Singed-off-by: æ ‡ç¾è¡¨ç¤ºç¾å者å‚与了补ä¸çš„å¼€å‘,或者他/她在补ä¸çš„ä¼ é€’è·¯å¾„ä¸ã€‚ -如果一个人没有直接å‚与补ä¸çš„准备或处ç†ï¼Œä½†å¸Œæœ›è¡¨ç¤ºå¹¶è®°å½•ä»–们对补ä¸çš„批准, -那么他们å¯ä»¥è¦æ±‚在补ä¸çš„å˜æ›´æ—¥å¿—ä¸æ·»åŠ 一个 Acked-by: +如果一个人没有直接å‚与补ä¸çš„准备或处ç†ï¼Œä½†å¸Œæœ›è¡¨ç¤ºå¹¶è®°å½•ä»–们对补ä¸çš„批准/赞æˆï¼Œ +那么他们å¯ä»¥è¦æ±‚在补ä¸çš„å˜æ›´æ—¥å¿—ä¸æ·»åŠ 一个Acked-by:。 -Acked-by:通常由å—å½±å“代ç 的维护者使用,当该维护者既没有贡献也没有转å‘è¡¥ä¸æ—¶ã€‚ +Acked-by: 通常由å—å½±å“代ç 的维护者使用,当该维护者既没有贡献也没有转å‘è¡¥ä¸æ—¶ã€‚ -Acked-by: ä¸åƒç¾å—äººé‚£æ ·æ£å¼ã€‚这是一个记录,确认人至少审查了补ä¸ï¼Œå¹¶è¡¨ç¤ºæŽ¥å—。 -å› æ¤ï¼Œè¡¥ä¸åˆå¹¶æœ‰æ—¶ä¼šæ‰‹åŠ¨å°†Acker的“Yep,looks good to meâ€è½¬æ¢ä¸º Acked-By:(但 +Acked-by: ä¸åƒç¾ç½²é‚£æ ·æ£å¼ã€‚这是一个记录,确认人至少审阅了补ä¸ï¼Œå¹¶è¡¨ç¤ºæŽ¥å—。 +å› æ¤ï¼Œè¡¥ä¸åˆå¹¶æœ‰æ—¶ä¼šæ‰‹åŠ¨å°†Acker的“Yep,looks good to meâ€è½¬æ¢ä¸º Acked-By:(但 请注æ„,通常最好è¦æ±‚一个明确的Ack)。 Acked-by:ä¸ä¸€å®šè¡¨ç¤ºå¯¹æ•´ä¸ªè¡¥ä¸çš„确认。例如,如果一个补ä¸å½±å“多个å系统,并且 -有一个:æ¥è‡ªä¸€ä¸ªå系统维护者,那么这通常表示åªç¡®è®¤å½±å“维护者代ç 的部分。这里 -应该仔细判æ–。如有疑问,应å‚考邮件列表档案ä¸çš„原始讨论。 +有一个æ¥è‡ªæŸä¸ªå系统维护者的Acked-By:,那么这通常表示åªç¡®è®¤å½±å“维护者代ç 的部 +分。这里应该仔细判æ–。如有疑问,应å‚考邮件列表å˜æ¡£ä¸çš„原始讨论。 -如果æŸäººæœ‰æœºä¼šå¯¹è¡¥ä¸è¿›è¡Œè¯„论,但没有æä¾›æ¤ç±»è¯„论,您å¯ä»¥é€‰æ‹©åœ¨è¡¥ä¸ä¸æ·»åŠ ``Cc:`` -è¿™æ˜¯å”¯ä¸€ä¸€ä¸ªæ ‡ç¾ï¼Œå®ƒå¯ä»¥åœ¨æ²¡æœ‰è¢«å®ƒå‘½å的人显å¼æ“ä½œçš„æƒ…å†µä¸‹æ·»åŠ ï¼Œä½†å®ƒåº”è¯¥è¡¨æ˜Ž -这个人是在补ä¸ä¸ŠæŠ„é€çš„。讨论ä¸åŒ…å«äº†æ½œåœ¨åˆ©ç›Šç›¸å…³æ–¹ã€‚ +如果æŸäººæœ¬åº”有机会对补ä¸è¿›è¡Œè¯„论,但没有æä¾›æ¤ç±»è¯„论,您å¯ä»¥é€‰æ‹©åœ¨è¡¥ä¸ä¸æ·»åŠ +``Cc:`` 这是唯一å¯ä»¥åœ¨æ²¡æœ‰è¢«è¯¥äººæ˜Žç¡®åŒæ„çš„æƒ…å†µä¸‹æ·»åŠ çš„æ ‡ç¾â€”—但它应该表明 +这个人是在补ä¸ä¸ŠæŠ„é€çš„。æ¤æ ‡ç¾è®°å½•äº†è®¨è®ºä¸åŒ…å«çš„潜在利益相关方。 Co-developed-by: 声明补ä¸æ˜¯ç”±å¤šä¸ªå¼€å‘人员共åŒåˆ›å»ºçš„ï¼›å½“å‡ ä¸ªäººåœ¨ä¸€ä¸ªè¡¥ä¸ä¸Šå·¥ -作时,它用于将属性赋予共åŒä½œè€…(除了 From: æ‰€èµ‹äºˆçš„ä½œè€…ä¹‹å¤–ï¼‰ã€‚å› ä¸º -Co-developed-by: 表示作者身份,所以æ¯ä¸ªå…±åŒå¼€å‘人:必须紧跟在相关åˆä½œä½œè€…çš„ -ç¾å之åŽã€‚æ ‡å‡†çš„ç¾æ ¸ç¨‹åºè¦æ±‚ï¼šæ ‡è®°çš„ç¾æ ¸é¡ºåºåº”å°½å¯èƒ½åæ˜ è¡¥ä¸çš„时间历å²ï¼Œè€Œä¸ -管作者是通过 From :还是由 Co-developed-by: å…±åŒå¼€å‘的。值得注æ„的是,最åŽä¸€ -个ç¾å—人:必须始终是æ交补ä¸çš„å¼€å‘人员。 +作时,它用于给出共åŒä½œè€…(除了From:æ‰€ç»™å‡ºçš„ä½œè€…ä¹‹å¤–ï¼‰ã€‚å› ä¸ºCo-developed-by: +表示作者身份,所以æ¯ä¸ªCo-developed-by:必须紧跟在相关åˆä½œä½œè€…çš„ç¾ç½²ä¹‹åŽã€‚æ ‡å‡† +ç¾ç½²ç¨‹åºè¦æ±‚Singed-off-by:æ ‡ç¾çš„顺åºåº”å°½å¯èƒ½åæ˜ è¡¥ä¸çš„时间历å²ï¼Œæ— 论作者是通 +过From:还是Co-developed-by:表明。值得注æ„的是,最åŽä¸€ä¸ªSinged-off-by:必须是 +æ交补ä¸çš„å¼€å‘人员。 -注æ„,当作者也是电åé‚®ä»¶æ ‡é¢˜â€œå‘件人:â€è¡Œä¸åˆ—出的人时,“From: â€ æ ‡è®°æ˜¯å¯é€‰çš„。 +注æ„,如果From:作者也是电åé‚®ä»¶æ ‡é¢˜çš„From:è¡Œä¸åˆ—出的人,则From:æ ‡ç¾æ˜¯å¯é€‰çš„。 -作者æ交的补ä¸ç¨‹åºç¤ºä¾‹:: +被From:作者æ交的补ä¸ç¤ºä¾‹:: <changelog> @@ -421,7 +374,7 @@ Co-developed-by: 表示作者身份,所以æ¯ä¸ªå…±åŒå¼€å‘人:必须紧跟 Signed-off-by: Second Co-Author <second@coauthor.example.org> Signed-off-by: From Author <from@author.example.org> -åˆä½œå¼€å‘者æ交的补ä¸ç¤ºä¾‹:: +被åˆä½œå¼€å‘者æ交的补ä¸ç¤ºä¾‹:: From: From Author <from@author.example.org> @@ -434,76 +387,85 @@ Co-developed-by: 表示作者身份,所以æ¯ä¸ªå…±åŒå¼€å‘人:必须紧跟 Signed-off-by: Submitting Co-Author <sub@coauthor.example.org> -13)使用报告人:ã€æµ‹è¯•äººï¼šã€å®¡æ ¸äººï¼šã€å»ºè®®äººï¼šã€ä¿®å¤äººï¼š --------------------------------------------------------- +使用Reported-by:ã€Tested-by:ã€Reviewed-by:ã€Suggested-by:å’ŒFixes: +----------------------------------------------------------------- Reported-by: 给那些å‘现错误并报告错误的人致谢,它希望激励他们在将æ¥å†æ¬¡å¸®åŠ© -我们。请注æ„,如果bug是以ç§æœ‰æ–¹å¼æŠ¥å‘Šçš„,那么在使用Reported-byæ ‡è®°ä¹‹å‰ï¼Œè¯· -先请求æƒé™ã€‚ +我们。请注æ„,如果bug是以ç§æœ‰æ–¹å¼æŠ¥å‘Šçš„,那么在使用Reported-byæ ‡ç¾ä¹‹å‰ï¼Œè¯· +先请求许å¯ã€‚æ¤æ ‡ç¾æ˜¯ä¸ºBug设计的;请ä¸è¦å°†å…¶ç”¨äºŽæ„Ÿè°¢åŠŸèƒ½è¯·æ±‚。 -Tested-by: æ ‡è®°è¡¨ç¤ºè¡¥ä¸å·²ç”±æŒ‡å®šçš„人(在æŸäº›çŽ¯å¢ƒä¸ï¼‰æˆåŠŸæµ‹è¯•ã€‚è¿™ä¸ªæ ‡ç¾é€šçŸ¥ -维护人员已ç»æ‰§è¡Œäº†ä¸€äº›æµ‹è¯•ï¼Œä¸ºå°†æ¥çš„è¡¥ä¸æ供了一ç§å®šä½æµ‹è¯•äººå‘˜çš„方法,并确 -ä¿æµ‹è¯•äººå‘˜çš„信誉。 +Tested-by: æ ‡ç¾è¡¨ç¤ºè¡¥ä¸å·²ç”±æŒ‡å®šçš„人(在æŸäº›çŽ¯å¢ƒä¸ï¼‰æˆåŠŸæµ‹è¯•ã€‚è¿™ä¸ªæ ‡ç¾é€šçŸ¥ +维护人员已ç»æ‰§è¡Œäº†ä¸€äº›æµ‹è¯•ï¼Œä¸ºå°†æ¥çš„è¡¥ä¸æ供了一ç§å®šä½æµ‹è¯•äººå‘˜çš„方法,并彰显测试人员的功劳。 -Reviewed-by:相åï¼Œæ ¹æ®å®¡æŸ¥äººçš„声明,表明该补ä¸å·²è¢«å®¡æŸ¥å¹¶è¢«è®¤ä¸ºæ˜¯å¯æŽ¥å—的: +Reviewed-byï¼šæ ¹æ®å®¡é˜…者的监ç£å£°æ˜Žï¼Œè¡¨æ˜Žè¯¥è¡¥ä¸å·²è¢«å®¡é˜…并被认为是å¯æŽ¥å—的: -审查人的监ç£å£°æ˜Ž +审阅者的监ç£å£°æ˜Ž ^^^^^^^^^^^^^^^^ -通过æ供我的 Reviewed-by,我声明: +通过æ供我的Reviewed-by:æ ‡ç¾ï¼Œæˆ‘声明: - (a) 我已ç»å¯¹è¿™ä¸ªè¡¥ä¸è¿›è¡Œäº†ä¸€æ¬¡æŠ€æœ¯å®¡æŸ¥ï¼Œä»¥è¯„估它是å¦é€‚åˆè¢«åŒ…å«åˆ° + (a) 我已ç»å¯¹è¿™ä¸ªè¡¥ä¸è¿›è¡Œäº†ä¸€æ¬¡æŠ€æœ¯å®¡é˜…,以评估它是å¦é€‚åˆè¢«åŒ…å«åˆ° ä¸»çº¿å†…æ ¸ä¸ã€‚ (b) 与补ä¸ç›¸å…³çš„任何问题ã€é¡¾è™‘或问题都已å馈给æ交者。我对æ交者对 我的评论的回应感到满æ„。 - (c) 虽然这一æ交å¯èƒ½ä¼šæ”¹è¿›ä¸€äº›ä¸œè¥¿ï¼Œä½†æˆ‘相信,æ¤æ—¶ï¼Œï¼ˆ1ï¼‰å¯¹å†…æ ¸ + (c) 虽然这一æ交å¯èƒ½ä»å¯è¢«æ”¹è¿›ï¼Œä½†æˆ‘相信,æ¤æ—¶ï¼Œï¼ˆ1ï¼‰å¯¹å†…æ ¸ 进行了有价值的修改,(2)没有包å«äº‰è®ºä¸æ¶‰åŠçš„已知问题。 - (d) 虽然我已ç»å®¡æŸ¥äº†è¡¥ä¸å¹¶è®¤ä¸ºå®ƒæ˜¯å¥å…¨çš„,但我ä¸ä¼šï¼ˆé™¤éžå¦æœ‰æ˜Žç¡® - 说明)作出任何ä¿è¯æˆ–ä¿è¯å®ƒå°†åœ¨ä»»ä½•ç»™å®šæƒ…况下实现其规定的目的 + (d) 虽然我已ç»å®¡é˜…了补ä¸å¹¶è®¤ä¸ºå®ƒæ˜¯å¥å…¨çš„,但我ä¸ä¼šï¼ˆé™¤éžå¦æœ‰æ˜Žç¡® + 说明)作出任何ä¿è¯æˆ–æ‹…ä¿å®ƒä¼šåœ¨ä»»ä½•ç»™å®šæƒ…况下实现其规定的目的 或æ£å¸¸è¿è¡Œã€‚ -Reviewed-by 是一ç§è§‚点声明,å³è¡¥ä¸æ˜¯å¯¹å†…æ ¸çš„é€‚å½“ä¿®æ”¹ï¼Œæ²¡æœ‰ä»»ä½•é—留的严é‡æŠ€æœ¯ -问题。任何感兴趣的审阅者(完æˆå·¥ä½œçš„人)都å¯ä»¥ä¸ºä¸€ä¸ªè¡¥ä¸æ供一个 Review-by -æ ‡ç¾ã€‚æ¤æ ‡ç¾ç”¨äºŽå‘审阅者æ供致谢,并通知维护者已在修补程åºä¸Šå®Œæˆçš„审阅程度。 -Reviewed-by: 当由已知了解主题区域并执行彻底检查的审阅者æä¾›æ—¶ï¼Œé€šå¸¸ä¼šå¢žåŠ +Reviewed-by是一ç§è§‚点声明,å³è¡¥ä¸æ˜¯å¯¹å†…æ ¸çš„é€‚å½“ä¿®æ”¹ï¼Œæ²¡æœ‰ä»»ä½•é—留的严é‡æŠ€æœ¯ +问题。任何感兴趣的审阅者(完æˆå·¥ä½œçš„人)都å¯ä»¥ä¸ºä¸€ä¸ªè¡¥ä¸æ供一个Reviewed-by +æ ‡ç¾ã€‚æ¤æ ‡ç¾ç”¨äºŽå‘审阅者æ供致谢,并通知维护者补ä¸çš„审阅进度。 +当Reviewed-by:æ ‡ç¾ç”±å·²çŸ¥äº†è§£ä¸»é¢˜åŒºåŸŸå¹¶æ‰§è¡Œå½»åº•æ£€æŸ¥çš„审阅者æä¾›æ—¶ï¼Œé€šå¸¸ä¼šå¢žåŠ è¡¥ä¸è¿›å…¥å†…æ ¸çš„å¯èƒ½æ€§ã€‚ +一旦从测试人员或审阅者的“Tested-byâ€å’Œâ€œReviewed-byâ€æ ‡ç¾å‡ºçŽ°åœ¨é‚®ä»¶åˆ—表ä¸ï¼Œ +作者应在å‘é€ä¸‹ä¸€ä¸ªç‰ˆæœ¬æ—¶å°†å…¶æ·»åŠ 到适用的补ä¸ä¸ã€‚但是,如果补ä¸åœ¨ä»¥ä¸‹ç‰ˆæœ¬ä¸å‘ +ç”Ÿäº†å®žè´¨æ€§æ›´æ”¹ï¼Œè¿™äº›æ ‡ç¾å¯èƒ½ä¸å†é€‚ç”¨ï¼Œå› æ¤åº”è¯¥åˆ é™¤ã€‚é€šå¸¸ï¼Œåœ¨è¡¥ä¸æ›´æ”¹æ—¥å¿—ä¸ +(在 ``---`` 分隔符之åŽï¼‰åº”该æåˆ°åˆ é™¤æŸäººçš„æµ‹è¯•è€…æˆ–å®¡é˜…è€…æ ‡ç¾ã€‚ + Suggested-by: 表示补ä¸çš„想法是由指定的人æ出的,并确ä¿å°†æ¤æƒ³æ³•å½’功于指定的 人。请注æ„,未ç»è®¸å¯ï¼Œä¸å¾—æ·»åŠ æ¤æ ‡ç¾ï¼Œç‰¹åˆ«æ˜¯å¦‚果该想法未在公共论å›ä¸Šå‘布。 -这就是说,如果我们勤快地致谢我们的创æ„者,他们很有希望在未æ¥å¾—到鼓舞,å†æ¬¡ +也就是说,如果我们勤快地致谢创æ„æ供者,他们将å—到鼓舞,很有希望在未æ¥å†æ¬¡ 帮助我们。 -Fixes: 指示补ä¸åœ¨ä»¥å‰çš„æ交ä¸ä¿®å¤äº†ä¸€ä¸ªé—®é¢˜ã€‚它å¯ä»¥å¾ˆå®¹æ˜“地确定错误的æ¥æºï¼Œ -这有助于检查错误修å¤ã€‚è¿™ä¸ªæ ‡è®°è¿˜å¸®åŠ©ç¨³å®šå†…æ ¸å›¢é˜Ÿç¡®å®šåº”è¯¥æŽ¥æ”¶ä¿®å¤çš„ç¨³å®šå†…æ ¸ -版本。这是指示补ä¸ä¿®å¤çš„错误的首选方法。请å‚阅 :ref:`cn_describe_changes` -æ述您的更改以了解更多详细信æ¯ã€‚ +Fixes: 指示补ä¸ä¿®å¤äº†ä¹‹å‰æ交的一个问题。它å¯ä»¥ä¾¿äºŽç¡®å®šé”™è¯¯çš„æ¥æºï¼Œè¿™æœ‰åŠ©äºŽ +检查错误修å¤ã€‚è¿™ä¸ªæ ‡ç¾è¿˜å¸®åŠ©ç¨³å®šå†…æ ¸å›¢é˜Ÿç¡®å®šåº”è¯¥æŽ¥æ”¶ä¿®å¤çš„ç¨³å®šå†…æ ¸ç‰ˆæœ¬ã€‚è¿™æ˜¯ +指示补ä¸ä¿®å¤çš„错误的首选方法。请å‚阅 :ref:`zh_describe_changes` 了解更多信æ¯ã€‚ -.. _cn_the_canonical_patch_format: +.. note:: -12ï¼‰æ ‡å‡†è¡¥ä¸æ ¼å¼ ----------------- + é™„åŠ Fixes:æ ‡ç¾ä¸ä¼šæ”¹å˜ç¨³å®šå†…æ ¸è§„åˆ™æµç¨‹ï¼Œä¹Ÿä¸æ”¹å˜æ‰€æœ‰ç¨³å®šç‰ˆè¡¥ä¸æŠ„é€ + stable@vger.kernel.orgçš„è¦æ±‚。有关更多信æ¯ï¼Œè¯·é˜…读 + Documentation/translations/zh_CN/process/stable-kernel-rules.rst 。 + +.. _zh_the_canonical_patch_format: + +æ ‡å‡†è¡¥ä¸æ ¼å¼ +------------ 本节æè¿°å¦‚ä½•æ ¼å¼åŒ–è¡¥ä¸æœ¬èº«ã€‚请注æ„,如果您的补ä¸å˜å‚¨åœ¨ ``Git`` å˜å‚¨åº“ä¸ï¼Œåˆ™ -å¯ä»¥ä½¿ç”¨ ``git format-patch`` 进行æ£ç¡®çš„è¡¥ä¸æ ¼å¼è®¾ç½®ã€‚ä½†æ˜¯ï¼Œè¿™äº›å·¥å…·æ— æ³•åˆ›å»º +å¯ä»¥ä½¿ç”¨ ``git format-patch`` 进行æ£ç¡®çš„è¡¥ä¸æ ¼å¼åŒ–ã€‚ä½†æ˜¯ï¼Œè¿™äº›å·¥å…·æ— æ³•åˆ›å»º å¿…è¦çš„æ–‡æœ¬ï¼Œå› æ¤è¯·åŠ¡å¿…阅读下é¢çš„说明。 -æ ‡å‡†çš„è¡¥ä¸ï¼Œæ ‡é¢˜è¡Œæ˜¯:: +æ ‡å‡†çš„è¡¥ä¸æ ‡é¢˜è¡Œæ˜¯:: Subject: [PATCH 001/123] å系统:一å¥è¯æ¦‚è¿° -æ ‡å‡†è¡¥ä¸çš„信体å˜åœ¨å¦‚下部分: +æ ‡å‡†è¡¥ä¸çš„信体包å«å¦‚下部分: - - 一个 "from" 行指出补ä¸ä½œè€…。åŽè·Ÿç©ºè¡Œï¼ˆä»…当å‘é€ä¿®è¡¥ç¨‹åºçš„人ä¸æ˜¯ä½œè€…æ—¶æ‰éœ€è¦ï¼‰ã€‚ + - 一个 ``from`` 行指出补ä¸ä½œè€…。åŽè·Ÿç©ºè¡Œï¼ˆä»…当å‘é€è¡¥ä¸çš„人ä¸æ˜¯ä½œè€…æ—¶æ‰éœ€è¦ï¼‰ã€‚ - - 解释的æ£æ–‡ï¼Œè¡Œä»¥75列包装,这将被å¤åˆ¶åˆ°æ°¸ä¹…å˜æ›´æ—¥å¿—æ¥æ述这个补ä¸ã€‚ + - 说明文å—,æ¯è¡Œæœ€é•¿75列,这将被å¤åˆ¶åˆ°æ°¸ä¹…å˜æ›´æ—¥å¿—æ¥æ述这个补ä¸ã€‚ - 一个空行 - - 上é¢æ述的“Signed-off-by†行,也将出现在更改日志ä¸ã€‚ + - 上述的 ``Signed-off-by:`` 行,也将出现在更改日志ä¸ã€‚ - åªåŒ…å« ``---`` çš„æ ‡è®°çº¿ã€‚ @@ -511,29 +473,29 @@ Fixes: 指示补ä¸åœ¨ä»¥å‰çš„æ交ä¸ä¿®å¤äº†ä¸€ä¸ªé—®é¢˜ã€‚它å¯ä»¥å¾ˆå®¹æ - 实际补ä¸ï¼ˆ ``diff`` 输出)。 -æ ‡é¢˜è¡Œçš„æ ¼å¼ï¼Œä½¿å¾—å¯¹æ ‡é¢˜è¡ŒæŒ‰å—æ¯åºæŽ’åºéžå¸¸çš„容易 - 很多 e-mail 客户端都 -å¯ä»¥æ”¯æŒ - å› ä¸ºåºåˆ—å·æ˜¯ç”¨é›¶å¡«å……的,所以按数å—排åºå’ŒæŒ‰å—æ¯æŽ’åºæ˜¯ä¸€æ ·çš„。 +æ ‡é¢˜è¡Œçš„æ ¼å¼ï¼Œä½¿å¾—å¯¹æ ‡é¢˜è¡ŒæŒ‰å—æ¯åºæŽ’åºéžå¸¸çš„容易——很多邮件客户端都 +å¯ä»¥æ”¯æŒâ€”â€”å› ä¸ºåºåˆ—å·æ˜¯ç”¨é›¶å¡«å……的,所以按数å—排åºå’ŒæŒ‰å—æ¯æŽ’åºæ˜¯ä¸€æ ·çš„。 -e-mail æ ‡é¢˜ä¸çš„“å系统â€æ ‡è¯†å“ªä¸ªå†…æ ¸å系统将被打补ä¸ã€‚ +é‚®ä»¶æ ‡é¢˜ä¸çš„“å系统â€æ ‡è¯†å“ªä¸ªå†…æ ¸å系统将被打补ä¸ã€‚ -e-mail æ ‡é¢˜ä¸çš„“一å¥è¯æ¦‚è¿°â€æ‰¼è¦çš„æè¿° e-mail ä¸çš„è¡¥ä¸ã€‚“一å¥è¯æ¦‚述†+é‚®ä»¶æ ‡é¢˜ä¸çš„“一å¥è¯æ¦‚è¿°â€æ‰¼è¦çš„æ述邮件ä¸çš„è¡¥ä¸ã€‚“一å¥è¯æ¦‚述†ä¸åº”该是一个文件å。对于一个补ä¸ç³»åˆ—(“补ä¸ç³»åˆ—â€æŒ‡ä¸€ç³»åˆ—的多个相关补 ä¸ï¼‰ï¼Œä¸è¦å¯¹æ¯ä¸ªè¡¥ä¸éƒ½ä½¿ç”¨åŒæ ·çš„“一å¥è¯æ¦‚è¿°â€ã€‚ -è®°ä½ e-mail 的“一å¥è¯æ¦‚è¿°â€ä¼šæˆä¸ºè¯¥è¡¥ä¸çš„å…¨å±€å”¯ä¸€æ ‡è¯†ã€‚å®ƒä¼šè”“å»¶åˆ° git +è®°ä½é‚®ä»¶çš„“一å¥è¯æ¦‚è¿°â€ä¼šæˆä¸ºè¯¥è¡¥ä¸çš„å…¨å±€å”¯ä¸€æ ‡è¯†ã€‚å®ƒä¼šè¿›å…¥ ``git`` 的改动记录里。然åŽâ€œä¸€å¥è¯æ¦‚è¿°â€ä¼šè¢«ç”¨åœ¨å¼€å‘者的讨论里,用æ¥æŒ‡ä»£è¿™ä¸ªè¡¥ -ä¸ã€‚用户将希望通过 google æ¥æœç´¢"一å¥è¯æ¦‚è¿°"æ¥æ‰¾åˆ°é‚£äº›è®¨è®ºè¿™ä¸ªè¡¥ä¸çš„æ–‡ +ä¸ã€‚用户将希望通过æœç´¢å¼•æ“Žæœç´¢â€œä¸€å¥è¯æ¦‚è¿°â€æ¥æ‰¾åˆ°é‚£äº›è®¨è®ºè¿™ä¸ªè¡¥ä¸çš„æ–‡ ç« ã€‚å½“äººä»¬åœ¨ä¸¤ä¸‰ä¸ªæœˆåŽä½¿ç”¨è¯¸å¦‚ ``gitk`` 或 ``git log --oneline`` 之类 的工具查看数åƒä¸ªè¡¥ä¸æ—¶ï¼Œä¹Ÿä¼šå¾ˆå¿«çœ‹åˆ°å®ƒã€‚ å‡ºäºŽè¿™äº›åŽŸå› ï¼Œæ¦‚è¿°å¿…é¡»ä¸è¶…过70-75个å—符,并且必须æè¿°è¡¥ä¸çš„更改以åŠä¸º -什么需è¦è¡¥ä¸ã€‚æ—¢è¦ç®€æ´åˆè¦æè¿°æ€§å¾ˆæœ‰æŒ‘æˆ˜æ€§ï¼Œä½†å†™å¾—å¥½çš„æ¦‚è¿°åº”è¯¥è¿™æ ·åšã€‚ +什么需è¦è¡¥ä¸ã€‚æ—¢è¦ç®€æ´åˆè¦æè¿°æ€§å¾ˆæœ‰æŒ‘æˆ˜æ€§ï¼Œä½†å†™å¾—å¥½çš„æ¦‚è¿°åº”è¯¥è¿™æ ·ã€‚ 概述的å‰ç¼€å¯ä»¥ç”¨æ–¹æ‹¬å·æ‹¬èµ·æ¥ï¼šâ€œSubject: [PATCH <tag>...] <概述>â€ã€‚æ ‡è®° ä¸è¢«è§†ä¸ºæ¦‚述的一部分,而是æ述应该如何处ç†è¡¥ä¸ã€‚如果补ä¸çš„å¤šä¸ªç‰ˆæœ¬å·²å‘ -é€å‡ºæ¥ä»¥å“应评审(å³â€œv1,v2,v3â€ï¼‰æˆ–“rfcâ€ï¼Œä»¥æŒ‡ç¤ºè¯„å®¡è¯·æ±‚ï¼Œé‚£ä¹ˆé€šç”¨æ ‡è®° -å¯èƒ½åŒ…括版本æ述符。如果一个补ä¸ç³»åˆ—ä¸æœ‰å››ä¸ªè¡¥ä¸ï¼Œé‚£ä¹ˆå„个补ä¸å¯ä»¥è¿™æ · -ç¼–å·ï¼š1/4ã€2/4ã€3/4ã€4/4。这å¯ä»¥ç¡®ä¿å¼€å‘人员了解补ä¸åº”用的顺åºï¼Œå¹¶ä¸”他们 +é€å‡ºæ¥ä»¥å“应评审(å³â€œv1,v2,v3â€ï¼‰åˆ™å¿…须包å«ç‰ˆæœ¬å·ï¼Œæˆ–包å«â€œRFCâ€ä»¥æŒ‡ç¤º +评审请求。如果一个补ä¸ç³»åˆ—ä¸æœ‰å››ä¸ªè¡¥ä¸ï¼Œé‚£ä¹ˆå„个补ä¸å¯ä»¥è¿™æ ·ç¼–å·ï¼š1/4ã€2/4〠+3/4ã€4/4。这å¯ä»¥ç¡®ä¿å¼€å‘人员了解补ä¸åº”用的顺åºï¼Œä¸” å·²ç»æŸ¥çœ‹æˆ–应用了补ä¸ç³»åˆ—ä¸çš„所有补ä¸ã€‚ ä¸€äº›æ ‡é¢˜çš„ä¾‹å:: @@ -541,95 +503,134 @@ e-mail æ ‡é¢˜ä¸çš„“一å¥è¯æ¦‚è¿°â€æ‰¼è¦çš„æè¿° e-mail ä¸çš„è¡¥ä¸ã€‚†Subject: [patch 2/5] ext2: improve scalability of bitmap searching Subject: [PATCHv2 001/207] x86: fix eflags tracking -"From" 行是信体里的最上é¢ä¸€è¡Œï¼Œå…·æœ‰å¦‚ä¸‹æ ¼å¼ï¼š +``From`` 行是信体里的最上é¢ä¸€è¡Œï¼Œå…·æœ‰å¦‚ä¸‹æ ¼å¼:: + From: Patch Author <author@example.com> -"From" 行指明在永久改动日志里,è°ä¼šè¢«ç¡®è®¤ä¸ºä½œè€…。如果没有 "From" 行,那 -么邮件头里的 "From: " 行会被用æ¥å†³å®šæ”¹åŠ¨æ—¥å¿—ä¸çš„作者。 +``From`` 行指明在永久改动日志里,è°ä¼šè¢«ç¡®è®¤ä¸ºä½œè€…。如果没有 ``From`` 行,那 +么邮件头里的 ``From:`` 行会被用æ¥å†³å®šæ”¹åŠ¨æ—¥å¿—ä¸çš„作者。 -说明的主题将会被æ交到永久的æºä»£ç æ”¹åŠ¨æ—¥å¿—é‡Œï¼Œå› æ¤å¯¹é‚£äº›æ—©å·²ç»ä¸è®°å¾—å’Œ -这个补ä¸ç›¸å…³çš„讨论细节的有能力的读者æ¥è¯´ï¼Œæ˜¯æœ‰æ„义的。包括补ä¸ç¨‹åºå®šä½ -é”™è¯¯çš„ï¼ˆå†…æ ¸æ—¥å¿—æ¶ˆæ¯ã€OOPS消æ¯ç‰ï¼‰ç—‡çŠ¶ï¼Œå¯¹äºŽæœç´¢æ交日志以寻找适用补ä¸çš„人 -尤其有用。如果一个补ä¸ä¿®å¤äº†ä¸€ä¸ªç¼–译失败,那么å¯èƒ½ä¸éœ€è¦åŒ…å«æ‰€æœ‰ç¼–译失败; +说明文å—将会被æ交到永久的æºä»£ç æ”¹åŠ¨æ—¥å¿—é‡Œï¼Œå› æ¤åº”针对那些早已ç»ä¸è®°å¾—和这 +个补ä¸ç›¸å…³çš„讨论细节的读者。包括补ä¸å¤„ç†çš„æ•…éšœç—‡çŠ¶ï¼ˆå†…æ ¸æ—¥å¿—æ¶ˆæ¯ã€oopsæ¶ˆæ¯ +ç‰ï¼‰ï¼Œè¿™å¯¹äºŽå¯èƒ½æ£åœ¨æœç´¢æ交日志以查找适用补ä¸çš„人特别有用。文本应该写得如 +æ¤è¯¦ç»†ï¼Œä»¥ä¾¿åœ¨æ•°å‘¨ã€æ•°æœˆç”šè‡³æ•°å¹´åŽé˜…读时,能够为读者æ供所需的细节信æ¯ï¼Œä»¥ +掌æ¡åˆ›å»ºè¡¥ä¸çš„ **åŽŸå› ** 。 + +如果一个补ä¸ä¿®å¤äº†ä¸€ä¸ªç¼–译失败,那么å¯èƒ½ä¸éœ€è¦åŒ…å« *所有* 编译失败; åªè¦è¶³å¤Ÿè®©æœç´¢è¡¥ä¸çš„äººèƒ½å¤Ÿæ‰¾åˆ°å®ƒå°±è¡Œäº†ã€‚ä¸Žæ¦‚è¿°ä¸€æ ·ï¼Œæ—¢è¦ç®€æ´åˆè¦æ述性。 -"---" æ ‡è®°è¡Œå¯¹äºŽè¡¥ä¸å¤„ç†å·¥å…·è¦æ‰¾åˆ°å“ªé‡Œæ˜¯æ”¹åŠ¨æ—¥å¿—ä¿¡æ¯çš„结æŸï¼Œæ˜¯ä¸å¯ç¼ºå°‘ +``---`` æ ‡è®°è¡Œå¯¹äºŽè¡¥ä¸å¤„ç†å·¥å…·è¦æ‰¾åˆ°å“ªé‡Œæ˜¯æ”¹åŠ¨æ—¥å¿—ä¿¡æ¯çš„结æŸï¼Œæ˜¯ä¸å¯ç¼ºå°‘ 的。 -对于 "---" æ ‡è®°ä¹‹åŽçš„é¢å¤–注解,一个好的用途就是用æ¥å†™ diffstat,用æ¥æ˜¾ -示修改了什么文件和æ¯ä¸ªæ–‡ä»¶éƒ½å¢žåŠ å’Œåˆ é™¤äº†å¤šå°‘è¡Œã€‚diffstat 对于比较大的补 -ä¸ç‰¹åˆ«æœ‰ç”¨ã€‚其余那些åªæ˜¯å’Œæ—¶åˆ»æˆ–者开å‘者相关的注解,ä¸åˆé€‚放到永久的改 -动日志里的,也应该放这里。 -使用 diffstat的选项 "-p 1 -w 70" è¿™æ ·æ–‡ä»¶åå°±ä¼šä»Žå†…æ ¸æºä»£ç æ ‘çš„ç›®å½•å¼€å§‹ +对于 ``---`` æ ‡è®°ä¹‹åŽçš„é¢å¤–注解,一个好的用途就是用æ¥å†™ ``diffstat`` ,用æ¥æ˜¾ +示修改了什么文件和æ¯ä¸ªæ–‡ä»¶éƒ½å¢žåŠ å’Œåˆ é™¤äº†å¤šå°‘è¡Œã€‚ ``diffstat`` 对于比较大的补 +ä¸ç‰¹åˆ«æœ‰ç”¨ã€‚ +使用 ``diffstat`` 的选项 ``-p 1 -w 70`` è¿™æ ·æ–‡ä»¶åå°±ä¼šä»Žå†…æ ¸æºä»£ç æ ‘çš„ç›®å½•å¼€å§‹ ,ä¸ä¼šå 用太宽的空间(很容易适åˆ80列的宽度,也许会有一些缩进。) +( ``git`` 默认会生æˆåˆé€‚çš„diffstat。) + +其余那些åªé€‚用于当时或者与维护者相关的注解,ä¸åˆé€‚放到永久的改动日志里的,也 +应该放这里。较好的例å就是 ``è¡¥ä¸æ›´æ”¹è®°å½•`` ,记录了v1å’Œv2版本补ä¸ä¹‹é—´çš„差异。 + +请将æ¤ä¿¡æ¯æ”¾åœ¨å°†å˜æ›´æ—¥å¿—与补ä¸çš„其余部分分隔开的 ``---`` è¡Œ **之åŽ** 。版本 +ä¿¡æ¯ä¸æ˜¯æ交到gitæ ‘çš„å˜æ›´æ—¥å¿—的一部分。åªæ˜¯ä¾›å®¡é˜…äººå‘˜ä½¿ç”¨çš„é™„åŠ ä¿¡æ¯ã€‚如果将 +其放置在æäº¤æ ‡è®°ä¸Šæ–¹ï¼Œåˆ™éœ€è¦æ‰‹åŠ¨äº¤äº’æ‰èƒ½å°†å…¶åˆ 除。如果它ä½äºŽåˆ†éš”线以下,则在 +应用补ä¸æ—¶ä¼šè‡ªåŠ¨å‰¥ç¦»:: + + <commit message> + ... + Signed-off-by: Author <author@mail> + --- + V2 -> V3: Removed redundant helper function + V1 -> V2: Cleaned up coding style and addressed review comments -在åŽé¢çš„å‚考资料ä¸èƒ½çœ‹åˆ°é€‚当的补ä¸æ ¼å¼çš„更多细节。 + path/to/file | 5+++-- + ... -.. _cn_explicit_in_reply_to: +在åŽé¢çš„å‚考资料ä¸èƒ½çœ‹åˆ°æ£ç¡®è¡¥ä¸æ ¼å¼çš„更多细节。 -15) 明确回å¤é‚®ä»¶å¤´(In-Reply-To) -------------------------------- +.. _zh_backtraces: -æ‰‹åŠ¨æ·»åŠ å›žå¤è¡¥ä¸çš„çš„æ ‡é¢˜å¤´(In-Reply_To:) 是有帮助的(例如,使用 ``git send-email`` ) -将补ä¸ä¸Žä»¥å‰çš„相关讨论关è”èµ·æ¥ï¼Œä¾‹å¦‚,将bugä¿®å¤ç¨‹åºé“¾æŽ¥åˆ°ç”µå邮件和bug报告。 +æ交消æ¯ä¸çš„回溯(Backtraces) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +回溯有助于记录导致问题的调用链。然而,并éžæ‰€æœ‰å›žæº¯éƒ½æœ‰å¸®åŠ©ã€‚例如,早期引导调 +用链是独特而明显的。而é€å—å¤åˆ¶å®Œæ•´çš„dmesgè¾“å‡ºåˆ™ä¼šå¢žåŠ æ—¶é—´æˆ³ã€æ¨¡å—列表ã€å¯„å˜ +å™¨å’Œå †æ ˆè½¬å‚¨ç‰åˆ†æ•£æ³¨æ„力的信æ¯ã€‚ + +å› æ¤ï¼Œæœ€æœ‰ç”¨çš„回溯应该从转储ä¸æå–相关信æ¯ï¼Œä»¥æ›´å®¹æ˜“集ä¸åœ¨çœŸå®žé—®é¢˜ä¸Šã€‚下é¢æ˜¯ +一个剪è£è‰¯å¥½çš„回溯示例:: + + unchecked MSR access error: WRMSR to 0xd51 (tried to write 0x0000000000000064) + at rIP: 0xffffffffae059994 (native_write_msr+0x4/0x20) + Call Trace: + mba_wrmsr + update_domains + rdtgroup_mkdir + +.. _zh_explicit_in_reply_to: + +明确回å¤é‚®ä»¶å¤´ï¼ˆIn-Reply-To) +----------------------------- + +æ‰‹åŠ¨æ·»åŠ å›žå¤è¡¥ä¸çš„的邮件头(In-Reply_To:)是有用的(例如,使用 ``git send-email`` ), +å¯ä»¥å°†è¡¥ä¸ä¸Žä»¥å‰çš„相关讨论关è”èµ·æ¥ï¼Œä¾‹å¦‚,将bugè¡¥ä¸é“¾æŽ¥åˆ°ç”µå邮件和bug报告。 但是,对于多补ä¸ç³»åˆ—,最好é¿å…在回å¤æ—¶ä½¿ç”¨é“¾æŽ¥åˆ°è¯¥ç³»åˆ—çš„æ—§ç‰ˆæœ¬ã€‚è¿™æ ·ï¼Œ -è¡¥ä¸çš„多个版本就ä¸ä¼šæˆä¸ºç”µå邮件客户端ä¸æ— 法管ç†çš„引用åºåˆ—。如果链接有用, +è¡¥ä¸çš„多个版本就ä¸ä¼šæˆä¸ºç”µå邮件客户端ä¸æ— 法管ç†çš„å¼•ç”¨æ ‘ã€‚å¦‚æžœé“¾æŽ¥æœ‰ç”¨ï¼Œ å¯ä»¥ä½¿ç”¨ https://lore.kernel.org/ é‡å®šå‘器(例如,在å°é¢ç”µå邮件文本ä¸ï¼‰ 链接到补ä¸ç³»åˆ—的早期版本。 -16) å‘é€git pull请求 --------------------- - -如果您有一系列补ä¸ï¼Œé‚£ä¹ˆè®©ç»´æŠ¤äººå‘˜é€šè¿‡git pullæ“作将它们直接拉入å系统å˜å‚¨ -库å¯èƒ½æ˜¯æœ€æ–¹ä¾¿çš„。但是,请注æ„,从开å‘人员那里获å–è¡¥ä¸æ¯”从邮件列表ä¸èŽ·å–è¡¥ -ä¸éœ€è¦æ›´é«˜çš„ä¿¡ä»»åº¦ã€‚å› æ¤ï¼Œè®¸å¤šå系统维护人员ä¸æ„¿æ„接å—请求,特别是æ¥è‡ªæ–°çš„ -未知开å‘人员的请求。如果有疑问,您å¯ä»¥åœ¨å°é¢é‚®ä»¶ä¸ä½¿ç”¨pull 请求作为补ä¸ç³»åˆ— -æ£å¸¸å‘布的一个选项,让维护人员å¯ä»¥é€‰æ‹©ä½¿ç”¨å…¶ä¸ä¹‹ä¸€ã€‚ - -pull 请求的主题行ä¸åº”该有[Git Pull]。请求本身应该在一行ä¸åŒ…å«å˜å‚¨åº“å称和 -感兴趣的分支;它应该看起æ¥åƒ:: +ç»™å‡ºåŸºç¡€æ ‘ä¿¡æ¯ +-------------- - Please pull from +当其他开å‘人员收到您的补ä¸å¹¶å¼€å§‹å®¡é˜…时,知é“应该将您的工作放到代ç æ ‘åŽ†å²è®°å½• +ä¸çš„什么ä½ç½®é€šå¸¸å¾ˆæœ‰ç”¨ã€‚这对于自动化æŒç»é›†æˆæµæ°´ï¼ˆCI)特别有用,这些æµæ°´çº¿è¯• +图è¿è¡Œä¸€ç³»åˆ—测试,以便在维护人员开始审阅之å‰ç¡®å®šæ交的质é‡ã€‚ - git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus +如果您使用 ``git format-patch`` 生æˆè¡¥ä¸ï¼Œåˆ™å¯ä»¥é€šè¿‡ ``--base`` æ ‡å¿—åœ¨æäº¤ä¸ +自动包å«åŸºç¡€æ ‘ä¿¡æ¯ã€‚使用æ¤é€‰é¡¹æœ€ç®€å•ã€æœ€æ–¹ä¾¿çš„方法是é…åˆä¸»é¢˜åˆ†æ”¯:: - to get these changes: + $ git checkout -t -b my-topical-branch master + Branch 'my-topical-branch' set up to track local branch 'master'. + Switched to a new branch 'my-topical-branch' + [perform your edits and commits] -pull 请求还应该包å«ä¸€æ¡æ•´ä½“消æ¯ï¼Œè¯´æ˜Žè¯·æ±‚ä¸å°†åŒ…å«ä»€ä¹ˆï¼Œä¸€ä¸ªè¡¥ä¸æœ¬èº«çš„ ``Git shortlog`` -以åŠä¸€ä¸ªæ˜¾ç¤ºè¡¥ä¸ç³»åˆ—整体效果的 ``diffstat`` 。当然,将所有这些信æ¯æ”¶é›†åœ¨ä¸€èµ· -的最简å•æ–¹æ³•æ˜¯è®© ``git`` 使用 ``git request-pull`` 命令为您完æˆè¿™äº›å·¥ä½œã€‚ + $ git format-patch --base=auto --cover-letter -o outgoing/ master + outgoing/0000-cover-letter.patch + outgoing/0001-First-Commit.patch + outgoing/... -一些维护人员(包括Linus)希望看到æ¥è‡ªå·²ç¾åæäº¤çš„è¯·æ±‚ï¼›è¿™å¢žåŠ äº†ä»–ä»¬å¯¹ä½ çš„ -请求信心。特别是,在没有ç¾åæ ‡ç¾çš„情况下,Linus ä¸ä¼šä»Žåƒ Github è¿™æ ·çš„å…¬å…± -托管站点拉请求。 +å½“ä½ ç¼–è¾‘ ``outgoing/0000-cover-letter.patch`` 时,您会注æ„到在它的最底部有一 +è¡Œ ``base-commit:`` 尾注,它为审阅者和CI工具æ供了足够的信æ¯ä»¥æ£ç¡®æ‰§è¡Œ +``git am`` 而ä¸å¿…担心冲çª:: -创建æ¤ç±»ç¾å的第一æ¥æ˜¯ç”Ÿæˆä¸€ä¸ª GNRPG å¯†é’¥ï¼Œå¹¶ç”±ä¸€ä¸ªæˆ–å¤šä¸ªæ ¸å¿ƒå†…æ ¸å¼€å‘人员对 -其进行ç¾å。这一æ¥å¯¹æ–°å¼€å‘人员æ¥è¯´å¯èƒ½å¾ˆå›°éš¾ï¼Œä½†æ²¡æœ‰åŠžæ³•ç»•è¿‡å®ƒã€‚å‚åŠ ä¼šè®®æ˜¯ -找到å¯ä»¥ç¾ç½²æ‚¨çš„密钥的开å‘人员的好方法。 + $ git checkout -b patch-review [base-commit-id] + Switched to a new branch 'patch-review' + $ git am patches.mbox + Applying: First Commit + Applying: ... -一旦您在Git ä¸å‡†å¤‡äº†ä¸€ä¸ªæ‚¨å¸Œæœ›æœ‰äººæ‹‰çš„è¡¥ä¸ç³»åˆ—,就用 ``git tag -s`` 创建一 -个ç¾åæ ‡è®°ã€‚è¿™å°†åˆ›å»ºä¸€ä¸ªæ–°æ ‡è®°ï¼Œæ ‡è¯†è¯¥ç³»åˆ—ä¸çš„最åŽä¸€æ¬¡æ交,并包å«ç”¨æ‚¨çš„ç§ -钥创建的ç¾å。您还å¯ä»¥å°†changelogæ ·å¼çš„消æ¯æ·»åŠ åˆ°æ ‡è®°ä¸ï¼›è¿™æ˜¯ä¸€ä¸ªæ述拉请求 -整体效果的ç†æƒ³ä½ç½®ã€‚ +有关æ¤é€‰é¡¹çš„更多信æ¯ï¼Œè¯·å‚阅 ``man git-format-patch`` 。 -如果维护人员将è¦ä»Žä¸æå–çš„æ ‘ä¸æ˜¯æ‚¨æ£åœ¨ä½¿ç”¨çš„å˜å‚¨åº“,请ä¸è¦å¿˜è®°å°†å·²ç¾åçš„æ ‡è®° -显å¼æŽ¨é€åˆ°å…¬å…±æ ‘。 +.. note:: -生æˆæ‹‰è¯·æ±‚时,请使用已ç¾åçš„æ ‡è®°ä½œä¸ºç›®æ ‡ã€‚è¿™æ ·çš„å‘½ä»¤å¯ä»¥å®žçŽ°:: + ``--base`` 功能是在2.9.0版gitä¸å¼•å…¥çš„。 - git request-pull master git://my.public.tree/linux.git my-signed-tag +如果您ä¸ä½¿ç”¨gitæ ¼å¼åŒ–è¡¥ä¸ï¼Œä»ç„¶å¯ä»¥åŒ…å«ç›¸åŒçš„ ``base-commit`` 尾注,以指示您 +çš„å·¥ä½œæ‰€åŸºäºŽçš„æ ‘çš„æäº¤å“ˆå¸Œã€‚ä½ åº”è¯¥åœ¨å°é¢é‚®ä»¶æˆ–系列的第一个补ä¸ä¸æ·»åŠ 它,它应 +该放在 ``---`` 行的下é¢æˆ–所有其他内容之åŽï¼Œå³åªåœ¨ä½ 的电å邮件ç¾å之å‰ã€‚ å‚考文献 -------- -Andrew Morton, "The perfect patch" (tpp). +Andrew Morton,“完美的补ä¸â€ï¼ˆtpp) <https://www.ozlabs.org/~akpm/stuff/tpp.txt> -Jeff Garzik, "Linux kernel patch submission format". +Jeff Garzik,“Linuxå†…æ ¸è¡¥ä¸æäº¤æ ¼å¼â€ <https://web.archive.org/web/20180829112450/http://linux.yyz.us/patch-format.html> -Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". +Greg Kroah-Hartman,“如何惹æ¼å†…æ ¸å系统维护人员†<http://www.kroah.com/log/linux/maintainer.html> <http://www.kroah.com/log/linux/maintainer-02.html> @@ -642,16 +643,15 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". <http://www.kroah.com/log/linux/maintainer-06.html> -NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! +ä¸ï¼ï¼ï¼åˆ«å†å‘巨型补ä¸ç‚¸å¼¹ç»™linux-kernel@vger.kernel.orgçš„äººä»¬äº†ï¼ <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net> -Kernel Documentation/process/coding-style.rst: - :ref:`Documentation/translations/zh_CN/process/coding-style.rst <cn_codingstyle>` +å†…æ ¸ Documentation/translations/zh_CN/process/coding-style.rst -Linus Torvalds's mail on the canonical patch format: +Linus Torvaldså…³äºŽæ ‡å‡†è¡¥ä¸æ ¼å¼çš„邮件 <https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org> -Andi Kleen, "On submitting kernel patches" - Some strategies to get difficult or controversial changes in. +Andi Kleen,“æ交补ä¸ä¹‹è·¯â€ + 一些帮助åˆå…¥å›°éš¾æˆ–有争议的å˜æ›´çš„ç–略。 http://halobates.de/on-submitting-patches.pdf diff --git a/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst b/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst index 26b0f36f793d..3076402406c4 100644 --- a/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst +++ b/Documentation/translations/zh_CN/scheduler/sched-design-CFS.rst @@ -80,7 +80,7 @@ p->se.vruntime。一旦p->se.vruntimeå˜å¾—足够大,其它的任务将æˆä¸ºæ CFS使用纳秒粒度的计时,ä¸ä¾èµ–于任何jiffies或HZçš„ç»†èŠ‚ã€‚å› æ¤CFS并ä¸åƒä¹‹å‰çš„è°ƒåº¦å™¨é‚£æ · 有“时间片â€çš„概念,也没有任何å¯å‘å¼çš„设计。唯一å¯è°ƒçš„å‚æ•°ï¼ˆä½ éœ€è¦æ‰“å¼€CONFIG_SCHED_DEBUG)是: - /proc/sys/kernel/sched_min_granularity_ns + /sys/kernel/debug/sched/min_granularity_ns 它å¯ä»¥ç”¨æ¥å°†è°ƒåº¦å™¨ä»Žâ€œæ¡Œé¢â€æ¨¡å¼ï¼ˆä¹Ÿå°±æ˜¯ä½Žæ—¶å»¶ï¼‰è°ƒèŠ‚为“æœåŠ¡å™¨â€ï¼ˆä¹Ÿå°±æ˜¯é«˜æ‰¹å¤„ç†ï¼‰æ¨¡å¼ã€‚ 它的默认设置是适åˆæ¡Œé¢çš„工作负载。SCHED_BATCH也被CFS调度器模å—处ç†ã€‚ diff --git a/Documentation/translations/zh_TW/oops-tracing.txt b/Documentation/translations/zh_TW/oops-tracing.txt deleted file mode 100644 index be8e59f2abaf..000000000000 --- a/Documentation/translations/zh_TW/oops-tracing.txt +++ /dev/null @@ -1,212 +0,0 @@ -Chinese translated version of Documentation/admin-guide/bug-hunting.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Traditional Chinese maintainer: Hu Haowen <src.res@email.cn> ---------------------------------------------------------------------- -Documentation/admin-guide/bug-hunting.rst çš„ç¹é«”ä¸æ–‡ç‰ˆç¿»è¯ - -如果想評論或更新本文的內容,請直接è¯ç¹«åŽŸæ–‡æª”çš„ç¶è·è€…ã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡ -交æµæœ‰å›°é›£çš„話,也å¯ä»¥å‘ç¹é«”ä¸æ–‡ç‰ˆç¶è·è€…求助。如果本翻è¯æ›´æ–°ä¸åŠæ™‚或 -者翻è¯å˜åœ¨å•é¡Œï¼Œè«‹è¯ç¹«ç¹é«”ä¸æ–‡ç‰ˆç¶è·è€…。 - -ç¹é«”ä¸æ–‡ç‰ˆç¶è·è€…: 胡皓文 Hu Haowen <src.res@email.cn> -ç¹é«”ä¸æ–‡ç‰ˆç¿»è¯è€…: 胡皓文 Hu Haowen <src.res@email.cn> -ç¹é«”ä¸æ–‡ç‰ˆæ ¡è¯è€…: 胡皓文 Hu Haowen <src.res@email.cn> - -以下爲æ£æ–‡ ---------------------------------------------------------------------- - -注æ„: ksymoops 在2.6ä¸æ˜¯æ²’有用的。 è«‹ä»¥åŽŸæœ‰æ ¼å¼ä½¿ç”¨Oops(來自dmesg,ç‰ç‰)。 -忽略任何這樣那樣關於「解碼Oopsã€æˆ–者「通éŽksymoopsé‹è¡Œã€çš„文檔。 å¦‚æžœä½ è²¼å‡ºé‹è¡ŒéŽ -ksymoops的來自2.6çš„Oops,人們åªæœƒè®“ä½ é‡è²¼ä¸€æ¬¡ã€‚ - -å¿«é€Ÿç¸½çµ -------------- - -發ç¾Oops並發é€çµ¦çœ‹ä¼¼ç›¸é—œçš„å…§æ ¸é ˜åŸŸçš„ç¶è·è€…。別太擔心å°ä¸ä¸Šè™Ÿã€‚å¦‚æžœä½ ä¸ç¢ºå®šå°±ç™¼çµ¦ -å’Œä½ æ‰€åšçš„äº‹æƒ…ç›¸é—œçš„ä»£ç¢¼çš„è² è²¬äººã€‚ 如果å¯é‡ç¾è©¦è‘—æ述怎樣é‡æ§‹ã€‚ 那甚至比oops更有 -價值。 - -å¦‚æžœä½ å°æ–¼ç™¼é€çµ¦èª°ä¸€ç„¡æ‰€çŸ¥ï¼Œ 發給linux-kernel@vger.kernel.org。感è¬ä½ 幫助Linux -儘å¯èƒ½åœ°ç©©å®šã€‚ - -Oops在哪裡? ----------------------- - -通常Oops文本由klogdå¾žå…§æ ¸ç·©è¡å€é‡Œè®€å–並傳給syslogd,由syslogd寫到syslog文件ä¸ï¼Œ -典型地是/var/log/messages(ä¾è³´æ–¼/etc/syslog.conf)。有時klogd崩潰了,這種情æ³ä¸‹ä½ -èƒ½å¤ é‹è¡Œdmesg > fileä¾†å¾žå…§æ ¸ç·©è¡å€ä¸è®€å–數據並ä¿å˜ä¸‹ä¾†ã€‚ å¦å‰‡ä½ å¯ä»¥ -cat /proc/kmsg > file, ç„¶è€Œä½ å¿…é ˆä»‹å…¥ä¸æ¢å‚³è¼¸ï¼Œ kmsg是一個「永ä¸çµæŸçš„文件ã€ã€‚如 -æžœæ©Ÿå™¨å´©æ½°å£žåˆ°ä½ ä¸èƒ½è¼¸å…¥å‘½ä»¤æˆ–者ç£ç¢Ÿä¸å¯ç”¨é‚£éº¼ä½ 有三種é¸æ“‡:- - -(1) 手抄å±å¹•ä¸Šçš„文本待機器é‡å•“後å†è¼¸å…¥è¨ˆç®—機。 麻煩但如果沒有é‡å°å´©æ½°çš„準備, -這是僅有的é¸æ“‡ã€‚ å¦å¤–ï¼Œä½ å¯ä»¥ç”¨æ•¸ä½ç›¸æ©ŸæŠŠå±å¹•æ‹ä¸‹ä¾†-ä¸å¤ªå¥½ï¼Œä½†æ¯”沒有強。 如果信 -æ¯æ»¾å‹•åˆ°äº†çµ‚端的上é¢ï¼Œä½ 會發ç¾ä»¥é«˜åˆ†è¾¯çŽ‡å•“動(比如,vga=791ï¼‰æœƒè®“ä½ è®€åˆ°æ›´å¤šçš„æ–‡ -本。(注æ„:這需è¦vesafb,所以å°ã€Žæ—©æœŸã€çš„oops沒有幫助) - -(2)用串å£çµ‚端啓動(請åƒçœ‹Documentation/admin-guide/serial-console.rst),é‹è¡Œä¸€å€‹null -modem到å¦ä¸€å°æ©Ÿå™¨ä¸¦ç”¨ä½ å–œæ¡çš„通訊工具ç²å–輸出。Minicom工作地很好。 - -(3)使用Kdump(請åƒçœ‹Documentation/admin-guide/kdump/kdump.rst), -使用在Documentation/admin-guide/kdump/gdbmacros.txtä¸å®šç¾©çš„dmesg gdbå®ï¼Œå¾žèˆŠçš„å…§å˜ä¸æå–å…§æ ¸ -環形緩è¡å€ã€‚ - -å®Œæ•´ä¿¡æ¯ ----------------- - -注æ„:以下來自於Linus的郵件é©ç”¨æ–¼2.4å…§æ ¸ã€‚ æˆ‘å› çˆ²æ·å²åŽŸå› ä¿ç•™äº†å®ƒï¼Œä¸¦ä¸”å› çˆ²å…¶ä¸ -一些信æ¯ä»ç„¶é©ç”¨ã€‚ 特別注æ„的是,請忽略任何ksymoops的引用。 - -From: Linus Torvalds <torvalds@osdl.org> - -怎樣跟蹤Oops.. [原發到linux-kernel的一å°éƒµä»¶] - -主è¦çš„竅門是有五年和這些煩人的oops消æ¯æ‰“交é“的經驗;-) - -å¯¦éš›ä¸Šï¼Œä½ æœ‰è¾¦æ³•ä½¿å®ƒæ›´ç°¡å–®ã€‚æˆ‘æœ‰å…©å€‹ä¸åŒçš„方法: - - gdb /usr/src/linux/vmlinux - gdb> disassemble <offending_function> - -那是發ç¾å•é¡Œçš„簡單辦法,至少如果bugå ±å‘Šåšçš„好的情æ³ä¸‹ï¼ˆè±¡é€™å€‹ä¸€æ¨£-é‹è¡Œksymoops -得到oops發生的函數åŠå‡½æ•¸å…§çš„å移)。 - -å“¦ï¼Œå¦‚æžœå ±å‘Šç™¼ç”Ÿçš„å…§æ ¸ä»¥ç›¸åŒçš„ç·¨è¯å™¨å’Œç›¸ä¼¼çš„é…置編è¯å®ƒæœƒæœ‰å¹«åŠ©çš„。 - -å¦ä¸€ä»¶è¦åšçš„事是å彙編bugå ±å‘Šçš„ã€ŒCodeã€éƒ¨åˆ†ï¼šksymoops也會用æ£ç¢ºçš„工具來åšé€™ä»¶äº‹ï¼Œ -ä½†å¦‚æžœæ²’æœ‰é‚£äº›å·¥å…·ä½ å¯ä»¥å¯«ä¸€å€‹å‚»ç¨‹åºï¼š - - char str[] = "\xXX\xXX\xXX..."; - main(){} - -並用gcc -gç·¨è¯å®ƒç„¶å¾ŒåŸ·è¡Œã€Œdisassemble strã€ï¼ˆXX部分是由Oopså ±å‘Šçš„å€¼-ä½ å¯ä»¥åƒ…剪切 -粘貼並用「\xã€æ›¿æ›ç©ºæ ¼-我就是這麼åšçš„ï¼Œå› çˆ²æˆ‘æ‡¶å¾—å¯«ç¨‹åºè‡ªå‹•åšé€™ä¸€åˆ‡ï¼‰ã€‚ - -å¦å¤–ï¼Œä½ å¯ä»¥ç”¨scripts/decodecode這個shell腳本。它的使用方法是: -decodecode < oops.txt - -「Codeã€ä¹‹å¾Œçš„åå…進ä½å—節å¯èƒ½ï¼ˆåœ¨æŸäº›æž¶æ§‹ä¸Šï¼‰æœ‰ä¸€äº›ç•¶å‰æŒ‡ä»¤ä¹‹å‰çš„指令å—ç¯€ä»¥åŠ -當å‰å’Œä¹‹å¾Œçš„指令å—節 - -Code: f9 0f 8d f9 00 00 00 8d 42 0c e8 dd 26 11 c7 a1 60 ea 2b f9 8b 50 08 a1 -64 ea 2b f9 8d 34 82 8b 1e 85 db 74 6d 8b 15 60 ea 2b f9 <8b> 43 04 39 42 54 -7e 04 40 89 42 54 8b 43 04 3b 05 00 f6 52 c0 - -æœ€å¾Œï¼Œå¦‚æžœä½ æƒ³çŸ¥é“ä»£ç¢¼ä¾†è‡ªå“ªè£¡ï¼Œä½ å¯ä»¥ï¼š - - cd /usr/src/linux - make fs/buffer.s # 或任何產生BUG的文件 - -ç„¶å¾Œä½ æœƒæ¯”gdbå彙編更清楚的知é“發生了什麼。 - -ç¾åœ¨ï¼Œå•é¡Œæ˜¯æŠŠä½ 所æ“有的所有數據çµåˆèµ·ä¾†ï¼šCæºç¢¼ï¼ˆé—œæ–¼å®ƒæ‡‰è©²æ€Žæ¨£çš„一般知è˜ï¼‰ï¼Œ -彙編代碼åŠå…¶å彙編得到的代碼(å¦å¤–還有從「oopsã€æ¶ˆæ¯å¾—到的寄å˜å™¨ç‹€æ…‹-å°äº†è§£æ¯€å£žçš„ -指é‡æœ‰ç”¨ï¼Œè€Œä¸”ç•¶ä½ æœ‰äº†å½™ç·¨ä»£ç¢¼ä½ ä¹Ÿèƒ½æ‹¿å…¶å®ƒçš„å¯„å˜å™¨å’Œä»»ä½•å®ƒå€‘å°æ‡‰çš„C表é”å¼åšåŒ¹é… -)。 - -å¯¦éš›ä¸Šï¼Œä½ åƒ…éœ€çœ‹çœ‹å“ªè£¡ä¸åŒ¹é…(這個例å是「Codeã€å彙編和編è¯å™¨ç”Ÿæˆçš„代碼ä¸åŒ¹é…)。 -ç„¶å¾Œä½ é ˆè¦æ‰¾å‡ºçˆ²ä»€éº¼ä¸åŒ¹é…。通常很簡單-ä½ çœ‹åˆ°ä»£ç¢¼ä½¿ç”¨äº†ç©ºæŒ‡é‡ç„¶å¾Œä½ çœ‹ä»£ç¢¼æƒ³çŸ¥é“ -空指é‡æ˜¯æ€Žéº¼å‡ºç¾çš„,還有檢查它是å¦åˆæ³•.. - -ç¾åœ¨ï¼Œå¦‚æžœæ˜Žç™½é€™æ˜¯ä¸€é …è€—æ™‚çš„å·¥ä½œè€Œä¸”éœ€è¦ä¸€ä¸é»žå…’的專心,沒錯。這就是我爲什麼大多 -åªæ˜¯å¿½ç•¥é‚£äº›æ²’有符號表信æ¯çš„å´©æ½°å ±å‘Šçš„åŽŸå› ï¼šç°¡å–®çš„èªªå¤ªé›£æŸ¥æ‰¾äº†ï¼ˆæˆ‘æœ‰ä¸€äº› -程åºç”¨æ–¼åœ¨å…§æ ¸ä»£ç¢¼æ®µä¸æœç´¢ç‰¹å®šçš„模å¼ï¼Œè€Œä¸”有時我也已經能找出那些崩潰的地方,但是 -僅僅是找出æ£ç¢ºçš„åºåˆ—也確實需è¦ç›¸ç•¶ç´®å¯¦çš„å…§æ ¸çŸ¥è˜ï¼‰ - -_有時_會發生這種情æ³ï¼Œæˆ‘僅看到崩潰ä¸çš„å彙編代碼åºåˆ—, 然後我馬上就明白å•é¡Œå‡ºåœ¨ -哪裡。這時我æ‰æ„è˜åˆ°è‡ªå·±å¹¹é€™å€‹å·¥ä½œå·²ç¶“太長時間了;-) - - Linus - - ---------------------------------------------------------------------------- -關於Oops跟蹤的註解: - -爲了幫助Linuså’Œå…¶å®ƒå…§æ ¸é–‹ç™¼è€…ï¼Œklogdç´å…¥äº†å¤§é‡çš„支æŒä¾†è™•ç†ä¿è·éŒ¯èª¤ã€‚爲了æ“æœ‰å° -地å€è§£æžçš„完整支æŒè‡³å°‘應該使用1.3-pl3çš„sysklogd包。 - -當ä¿è·éŒ¯èª¤ç™¼ç”Ÿæ™‚,klogd守è·é€²ç¨‹è‡ªå‹•æŠŠå…§æ ¸æ—¥èªŒä¿¡æ¯ä¸çš„é‡è¦åœ°å€ç¿»è¯æˆå®ƒå€‘相應的符 -號。 - -klogd執行兩種類型的地å€è§£æžã€‚首先是éœæ…‹ç¿»è¯å…¶æ¬¡æ˜¯å‹•æ…‹ç¿»è¯ã€‚éœæ…‹ç¿»è¯å’Œksymoops -一樣使用System.map文件。爲了åšéœæ…‹ç¿»è¯klogd守è·é€²ç¨‹å¿…é ˆåœ¨åˆå§‹åŒ–時能找到system -map文件。關於klogd怎樣æœç´¢map文件請åƒçœ‹klogd手冊é 。 - -動態地å€ç¿»è¯åœ¨ä½¿ç”¨å…§æ ¸å¯è£è¼‰æ¨¡å¡Šæ™‚很é‡è¦ã€‚ å› çˆ²å…§æ ¸æ¨¡å¡Šçš„å…§å˜æ˜¯å¾žå…§æ ¸å‹•æ…‹å…§å˜æ± -里分é…的,所以ä¸ç®¡æ˜¯æ¨¡å¡Šé–‹å§‹ä½ç½®é‚„是模塊ä¸å‡½æ•¸å’Œç¬¦è™Ÿçš„ä½ç½®éƒ½ä¸æ˜¯å›ºå®šçš„。 - -å…§æ ¸æ”¯æŒå…許程åºæ±ºå®šè£è¼‰å“ªäº›æ¨¡å¡Šå’Œå®ƒå€‘在內å˜ä¸ä½ç½®çš„系統調用。使用這些系統調用 -klogd守è·é€²ç¨‹ç”Ÿæˆä¸€å¼µç¬¦è™Ÿè¡¨ç”¨æ–¼èª¿è©¦ç™¼ç”Ÿåœ¨å¯è£è¼‰æ¨¡å¡Šä¸çš„ä¿è·éŒ¯èª¤ã€‚ - -至少klogd會æ供產生ä¿è·éŒ¯èª¤çš„模塊å。還å¯æœ‰é¡å¤–的符號信æ¯ä¾›å¯è£è¼‰æ¨¡å¡Šé–‹ç™¼è€…é¸æ“‡ -以從模塊ä¸è¼¸å‡ºç¬¦è™Ÿä¿¡æ¯ã€‚ - -å› çˆ²å…§æ ¸æ¨¡å¡Šç’°å¢ƒå¯èƒ½æ˜¯å‹•æ…‹çš„ï¼Œæ‰€ä»¥å¿…é ˆæœ‰ä¸€ç¨®æ©Ÿåˆ¶ç•¶æ¨¡å¡Šç’°å¢ƒç™¼ç”Ÿæ”¹è®Šæ™‚ä¾†é€šçŸ¥klogd -守è·é€²ç¨‹ã€‚ 有一些å¯ç”¨çš„命令行é¸é …å…許klogdå‘當å‰åŸ·è¡Œä¸çš„守è·é€²ç¨‹ç™¼é€ä¿¡è™Ÿï¼Œå‘ŠçŸ¥ç¬¦ -號信æ¯æ‡‰è©²è¢«åˆ·æ–°äº†ã€‚ 更多信æ¯è«‹åƒçœ‹klogd手冊é 。 - -sysklogd發布時包å«ä¸€å€‹è£œä¸ä¿®æ”¹äº†modules-2.0.0包,無論何時一個模塊è£è¼‰æˆ–者å¸è¼‰éƒ½ -會自動å‘klogd發é€ä¿¡è™Ÿã€‚打上這個補ä¸æ供了必è¦çš„å°èª¿è©¦ç™¼ç”Ÿæ–¼å…§æ ¸å¯è£è¼‰æ¨¡å¡Šçš„ä¿è· -錯誤的無縫支æŒã€‚ - -以下是被klogd處ç†éŽçš„發生在å¯è£è¼‰æ¨¡å¡Šä¸çš„一個ä¿è·éŒ¯èª¤ä¾‹å: ---------------------------------------------------------------------------- -Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc -Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000 -Aug 29 09:51:01 blizard kernel: *pde = 00000000 -Aug 29 09:51:01 blizard kernel: Oops: 0002 -Aug 29 09:51:01 blizard kernel: CPU: 0 -Aug 29 09:51:01 blizard kernel: EIP: 0010:[oops:_oops+16/3868] -Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212 -Aug 29 09:51:01 blizard kernel: eax: 315e97cc ebx: 003a6f80 ecx: 001be77b edx: 00237c0c -Aug 29 09:51:01 blizard kernel: esi: 00000000 edi: bffffdb3 ebp: 00589f90 esp: 00589f8c -Aug 29 09:51:01 blizard kernel: ds: 0018 es: 0018 fs: 002b gs: 002b ss: 0018 -Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000) -Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001 -Aug 29 09:51:01 blizard kernel: 00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00 -Aug 29 09:51:01 blizard kernel: bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036 -Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128] -Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3 ---------------------------------------------------------------------------- - -Dr. G.W. Wettstein Oncology Research Div. Computing Facility -Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com -820 4th St. N. -Fargo, ND 58122 -Phone: 701-234-7556 - - ---------------------------------------------------------------------------- -å—æ±™æŸ“çš„å…§æ ¸ - -一些oopså ±å‘Šåœ¨ç¨‹åºè¨˜æ•¸å™¨ä¹‹å¾ŒåŒ…å«å—符串'Tainted: 'ã€‚é€™è¡¨æ˜Žå…§æ ¸å·²ç¶“è¢«ä¸€äº›æ±è¥¿çµ¦æ±™ -染了。 該å—符串之後緊跟著一系列的ä½ç½®æ•æ„Ÿçš„å—符,æ¯å€‹ä»£è¡¨ä¸€å€‹ç‰¹å®šçš„汙染值。 - - 1:'G'如果所有è£è¼‰çš„模塊都有GPL或相容的許å¯è‰ï¼Œ'P'如果è£è¼‰äº†ä»»ä½•çš„專有模塊。 -沒有模塊MODULE_LICENSE或者帶有insmodèªçˆ²æ˜¯èˆ‡GPLä¸ç›¸å®¹çš„çš„MODULE_LICENSE的模塊被 -èªå®šæ˜¯å°ˆæœ‰çš„。 - - 2:'F'如果有任何通éŽã€Œinsmod -fã€è¢«å¼·åˆ¶è£è¼‰çš„模塊,' '如果所有模塊都被æ£å¸¸è£è¼‰ã€‚ - - 3:'S'如果oops發生在SMPå…§æ ¸ä¸ï¼Œé‹è¡Œæ–¼æ²’有è‰æ˜Žå®‰å…¨é‹è¡Œå¤šè™•ç†å™¨çš„硬體。 當å‰é€™ç¨® -情æ³åƒ…é™æ–¼å¹¾ç¨®ä¸æ”¯æŒSMP的速é¾è™•ç†å™¨ã€‚ - - 4:'R'如果模塊通éŽã€Œinsmod -fã€è¢«å¼·åˆ¶è£è¼‰ï¼Œ' '如果所有模塊都被æ£å¸¸è£è¼‰ã€‚ - - 5:'M'如果任何處ç†å™¨å ±å‘Šäº†æ©Ÿå™¨æª¢æŸ¥ç•°å¸¸ï¼Œ' '如果沒有發生機器檢查異常。 - - 6:'B'如果é 釋放函數發ç¾äº†ä¸€å€‹éŒ¯èª¤çš„é 引用或者一些éžé 期的é 標誌。 - - 7:'U'如果用戶或者用戶應用程å¼ç‰¹åˆ¥è«‹æ±‚è¨ç½®æ±™æŸ“標誌,å¦å‰‡' '。 - - 8:'D'å¦‚æžœå…§æ ¸å‰›å‰›æ»æŽ‰ï¼Œæ¯”如有OOPS或者BUG。 - -使用'Tainted: 'å—符串的主è¦åŽŸå› 是è¦å‘Šè¨´å…§æ ¸èª¿è©¦è€…,這是å¦æ˜¯ä¸€å€‹ä¹¾æ·¨çš„å…§æ ¸äº¦æˆ–ç™¼ -生了任何的ä¸æ£å¸¸çš„事。汙染是永久的:å³ä½¿å‡ºéŒ¯çš„模塊已經被å¸è¼‰äº†ï¼Œæ±™æŸ“值ä»ç„¶å˜åœ¨ï¼Œ -ä»¥è¡¨æ˜Žå…§æ ¸ä¸å†å€¼å¾—信任。 - diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index b8ea59493964..cec780c2f497 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: May 2022 +:Date: September 2022 The goal of Landlock is to enable to restrict ambient rights (e.g. global filesystem access) for a set of processes. Because Landlock is a stackable @@ -69,7 +69,7 @@ should try to protect users as much as possible whatever the kernel they are using. To avoid binary enforcement (i.e. either all security features or none), we can leverage a dedicated Landlock command to get the current version of the Landlock ABI and adapt the handled accesses. Let's check if we should -remove the `LANDLOCK_ACCESS_FS_REFER` access right which is only supported +remove the ``LANDLOCK_ACCESS_FS_REFER`` access right which is only supported starting with the second version of the ABI. .. code-block:: c @@ -128,7 +128,7 @@ descriptor. It may also be required to create rules following the same logic as explained for the ruleset creation, by filtering access rights according to the Landlock ABI version. In this example, this is not required because -`LANDLOCK_ACCESS_FS_REFER` is not allowed by any rule. +``LANDLOCK_ACCESS_FS_REFER`` is not allowed by any rule. We now have a ruleset with one rule allowing read access to ``/usr`` while denying all other handled accesses for the filesystem. The next step is to @@ -154,8 +154,8 @@ The current thread is now ready to sandbox itself with the ruleset. } close(ruleset_fd); -If the `landlock_restrict_self` system call succeeds, the current thread is now -restricted and this policy will be enforced on all its subsequently created +If the ``landlock_restrict_self`` system call succeeds, the current thread is +now restricted and this policy will be enforced on all its subsequently created children as well. Once a thread is landlocked, there is no way to remove its security policy; only adding more restrictions is allowed. These threads are now in a new Landlock domain, merge of their parent one (if any) with the new @@ -170,12 +170,13 @@ It is recommended setting access rights to file hierarchy leaves as much as possible. For instance, it is better to be able to have ``~/doc/`` as a read-only hierarchy and ``~/tmp/`` as a read-write hierarchy, compared to ``~/`` as a read-only hierarchy and ``~/tmp/`` as a read-write hierarchy. -Following this good practice leads to self-sufficient hierarchies that don't +Following this good practice leads to self-sufficient hierarchies that do not depend on their location (i.e. parent directories). This is particularly relevant when we want to allow linking or renaming. Indeed, having consistent access rights per directory enables to change the location of such directory without relying on the destination directory access rights (except those that -are required for this operation, see `LANDLOCK_ACCESS_FS_REFER` documentation). +are required for this operation, see ``LANDLOCK_ACCESS_FS_REFER`` +documentation). Having self-sufficient hierarchies also helps to tighten the required access rights to the minimal set of data. This also helps avoid sinkhole directories, i.e. directories where data can be linked to but not linked from. However, @@ -259,7 +260,7 @@ Backward and forward compatibility Landlock is designed to be compatible with past and future versions of the kernel. This is achieved thanks to the system call attributes and the -associated bitflags, particularly the ruleset's `handled_access_fs`. Making +associated bitflags, particularly the ruleset's ``handled_access_fs``. Making handled access right explicit enables the kernel and user space to have a clear contract with each other. This is required to make sure sandboxing will not get stricter with a system update, which could break applications. @@ -380,8 +381,8 @@ by the Documentation/admin-guide/cgroup-v1/memory.rst. Previous limitations ==================== -File renaming and linking (ABI 1) ---------------------------------- +File renaming and linking (ABI < 2) +----------------------------------- Because Landlock targets unprivileged access controls, it needs to properly handle composition of rules. Such property also implies rules nesting. @@ -394,7 +395,7 @@ according to the potentially lost constraints. To protect against privilege escalations through renaming or linking, and for the sake of simplicity, Landlock previously limited linking and renaming to the same directory. Starting with the Landlock ABI version 2, it is now possible to securely -control renaming and linking thanks to the new `LANDLOCK_ACCESS_FS_REFER` +control renaming and linking thanks to the new ``LANDLOCK_ACCESS_FS_REFER`` access right. .. _kernel_support: @@ -403,14 +404,14 @@ Kernel support ============== Landlock was first introduced in Linux 5.13 but it must be configured at build -time with `CONFIG_SECURITY_LANDLOCK=y`. Landlock must also be enabled at boot +time with ``CONFIG_SECURITY_LANDLOCK=y``. Landlock must also be enabled at boot time as the other security modules. The list of security modules enabled by -default is set with `CONFIG_LSM`. The kernel configuration should then -contains `CONFIG_LSM=landlock,[...]` with `[...]` as the list of other +default is set with ``CONFIG_LSM``. The kernel configuration should then +contains ``CONFIG_LSM=landlock,[...]`` with ``[...]`` as the list of other potentially useful security modules for the running system (see the -`CONFIG_LSM` help). +``CONFIG_LSM`` help). -If the running kernel doesn't have `landlock` in `CONFIG_LSM`, then we can +If the running kernel does not have ``landlock`` in ``CONFIG_LSM``, then we can still enable it by adding ``lsm=landlock,[...]`` to Documentation/admin-guide/kernel-parameters.rst thanks to the bootloader configuration. diff --git a/Documentation/virt/kvm/x86/mmu.rst b/Documentation/virt/kvm/x86/mmu.rst index 8739120f4300..8364afa228ec 100644 --- a/Documentation/virt/kvm/x86/mmu.rst +++ b/Documentation/virt/kvm/x86/mmu.rst @@ -377,7 +377,7 @@ Emulating cr0.wp ================ If tdp is not enabled, the host must keep cr0.wp=1 so page write protection -works for the guest kernel, not guest guest userspace. When the guest +works for the guest kernel, not guest userspace. When the guest cr0.wp=1, this does not present a problem. However when the guest cr0.wp=0, we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the semantics require allowing any guest kernel access plus user read access). diff --git a/Documentation/w1/masters/ds2490.rst b/Documentation/w1/masters/ds2490.rst index 7e5b50f9c0f5..842e7ae80424 100644 --- a/Documentation/w1/masters/ds2490.rst +++ b/Documentation/w1/masters/ds2490.rst @@ -52,7 +52,7 @@ Notes and limitations. clear the entire bulk in buffer. It would be possible to read the maximum buffer size to not run into this error condition, only extra bytes in the buffer is a logic error in the driver. The code should - should match reads and writes as well as data sizes. Reads and + match reads and writes as well as data sizes. Reads and writes are serialized and the status verifies that the chip is idle (and data is available) before the read is executed, so it should not happen. diff --git a/Documentation/w1/w1-generic.rst b/Documentation/w1/w1-generic.rst index da4e8b4e9b01..99255b6d0e53 100644 --- a/Documentation/w1/w1-generic.rst +++ b/Documentation/w1/w1-generic.rst @@ -113,7 +113,7 @@ generally only make sense when searching is disabled, as a search will redetect manually removed devices that are present and timeout manually added devices that aren't on the bus. -Bus searches occur at an interval, specified as a summ of timeout and +Bus searches occur at an interval, specified as a sum of timeout and timeout_us module parameters (either of which may be 0) for as long as w1_master_search remains greater than 0 or is -1. Each search attempt decrements w1_master_search by 1 (down to 0) and increments diff --git a/Documentation/x86/entry_64.rst b/Documentation/x86/entry_64.rst index e433e08f7018..0afdce3c06f4 100644 --- a/Documentation/x86/entry_64.rst +++ b/Documentation/x86/entry_64.rst @@ -33,8 +33,8 @@ Some of these entries are: - interrupt: An array of entries. Every IDT vector that doesn't explicitly point somewhere else gets set to the corresponding value in interrupts. These point to a whole array of - magically-generated functions that make their way to do_IRQ with - the interrupt number as a parameter. + magically-generated functions that make their way to common_interrupt() + with the interrupt number as a parameter. - APIC interrupts: Various special-purpose interrupts for things like TLB shootdown. diff --git a/Documentation/x86/microcode.rst b/Documentation/x86/microcode.rst index a320d37982ed..b627c6f36bcf 100644 --- a/Documentation/x86/microcode.rst +++ b/Documentation/x86/microcode.rst @@ -6,6 +6,7 @@ The Linux Microcode Loader :Authors: - Fenghua Yu <fenghua.yu@intel.com> - Borislav Petkov <bp@suse.de> + - Ashok Raj <ashok.raj@intel.com> The kernel has a x86 microcode loading facility which is supposed to provide microcode loading methods in the OS. Potential use cases are @@ -92,15 +93,8 @@ vendor's site. Late loading ============ -There are two legacy user space interfaces to load microcode, either through -/dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file -in sysfs. - -The /dev/cpu/microcode method is deprecated because it needs a special -userspace tool for that. - -The easier method is simply installing the microcode packages your distro -supplies and running:: +You simply install the microcode packages your distro supplies and +run:: # echo 1 > /sys/devices/system/cpu/microcode/reload @@ -110,6 +104,110 @@ The loading mechanism looks for microcode blobs in /lib/firmware/{intel-ucode,amd-ucode}. The default distro installation packages already put them there. +Since kernel 5.19, late loading is not enabled by default. + +The /dev/cpu/microcode method has been removed in 5.19. + +Why is late loading dangerous? +============================== + +Synchronizing all CPUs +---------------------- + +The microcode engine which receives the microcode update is shared +between the two logical threads in a SMT system. Therefore, when +the update is executed on one SMT thread of the core, the sibling +"automatically" gets the update. + +Since the microcode can "simulate" MSRs too, while the microcode update +is in progress, those simulated MSRs transiently cease to exist. This +can result in unpredictable results if the SMT sibling thread happens to +be in the middle of an access to such an MSR. The usual observation is +that such MSR accesses cause #GPs to be raised to signal that former are +not present. + +The disappearing MSRs are just one common issue which is being observed. +Any other instruction that's being patched and gets concurrently +executed by the other SMT sibling, can also result in similar, +unpredictable behavior. + +To eliminate this case, a stop_machine()-based CPU synchronization was +introduced as a way to guarantee that all logical CPUs will not execute +any code but just wait in a spin loop, polling an atomic variable. + +While this took care of device or external interrupts, IPIs including +LVT ones, such as CMCI etc, it cannot address other special interrupts +that can't be shut off. Those are Machine Check (#MC), System Management +(#SMI) and Non-Maskable interrupts (#NMI). + +Machine Checks +-------------- + +Machine Checks (#MC) are non-maskable. There are two kinds of MCEs. +Fatal un-recoverable MCEs and recoverable MCEs. While un-recoverable +errors are fatal, recoverable errors can also happen in kernel context +are also treated as fatal by the kernel. + +On certain Intel machines, MCEs are also broadcast to all threads in a +system. If one thread is in the middle of executing WRMSR, a MCE will be +taken at the end of the flow. Either way, they will wait for the thread +performing the wrmsr(0x79) to rendezvous in the MCE handler and shutdown +eventually if any of the threads in the system fail to check in to the +MCE rendezvous. + +To be paranoid and get predictable behavior, the OS can choose to set +MCG_STATUS.MCIP. Since MCEs can be at most one in a system, if an +MCE was signaled, the above condition will promote to a system reset +automatically. OS can turn off MCIP at the end of the update for that +core. + +System Management Interrupt +--------------------------- + +SMIs are also broadcast to all CPUs in the platform. Microcode update +requests exclusive access to the core before writing to MSR 0x79. So if +it does happen such that, one thread is in WRMSR flow, and the 2nd got +an SMI, that thread will be stopped in the first instruction in the SMI +handler. + +Since the secondary thread is stopped in the first instruction in SMI, +there is very little chance that it would be in the middle of executing +an instruction being patched. Plus OS has no way to stop SMIs from +happening. + +Non-Maskable Interrupts +----------------------- + +When thread0 of a core is doing the microcode update, if thread1 is +pulled into NMI, that can cause unpredictable behavior due to the +reasons above. + +OS can choose a variety of methods to avoid running into this situation. + + +Is the microcode suitable for late loading? +------------------------------------------- + +Late loading is done when the system is fully operational and running +real workloads. Late loading behavior depends on what the base patch on +the CPU is before upgrading to the new patch. + +This is true for Intel CPUs. + +Consider, for example, a CPU has patch level 1 and the update is to +patch level 3. + +Between patch1 and patch3, patch2 might have deprecated a software-visible +feature. + +This is unacceptable if software is even potentially using that feature. +For instance, say MSR_X is no longer available after an update, +accessing that MSR will cause a #GP fault. + +Basically there is no way to declare a new microcode update suitable +for late-loading. This is another one of the problems that caused late +loading to be not enabled by default. + Builtin microcode ================= diff --git a/MAINTAINERS b/MAINTAINERS index e55a4d47324c..9ca84cb5ab4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -348,7 +348,6 @@ M: "Rafael J. Wysocki" <rafael@kernel.org> R: Len Brown <lenb@kernel.org> L: linux-acpi@vger.kernel.org S: Supported -W: https://01.org/linux-acpi Q: https://patchwork.kernel.org/project/linux-acpi/list/ B: https://bugzilla.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm @@ -427,7 +426,6 @@ M: Rafael J. Wysocki <rafael@kernel.org> R: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org S: Supported -W: https://01.org/linux-acpi B: https://bugzilla.kernel.org F: drivers/acpi/*thermal* @@ -2678,7 +2676,6 @@ M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained W: http://www.armlinux.org.uk/ -F: arch/arm/include/asm/hardware/entry-macro-iomd.S F: arch/arm/include/asm/hardware/ioc.h F: arch/arm/include/asm/hardware/iomd.h F: arch/arm/include/asm/hardware/memc.h @@ -7690,7 +7687,6 @@ R: Kees Cook <keescook@chromium.org> L: linux-mm@kvack.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve -F: arch/alpha/kernel/binfmt_loader.c F: fs/*binfmt_*.c F: fs/exec.c F: include/linux/binfmts.h @@ -8007,6 +8003,7 @@ L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: include/linux/fortify-string.h +F: lib/fortify_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b @@ -10381,7 +10378,6 @@ INTEL MENLOW THERMAL DRIVER M: Sujith Thomas <sujith.thomas@intel.com> L: linux-pm@vger.kernel.org S: Supported -W: https://01.org/linux-acpi F: drivers/thermal/intel/intel_menlow.c INTEL P-Unit IPC DRIVER @@ -14452,6 +14448,7 @@ M: Willy Tarreau <w@1wt.eu> S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git F: tools/include/nolibc/ +F: tools/testing/selftests/nolibc/ NSDEPS M: Matthias Maennich <maennich@google.com> @@ -17767,6 +17764,24 @@ F: include/rv/ F: kernel/trace/rv/ F: tools/verification/ +RUST +M: Miguel Ojeda <ojeda@kernel.org> +M: Alex Gaynor <alex.gaynor@gmail.com> +M: Wedson Almeida Filho <wedsonaf@gmail.com> +R: Boqun Feng <boqun.feng@gmail.com> +R: Gary Guo <gary@garyguo.net> +R: Björn Roy Baron <bjorn3_gh@protonmail.com> +L: rust-for-linux@vger.kernel.org +S: Supported +W: https://github.com/Rust-for-Linux/linux +B: https://github.com/Rust-for-Linux/linux/issues +T: git https://github.com/Rust-for-Linux/linux.git rust-next +F: Documentation/rust/ +F: rust/ +F: samples/rust/ +F: scripts/*rust* +K: \b(?i:rust)\b + RXRPC SOCKETS (AF_RXRPC) M: David Howells <dhowells@redhat.com> M: Marc Dionne <marc.dionne@auristor.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* @@ -120,6 +120,15 @@ endif export KBUILD_CHECKSRC +# Enable "clippy" (a linter) as part of the Rust compilation. +# +# Use 'make CLIPPY=1' to enable it. +ifeq ("$(origin CLIPPY)", "command line") + KBUILD_CLIPPY := $(CLIPPY) +endif + +export KBUILD_CLIPPY + # Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the # directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") @@ -270,14 +279,14 @@ no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ - outputmakefile + outputmakefile rustavailable rustfmt rustfmtcheck # Installation targets should not require compiler. Unfortunately, vdso_install # is an exception where build artifacts may be updated. This must be fixed. no-compiler-targets := $(no-dot-config-targets) install dtbs_install \ headers_install modules_install kernelrelease image_name no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ +single-targets := %.a %.i %.rsi %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ config-build := mixed-build := @@ -439,6 +448,7 @@ else HOSTCC = gcc HOSTCXX = g++ endif +HOSTRUSTC = rustc HOSTPKG_CONFIG = pkg-config KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ @@ -447,8 +457,26 @@ KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) KBUILD_USERLDFLAGS := $(USERLDFLAGS) +# These flags apply to all Rust code in the tree, including the kernel and +# host programs. +export rust_common_flags := --edition=2021 \ + -Zbinary_dep_depinfo=y \ + -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms \ + -Dunreachable_pub -Dnon_ascii_idents \ + -Wmissing_docs \ + -Drustdoc::missing_crate_level_docs \ + -Dclippy::correctness -Dclippy::style \ + -Dclippy::suspicious -Dclippy::complexity \ + -Dclippy::perf \ + -Dclippy::let_unit_value -Dclippy::mut_mut \ + -Dclippy::needless_bitwise_bool \ + -Dclippy::needless_continue \ + -Wclippy::dbg_macro + KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS) +KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \ + -Zallow-features= $(HOSTRUSTFLAGS) KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) @@ -473,6 +501,12 @@ OBJDUMP = $(CROSS_COMPILE)objdump READELF = $(CROSS_COMPILE)readelf STRIP = $(CROSS_COMPILE)strip endif +RUSTC = rustc +RUSTDOC = rustdoc +RUSTFMT = rustfmt +CLIPPY_DRIVER = clippy-driver +BINDGEN = bindgen +CARGO = cargo PAHOLE = pahole RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids LEX = flex @@ -498,9 +532,11 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := CFLAGS_MODULE = +RUSTFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = CFLAGS_KERNEL = +RUSTFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = @@ -529,15 +565,43 @@ KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \ -Werror=return-type -Wno-format-security \ -std=gnu11 KBUILD_CPPFLAGS := -D__KERNEL__ +KBUILD_RUSTFLAGS := $(rust_common_flags) \ + --target=$(objtree)/rust/target.json \ + -Cpanic=abort -Cembed-bitcode=n -Clto=n \ + -Cforce-unwind-tables=n -Ccodegen-units=1 \ + -Csymbol-mangling-version=v0 \ + -Crelocation-model=static \ + -Zfunction-sections=n \ + -Dclippy::float_arithmetic + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := +KBUILD_RUSTFLAGS_KERNEL := KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE +KBUILD_RUSTFLAGS_MODULE := --cfg MODULE KBUILD_LDFLAGS_MODULE := KBUILD_LDFLAGS := CLANG_FLAGS := +ifeq ($(KBUILD_CLIPPY),1) + RUSTC_OR_CLIPPY_QUIET := CLIPPY + RUSTC_OR_CLIPPY = $(CLIPPY_DRIVER) +else + RUSTC_OR_CLIPPY_QUIET := RUSTC + RUSTC_OR_CLIPPY = $(RUSTC) +endif + +ifdef RUST_LIB_SRC + export RUST_LIB_SRC +endif + +# Allows the usage of unstable features in stable compilers. +export RUSTC_BOOTSTRAP := 1 + export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG +export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO +export HOSTRUSTC KBUILD_HOSTRUSTFLAGS export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD @@ -546,9 +610,10 @@ export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +export KBUILD_RUSTFLAGS RUSTFLAGS_KERNEL RUSTFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE -export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE -export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_RUSTFLAGS_MODULE KBUILD_LDFLAGS_MODULE +export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL KBUILD_RUSTFLAGS_KERNEL export PAHOLE_FLAGS # Files to ignore in find ... statements @@ -729,7 +794,7 @@ $(KCONFIG_CONFIG): # # Do not use $(call cmd,...) here. That would suppress prompts from syncconfig, # so you cannot notice that Kconfig is waiting for the user input. -%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h: $(KCONFIG_CONFIG) +%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h %/generated/rustc_cfg: $(KCONFIG_CONFIG) $(Q)$(kecho) " SYNC $@" $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig else # !may-sync-config @@ -758,10 +823,17 @@ KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 +KBUILD_RUSTFLAGS += -Copt-level=2 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os +KBUILD_RUSTFLAGS += -Copt-level=s endif +# Always set `debug-assertions` and `overflow-checks` because their default +# depends on `opt-level` and `debug-assertions`, respectively. +KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) +KBUILD_RUSTFLAGS += -Coverflow-checks=$(if $(CONFIG_RUST_OVERFLOW_CHECKS),y,n) + # Tell gcc to never replace conditional load with a non-conditional one ifdef CONFIG_CC_IS_GCC # gcc-10 renamed --param=allow-store-data-races=0 to @@ -792,6 +864,9 @@ KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) +KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings +KBUILD_RUSTFLAGS += $(KBUILD_RUSTFLAGS-y) + ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. @@ -812,12 +887,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically # select FRAME_POINTER. However, FUNCTION_TRACER adds -pg, and this is # incompatible with -fomit-frame-pointer with current GCC, so we don't use # -fomit-frame-pointer with FUNCTION_TRACER. +# In the Rust target specification, "frame-pointer" is set explicitly +# to "may-omit". ifndef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -fomit-frame-pointer endif @@ -831,8 +909,8 @@ endif # Initialize all stack variables with a zero value. ifdef CONFIG_INIT_STACK_ALL_ZERO KBUILD_CFLAGS += -ftrivial-auto-var-init=zero -ifdef CONFIG_CC_IS_CLANG -# https://bugs.llvm.org/show_bug.cgi?id=45497 +ifdef CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER +# https://github.com/llvm/llvm-project/issues/44842 KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang endif endif @@ -882,8 +960,10 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH KBUILD_CFLAGS += -fno-inline-functions-called-once endif +# `rustc`'s `-Zfunction-sections` applies to data too (as of 1.59.0). ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections +KBUILD_RUSTFLAGS_KERNEL += -Zfunction-sections=y LDFLAGS_vmlinux += --gc-sections endif @@ -921,18 +1001,7 @@ export CC_FLAGS_LTO endif ifdef CONFIG_CFI_CLANG -CC_FLAGS_CFI := -fsanitize=cfi \ - -fsanitize-cfi-cross-dso \ - -fno-sanitize-cfi-canonical-jump-tables \ - -fno-sanitize-trap=cfi \ - -fno-sanitize-blacklist - -ifdef CONFIG_CFI_PERMISSIVE -CC_FLAGS_CFI += -fsanitize-recover=cfi -endif - -# If LTO flags are filtered out, we must also filter out CFI. -CC_FLAGS_LTO += $(CC_FLAGS_CFI) +CC_FLAGS_CFI := -fsanitize=kcfi KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif @@ -1026,10 +1095,11 @@ include $(addprefix $(srctree)/, $(include-y)) # Do not add $(call cc-option,...) below this line. When you build the kernel # from the clean source tree, the GCC plugins do not exist at this point. -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments +# Add user supplied CPPFLAGS, AFLAGS, CFLAGS and RUSTFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) KBUILD_CFLAGS += $(KCFLAGS) +KBUILD_RUSTFLAGS += $(KRUSTFLAGS) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 @@ -1104,6 +1174,7 @@ ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ core-$(CONFIG_BLOCK) += block/ core-$(CONFIG_IO_URING) += io_uring/ +core-$(CONFIG_RUST) += rust/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ @@ -1206,6 +1277,10 @@ prepare0: archprepare # All the preparing.. prepare: prepare0 +ifdef CONFIG_RUST + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh -v + $(Q)$(MAKE) $(build)=rust +endif PHONY += remove-stale-files remove-stale-files: @@ -1499,7 +1574,7 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ - compile_commands.json .thinlto-cache + compile_commands.json .thinlto-cache rust/test rust/doc # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -1510,7 +1585,8 @@ MRPROPER_FILES += include/config include/generated \ certs/signing_key.pem \ certs/x509.genkey \ vmlinux-gdb.py \ - *.spec + *.spec \ + rust/target.json rust/libmacros.so # clean - Delete most, but leave enough to build external modules # @@ -1535,6 +1611,9 @@ $(mrproper-dirs): mrproper: clean $(mrproper-dirs) $(call cmd,rmfiles) + @find . $(RCS_FIND_IGNORE) \ + \( -name '*.rmeta' \) \ + -type f -print | xargs rm -f # distclean # @@ -1622,6 +1701,24 @@ help: @echo ' kselftest-merge - Merge all the config dependencies of' @echo ' kselftest to existing .config.' @echo '' + @echo 'Rust targets:' + @echo ' rustavailable - Checks whether the Rust toolchain is' + @echo ' available and, if not, explains why.' + @echo ' rustfmt - Reformat all the Rust code in the kernel' + @echo ' rustfmtcheck - Checks if all the Rust code in the kernel' + @echo ' is formatted, printing a diff otherwise.' + @echo ' rustdoc - Generate Rust documentation' + @echo ' (requires kernel .config)' + @echo ' rusttest - Runs the Rust tests' + @echo ' (requires kernel .config; downloads external repos)' + @echo ' rust-analyzer - Generate rust-project.json rust-analyzer support file' + @echo ' (requires kernel .config)' + @echo ' dir/file.[os] - Build specified target only' + @echo ' dir/file.rsi - Build macro expanded source, similar to C preprocessing.' + @echo ' Run with RUSTFMT=n to skip reformatting if needed.' + @echo ' The output is not intended to be compilable.' + @echo ' dir/file.ll - Build the LLVM assembly file' + @echo '' @$(if $(dtstree), \ echo 'Devicetree:'; \ echo '* dtbs - Build device tree blobs for enabled boards'; \ @@ -1694,6 +1791,52 @@ PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ + +# Rust targets +# --------------------------------------------------------------------------- + +# "Is Rust available?" target +PHONY += rustavailable +rustavailable: + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh -v && echo "Rust is available!" + +# Documentation target +# +# Using the singular to avoid running afoul of `no-dot-config-targets`. +PHONY += rustdoc +rustdoc: prepare + $(Q)$(MAKE) $(build)=rust $@ + +# Testing target +PHONY += rusttest +rusttest: prepare + $(Q)$(MAKE) $(build)=rust $@ + +# Formatting targets +PHONY += rustfmt rustfmtcheck + +# We skip `rust/alloc` since we want to minimize the diff w.r.t. upstream. +# +# We match using absolute paths since `find` does not resolve them +# when matching, which is a problem when e.g. `srctree` is `..`. +# We `grep` afterwards in order to remove the directory entry itself. +rustfmt: + $(Q)find $(abs_srctree) -type f -name '*.rs' \ + -o -path $(abs_srctree)/rust/alloc -prune \ + -o -path $(abs_objtree)/rust/test -prune \ + | grep -Fv $(abs_srctree)/rust/alloc \ + | grep -Fv $(abs_objtree)/rust/test \ + | grep -Fv generated \ + | xargs $(RUSTFMT) $(rustfmt_flags) + +rustfmtcheck: rustfmt_flags = --check +rustfmtcheck: rustfmt + +# IDE support targets +PHONY += rust-analyzer +rust-analyzer: + $(Q)$(MAKE) $(build)=rust $@ + # Misc # --------------------------------------------------------------------------- @@ -1861,7 +2004,7 @@ $(clean-dirs): clean: $(clean-dirs) $(call cmd,rmfiles) @find $(or $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ - \( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \ + \( -name '*.[aios]' -o -name '*.rsi' -o -name '*.ko' -o -name '.*.cmd' \ -o -name '*.ko.*' \ -o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \ -o -name '*.dwo' -o -name '*.lst' \ diff --git a/arch/Kconfig b/arch/Kconfig index 8b311e400ec1..266862428a84 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -355,6 +355,12 @@ config HAVE_RSEQ This symbol should be selected by an architecture if it supports an implementation of restartable sequences. +config HAVE_RUST + bool + help + This symbol should be selected by an architecture if it + supports Rust. + config HAVE_FUNCTION_ARG_ACCESS_API bool help @@ -738,11 +744,13 @@ config ARCH_SUPPORTS_CFI_CLANG An architecture should select this option if it can support Clang's Control-Flow Integrity (CFI) checking. +config ARCH_USES_CFI_TRAPS + bool + config CFI_CLANG bool "Use Clang's Control Flow Integrity (CFI)" - depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG - depends on CLANG_VERSION >= 140000 - select KALLSYMS + depends on ARCH_SUPPORTS_CFI_CLANG + depends on $(cc-option,-fsanitize=kcfi) help This option enables Clang’s forward-edge Control Flow Integrity (CFI) checking, where the compiler injects a runtime check to each @@ -754,16 +762,6 @@ config CFI_CLANG https://clang.llvm.org/docs/ControlFlowIntegrity.html -config CFI_CLANG_SHADOW - bool "Use CFI shadow to speed up cross-module checks" - default y - depends on CFI_CLANG && MODULES - help - If you select this option, the kernel builds a fast look-up table of - CFI check functions in loaded modules to reduce performance overhead. - - If unsure, say Y. - config CFI_PERMISSIVE bool "Use CFI in permissive mode" depends on CFI_CLANG diff --git a/arch/alpha/include/asm/a.out.h b/arch/alpha/include/asm/a.out.h deleted file mode 100644 index d2346b7caff1..000000000000 --- a/arch/alpha/include/asm/a.out.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ALPHA_A_OUT_H__ -#define __ALPHA_A_OUT_H__ - -#include <uapi/asm/a.out.h> - - -/* Assume that start addresses below 4G belong to a TASO application. - Unfortunately, there is no proper bit in the exec header to check. - Worse, we have to notice the start address before swapping to use - /sbin/loader, which of course is _not_ a TASO application. */ -#define SET_AOUT_PERSONALITY(BFPM, EX) \ - set_personality (((BFPM->taso || EX.ah.entry < 0x100000000L \ - ? ADDR_LIMIT_32BIT : 0) | PER_OSF4)) - -#endif /* __A_OUT_GNU_H__ */ diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 5a74581bf0ee..6a274c0d53a2 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -47,10 +47,6 @@ else # Misc support obj-$(CONFIG_ALPHA_SRM) += srmcons.o -ifdef CONFIG_BINFMT_AOUT -obj-y += binfmt_loader.o -endif - # Core logic support obj-$(CONFIG_ALPHA_APECS) += core_apecs.o obj-$(CONFIG_ALPHA_CIA) += core_cia.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c deleted file mode 100644 index e4be7a543ecf..000000000000 --- a/arch/alpha/kernel/binfmt_loader.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/mm_types.h> -#include <linux/binfmts.h> -#include <linux/a.out.h> - -static int load_binary(struct linux_binprm *bprm) -{ - struct exec *eh = (struct exec *)bprm->buf; - unsigned long loader; - struct file *file; - int retval; - - if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000) - return -ENOEXEC; - - if (bprm->loader) - return -ENOEXEC; - - loader = bprm->vma->vm_end - sizeof(void *); - - file = open_exec("/sbin/loader"); - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - /* Remember if the application is TASO. */ - bprm->taso = eh->ah.entry < 0x100000000UL; - - bprm->interpreter = file; - bprm->loader = loader; - return 0; -} - -static struct linux_binfmt loader_format = { - .load_binary = load_binary, -}; - -static int __init init_loader_binfmt(void) -{ - insert_binfmt(&loader_format); - return 0; -} -arch_initcall(init_loader_binfmt); diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d257293401e2..b3ad8c44c971 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1278,45 +1278,15 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } -#ifdef CONFIG_OSF4_COMPAT -/* Clear top 32 bits of iov_len in the user's buffer for - compatibility with old versions of OSF/1 where iov_len - was defined as int. */ -static int -osf_fix_iov_len(const struct iovec __user *iov, unsigned long count) -{ - unsigned long i; - - for (i = 0 ; i < count ; i++) { - int __user *iov_len_high = (int __user *)&iov[i].iov_len + 1; - - if (put_user(0, iov_len_high)) - return -EFAULT; - } - return 0; -} -#endif - SYSCALL_DEFINE3(osf_readv, unsigned long, fd, const struct iovec __user *, vector, unsigned long, count) { -#ifdef CONFIG_OSF4_COMPAT - if (unlikely(personality(current->personality) == PER_OSF4)) - if (osf_fix_iov_len(vector, count)) - return -EFAULT; -#endif - return sys_readv(fd, vector, count); } SYSCALL_DEFINE3(osf_writev, unsigned long, fd, const struct iovec __user *, vector, unsigned long, count) { -#ifdef CONFIG_OSF4_COMPAT - if (unlikely(personality(current->personality) == PER_OSF4)) - if (osf_fix_iov_len(vector, count)) - return -EFAULT; -#endif return sys_writev(fd, vector, count); } diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index cb2e069dc73f..abfed1aa2baa 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -23,7 +23,9 @@ unsigned int __machine_arch_type; #include <linux/types.h> #include <linux/linkage.h> #include "misc.h" +#ifdef CONFIG_ARCH_EP93XX #include "misc-ep93xx.h" +#endif static void putstr(const char *ptr); diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 1bcb68ac4b01..3fcb3e62dc56 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -23,6 +23,7 @@ SECTIONS *(.ARM.extab*) *(.note.*) *(.rel.*) + *(.printk_index) /* * Discard any r/w data - this produces a link error if we have any, * which is required for PIC decompression. Local data generates @@ -57,6 +58,7 @@ SECTIONS *(.rodata) *(.rodata.*) *(.data.rel.ro) + *(.data.rel.ro.*) } .piggydata : { *(.piggydata) diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 506f3378da07..6908032fbce8 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -6,7 +6,6 @@ CONFIG_UNUSED_BOARD_FILES=y CONFIG_CMDLINE="init=/linuxrc root=/dev/mtdblock3" CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=m CONFIG_MODULES=y CONFIG_MODVERSIONS=y CONFIG_PARTITION_ADVANCED=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index 1f137f74050f..df84640f4f57 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -16,7 +16,6 @@ CONFIG_MACH_HUSKY=y CONFIG_UNUSED_BOARD_FILES=y CONFIG_CMDLINE="console=ttyS0,115200n8 console=tty1 noinitrd root=/dev/mtdblock2 rootfstype=jffs2 debug" CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index 1a41391d7367..cd9ccc4e4627 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -25,7 +25,6 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m CONFIG_CPU_IDLE=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=m CONFIG_PM=y CONFIG_APM_EMULATION=y CONFIG_MODULES=y diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 504070812ad0..b5b56f8dda5f 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -9,7 +9,6 @@ CONFIG_ARCH_EBSA285_HOST=y CONFIG_ARCH_NETWINDER=y CONFIG_FPE_NWFPE=y CONFIG_FPE_NWFPE_XP=y -CONFIG_BINFMT_AOUT=y CONFIG_MODULES=y CONFIG_PARTITION_ADVANCED=y CONFIG_ACORN_PARTITION=y diff --git a/arch/arm/configs/hackkit_defconfig b/arch/arm/configs/hackkit_defconfig index b9327b2eacd3..398558c4ffa8 100644 --- a/arch/arm/configs/hackkit_defconfig +++ b/arch/arm/configs/hackkit_defconfig @@ -7,7 +7,6 @@ CONFIG_UNUSED_BOARD_FILES=y CONFIG_CMDLINE="console=ttySA0,115200 root=/dev/ram0 initrd=0xc0400000,8M init=/rootshell" CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_MODULES=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index c16e92cdfd00..19e30e790d35 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -12,7 +12,6 @@ CONFIG_MACH_N2100=y CONFIG_UNUSED_BOARD_FILES=y CONFIG_CMDLINE="console=ttyS0,115200 root=/dev/nfs ip=bootp cachepolicy=writealloc" CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y diff --git a/arch/arm/configs/jornada720_defconfig b/arch/arm/configs/jornada720_defconfig index 3dcf89d3e1f1..1a11ee6b3e24 100644 --- a/arch/arm/configs/jornada720_defconfig +++ b/arch/arm/configs/jornada720_defconfig @@ -6,7 +6,6 @@ CONFIG_SA1100_JORNADA720=y CONFIG_SA1100_JORNADA720_SSP=y CONFIG_UNUSED_BOARD_FILES=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_PM=y CONFIG_MODULES=y CONFIG_NET=y diff --git a/arch/arm/configs/lart_defconfig b/arch/arm/configs/lart_defconfig index 0c2f19d756c0..00583d64d2ea 100644 --- a/arch/arm/configs/lart_defconfig +++ b/arch/arm/configs/lart_defconfig @@ -8,7 +8,6 @@ CONFIG_CMDLINE="console=ttySA0,9600 root=/dev/ram" CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_PM=y CONFIG_MODULES=y CONFIG_NET=y diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig index 907403529e30..2d16ddb0e7ff 100644 --- a/arch/arm/configs/neponset_defconfig +++ b/arch/arm/configs/neponset_defconfig @@ -9,7 +9,6 @@ CONFIG_ZBOOT_ROM_BSS=0xc1000000 CONFIG_ZBOOT_ROM=y CONFIG_CMDLINE="console=ttySA0,38400n8 cpufreq=221200 rw root=/dev/mtdblock2 mtdparts=sa1100:512K(boot),1M(kernel),2560K(initrd),4M(root) load_ramdisk=1 prompt_ramdisk=0 mem=32M noinitrd initrd=0xc0800000,3M" CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_PM=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arm/configs/netwinder_defconfig b/arch/arm/configs/netwinder_defconfig index cf7bbcf9d98a..7a14ea1faa65 100644 --- a/arch/arm/configs/netwinder_defconfig +++ b/arch/arm/configs/netwinder_defconfig @@ -5,7 +5,6 @@ CONFIG_ARCH_NETWINDER=y CONFIG_DEPRECATED_PARAM_STRUCT=y CONFIG_CMDLINE="root=0x801" CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_PARTITION_ADVANCED=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 16d74a1f027a..b667c9d4527c 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -7,7 +7,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARCH_RPC=y CONFIG_CPU_SA110=y CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=y CONFIG_PARTITION_ADVANCED=y CONFIG_BSD_DISKLABEL=y CONFIG_SLAB=y diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 1284a1d92ca3..66d74653f3fb 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -13,7 +13,6 @@ CONFIG_MACH_AKITA=y CONFIG_MACH_BORZOI=y CONFIG_CMDLINE="console=ttyS0,115200n8 console=tty1 noinitrd root=/dev/mtdblock2 rootfstype=jffs2 debug" CONFIG_FPE_NWFPE=y -CONFIG_BINFMT_AOUT=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index fb688003d156..712da6a81b23 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -346,7 +346,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) addr = start + i * PMD_SIZE; domain = get_domain_name(pmd); if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) - note_page(st, addr, 3, pmd_val(*pmd), domain); + note_page(st, addr, 4, pmd_val(*pmd), domain); else walk_pte(st, pmd, addr, domain); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a49f0b9c0f75..463fc2a8448f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -300,7 +300,11 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_XN | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, +#ifdef CONFIG_ARM_LPAE + .prot_sect = PMD_TYPE_SECT | L_PMD_SECT_RDONLY | PMD_SECT_AP2, +#else .prot_sect = PMD_TYPE_SECT, +#endif .domain = DOMAIN_KERNEL, }, [MT_ROM] = { diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 7868330dd54e..ebe5558929b7 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> SHASH .req v0 @@ -350,11 +351,11 @@ CPU_LE( rev64 T1.16b, T1.16b ) * void pmull_ghash_update(int blocks, u64 dg[], const char *src, * struct ghash_key const *k, const char *head) */ -SYM_FUNC_START(pmull_ghash_update_p64) +SYM_TYPED_FUNC_START(pmull_ghash_update_p64) __pmull_ghash p64 SYM_FUNC_END(pmull_ghash_update_p64) -SYM_FUNC_START(pmull_ghash_update_p8) +SYM_TYPED_FUNC_START(pmull_ghash_update_p8) __pmull_ghash p8 SYM_FUNC_END(pmull_ghash_update_p8) diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S index ef97d3187cb7..ca70cfacd0d0 100644 --- a/arch/arm64/crypto/sm3-ce-core.S +++ b/arch/arm64/crypto/sm3-ce-core.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 @@ -73,7 +74,7 @@ * int blocks) */ .text -SYM_FUNC_START(sm3_ce_transform) +SYM_TYPED_FUNC_START(sm3_ce_transform) /* load state */ ld1 {v8.4s-v9.4s}, [x0] rev64 v8.4s, v8.4s diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index ec7720dbe2c8..6e000113e508 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -17,6 +17,7 @@ * 0x401: for compile time BRK instruction * 0x800: kernel-mode BUG() and WARN() traps * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) + * 0x8xxx: Control-Flow Integrity traps */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 @@ -28,4 +29,9 @@ #define KASAN_BRK_IMM 0x900 #define KASAN_BRK_MASK 0x0ff +#define CFI_BRK_IMM_TARGET GENMASK(4, 0) +#define CFI_BRK_IMM_TYPE GENMASK(9, 5) +#define CFI_BRK_IMM_BASE 0x8000 +#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE) + #endif diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index dbc45a4157fa..329dbbd4d50b 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -26,7 +26,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) +#define MCOUNT_ADDR ((unsigned long)_mcount) #endif /* The BL at the callsite's adjusted rec->ip */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 43f8c25b3fda..1436fa1cde24 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,4 +39,8 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + bti c ; + #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c7ccd82db1d2..d3f8b5df0c1f 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) +static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; @@ -168,7 +168,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) ttbr1 |= TTBR_CNP_BIT; } - replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); + replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); __cpu_install_idmap(idmap); replace_phys(ttbr1); diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index bfeeb5319abf..b1990e38aed0 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -99,7 +99,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu) * that read this address need to convert this address to the * Boot-Loader's endianness before jumping. */ - writeq_relaxed(__pa_symbol(function_nocfi(secondary_entry)), + writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point); writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 9bcaa5eacf16..d2c66507398d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -133,7 +133,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __nocfi __apply_alternatives(struct alt_region *region, bool is_module, +static void __apply_alternatives(struct alt_region *region, bool is_module, unsigned long *feature_mask) { struct alt_instr *alt; diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 48a8af97faa9..6b752fe89745 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -8,6 +8,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/sysreg.h> #include <asm/virt.h> @@ -28,7 +29,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -SYM_CODE_START(cpu_soft_restart) +SYM_TYPED_FUNC_START(cpu_soft_restart) mov_q x12, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround /* @@ -47,6 +48,6 @@ SYM_CODE_START(cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -SYM_CODE_END(cpu_soft_restart) +SYM_FUNC_END(cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af4de817d712..d8361691efeb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1685,7 +1685,7 @@ static phys_addr_t kpti_ng_pgd_alloc(int shift) return kpti_ng_temp_alloc; } -static void __nocfi +static void kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); @@ -1713,7 +1713,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) if (arm64_use_ng_mappings) return; - remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); if (!cpu) { alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index ea5dc7c90f46..26789865748c 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -56,7 +56,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned long pc; u32 new; - pc = (unsigned long)function_nocfi(ftrace_call); + pc = (unsigned long)ftrace_call; new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, AARCH64_INSN_BRANCH_LINK); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 19c2d487cb08..ce3d40120f72 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -204,7 +204,7 @@ void machine_kexec(struct kimage *kimage) typeof(cpu_soft_restart) *restart; cpu_install_idmap(); - restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); + restart = (void *)__pa_symbol(cpu_soft_restart); restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 0, 0); } else { diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ab7f4c476104..29a8e444db83 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -38,7 +38,7 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu) static int cpu_psci_cpu_boot(unsigned int cpu) { - phys_addr_t pa_secondary_entry = __pa_symbol(function_nocfi(secondary_entry)); + phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry); int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry); if (err) pr_err("failed to boot CPU%d (%d)\n", cpu, err); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7e1624ecab3c..49029eace3ad 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -66,7 +66,7 @@ static int smp_spin_table_cpu_init(unsigned int cpu) static int smp_spin_table_cpu_prepare(unsigned int cpu) { __le64 __iomem *release_addr; - phys_addr_t pa_holding_pen = __pa_symbol(function_nocfi(secondary_holding_pen)); + phys_addr_t pa_holding_pen = __pa_symbol(secondary_holding_pen); if (!cpu_release_addr[cpu]) return -ENODEV; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b7fed33981f7..3c026da95bbc 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/mm_types.h> #include <linux/kasan.h> +#include <linux/cfi.h> #include <asm/atomic.h> #include <asm/bug.h> @@ -991,6 +992,38 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; +#ifdef CONFIG_CFI_CLANG +static int cfi_handler(struct pt_regs *regs, unsigned long esr) +{ + unsigned long target; + u32 type; + + target = pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TARGET, esr)); + type = (u32)pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TYPE, esr)); + + switch (report_cfi_failure(regs, regs->pc, &target, type)) { + case BUG_TRAP_TYPE_BUG: + die("Oops - CFI", regs, 0); + break; + + case BUG_TRAP_TYPE_WARN: + break; + + default: + return DBG_HOOK_ERROR; + } + + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + return DBG_HOOK_HANDLED; +} + +static struct break_hook cfi_break_hook = { + .fn = cfi_handler, + .imm = CFI_BRK_IMM_BASE, + .mask = CFI_BRK_IMM_MASK, +}; +#endif /* CONFIG_CFI_CLANG */ + static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) { pr_err("%s generated an invalid instruction at %pS!\n", @@ -1052,6 +1085,9 @@ static struct break_hook kasan_break_hook = { }; #endif + +#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK) + /* * Initial handler for AArch64 BRK exceptions * This handler only used until debug_traps_init(). @@ -1059,10 +1095,12 @@ static struct break_hook kasan_break_hook = { int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs) { +#ifdef CONFIG_CFI_CLANG + if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE) + return cfi_handler(regs, esr) != DBG_HOOK_HANDLED; +#endif #ifdef CONFIG_KASAN_SW_TAGS - unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; - - if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif return bug_handler(regs, esr) != DBG_HOOK_HANDLED; @@ -1071,6 +1109,9 @@ int __init early_brk64(unsigned long addr, unsigned long esr, void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); +#ifdef CONFIG_CFI_CLANG + register_kernel_break_hook(&cfi_break_hook); +#endif register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index bafbf78fab77..619e2dc7ee14 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -40,7 +40,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # kernel with CONFIG_WERROR enabled. CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7837a69524c5..8b9f419fcad9 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <linux/pgtable.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> @@ -185,7 +186,7 @@ SYM_FUNC_END(cpu_do_resume) * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. */ -SYM_FUNC_START(idmap_cpu_replace_ttbr1) +SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 __idmap_cpu_set_reserved_ttbr1 x1, x3 @@ -253,7 +254,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) SYM_DATA(__idmap_kpti_flag, .long 1) .popsection -SYM_FUNC_START(idmap_kpti_install_ng_mappings) +SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 temp_pte .req x0 num_cpus .req w1 diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index c7dd6ad779af..551dd99e98b8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,7 +10,6 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI - select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION diff --git a/arch/loongarch/kernel/dma.c b/arch/loongarch/kernel/dma.c index 8c9b5314a13e..7a9c6a9dd2d0 100644 --- a/arch/loongarch/kernel/dma.c +++ b/arch/loongarch/kernel/dma.c @@ -2,39 +2,29 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ -#include <linux/init.h> +#include <linux/acpi.h> #include <linux/dma-direct.h> -#include <linux/dma-mapping.h> -#include <linux/dma-map-ops.h> -#include <linux/swiotlb.h> -#include <asm/bootinfo.h> -#include <asm/dma.h> -#include <asm/loongson.h> - -/* - * We extract 4bit node id (bit 44~47) from Loongson-3's - * 48bit physical address space and embed it into 40bit. - */ - -static int node_id_offset; - -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - long nid = (paddr >> 44) & 0xf; - - return ((nid << 44) ^ paddr) | (nid << node_id_offset); -} - -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +void acpi_arch_dma_setup(struct device *dev) { - long nid = (daddr >> node_id_offset) & 0xf; + int ret; + u64 mask, end = 0; + const struct bus_dma_region *map = NULL; + + ret = acpi_dma_get_range(dev, &map); + if (!ret && map) { + const struct bus_dma_region *r = map; + + for (end = 0; r->size; r++) { + if (r->dma_start + r->size - 1 > end) + end = r->dma_start + r->size - 1; + } + + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->dma_range_map = map; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } - return ((nid << node_id_offset) ^ daddr) | (nid << 44); -} - -void __init plat_swiotlb_setup(void) -{ - swiotlb_init(true, SWIOTLB_VERBOSE); - node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF) + 36; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 8f5c2f9a1a83..d97c69dbe553 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -247,7 +247,7 @@ static void __init arch_mem_init(char **cmdline_p) sparse_init(); memblock_set_bottom_up(true); - plat_swiotlb_setup(); + swiotlb_init(true, SWIOTLB_VERBOSE); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index b06faf6c0b27..7bff88118507 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -87,7 +87,7 @@ config MMU_SUN3 config KEXEC bool "kexec system call" - depends on M68KCLASSIC + depends on M68KCLASSIC && MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index a6a886a89be2..e2038d9499e4 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -84,7 +84,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -573,9 +572,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -594,6 +593,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index bffd24c2755e..ddd201259e43 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -80,7 +80,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -530,9 +529,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -551,6 +550,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 0013425b1e08..d9f783707387 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -87,7 +87,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -550,9 +549,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -571,6 +570,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 42d969697f7f..68957c6bcff1 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -77,7 +77,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -522,9 +521,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -543,6 +542,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 97d6d9acb395..825c6a02fa9d 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -79,7 +79,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -532,9 +531,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -553,6 +552,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 8cbfc1c659a3..17f64c562bf1 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -78,7 +78,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -552,9 +551,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -573,6 +572,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 9f45fe60757f..f5f4c572b694 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -98,7 +98,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -638,9 +637,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -659,6 +658,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 4736cfacf6a2..b4a0bbef7e39 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -76,7 +76,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -521,9 +520,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -542,6 +541,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 638cd38aa7d2..c6a6d5926793 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -77,7 +77,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -522,9 +521,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -543,6 +542,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ec8b6bb70ebd..49c9c89f0caf 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -78,7 +78,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -539,9 +538,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -560,6 +559,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 7d8dc578d59c..9b44eeb9c07f 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -74,7 +74,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -521,9 +520,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -542,6 +541,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 96290aee5302..d2ffb0a65b44 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -74,7 +74,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y -# CONFIG_NF_CONNTRACK_PROCFS is not set # CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m @@ -520,9 +519,9 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m @@ -541,6 +540,7 @@ CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m diff --git a/arch/m68k/include/uapi/asm/bootinfo-virt.h b/arch/m68k/include/uapi/asm/bootinfo-virt.h index b091ee9b06e0..7dbcd7bec103 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-virt.h +++ b/arch/m68k/include/uapi/asm/bootinfo-virt.h @@ -13,13 +13,8 @@ #define BI_VIRT_VIRTIO_BASE 0x8004 #define BI_VIRT_CTRL_BASE 0x8005 -/* - * A random seed used to initialize the RNG. Record format: - * - * - length [ 2 bytes, 16-bit big endian ] - * - seed data [ `length` bytes, padded to preserve 2-byte alignment ] - */ -#define BI_VIRT_RNG_SEED 0x8006 +/* No longer used -- replaced with BI_RNG_SEED -- but don't reuse this index: + * #define BI_VIRT_RNG_SEED 0x8006 */ #define VIRT_BOOTI_VERSION MK_BI_VERSION(2, 0) diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h index 95ecf3ae4c49..024e87d7095f 100644 --- a/arch/m68k/include/uapi/asm/bootinfo.h +++ b/arch/m68k/include/uapi/asm/bootinfo.h @@ -64,6 +64,13 @@ struct mem_info { /* (struct mem_info) */ #define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */ /* (string) */ +/* + * A random seed used to initialize the RNG. Record format: + * + * - length [ 2 bytes, 16-bit big endian ] + * - seed data [ `length` bytes, padded to preserve 4-byte struct alignment ] + */ +#define BI_RNG_SEED 0x0008 /* diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index e62fa8f2149b..3a2bb2e8fdad 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/nvram.h> #include <linux/initrd.h> +#include <linux/random.h> #include <asm/bootinfo.h> #include <asm/byteorder.h> @@ -109,10 +110,9 @@ extern void paging_init(void); static void __init m68k_parse_bootinfo(const struct bi_record *record) { + const struct bi_record *first_record = record; uint16_t tag; - save_bootinfo(record); - while ((tag = be16_to_cpu(record->tag)) != BI_LAST) { int unknown = 0; const void *data = record->data; @@ -148,10 +148,21 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) break; case BI_COMMAND_LINE: - strlcpy(m68k_command_line, data, + strscpy(m68k_command_line, data, sizeof(m68k_command_line)); break; + case BI_RNG_SEED: { + u16 len = be16_to_cpup(data); + add_bootloader_randomness(data + 2, len); + /* + * Zero the data to preserve forward secrecy, and zero the + * length to prevent kexec from using it. + */ + memzero_explicit((void *)data, len + 2); + break; + } + default: if (MACH_IS_AMIGA) unknown = amiga_parse_bootinfo(record); @@ -182,6 +193,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) record = (struct bi_record *)((unsigned long)record + size); } + save_bootinfo(first_record); + m68k_realnum_memory = m68k_num_memory; #ifdef CONFIG_SINGLE_MEMORY_CHUNK if (m68k_num_memory > 1) { diff --git a/arch/m68k/virt/config.c b/arch/m68k/virt/config.c index 4ab22946ff68..632ba200ad42 100644 --- a/arch/m68k/virt/config.c +++ b/arch/m68k/virt/config.c @@ -2,7 +2,6 @@ #include <linux/reboot.h> #include <linux/serial_core.h> -#include <linux/random.h> #include <clocksource/timer-goldfish.h> #include <asm/bootinfo.h> @@ -93,16 +92,6 @@ int __init virt_parse_bootinfo(const struct bi_record *record) data += 4; virt_bi_data.virtio.irq = be32_to_cpup(data); break; - case BI_VIRT_RNG_SEED: { - u16 len = be16_to_cpup(data); - add_bootloader_randomness(data + 2, len); - /* - * Zero the data to preserve forward secrecy, and zero the - * length to prevent kexec from using it. - */ - memzero_explicit((void *)data, len + 2); - break; - } default: unknown = 1; break; diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c index ab203e66ba0d..a9bea411d928 100644 --- a/arch/mips/bcm47xx/prom.c +++ b/arch/mips/bcm47xx/prom.c @@ -86,7 +86,7 @@ static __init void prom_init_mem(void) pr_debug("Assume 128MB RAM\n"); break; } - if (!memcmp(prom_init, prom_init + mem, 32)) + if (!memcmp((void *)prom_init, (void *)prom_init + mem, 32)) break; } lowmem = mem; @@ -159,7 +159,7 @@ void __init bcm47xx_prom_highmem_init(void) off = EXTVBASE + __pa(off); for (extmem = 128 << 20; extmem < 512 << 20; extmem <<= 1) { - if (!memcmp(prom_init, (void *)(off + extmem), 16)) + if (!memcmp((void *)prom_init, (void *)(off + extmem), 16)) break; } extmem -= lowmem; diff --git a/arch/mips/boot/dts/brcm/bcm63268.dtsi b/arch/mips/boot/dts/brcm/bcm63268.dtsi index c3ce49ec675f..8926417a8fbc 100644 --- a/arch/mips/boot/dts/brcm/bcm63268.dtsi +++ b/arch/mips/boot/dts/brcm/bcm63268.dtsi @@ -105,14 +105,20 @@ interrupts = <2>, <3>; }; - wdt: watchdog@1000009c { - compatible = "brcm,bcm7038-wdt"; - reg = <0x1000009c 0xc>; + timer-mfd@10000080 { + compatible = "brcm,bcm7038-twd", "simple-mfd", "syscon"; + reg = <0x10000080 0x30>; + ranges = <0x0 0x10000080 0x30>; - clocks = <&periph_osc>; - clock-names = "refclk"; + wdt: watchdog@1c { + compatible = "brcm,bcm7038-wdt"; + reg = <0x1c 0xc>; - timeout-sec = <30>; + clocks = <&periph_osc>; + clock-names = "refclk"; + + timeout-sec = <30>; + }; }; uart0: serial@10000180 { diff --git a/arch/mips/boot/dts/lantiq/Makefile b/arch/mips/boot/dts/lantiq/Makefile index f5dfc06242b9..ae6e3e21ebeb 100644 --- a/arch/mips/boot/dts/lantiq/Makefile +++ b/arch/mips/boot/dts/lantiq/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -dtb-$(CONFIG_DT_EASY50712) += easy50712.dtb +dtb-$(CONFIG_DT_EASY50712) += danube_easy50712.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/mips/boot/dts/lantiq/easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index 1ce20b7d05cb..1ce20b7d05cb 100644 --- a/arch/mips/boot/dts/lantiq/easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts diff --git a/arch/mips/cavium-octeon/oct_ilm.c b/arch/mips/cavium-octeon/oct_ilm.c index 6a4694538bb6..dc05262e85ff 100644 --- a/arch/mips/cavium-octeon/oct_ilm.c +++ b/arch/mips/cavium-octeon/oct_ilm.c @@ -28,7 +28,7 @@ struct latency_info { static struct latency_info li; static struct dentry *dir; -static int show_latency(struct seq_file *m, void *v) +static int oct_ilm_show(struct seq_file *m, void *v) { u64 cpuclk, avg, max, min; struct latency_info curr_li = li; @@ -43,18 +43,7 @@ static int show_latency(struct seq_file *m, void *v) curr_li.interrupt_cnt, avg, max, min); return 0; } - -static int oct_ilm_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_latency, NULL); -} - -static const struct file_operations oct_ilm_ops = { - .open = oct_ilm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(oct_ilm); static int reset_statistics(void *data, u64 value) { @@ -67,7 +56,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(reset_statistics_ops, NULL, reset_statistics, "%llu\n") static void init_debugfs(void) { dir = debugfs_create_dir("oct_ilm", 0); - debugfs_create_file("statistics", 0222, dir, NULL, &oct_ilm_ops); + debugfs_create_file("statistics", 0222, dir, NULL, &oct_ilm_fops); debugfs_create_file("reset", 0222, dir, NULL, &reset_statistics_ops); } diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index e7f994393ae8..a71727f7a608 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -530,7 +530,7 @@ void octeon_user_io_init(void) /* Get the current settings for CP0_CVMMEMCTL_REG */ cvmmemctl.u64 = read_c0_cvmmemctl(); /* R/W If set, marked write-buffer entries time out the same - * as as other entries; if clear, marked write-buffer entries + * as other entries; if clear, marked write-buffer entries * use the maximum timeout. */ cvmmemctl.s.dismarkwblongto = 1; /* R/W If set, a merged store does not clear the write-buffer diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index cf9c6329b807..ed4a6388791e 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -32,9 +32,6 @@ CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_MROUTE=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set @@ -117,7 +114,6 @@ CONFIG_JFFS2_SUMMARY=y CONFIG_JFFS2_COMPRESSION_OPTIONS=y CONFIG_SQUASHFS=y # CONFIG_CRYPTO_HW is not set -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_CMDLINE_BOOL=y diff --git a/arch/mips/configs/ath25_defconfig b/arch/mips/configs/ath25_defconfig index 7143441f5476..afd1c16242e9 100644 --- a/arch/mips/configs/ath25_defconfig +++ b/arch/mips/configs/ath25_defconfig @@ -29,9 +29,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -108,7 +105,6 @@ CONFIG_SQUASHFS_XZ=y # CONFIG_XZ_DEC_ARMTHUMB is not set # CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 96622a2ad333..0b741716c852 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -10,12 +10,6 @@ CONFIG_EMBEDDED=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_ATH79=y -CONFIG_ATH79_MACH_AP121=y -CONFIG_ATH79_MACH_AP136=y -CONFIG_ATH79_MACH_AP81=y -CONFIG_ATH79_MACH_DB120=y -CONFIG_ATH79_MACH_PB44=y -CONFIG_ATH79_MACH_UBNT_XM=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set CONFIG_PCI=y @@ -29,9 +23,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -92,7 +83,6 @@ CONFIG_LEDS_GPIO=y # CONFIG_DNOTIFY is not set # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_CRC_ITU_T=m -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig index 861f680184b9..34d0ca638ef0 100644 --- a/arch/mips/configs/bcm63xx_defconfig +++ b/arch/mips/configs/bcm63xx_defconfig @@ -24,9 +24,6 @@ CONFIG_PCMCIA_BCM63XX=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_CFG80211=y diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index d83e7d600b0a..d15961f00ece 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -49,8 +49,6 @@ CONFIG_IP_PIMSM_V2=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -59,7 +57,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_SIT_6RD=y CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y @@ -101,7 +98,6 @@ CONFIG_BAYCOM_SER_HDX=m CONFIG_YAM=m CONFIG_FW_LOADER=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_EEPROM_LEGACY=y CONFIG_EEPROM_MAX6875=y @@ -230,12 +226,8 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -243,7 +235,6 @@ CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/bmips_be_defconfig b/arch/mips/configs/bmips_be_defconfig index 032bb51defe8..daef132d000b 100644 --- a/arch/mips/configs/bmips_be_defconfig +++ b/arch/mips/configs/bmips_be_defconfig @@ -17,9 +17,6 @@ CONFIG_PACKET=y CONFIG_PACKET_DIAG=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_CFG80211=y CONFIG_NL80211_TESTMODE=y diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig index 5956fb95c19f..cd0dc37c3d84 100644 --- a/arch/mips/configs/bmips_stb_defconfig +++ b/arch/mips/configs/bmips_stb_defconfig @@ -12,7 +12,6 @@ CONFIG_HIGHMEM=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_NR_CPUS=4 -CONFIG_CC_STACKPROTECTOR_STRONG=y # CONFIG_SECCOMP is not set CONFIG_MIPS_O32_FP64_SUPPORT=y # CONFIG_RD_GZIP is not set @@ -21,8 +20,6 @@ CONFIG_MIPS_O32_FP64_SUPPORT=y CONFIG_RD_XZ=y # CONFIG_RD_LZO is not set # CONFIG_RD_LZ4 is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_PCI=y CONFIG_PCI_MSI=y CONFIG_PCIEASPM_POWERSAVE=y @@ -30,7 +27,6 @@ CONFIG_PCIEPORTBUS=y CONFIG_PCIE_BRCMSTB=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=y -CONFIG_CPU_FREQ_STAT_DETAILS=y CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y @@ -44,15 +40,11 @@ CONFIG_PACKET=y CONFIG_PACKET_DIAG=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_CFG80211=y CONFIG_NL80211_TESTMODE=y CONFIG_WIRELESS=y CONFIG_MAC80211=y -CONFIG_NL80211=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set @@ -70,10 +62,6 @@ CONFIG_IP_PNP_RARP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BIC=y @@ -93,7 +81,6 @@ CONFIG_NET_SWITCHDEV=y CONFIG_DMA_CMA=y CONFIG_CMA_ALIGNMENT=12 CONFIG_SPI=y -CONFIG_SPI_BRCMSTB=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y @@ -105,14 +92,11 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_ROM=y CONFIG_MTD_ABSENT=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y CONFIG_MTD_NAND_BRCMNAND=y CONFIG_MTD_SPI_NOR=y # CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set CONFIG_MTD_UBI=y CONFIG_MTD_UBI_GLUEBI=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -120,7 +104,6 @@ CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y CONFIG_VLAN_8021Q=y @@ -135,7 +118,6 @@ CONFIG_INPUT_UINPUT=y # CONFIG_SERIO is not set CONFIG_VT=y CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y @@ -203,17 +185,14 @@ CONFIG_CMDLINE="earlycon" CONFIG_MIPS_CMDLINE_DTB_EXTEND=y # CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER is not set # CONFIG_CRYPTO_HW is not set -CONFIG_DT_BCM974XX=y CONFIG_FW_CFE=y CONFIG_ATA=y CONFIG_SATA_AHCI_PLATFORM=y -CONFIG_AHCI_BRCMSTB=y CONFIG_GENERIC_PHY=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y CONFIG_PHY_BRCM_USB=y CONFIG_PHY_BRCM_SATA=y -CONFIG_PM_RUNTIME=y CONFIG_PM_DEBUG=y CONFIG_SYSVIPC=y CONFIG_FUNCTION_GRAPH_TRACER=y @@ -227,3 +206,5 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y +CONFIG_AHCI_BRCM=y +CONFIG_MTD_RAW_NAND=y diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 97ceaf080c0c..a2311495af79 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -71,7 +71,6 @@ CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index b8bd66300996..a8b62df3c021 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -61,7 +61,6 @@ CONFIG_INET6_AH=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_IPV6_MIP6=y -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y CONFIG_IPV6_VTI=y CONFIG_IPV6_SIT_6RD=y CONFIG_IPV6_GRE=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 4044f2829759..49ec1575234e 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -37,9 +37,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -47,7 +44,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y @@ -77,7 +73,6 @@ CONFIG_NETDEVICES=y CONFIG_DECLANCE=y # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set @@ -191,12 +186,8 @@ CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m @@ -206,7 +197,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 157fc57520a7..5cec4c096e2c 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -33,9 +33,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -43,7 +40,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y @@ -73,7 +69,6 @@ CONFIG_NETDEVICES=y CONFIG_DECLANCE=y # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set @@ -186,12 +181,8 @@ CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m @@ -201,7 +192,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index f73c26ebfc83..af37e26d9b5b 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -32,9 +32,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -42,7 +39,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_NETWORK_SECMARK=y @@ -72,7 +68,6 @@ CONFIG_NETDEVICES=y CONFIG_DECLANCE=y # CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_CADENCE is not set # CONFIG_NET_VENDOR_CAVIUM is not set @@ -186,12 +181,8 @@ CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARC4=m @@ -201,7 +192,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index ba47c5e929b7..843d6a5a4f61 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -35,8 +35,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y CONFIG_NET_IPIP=m -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y @@ -92,7 +90,6 @@ CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_CFI_STAA=m CONFIG_MTD_PHYSMAP=m CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_RAM=m CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m @@ -159,7 +156,6 @@ CONFIG_USB_MOUSE=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_OTG_PRODUCTLIST=y -CONFIG_USB_WUSB_CBAF=m CONFIG_USB_C67X00_HCD=m CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y @@ -219,15 +215,10 @@ CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=y CONFIG_CRC7=m -# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 510709565404..8cfbafa532e0 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -25,7 +25,6 @@ CONFIG_NETDEVICES=y CONFIG_NET_SWITCHDEV=y CONFIG_NET_DSA=y CONFIG_MSCC_OCELOT_SWITCH=y -CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y CONFIG_MDIO_MSCC_MIIM=y CONFIG_MICROSEMI_PHY=y diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index ce8a444957c1..eb755650f821 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -29,9 +29,6 @@ CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETFILTER=y @@ -218,9 +215,6 @@ CONFIG_HDLC_X25=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_FARSYNC=m -CONFIG_DSCC4=m -CONFIG_DSCC4_PCISYNC=y -CONFIG_DSCC4_PCI_RST=y CONFIG_LAPBETHER=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -286,7 +280,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -297,7 +290,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200 root=/dev/nfs rw ip=auto" diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 70a4ba90f491..13df29901237 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -36,9 +36,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -47,7 +44,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y @@ -226,7 +222,6 @@ CONFIG_SERIO_RAW=m CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_SERIAL_IP22_ZILOG=m # CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=m # CONFIG_HWMON is not set CONFIG_THERMAL=y CONFIG_WATCHDOG=y @@ -320,11 +315,7 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -333,7 +324,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 821630ac1be7..3e86f8106ba0 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -33,9 +33,6 @@ CONFIG_NET_KEY_MIGRATE=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -44,10 +41,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_SIT=m CONFIG_IPV6_SIT_6RD=y CONFIG_IPV6_TUNNEL=m @@ -92,7 +85,6 @@ CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_RFKILL=m CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_SCSI=y @@ -115,7 +107,6 @@ CONFIG_SCSI_AIC94XX=m # CONFIG_AIC94XX_DEBUG is not set CONFIG_SCSI_MVSAS=m # CONFIG_SCSI_MVSAS_DEBUG is not set -CONFIG_SCSI_DPT_I2O=m CONFIG_SCSI_MPT2SAS=m CONFIG_LIBFC=m CONFIG_SCSI_QLOGIC_1280=y @@ -126,8 +117,6 @@ CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m @@ -166,7 +155,6 @@ CONFIG_JME=m CONFIG_MLX4_EN=m # CONFIG_MLX4_DEBUG is not set CONFIG_KS8851_MLL=m -CONFIG_VXGE=m CONFIG_AX88796=m CONFIG_AX88796_93CX6=y CONFIG_ETHOC=m @@ -264,7 +252,6 @@ CONFIG_I2C_VIAPRO=m CONFIG_I2C_OCORES=m CONFIG_I2C_PCA_PLATFORM=m CONFIG_I2C_SIMTEC=m -CONFIG_I2C_PARPORT_LIGHT=m CONFIG_I2C_TAOS_EVM=m CONFIG_I2C_STUB=m # CONFIG_HWMON is not set @@ -309,7 +296,6 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_SQUASHFS=m CONFIG_OMFS_FS=m -CONFIG_EXOFS_FS=m CONFIG_NFS_FS=y CONFIG_SECURITYFS=y CONFIG_CRYPTO_CRYPTD=m @@ -321,12 +307,8 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -335,7 +317,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 0921ef38e9fb..ba13eea0509f 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -29,9 +29,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_TCP_MD5SIG=y # CONFIG_IPV6 is not set CONFIG_SCSI=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 74020aa3440b..8ced2224c328 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -43,7 +43,6 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NETWORK_SECMARK=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_RAID_ATTRS=y CONFIG_SCSI=y @@ -165,7 +164,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_TGR192=y CONFIG_CRYPTO_WP512=y CONFIG_CRYPTO_ANUBIS=y CONFIG_CRYPTO_ARC4=y diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 843f360da5f2..106b21cb677f 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -32,7 +32,6 @@ CONFIG_PARPORT_1284=y CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_FD=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m CONFIG_CDROM_PKTCDVD=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 791894c4d8fb..7e598d338979 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -43,9 +43,6 @@ CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BIC=y CONFIG_DEFAULT_BIC=y @@ -77,7 +74,6 @@ CONFIG_MAC80211_LEDS=y CONFIG_RFKILL=m CONFIG_RFKILL_INPUT=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_BLK_DEV_SD=y @@ -312,12 +308,8 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -326,7 +318,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig index 25e70423e17d..68207b31dc20 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1b_defconfig @@ -28,9 +28,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set @@ -115,7 +112,6 @@ CONFIG_NLS_ISO8859_1=m # CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_HW is not set CONFIG_DYNAMIC_DEBUG=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig index 3a158d4d2fab..c3910a9dee9e 100644 --- a/arch/mips/configs/loongson1c_defconfig +++ b/arch/mips/configs/loongson1c_defconfig @@ -29,9 +29,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set @@ -116,7 +113,6 @@ CONFIG_NLS_ISO8859_1=m # CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_HW is not set CONFIG_DYNAMIC_DEBUG=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index e948ca487e2d..728bef666f7a 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -95,7 +95,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=m CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_RAID_ATTRS=m @@ -229,7 +228,6 @@ CONFIG_SERIAL_8250_RSA=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_NONSTANDARD=y CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_GPIO_LOONGSON=y @@ -336,7 +334,6 @@ CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 25ecd15bc952..aca66a5f330d 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -143,7 +143,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=m CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_VIRTIO_BLK=y @@ -268,7 +267,6 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_NONSTANDARD=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_GPIO_LOONGSON=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 7a5bdd236a2a..265d38dffbf6 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -42,8 +42,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -229,7 +227,6 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_FD=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m @@ -237,7 +234,6 @@ CONFIG_ATA_OVER_ETH=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y @@ -408,7 +404,6 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index b5ba08d7ab57..1d2b248c7cd3 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -46,8 +46,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -233,7 +231,6 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_FD=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m @@ -241,7 +238,6 @@ CONFIG_ATA_OVER_ETH=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y @@ -415,7 +411,6 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 6fb9bc29f4a0..fd63a2b152f6 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -76,7 +76,6 @@ CONFIG_NET_ACT_POLICE=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set @@ -98,7 +97,6 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set @@ -172,7 +170,6 @@ CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index eb72df528243..1f07e354c954 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -78,7 +78,6 @@ CONFIG_NET_ACT_POLICE=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set @@ -100,7 +99,6 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set @@ -173,7 +171,6 @@ CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 1fb40d310f49..5cd3eca236de 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -79,7 +79,6 @@ CONFIG_NET_ACT_POLICE=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set @@ -99,7 +98,6 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set @@ -174,7 +172,6 @@ CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 75cb778c6149..45688e742a15 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -80,7 +80,6 @@ CONFIG_NET_ACT_POLICE=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set @@ -102,7 +101,6 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set @@ -176,7 +174,6 @@ CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 7b4f247dc60c..136f965784db 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -77,7 +77,6 @@ CONFIG_NET_ACT_POLICE=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set @@ -99,7 +98,6 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set @@ -172,7 +170,6 @@ CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 8d58653f1b4e..75b8da8d9927 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -45,8 +45,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -231,7 +229,6 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_FD=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m @@ -239,7 +236,6 @@ CONFIG_ATA_OVER_ETH=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y @@ -414,7 +410,6 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 1339c062a042..edf9634aa8ee 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -58,13 +58,9 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_TUNNEL=m CONFIG_NETWORK_SECMARK=y CONFIG_NETFILTER=y @@ -282,7 +278,6 @@ CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m CONFIG_SUNDANCE=m CONFIG_PCMCIA_FMVJ18X=m -CONFIG_HP100=m CONFIG_E100=m CONFIG_E1000=m CONFIG_IXGB=m @@ -366,9 +361,6 @@ CONFIG_HDLC_X25=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_FARSYNC=m -CONFIG_DSCC4=m -CONFIG_DSCC4_PCISYNC=y -CONFIG_DSCC4_PCI_RST=y CONFIG_LAPBETHER=m # CONFIG_KEYBOARD_ATKBD is not set CONFIG_KEYBOARD_GPIO=y @@ -681,7 +673,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m @@ -690,5 +681,4 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index fc39ddf610a9..9c34daf83563 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -35,9 +35,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig index fd567247adc7..48dd02d01ac1 100644 --- a/arch/mips/configs/pic32mzda_defconfig +++ b/arch/mips/configs/pic32mzda_defconfig @@ -45,7 +45,6 @@ CONFIG_KEYBOARD_GPIO_POLLED=m CONFIG_SERIAL_PIC32=y CONFIG_SERIAL_PIC32_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=m CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set CONFIG_HIDRAW=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 252d472387aa..93306f5e045b 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -33,9 +33,6 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET_DIAG=m CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_CUBIC=m @@ -162,5 +159,4 @@ CONFIG_SQUASHFS=y CONFIG_CRYPTO_TEST=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC16=m -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index f8212a813be7..30c195f28278 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -21,9 +21,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_MTD=y @@ -51,7 +48,6 @@ CONFIG_TC35815=y CONFIG_SERIAL_TXX9_CONSOLE=y CONFIG_SERIAL_TXX9_STDSERIAL=y CONFIG_SPI=y -CONFIG_SPI_TXX9=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_TXX9_WDT=m @@ -65,8 +61,6 @@ CONFIG_SND=m # CONFIG_SND_SPI is not set # CONFIG_SND_MIPS is not set CONFIG_SND_SOC=m -CONFIG_SND_SOC_TXX9ACLC=m -CONFIG_SND_SOC_TXX9ACLC_GENERIC=m # CONFIG_USB_SUPPORT is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -78,7 +72,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_RS5C348=y CONFIG_RTC_DRV_DS1742=y -CONFIG_RTC_DRV_TX4939=y CONFIG_DMADEVICES=y CONFIG_TXX9_DMAC=m # CONFIG_DNOTIFY is not set diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 04c681bd716e..9932a593e3c3 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -29,9 +29,6 @@ CONFIG_NET_IPIP=m CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_TCP_MD5SIG=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y @@ -39,7 +36,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y @@ -190,9 +186,7 @@ CONFIG_PARIDE_KTTI=m CONFIG_PARIDE_ON20=m CONFIG_PARIDE_ON26=m CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_SX8=m CONFIG_BLK_DEV_RAM=m CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m @@ -398,7 +392,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index eb359db15dba..bf017d493002 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -35,9 +35,6 @@ CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set @@ -140,7 +137,6 @@ CONFIG_CRC32_SARWATE=y # CONFIG_XZ_DEC_ARMTHUMB is not set # CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index de94bf756a93..030186f89501 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -88,7 +88,6 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -96,7 +95,6 @@ CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index a14f8ea5c386..0722a3bf03c0 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -35,9 +35,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index eeb689f715cb..eb5acf1f24ae 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -37,9 +37,6 @@ CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set @@ -146,7 +143,6 @@ CONFIG_CRYPTO_ARC4=m CONFIG_CRC_ITU_T=m CONFIG_CRC32_SARWATE=y CONFIG_PRINTK_TIME=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 57561e0e6e8d..44f9824c1d8c 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -63,10 +63,6 @@ extern void do_domain_IRQ(struct irq_domain *domain, unsigned int irq); extern void arch_init_irq(void); extern void spurious_interrupt(void); -extern int allocate_irqno(void); -extern void alloc_legacy_irqno(void); -extern void free_irqno(unsigned int irq); - /* * Before R2 the timer and performance counter interrupts were both fixed to * IE7. Since R2 their number has to be read from the c0_intctl register. diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h index cbe75ade3277..1e8621a6afa3 100644 --- a/arch/mips/include/asm/mach-ar7/ar7.h +++ b/arch/mips/include/asm/mach-ar7/ar7.h @@ -104,8 +104,6 @@ struct plat_dsl_data { int reset_bit_sar; }; -extern int ar7_cpu_clock, ar7_bus_clock, ar7_dsp_clock; - static inline int ar7_is_titan(void) { return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x24)) & 0xffff) == diff --git a/arch/mips/include/asm/octeon/cvmx-fpa.h b/arch/mips/include/asm/octeon/cvmx-fpa.h index 29ae63606ab4..f6dfcca97f19 100644 --- a/arch/mips/include/asm/octeon/cvmx-fpa.h +++ b/arch/mips/include/asm/octeon/cvmx-fpa.h @@ -264,26 +264,6 @@ static inline void cvmx_fpa_free(void *ptr, uint64_t pool, } /** - * Setup a FPA pool to control a new block of memory. - * This can only be called once per pool. Make sure proper - * locking enforces this. - * - * @pool: Pool to initialize - * 0 <= pool < 8 - * @name: Constant character string to name this pool. - * String is not copied. - * @buffer: Pointer to the block of memory to use. This must be - * accessible by all processors and external hardware. - * @block_size: Size for each block controlled by the FPA - * @num_blocks: Number of blocks - * - * Returns 0 on Success, - * -1 on failure - */ -extern int cvmx_fpa_setup_pool(uint64_t pool, const char *name, void *buffer, - uint64_t block_size, uint64_t num_blocks); - -/** * Shutdown a Memory pool and validate that it had all of * the buffers originally placed in it. This should only be * called by one processor after all hardware has finished diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h index 7e714aefc76d..5c1d726c702f 100644 --- a/arch/mips/include/asm/octeon/octeon.h +++ b/arch/mips/include/asm/octeon/octeon.h @@ -43,7 +43,6 @@ extern int octeon_get_southbridge_interrupt(void); extern int octeon_get_boot_coremask(void); extern int octeon_get_boot_num_arguments(void); extern const char *octeon_get_boot_argument(int arg); -extern void octeon_hal_setup_reserved32(void); extern void octeon_user_io_init(void); extern void octeon_init_cvmcount(void); diff --git a/arch/mips/include/asm/octeon/pci-octeon.h b/arch/mips/include/asm/octeon/pci-octeon.h index b12d9a3fbfb6..2f46f6c6e3d0 100644 --- a/arch/mips/include/asm/octeon/pci-octeon.h +++ b/arch/mips/include/asm/octeon/pci-octeon.h @@ -64,6 +64,4 @@ enum octeon_dma_bar_type { extern enum octeon_dma_bar_type octeon_dma_bar_type; void octeon_pci_dma_init(void); -extern char *octeon_swiotlb; - #endif diff --git a/arch/mips/include/asm/sibyte/sb1250.h b/arch/mips/include/asm/sibyte/sb1250.h index dbde5f93f0dd..495b31925ed7 100644 --- a/arch/mips/include/asm/sibyte/sb1250.h +++ b/arch/mips/include/asm/sibyte/sb1250.h @@ -32,7 +32,6 @@ extern unsigned int soc_type; extern unsigned int periph_rev; extern unsigned int zbbus_mhz; -extern void sb1250_time_init(void); extern void sb1250_mask_irq(int cpu, int irq); extern void sb1250_unmask_irq(int cpu, int irq); diff --git a/arch/mips/include/asm/sni.h b/arch/mips/include/asm/sni.h index 7dfa297ce597..7fb6656a6bfd 100644 --- a/arch/mips/include/asm/sni.h +++ b/arch/mips/include/asm/sni.h @@ -226,9 +226,6 @@ extern void sni_pcit_cplus_irq_init(void); extern void sni_rm200_irq_init(void); extern void sni_pcimt_irq_init(void); -/* timer inits */ -extern void sni_cpu_time_init(void); - /* eisa init for RM200/400 */ #ifdef CONFIG_EISA extern int sni_eisa_root_init(void); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 7db6ff9aed7d..f88ce78e13e3 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -26,7 +26,7 @@ __init void mips_set_machine_name(const char *name) if (name == NULL) return; - strlcpy(mips_machine_name, name, sizeof(mips_machine_name)); + strscpy(mips_machine_name, name, sizeof(mips_machine_name)); pr_info("MIPS: machine is %s\n", mips_get_machine_name()); } @@ -52,9 +52,9 @@ int __init __dt_register_buses(const char *bus0, const char *bus1) if (!of_have_populated_dt()) panic("device tree not present"); - strlcpy(of_ids[0].compatible, bus0, sizeof(of_ids[0].compatible)); + strscpy(of_ids[0].compatible, bus0, sizeof(of_ids[0].compatible)); if (bus1) { - strlcpy(of_ids[1].compatible, bus1, + strscpy(of_ids[1].compatible, bus1, sizeof(of_ids[1].compatible)); } diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 56b51de2dc51..58fc8d089402 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -340,7 +340,7 @@ void *__init relocate_kernel(void) early_init_dt_scan(fdt); if (boot_command_line[0]) { /* Boot command line was passed in device tree */ - strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strscpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); } #endif /* CONFIG_USE_OF */ diff --git a/arch/mips/kernel/segment.c b/arch/mips/kernel/segment.c index 0a9bd7b0983b..24560501c70d 100644 --- a/arch/mips/kernel/segment.c +++ b/arch/mips/kernel/segment.c @@ -46,7 +46,7 @@ static void build_segment_config(char *str, unsigned int cfg) ((cfg & MIPS_SEGCFG_EU) >> MIPS_SEGCFG_EU_SHIFT)); } -static int show_segments(struct seq_file *m, void *v) +static int segments_show(struct seq_file *m, void *v) { unsigned int segcfg; char str[42]; @@ -80,18 +80,7 @@ static int show_segments(struct seq_file *m, void *v) return 0; } - -static int segments_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_segments, NULL); -} - -static const struct file_operations segments_fops = { - .open = segments_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(segments); static int __init segments_info(void) { diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 2ca156a5b231..39c79f67c7a3 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -42,6 +42,7 @@ #include <asm/setup.h> #include <asm/smp-ops.h> #include <asm/prom.h> +#include <asm/fw/fw.h> #ifdef CONFIG_MIPS_ELF_APPENDED_DTB char __section(".appended_dtb") __appended_dtb[0x100000]; @@ -756,6 +757,24 @@ static void __init prefill_possible_map(void) static inline void prefill_possible_map(void) {} #endif +static void __init setup_rng_seed(void) +{ + char *rng_seed_hex = fw_getenv("rngseed"); + u8 rng_seed[512]; + size_t len; + + if (!rng_seed_hex) + return; + + len = min(sizeof(rng_seed), strlen(rng_seed_hex) / 2); + if (hex2bin(rng_seed, rng_seed_hex, len)) + return; + + add_bootloader_randomness(rng_seed, len); + memzero_explicit(rng_seed, len); + memzero_explicit(rng_seed_hex, len * 2); +} + void __init setup_arch(char **cmdline_p) { cpu_probe(); @@ -786,6 +805,8 @@ void __init setup_arch(char **cmdline_p) paging_init(); memblock_dump_all(); + + setup_rng_seed(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index c731082a0c42..be4829cc7a3a 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -34,6 +34,14 @@ unsigned long physical_memsize = 0L; */ static struct ltq_soc_info soc_info; +/* + * These structs are used to override vsmp_init_secondary() + */ +#if defined(CONFIG_MIPS_MT_SMP) +extern const struct plat_smp_ops vsmp_smp_ops; +static struct plat_smp_ops lantiq_smp_ops; +#endif + const char *get_system_type(void) { return soc_info.sys_type; @@ -84,6 +92,17 @@ void __init plat_mem_setup(void) __dt_setup_arch(dtb); } +#if defined(CONFIG_MIPS_MT_SMP) +static void lantiq_init_secondary(void) +{ + /* + * MIPS CPU startup function vsmp_init_secondary() will only + * enable some of the interrupts for the second CPU/VPE. + */ + set_c0_status(ST0_IM); +} +#endif + void __init prom_init(void) { /* call the soc specific detetcion code and get it to fill soc_info */ @@ -95,7 +114,10 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (register_vsmp_smp_ops()) - panic("failed to register_vsmp_smp_ops()"); + if (cpu_has_mipsmt) { + lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops.init_secondary = lantiq_init_secondary; + register_smp_ops(&lantiq_smp_ops); + } #endif } diff --git a/arch/mips/lantiq/xway/vmmc.c b/arch/mips/lantiq/xway/vmmc.c index 7a14da8d9d15..2796e87dfcae 100644 --- a/arch/mips/lantiq/xway/vmmc.c +++ b/arch/mips/lantiq/xway/vmmc.c @@ -4,9 +4,10 @@ * Copyright (C) 2012 John Crispin <john@phrozen.org> */ +#include <linux/err.h> #include <linux/export.h> +#include <linux/gpio/consumer.h> #include <linux/of_platform.h> -#include <linux/of_gpio.h> #include <linux/dma-mapping.h> #include <lantiq_soc.h> @@ -25,23 +26,28 @@ EXPORT_SYMBOL(ltq_get_cp1_base); static int vmmc_probe(struct platform_device *pdev) { #define CP1_SIZE (1 << 20) + struct gpio_desc *gpio; int gpio_count; dma_addr_t dma; + int error; cp1_base = (void *) CPHYSADDR(dma_alloc_coherent(&pdev->dev, CP1_SIZE, &dma, GFP_KERNEL)); - gpio_count = of_gpio_count(pdev->dev.of_node); + gpio_count = gpiod_count(&pdev->dev, NULL); while (gpio_count > 0) { - enum of_gpio_flags flags; - int gpio = of_get_gpio_flags(pdev->dev.of_node, - --gpio_count, &flags); - if (gpio_request(gpio, "vmmc-relay")) + gpio = devm_gpiod_get_index(&pdev->dev, + NULL, --gpio_count, GPIOD_OUT_HIGH); + error = PTR_ERR_OR_ZERO(gpio); + if (error) { + dev_err(&pdev->dev, + "failed to request GPIO idx %d: %d\n", + gpio_count, error); continue; - dev_info(&pdev->dev, "requested GPIO %d\n", gpio); - gpio_direction_output(gpio, - (flags & OF_GPIO_ACTIVE_LOW) ? (0) : (1)); + } + + gpiod_set_consumer_name(gpio, "vmmc-relay"); } dev_info(&pdev->dev, "reserved %dMB at 0x%p", CP1_SIZE >> 20, cp1_base); diff --git a/arch/mips/lib/bswapdi.c b/arch/mips/lib/bswapdi.c index fcef74084492..88242dc7de17 100644 --- a/arch/mips/lib/bswapdi.c +++ b/arch/mips/lib/bswapdi.c @@ -1,17 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/compiler.h> +#include <uapi/linux/swab.h> + +/* To silence -Wmissing-prototypes. */ +unsigned long long __bswapdi2(unsigned long long u); unsigned long long notrace __bswapdi2(unsigned long long u) { - return (((u) & 0xff00000000000000ull) >> 56) | - (((u) & 0x00ff000000000000ull) >> 40) | - (((u) & 0x0000ff0000000000ull) >> 24) | - (((u) & 0x000000ff00000000ull) >> 8) | - (((u) & 0x00000000ff000000ull) << 8) | - (((u) & 0x0000000000ff0000ull) << 24) | - (((u) & 0x000000000000ff00ull) << 40) | - (((u) & 0x00000000000000ffull) << 56); + return ___constant_swab64(u); } - EXPORT_SYMBOL(__bswapdi2); diff --git a/arch/mips/lib/bswapsi.c b/arch/mips/lib/bswapsi.c index 22d8e4f6d66e..2ed655497de5 100644 --- a/arch/mips/lib/bswapsi.c +++ b/arch/mips/lib/bswapsi.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/compiler.h> +#include <uapi/linux/swab.h> + +/* To silence -Wmissing-prototypes. */ +unsigned int __bswapsi2(unsigned int u); unsigned int notrace __bswapsi2(unsigned int u) { - return (((u) & 0xff000000) >> 24) | - (((u) & 0x00ff0000) >> 8) | - (((u) & 0x0000ff00) << 8) | - (((u) & 0x000000ff) << 24); + return ___constant_swab32(u); } - EXPORT_SYMBOL(__bswapsi2); diff --git a/arch/mips/loongson2ef/common/pci.c b/arch/mips/loongson2ef/common/pci.c index 200916925e95..7d9ea51e8c01 100644 --- a/arch/mips/loongson2ef/common/pci.c +++ b/arch/mips/loongson2ef/common/pci.c @@ -73,8 +73,6 @@ static void __init setup_pcimap(void) #endif } -extern int sbx00_acpi_init(void); - static int __init pcibios_init(void) { setup_pcimap(); diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 587cf1d115e8..265bc57819df 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1032,7 +1032,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, */ if (dec_insn.micro_mips_mode) { /* - * If next instruction is a 16-bit instruction, then it + * If next instruction is a 16-bit instruction, then * it cannot be a FPU instruction. This could happen * since we can be called for non-FPU instructions. */ diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 30e0922f4cea..e17d862cfa4c 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -2,7 +2,7 @@ /* */ -/** +/* * Both AR2315 and AR2316 chips have PCI interface unit, which supports DMA * and interrupt. PCI interface supports MMIO access method, but does not * seem to support I/O ports. diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 1ca42f482130..8d16cd021f60 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -9,11 +9,11 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/gpio/consumer.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/clk.h> #include <linux/of_platform.h> -#include <linux/of_gpio.h> #include <linux/of_irq.h> #include <linux/of_pci.h> @@ -62,7 +62,7 @@ __iomem void *ltq_pci_mapped_cfg; static __iomem void *ltq_pci_membase; -static int reset_gpio; +static struct gpio_desc *reset_gpio; static struct clk *clk_pci, *clk_external; static struct resource pci_io_resource; static struct resource pci_mem_resource; @@ -95,6 +95,7 @@ static int ltq_pci_startup(struct platform_device *pdev) struct device_node *node = pdev->dev.of_node; const __be32 *req_mask, *bus_clk; u32 temp_buffer; + int error; /* get our clocks */ clk_pci = clk_get(&pdev->dev, NULL); @@ -123,17 +124,14 @@ static int ltq_pci_startup(struct platform_device *pdev) clk_disable(clk_external); /* setup reset gpio used by pci */ - reset_gpio = of_get_named_gpio(node, "gpio-reset", 0); - if (gpio_is_valid(reset_gpio)) { - int ret = devm_gpio_request(&pdev->dev, - reset_gpio, "pci-reset"); - if (ret) { - dev_err(&pdev->dev, - "failed to request gpio %d\n", reset_gpio); - return ret; - } - gpio_direction_output(reset_gpio, 1); + reset_gpio = devm_gpiod_get_optional(&pdev->dev, "reset", + GPIOD_OUT_LOW); + error = PTR_ERR_OR_ZERO(reset_gpio); + if (error) { + dev_err(&pdev->dev, "failed to request gpio: %d\n", error); + return error; } + gpiod_set_consumer_name(reset_gpio, "pci_reset"); /* enable auto-switching between PCI and EBU */ ltq_pci_w32(0xa, PCI_CR_CLK_CTRL); @@ -195,11 +193,11 @@ static int ltq_pci_startup(struct platform_device *pdev) ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_IEN) | 0x10, LTQ_EBU_PCC_IEN); /* toggle reset pin */ - if (gpio_is_valid(reset_gpio)) { - __gpio_set_value(reset_gpio, 0); + if (reset_gpio) { + gpiod_set_value_cansleep(reset_gpio, 1); wmb(); mdelay(1); - __gpio_set_value(reset_gpio, 1); + gpiod_set_value_cansleep(reset_gpio, 0); } return 0; } diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index d9c8c4e46aff..08c46cf122d7 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -44,7 +44,7 @@ void __init plat_mem_setup(void) pr_info(" builtin_cmdline : %s\n", CONFIG_CMDLINE); #endif if (dtb != __dtb_start) - strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strscpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); #ifdef CONFIG_EARLY_PRINTK fw_init_early_console(-1); diff --git a/arch/mips/ralink/bootrom.c b/arch/mips/ralink/bootrom.c index 94ca8379b83c..8c8cc0a81ed8 100644 --- a/arch/mips/ralink/bootrom.c +++ b/arch/mips/ralink/bootrom.c @@ -18,22 +18,11 @@ static int bootrom_show(struct seq_file *s, void *unused) return 0; } - -static int bootrom_open(struct inode *inode, struct file *file) -{ - return single_open(file, bootrom_show, NULL); -} - -static const struct file_operations bootrom_file_ops = { - .open = bootrom_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(bootrom); static int __init bootrom_setup(void) { - debugfs_create_file("bootrom", 0444, NULL, NULL, &bootrom_file_ops); + debugfs_create_file("bootrom", 0444, NULL, NULL, &bootrom_fops); return 0; } diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index e762886d1dda..5143d1cf8984 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -27,15 +27,18 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) { struct xtalk_bridge_platform_data *bd; struct sgi_w1_platform_data *wd; - struct platform_device *pdev; + struct platform_device *pdev_wd; + struct platform_device *pdev_bd; struct resource w1_res; unsigned long offset; offset = NODE_OFFSET(nasid); wd = kzalloc(sizeof(*wd), GFP_KERNEL); - if (!wd) - goto no_mem; + if (!wd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + return; + } snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx", offset + (widget << SWIN_SIZE_BITS)); @@ -46,24 +49,35 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) w1_res.end = w1_res.start + 3; w1_res.flags = IORESOURCE_MEM; - pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(wd); - goto no_mem; + pdev_wd = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); + if (!pdev_wd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_kfree_wd; + } + if (platform_device_add_resources(pdev_wd, &w1_res, 1)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform resources.\n", nasid, widget); + goto err_put_pdev_wd; + } + if (platform_device_add_data(pdev_wd, wd, sizeof(*wd))) { + pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget); + goto err_put_pdev_wd; + } + if (platform_device_add(pdev_wd)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget); + goto err_put_pdev_wd; } - platform_device_add_resources(pdev, &w1_res, 1); - platform_device_add_data(pdev, wd, sizeof(*wd)); /* platform_device_add_data() duplicates the data */ kfree(wd); - platform_device_add(pdev); bd = kzalloc(sizeof(*bd), GFP_KERNEL); - if (!bd) - goto no_mem; - pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(bd); - goto no_mem; + if (!bd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_unregister_pdev_wd; + } + pdev_bd = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); + if (!pdev_bd) { + pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); + goto err_kfree_bd; } @@ -84,15 +98,31 @@ static void bridge_platform_create(nasid_t nasid, int widget, int masterwid) bd->io.flags = IORESOURCE_IO; bd->io_offset = offset; - platform_device_add_data(pdev, bd, sizeof(*bd)); + if (platform_device_add_data(pdev_bd, bd, sizeof(*bd))) { + pr_warn("xtalk:n%d/%x bridge failed to add platform data.\n", nasid, widget); + goto err_put_pdev_bd; + } + if (platform_device_add(pdev_bd)) { + pr_warn("xtalk:n%d/%x bridge failed to add platform device.\n", nasid, widget); + goto err_put_pdev_bd; + } /* platform_device_add_data() duplicates the data */ kfree(bd); - platform_device_add(pdev); pr_info("xtalk:n%d/%x bridge widget\n", nasid, widget); return; -no_mem: - pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget); +err_put_pdev_bd: + platform_device_put(pdev_bd); +err_kfree_bd: + kfree(bd); +err_unregister_pdev_wd: + platform_device_unregister(pdev_wd); + return; +err_put_pdev_wd: + platform_device_put(pdev_wd); +err_kfree_wd: + kfree(wd); + return; } static int probe_one_port(nasid_t nasid, int widget, int masterwid) diff --git a/arch/mips/sgi-ip30/ip30-xtalk.c b/arch/mips/sgi-ip30/ip30-xtalk.c index 8129524421cb..7ceb2b23ea1c 100644 --- a/arch/mips/sgi-ip30/ip30-xtalk.c +++ b/arch/mips/sgi-ip30/ip30-xtalk.c @@ -40,12 +40,15 @@ static void bridge_platform_create(int widget, int masterwid) { struct xtalk_bridge_platform_data *bd; struct sgi_w1_platform_data *wd; - struct platform_device *pdev; + struct platform_device *pdev_wd; + struct platform_device *pdev_bd; struct resource w1_res; wd = kzalloc(sizeof(*wd), GFP_KERNEL); - if (!wd) - goto no_mem; + if (!wd) { + pr_warn("xtalk:%x bridge create out of memory\n", widget); + return; + } snprintf(wd->dev_id, sizeof(wd->dev_id), "bridge-%012lx", IP30_SWIN_BASE(widget)); @@ -56,24 +59,35 @@ static void bridge_platform_create(int widget, int masterwid) w1_res.end = w1_res.start + 3; w1_res.flags = IORESOURCE_MEM; - pdev = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(wd); - goto no_mem; + pdev_wd = platform_device_alloc("sgi_w1", PLATFORM_DEVID_AUTO); + if (!pdev_wd) { + pr_warn("xtalk:%x bridge create out of memory\n", widget); + goto err_kfree_wd; + } + if (platform_device_add_resources(pdev_wd, &w1_res, 1)) { + pr_warn("xtalk:%x bridge failed to add platform resources.\n", widget); + goto err_put_pdev_wd; + } + if (platform_device_add_data(pdev_wd, wd, sizeof(*wd))) { + pr_warn("xtalk:%x bridge failed to add platform data.\n", widget); + goto err_put_pdev_wd; + } + if (platform_device_add(pdev_wd)) { + pr_warn("xtalk:%x bridge failed to add platform device.\n", widget); + goto err_put_pdev_wd; } - platform_device_add_resources(pdev, &w1_res, 1); - platform_device_add_data(pdev, wd, sizeof(*wd)); /* platform_device_add_data() duplicates the data */ kfree(wd); - platform_device_add(pdev); bd = kzalloc(sizeof(*bd), GFP_KERNEL); - if (!bd) - goto no_mem; - pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); - if (!pdev) { - kfree(bd); - goto no_mem; + if (!bd) { + pr_warn("xtalk:%x bridge create out of memory\n", widget); + goto err_unregister_pdev_wd; + } + pdev_bd = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO); + if (!pdev_bd) { + pr_warn("xtalk:%x bridge create out of memory\n", widget); + goto err_kfree_bd; } bd->bridge_addr = IP30_RAW_SWIN_BASE(widget); @@ -93,15 +107,31 @@ static void bridge_platform_create(int widget, int masterwid) bd->io.flags = IORESOURCE_IO; bd->io_offset = IP30_SWIN_BASE(widget); - platform_device_add_data(pdev, bd, sizeof(*bd)); + if (platform_device_add_data(pdev_bd, bd, sizeof(*bd))) { + pr_warn("xtalk:%x bridge failed to add platform data.\n", widget); + goto err_put_pdev_bd; + } + if (platform_device_add(pdev_bd)) { + pr_warn("xtalk:%x bridge failed to add platform device.\n", widget); + goto err_put_pdev_bd; + } /* platform_device_add_data() duplicates the data */ kfree(bd); - platform_device_add(pdev); pr_info("xtalk:%x bridge widget\n", widget); return; -no_mem: - pr_warn("xtalk:%x bridge create out of memory\n", widget); +err_put_pdev_bd: + platform_device_put(pdev_bd); +err_kfree_bd: + kfree(bd); +err_unregister_pdev_wd: + platform_device_unregister(pdev_wd); + return; +err_put_pdev_wd: + platform_device_put(pdev_wd); +err_kfree_wd: + kfree(wd); + return; } static unsigned int __init xbow_widget_active(s8 wid) diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index 86f49c48fc34..2f08ad267a11 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -262,12 +262,6 @@ void __init arch_init_irq(void) __raw_writeq(tmp, IOADDR(A_IMR_REGISTER(0, R_IMR_INTERRUPT_MASK))); __raw_writeq(tmp, IOADDR(A_IMR_REGISTER(1, R_IMR_INTERRUPT_MASK))); - /* - * Note that the timer interrupts are also mapped, but this is - * done in sb1250_time_init(). Also, the profiling driver - * does its own management of IP7. - */ - /* Enable necessary IPs, disable the rest */ change_c0_status(ST0_IM, imask); } diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h index 8edb824049b9..0cb0ca149ac3 100644 --- a/arch/sh/include/asm/sections.h +++ b/arch/sh/include/asm/sections.h @@ -4,7 +4,7 @@ #include <asm-generic/sections.h> -extern long __machvec_start, __machvec_end; +extern char __machvec_start[], __machvec_end[]; extern char __uncached_start, __uncached_end; extern char __start_eh_frame[], __stop_eh_frame[]; diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index d606679a211e..57efaf5b82ae 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -20,8 +20,8 @@ #define MV_NAME_SIZE 32 #define for_each_mv(mv) \ - for ((mv) = (struct sh_machine_vector *)&__machvec_start; \ - (mv) && (unsigned long)(mv) < (unsigned long)&__machvec_end; \ + for ((mv) = (struct sh_machine_vector *)__machvec_start; \ + (mv) && (unsigned long)(mv) < (unsigned long)__machvec_end; \ (mv)++) static struct sh_machine_vector * __init get_mv_byname(const char *name) @@ -87,8 +87,8 @@ void __init sh_mv_setup(void) if (!machvec_selected) { unsigned long machvec_size; - machvec_size = ((unsigned long)&__machvec_end - - (unsigned long)&__machvec_start); + machvec_size = ((unsigned long)__machvec_end - + (unsigned long)__machvec_start); /* * Sanity check for machvec section alignment. Ensure @@ -102,7 +102,7 @@ void __init sh_mv_setup(void) * vector (usually the only one) from .machvec.init. */ if (machvec_size >= sizeof(struct sh_machine_vector)) - sh_mv = *(struct sh_machine_vector *)&__machvec_start; + sh_mv = *(struct sh_machine_vector *)__machvec_start; } pr_notice("Booting machvec: %s\n", get_system_type()); diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 856081761b0f..2cf7971d7f6c 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -33,9 +33,6 @@ extern volatile unsigned long cpu_callin_map[NR_CPUS]; extern cpumask_t smp_commenced_mask; extern struct linux_prom_registers smp_penguin_ctable; -typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long); - void cpu_panic(void); /* @@ -57,7 +54,7 @@ void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); struct sparc32_ipi_ops { - void (*cross_call)(smpfunc_t func, cpumask_t mask, unsigned long arg1, + void (*cross_call)(void *func, cpumask_t mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4); void (*resched)(int cpu); @@ -66,28 +63,28 @@ struct sparc32_ipi_ops { }; extern const struct sparc32_ipi_ops *sparc32_ipi_ops; -static inline void xc0(smpfunc_t func) +static inline void xc0(void *func) { sparc32_ipi_ops->cross_call(func, *cpu_online_mask, 0, 0, 0, 0); } -static inline void xc1(smpfunc_t func, unsigned long arg1) +static inline void xc1(void *func, unsigned long arg1) { sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); } -static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) +static inline void xc2(void *func, unsigned long arg1, unsigned long arg2) { sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); } -static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, +static inline void xc3(void *func, unsigned long arg1, unsigned long arg2, unsigned long arg3) { sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); } -static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, +static inline void xc4(void *func, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { sparc32_ipi_ops->cross_call(func, *cpu_online_mask, diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 1eed26d423fb..991e9ad3d3e8 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -359,7 +359,7 @@ void leonsmp_ipi_interrupt(void) } static struct smp_funcall { - smpfunc_t func; + void *func; unsigned long arg1; unsigned long arg2; unsigned long arg3; @@ -372,7 +372,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, +static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -384,7 +384,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, { /* If you make changes here, make sure gcc generates proper code... */ - register smpfunc_t f asm("i0") = func; + register void *f asm("i0") = func; register unsigned long a1 asm("i1") = arg1; register unsigned long a2 asm("i2") = arg2; register unsigned long a3 asm("i3") = arg3; @@ -444,11 +444,13 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, /* Running cross calls. */ void leon_cross_call_irq(void) { + void (*func)(unsigned long, unsigned long, unsigned long, unsigned long, + unsigned long) = ccall_info.func; int i = smp_processor_id(); ccall_info.processors_in[i] = 1; - ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, - ccall_info.arg4, ccall_info.arg5); + func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, ccall_info.arg4, + ccall_info.arg5); ccall_info.processors_out[i] = 1; } diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index ff30f03beb7c..9a62a5cf3337 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -268,7 +268,7 @@ static void sun4d_ipi_resched(int cpu) } static struct smp_funcall { - smpfunc_t func; + void *func; unsigned long arg1; unsigned long arg2; unsigned long arg3; @@ -281,7 +281,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, +static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -296,7 +296,7 @@ static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, * If you make changes here, make sure * gcc generates proper code... */ - register smpfunc_t f asm("i0") = func; + register void *f asm("i0") = func; register unsigned long a1 asm("i1") = arg1; register unsigned long a2 asm("i2") = arg2; register unsigned long a3 asm("i3") = arg3; @@ -353,11 +353,13 @@ static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, /* Running cross calls. */ void smp4d_cross_call_irq(void) { + void (*func)(unsigned long, unsigned long, unsigned long, unsigned long, + unsigned long) = ccall_info.func; int i = hard_smp_processor_id(); ccall_info.processors_in[i] = 1; - ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, - ccall_info.arg4, ccall_info.arg5); + func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, ccall_info.arg4, + ccall_info.arg5); ccall_info.processors_out[i] = 1; } diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 228a6527082d..056df034e79e 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -157,7 +157,7 @@ static void sun4m_ipi_mask_one(int cpu) } static struct smp_funcall { - smpfunc_t func; + void *func; unsigned long arg1; unsigned long arg2; unsigned long arg3; @@ -170,7 +170,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void sun4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, +static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -230,11 +230,13 @@ static void sun4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, /* Running cross calls. */ void smp4m_cross_call_irq(void) { + void (*func)(unsigned long, unsigned long, unsigned long, unsigned long, + unsigned long) = ccall_info.func; int i = smp_processor_id(); ccall_info.processors_in[i] = 1; - ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, - ccall_info.arg4, ccall_info.arg5); + func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, ccall_info.arg4, + ccall_info.arg5); ccall_info.processors_out[i] = 1; } diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index a9aa6a92c7fe..13f027afc875 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1636,19 +1636,19 @@ static void __init get_srmmu_type(void) /* Local cross-calls. */ static void smp_flush_page_for_dma(unsigned long page) { - xc1((smpfunc_t) local_ops->page_for_dma, page); + xc1(local_ops->page_for_dma, page); local_ops->page_for_dma(page); } static void smp_flush_cache_all(void) { - xc0((smpfunc_t) local_ops->cache_all); + xc0(local_ops->cache_all); local_ops->cache_all(); } static void smp_flush_tlb_all(void) { - xc0((smpfunc_t) local_ops->tlb_all); + xc0(local_ops->tlb_all); local_ops->tlb_all(); } @@ -1659,7 +1659,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm) cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc1((smpfunc_t) local_ops->cache_mm, (unsigned long) mm); + xc1(local_ops->cache_mm, (unsigned long)mm); local_ops->cache_mm(mm); } } @@ -1671,7 +1671,7 @@ static void smp_flush_tlb_mm(struct mm_struct *mm) cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) { - xc1((smpfunc_t) local_ops->tlb_mm, (unsigned long) mm); + xc1(local_ops->tlb_mm, (unsigned long)mm); if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); @@ -1691,8 +1691,8 @@ static void smp_flush_cache_range(struct vm_area_struct *vma, cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc3((smpfunc_t) local_ops->cache_range, - (unsigned long) vma, start, end); + xc3(local_ops->cache_range, (unsigned long)vma, start, + end); local_ops->cache_range(vma, start, end); } } @@ -1708,8 +1708,8 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma, cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc3((smpfunc_t) local_ops->tlb_range, - (unsigned long) vma, start, end); + xc3(local_ops->tlb_range, (unsigned long)vma, start, + end); local_ops->tlb_range(vma, start, end); } } @@ -1723,8 +1723,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc2((smpfunc_t) local_ops->cache_page, - (unsigned long) vma, page); + xc2(local_ops->cache_page, (unsigned long)vma, page); local_ops->cache_page(vma, page); } } @@ -1738,8 +1737,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc2((smpfunc_t) local_ops->tlb_page, - (unsigned long) vma, page); + xc2(local_ops->tlb_page, (unsigned long)vma, page); local_ops->tlb_page(vma, page); } } @@ -1753,7 +1751,7 @@ static void smp_flush_page_to_ram(unsigned long page) * XXX This experiment failed, research further... -DaveM */ #if 1 - xc1((smpfunc_t) local_ops->page_to_ram, page); + xc1(local_ops->page_to_ram, page); #endif local_ops->page_to_ram(page); } @@ -1764,8 +1762,7 @@ static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) cpumask_copy(&cpu_mask, mm_cpumask(mm)); cpumask_clear_cpu(smp_processor_id(), &cpu_mask); if (!cpumask_empty(&cpu_mask)) - xc2((smpfunc_t) local_ops->sig_insns, - (unsigned long) mm, insn_addr); + xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr); local_ops->sig_insns(mm, insn_addr); } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 78de31ac1da7..ad4ff3b0e91e 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -6,6 +6,7 @@ config UML bool default y select ARCH_EPHEMERAL_INODES + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV select ARCH_HAS_STRNCPY_FROM_USER diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index cb667c9225ab..fd575ecbcaec 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define __NO_FORTIFY #include <linux/types.h> #include <linux/module.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 089c20cefd2b..088af7c84e5d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -107,6 +107,8 @@ config X86 select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 + select ARCH_SUPPORTS_CFI_CLANG if X86_64 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_USE_BUILTIN_BSWAP @@ -257,6 +259,7 @@ config X86 select HAVE_STATIC_CALL_INLINE if HAVE_OBJTOOL select HAVE_PREEMPT_DYNAMIC_CALL select HAVE_RSEQ + select HAVE_RUST if X86_64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK @@ -449,6 +452,11 @@ config X86_X2APIC This allows 32-bit apic IDs (so it can support very large systems), and accesses the local apic via MSRs not via mmio. + Some Intel systems circa 2022 and later are locked into x2APIC mode + and can not fall back to the legacy APIC modes if SGX or TDX are + enabled in the BIOS. They will be unable to boot without enabling + this option. + If you don't know what to do here, say N. config X86_MPPARSE @@ -1920,7 +1928,7 @@ endchoice config X86_SGX bool "Software Guard eXtensions (SGX)" - depends on X86_64 && CPU_SUP_INTEL + depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC depends on CRYPTO=y depends on CRYPTO_SHA256=y select SRCU @@ -2570,7 +2578,7 @@ menuconfig APM 1) make sure that you have enough swap space and that it is enabled. - 2) pass the "no-hlt" option to the kernel + 2) pass the "idle=poll" option to the kernel 3) switch on floating point emulation in the kernel and pass the "no387" option to the kernel 4) pass the "floppy=nodma" option to the kernel diff --git a/arch/x86/Makefile b/arch/x86/Makefile index bafbd905e6e7..2d7e640674c6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -68,6 +68,7 @@ export BITS # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 ifeq ($(CONFIG_X86_KERNEL_IBT),y) # @@ -155,8 +156,17 @@ else cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic KBUILD_CFLAGS += $(cflags-y) + rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8 + rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona + rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2 + rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom + rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic + KBUILD_RUSTFLAGS += $(rustflags-y) + KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel + KBUILD_RUSTFLAGS += -Cno-redzone=y + KBUILD_RUSTFLAGS += -Ccode-model=kernel endif # diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 02e1dea11d94..8518ae214c9b 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -19,13 +19,13 @@ static inline bool constant_test_bit(int nr, const void *addr) { - const u32 *p = (const u32 *)addr; + const u32 *p = addr; return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; } static inline bool variable_test_bit(int nr, const void *addr) { bool v; - const u32 *p = (const u32 *)addr; + const u32 *p = addr; asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr)); return v; diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 802d71582689..4a43e072d2d1 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> .file "blowfish-x86_64-asm.S" .text @@ -141,7 +142,7 @@ SYM_FUNC_START(__blowfish_enc_blk) RET; SYM_FUNC_END(__blowfish_enc_blk) -SYM_FUNC_START(blowfish_dec_blk) +SYM_TYPED_FUNC_START(blowfish_dec_blk) /* input: * %rdi: ctx * %rsi: dst @@ -332,7 +333,7 @@ SYM_FUNC_START(__blowfish_enc_blk_4way) RET; SYM_FUNC_END(__blowfish_enc_blk_4way) -SYM_FUNC_START(blowfish_dec_blk_4way) +SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) /* input: * %rdi: ctx * %rsi: dst diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 12f6c4d714cd..381d3333b996 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -91,7 +91,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),) endif endif -$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) $(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # @@ -153,6 +153,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += -fno-stack-protector KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c601939a74b1..c20d8cd47c48 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2102,6 +2102,15 @@ static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6"); + +static struct attribute *grt_mem_attrs[] = { + EVENT_PTR(mem_ld_grt), + EVENT_PTR(mem_st_grt), + NULL +}; + static struct extra_reg intel_grt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), @@ -5975,6 +5984,36 @@ __init int intel_pmu_init(void) name = "Tremont"; break; + case INTEL_FAM6_ALDERLAKE_N: + x86_pmu.mid_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + x86_pmu.extra_regs = intel_grt_extra_regs; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + intel_pmu_pebs_data_source_grt(); + x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + x86_pmu.limit_period = spr_limit_period; + td_attr = tnt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = nhm_format_attr; + pr_cont("Gracemont events, "); + name = "gracemont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: @@ -6317,7 +6356,6 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_ALDERLAKE_N: case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: /* diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index de1f55d51784..ac973c6f82ad 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -110,13 +110,18 @@ void __init intel_pmu_pebs_data_source_skl(bool pmem) __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); } -static void __init intel_pmu_pebs_data_source_grt(u64 *data_source) +static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source) { data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); } +void __init intel_pmu_pebs_data_source_grt(void) +{ + __intel_pmu_pebs_data_source_grt(pebs_data_source); +} + void __init intel_pmu_pebs_data_source_adl(void) { u64 *data_source; @@ -127,7 +132,7 @@ void __init intel_pmu_pebs_data_source_adl(void) data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); - intel_pmu_pebs_data_source_grt(data_source); + __intel_pmu_pebs_data_source_grt(data_source); } static u64 precise_store_data(u64 status) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ba3d24a6a4ec..266143abcbd8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1516,6 +1516,8 @@ void intel_pmu_pebs_data_source_skl(bool pmem); void intel_pmu_pebs_data_source_adl(void); +void intel_pmu_pebs_data_source_grt(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h index e003a01b7c67..1dd14381bcb6 100644 --- a/arch/x86/include/asm/acrn.h +++ b/arch/x86/include/asm/acrn.h @@ -10,6 +10,15 @@ /* Bit 0 indicates whether guest VM is privileged */ #define ACRN_FEATURE_PRIVILEGED_VM BIT(0) +/* + * Timing Information. + * This leaf returns the current TSC frequency in kHz. + * + * EAX: (Virtual) TSC frequency in kHz. + * EBX, ECX, EDX: RESERVED (reserved fields are set to zero). + */ +#define ACRN_CPUID_TIMING_INFO 0x40000010 + void acrn_setup_intr_handler(void (*handler)(void)); void acrn_remove_intr_handler(void); @@ -21,6 +30,11 @@ static inline u32 acrn_cpuid_base(void) return 0; } +static inline unsigned long acrn_get_tsc_khz(void) +{ + return cpuid_eax(ACRN_CPUID_TIMING_INFO); +} + /* * Hypercalls for ACRN * diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0fe9de58af31..2edf68475fec 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -247,17 +247,30 @@ arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline unsigned long variable__ffs(unsigned long word) +{ + asm("rep; bsf %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + /** * __ffs - find first set bit in word * @word: The word to search * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __ffs(unsigned long word) +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline unsigned long variable_ffz(unsigned long word) { asm("rep; bsf %1,%0" : "=r" (word) - : "rm" (word)); + : "r" (~word)); return word; } @@ -267,13 +280,10 @@ static __always_inline unsigned long __ffs(unsigned long word) * * Undefined if no zero exists, so code should check against ~0UL first. */ -static __always_inline unsigned long ffz(unsigned long word) -{ - asm("rep; bsf %1,%0" - : "=r" (word) - : "r" (~word)); - return word; -} +#define ffz(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(~word) : \ + variable_ffz(word)) /* * __fls: find last set bit in word @@ -292,18 +302,7 @@ static __always_inline unsigned long __fls(unsigned long word) #undef ADDR #ifdef __KERNEL__ -/** - * ffs - find first set bit in word - * @x: the word to search - * - * This is defined the same way as the libc and compiler builtin ffs - * routines, therefore differs in spirit from the other bitops. - * - * ffs(value) returns 0 if value is 0 or the position of the first - * set bit if value is nonzero. The first (least significant) bit - * is at position 1. - */ -static __always_inline int ffs(int x) +static __always_inline int variable_ffs(int x) { int r; @@ -334,6 +333,19 @@ static __always_inline int ffs(int x) } /** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +/** * fls - find last set bit in word * @x: the word to search * diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h new file mode 100644 index 000000000000..58dacd90daef --- /dev/null +++ b/arch/x86/include/asm/cfi.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CFI_H +#define _ASM_X86_CFI_H + +/* + * Clang Control Flow Integrity (CFI) support. + * + * Copyright (C) 2022 Google LLC + */ + +#include <linux/cfi.h> + +#ifdef CONFIG_CFI_CLANG +enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); +#else +static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) +{ + return BUG_TRAP_TYPE_NONE; +} +#endif /* CONFIG_CFI_CLANG */ + +#endif /* _ASM_X86_CFI_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 8cbf623f0ecf..b472ef76826a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -94,4 +94,6 @@ static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1, return p1 & p2; } +extern u64 x86_read_arch_cap_msr(void); + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 73ca20049835..f484d656d34e 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -43,6 +43,18 @@ #endif /* __ASSEMBLY__ */ +#define __CFI_TYPE(name) \ + SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ + .fill 11, 1, 0x90 ASM_NL \ + .byte 0xb8 ASM_NL \ + .long __kcfi_typeid_##name ASM_NL \ + SYM_FUNC_END(__cfi_##name) + +/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + /* SYM_FUNC_START -- use for global functions */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 97198001e567..6115bb3d5795 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,7 +95,7 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(const struct timespec64 *now); +extern int mach_set_cmos_time(const struct timespec64 *now); extern void mach_get_cmos_time(struct timespec64 *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cc73061e7255..6e986088817d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -42,6 +42,7 @@ #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) +#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 0c3d3440fe27..74ecc2bd6cd0 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -9,6 +9,7 @@ struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ + unsigned int size; u32 patch_id; u16 equiv_cpu; }; @@ -32,9 +33,6 @@ enum ucode_state { }; struct microcode_ops { - enum ucode_state (*request_microcode_user) (int cpu, - const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, struct device *, bool refresh_fw); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6674bdb096f3..1e086b37a307 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -155,6 +155,11 @@ * Return Stack Buffer Predictions. */ +#define ARCH_CAP_XAPIC_DISABLE BIT(21) /* + * IA32_XAPIC_DISABLE_STATUS MSR + * supported + */ + #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* * Writeback and invalidate the @@ -1054,4 +1059,12 @@ #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 +/* x2APIC locked status */ +#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD +#define LEGACY_XAPIC_DISABLED BIT(0) /* + * x2APIC mode is locked and + * disabling x2APIC will cause + * a #GP + */ + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 964442b99245..2a0b8dd4ec33 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -743,6 +743,7 @@ extern void default_banner(void); word 771b; \ .byte ptype; \ .byte 772b-771b; \ + _ASM_ALIGN; \ .popsection diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 89df6c6617f5..f3d601574730 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -294,6 +294,7 @@ extern struct paravirt_patch_template pv_ops; " .byte " type "\n" \ " .byte 772b-771b\n" \ " .short " clobber "\n" \ + _ASM_ALIGN "\n" \ ".popsection\n" /* Generate patchable code, with the default asm parameters. */ @@ -328,7 +329,7 @@ int paravirt_disable_iospace(void); * Unfortunately, this is a relatively slow operation for modern CPUs, * because it cannot necessarily determine what the destination * address is. In this case, the address is a runtime constant, so at - * the very least we can patch the call to e a simple direct call, or + * the very least we can patch the call to a simple direct call, or, * ideally, patch an inline implementation into the callsite. (Direct * calls are essentially free, because the call and return addresses * are completely predictable.) @@ -339,10 +340,10 @@ int paravirt_disable_iospace(void); * on the stack. All caller-save registers (eax,edx,ecx) are expected * to be modified (either clobbered or used for return values). * X86_64, on the other hand, already specifies a register-based calling - * conventions, returning at %rax, with parameters going on %rdi, %rsi, + * conventions, returning at %rax, with parameters going in %rdi, %rsi, * %rdx, and %rcx. Note that for this reason, x86_64 does not need any * special handling for dealing with 4 arguments, unlike i386. - * However, x86_64 also have to clobber all caller saved registers, which + * However, x86_64 also has to clobber all caller saved registers, which * unfortunately, are quite a bit (r8 - r11) * * The call instruction itself is marked by placing its start address @@ -360,22 +361,22 @@ int paravirt_disable_iospace(void); * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. * It could be extended to more arguments, but there would be little * to be gained from that. For each number of arguments, there are - * the two VCALL and CALL variants for void and non-void functions. + * two VCALL and CALL variants for void and non-void functions. * * When there is a return value, the invoker of the macro must specify * the return type. The macro then uses sizeof() on that type to - * determine whether its a 32 or 64 bit value, and places the return + * determine whether it's a 32 or 64 bit value and places the return * in the right register(s) (just %eax for 32-bit, and %edx:%eax for - * 64-bit). For x86_64 machines, it just returns at %rax regardless of + * 64-bit). For x86_64 machines, it just returns in %rax regardless of * the return value size. * - * 64-bit arguments are passed as a pair of adjacent 32-bit arguments + * 64-bit arguments are passed as a pair of adjacent 32-bit arguments; * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments * in low,high order * * Small structures are passed and returned in registers. The macro * calling convention can't directly deal with this, so the wrapper - * functions must do this. + * functions must do it. * * These PVOP_* macros are only defined within this header. This * means that all uses must be wrapped in inline functions. This also @@ -414,8 +415,17 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) -/* void functions are still allowed [re]ax for scratch */ +/* + * void functions are still allowed [re]ax for scratch. + * + * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved + * registers. Make sure we model this with the appropriate clobbers. + */ +#ifdef CONFIG_ZERO_CALL_USED_REGS +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), PVOP_VCALL_CLOBBERS +#else #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#endif #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index d60ed0668a59..d24b04ebf950 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -81,6 +81,15 @@ static void __resctrl_sched_in(void) } } +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + unsigned int scale = boot_cpu_data.x86_cache_occ_scale; + + /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ + val /= scale; + return val * scale; +} + static inline void resctrl_sched_in(void) { if (static_branch_likely(&rdt_enable_key)) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 81a0211a372d..a73bced40e24 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,16 +21,6 @@ DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); -static inline struct cpumask *cpu_llc_shared_mask(int cpu) -{ - return per_cpu(cpu_llc_shared_map, cpu); -} - -static inline struct cpumask *cpu_l2c_shared_mask(int cpu) -{ - return per_cpu(cpu_l2c_shared_map, cpu); -} - DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); @@ -172,6 +162,16 @@ extern int safe_smp_processor_id(void); # define safe_smp_processor_id() smp_processor_id() #endif +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return per_cpu(cpu_llc_shared_map, cpu); +} + +static inline struct cpumask *cpu_l2c_shared_mask(int cpu) +{ + return per_cpu(cpu_l2c_shared_map, cpu); +} + #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() static inline int wbinvd_on_all_cpus(void) @@ -179,6 +179,11 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } + +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return (struct cpumask *)cpumask_of(0); +} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 913e593a3b45..1ec6a9ea2328 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -448,7 +448,7 @@ do { \ #ifdef CONFIG_X86_32 /* - * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. + * Unlike the normal CMPXCHG, use output GPR for both success/fail and error. * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses * both ESI and EDI for the memory operand, compilation will fail if the error @@ -461,11 +461,12 @@ do { \ __typeof__(*(_ptr)) __new = (_new); \ asm volatile("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ - "mov $0, %%ecx\n\t" \ - "setz %%cl\n" \ + "mov $0, %[result]\n\t" \ + "setz %b[result]\n" \ "2:\n" \ - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ - : [result]"=c" (__result), \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[result]) \ + : [result] "=q" (__result), \ "+A" (__old), \ [ptr] "+m" (*_ptr) \ : "b" ((u32)__new), \ @@ -502,9 +503,6 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - #ifdef CONFIG_ARCH_HAS_COPY_MC unsigned long __must_check copy_mc_to_kernel(void *to, const void *from, unsigned len); @@ -526,6 +524,8 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 #ifdef CONFIG_X86_32 +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); # include <asm/uaccess_32.h> #else # include <asm/uaccess_64.h> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 45697e04d771..d13d71af5cf6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -79,4 +79,49 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) kasan_check_write(dst, size); return __copy_user_flushcache(dst, src, size); } + +/* + * Zero Userspace. + */ + +__must_check unsigned long +clear_user_original(void __user *addr, unsigned long len); +__must_check unsigned long +clear_user_rep_good(void __user *addr, unsigned long len); +__must_check unsigned long +clear_user_erms(void __user *addr, unsigned long len); + +static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) +{ + might_fault(); + stac(); + + /* + * No memory constraint because it doesn't change any memory gcc + * knows about. + */ + asm volatile( + "1:\n\t" + ALTERNATIVE_3("rep stosb", + "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM), + "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS), + "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD)) + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) + : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT + : "a" (0) + /* rep_good clobbers %rdx */ + : "rdx"); + + clac(); + + return size; +} + +static __always_inline unsigned long clear_user(void __user *to, unsigned long n) +{ + if (access_ok(to, n)) + return __clear_user(to, n); + return n; +} #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a20a5ebfacd7..1286a73ebdbc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -139,6 +139,8 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o +obj-$(CONFIG_CFI_CLANG) += cfi.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 62f6b8b7c4a5..5cadcea035e0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -453,6 +453,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) return ret; i += ret; + /* + * The compiler is supposed to EMIT an INT3 after every unconditional + * JMP instruction due to AMD BTC. However, if the compiler is too old + * or SLS isn't enabled, we still need an INT3 after indirect JMPs + * even on Intel. + */ + if (op == JMP32_INSN_OPCODE && i < insn->length) + bytes[i++] = INT3_INSN_OPCODE; + for (; i < insn->length;) bytes[i++] = BYTES_NOP1; @@ -1319,22 +1328,23 @@ struct bp_patching_desc { atomic_t refs; }; -static struct bp_patching_desc *bp_desc; +static struct bp_patching_desc bp_desc; static __always_inline -struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +struct bp_patching_desc *try_get_desc(void) { - /* rcu_dereference */ - struct bp_patching_desc *desc = __READ_ONCE(*descp); + struct bp_patching_desc *desc = &bp_desc; - if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) + if (!arch_atomic_inc_not_zero(&desc->refs)) return NULL; return desc; } -static __always_inline void put_desc(struct bp_patching_desc *desc) +static __always_inline void put_desc(void) { + struct bp_patching_desc *desc = &bp_desc; + smp_mb__before_atomic(); arch_atomic_dec(&desc->refs); } @@ -1367,15 +1377,15 @@ noinstr int poke_int3_handler(struct pt_regs *regs) /* * Having observed our INT3 instruction, we now must observe - * bp_desc: + * bp_desc with non-zero refcount: * - * bp_desc = desc INT3 + * bp_desc.refs = 1 INT3 * WMB RMB - * write INT3 if (desc) + * write INT3 if (bp_desc.refs != 0) */ smp_rmb(); - desc = try_get_desc(&bp_desc); + desc = try_get_desc(); if (!desc) return 0; @@ -1429,7 +1439,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs) ret = 1; out_put: - put_desc(desc); + put_desc(); return ret; } @@ -1460,18 +1470,20 @@ static int tp_vec_nr; */ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { - struct bp_patching_desc desc = { - .vec = tp, - .nr_entries = nr_entries, - .refs = ATOMIC_INIT(1), - }; unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; int do_sync; lockdep_assert_held(&text_mutex); - smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ + bp_desc.vec = tp; + bp_desc.nr_entries = nr_entries; + + /* + * Corresponds to the implicit memory barrier in try_get_desc() to + * ensure reading a non-zero refcount provides up to date bp_desc data. + */ + atomic_set_release(&bp_desc.refs, 1); /* * Corresponding read barrier in int3 notifier for making sure the @@ -1559,12 +1571,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries text_poke_sync(); /* - * Remove and synchronize_rcu(), except we have a very primitive - * refcount based completion. + * Remove and wait for refs to be zero. */ - WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ - if (!atomic_dec_and_test(&desc.refs)) - atomic_cond_read_acquire(&desc.refs, !VAL); + if (!atomic_dec_and_test(&bp_desc.refs)) + atomic_cond_read_acquire(&bp_desc.refs, !VAL); } static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 194d54eed537..19a0207e529f 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -53,7 +53,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ * of only flushing when an mapping is reused. With it true the GART is * flushed for every mapping. Problem is that doing the lazy flush seems * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). + * has been also seen with Qlogic at least). */ static int iommu_fullflush = 1; diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 7a5630d904b2..4feaa670d578 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -36,7 +36,7 @@ /* * Using 512M as goal, in case kexec will load kernel_big * that will do the on-position decompress, and could overlap with - * with the gart aperture that is used. + * the gart aperture that is used. * Sequence: * kernel_small * ==> kexec (with kdump trigger path or gart still enabled) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6d303d1d276c..c6876d3ea4b1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -61,6 +61,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include <asm/irq_regs.h> +#include <asm/cpu.h> unsigned int num_processors; @@ -1751,11 +1752,26 @@ EXPORT_SYMBOL_GPL(x2apic_mode); enum { X2APIC_OFF, - X2APIC_ON, X2APIC_DISABLED, + /* All states below here have X2APIC enabled */ + X2APIC_ON, + X2APIC_ON_LOCKED }; static int x2apic_state; +static bool x2apic_hw_locked(void) +{ + u64 ia32_cap; + u64 msr; + + ia32_cap = x86_read_arch_cap_msr(); + if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); + return (msr & LEGACY_XAPIC_DISABLED); + } + return false; +} + static void __x2apic_disable(void) { u64 msr; @@ -1793,6 +1809,10 @@ static int __init setup_nox2apic(char *str) apicid); return 0; } + if (x2apic_hw_locked()) { + pr_warn("APIC locked in x2apic mode, can't disable\n"); + return 0; + } pr_warn("x2apic already enabled.\n"); __x2apic_disable(); } @@ -1807,10 +1827,18 @@ early_param("nox2apic", setup_nox2apic); void x2apic_setup(void) { /* - * If x2apic is not in ON state, disable it if already enabled + * Try to make the AP's APIC state match that of the BSP, but if the + * BSP is unlocked and the AP is locked then there is a state mismatch. + * Warn about the mismatch in case a GP fault occurs due to a locked AP + * trying to be turned off. + */ + if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked()) + pr_warn("x2apic lock mismatch between BSP and AP.\n"); + /* + * If x2apic is not in ON or LOCKED state, disable it if already enabled * from BIOS. */ - if (x2apic_state != X2APIC_ON) { + if (x2apic_state < X2APIC_ON) { __x2apic_disable(); return; } @@ -1831,6 +1859,11 @@ static __init void x2apic_disable(void) if (x2apic_id >= 255) panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + if (x2apic_hw_locked()) { + pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id); + return; + } + __x2apic_disable(); register_lapic_address(mp_lapic_addr); } @@ -1889,7 +1922,10 @@ void __init check_x2apic(void) if (x2apic_enabled()) { pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); x2apic_mode = 1; - x2apic_state = X2APIC_ON; + if (x2apic_hw_locked()) + x2apic_state = X2APIC_ON_LOCKED; + else + x2apic_state = X2APIC_ON; } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { x2apic_state = X2APIC_DISABLED; } diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c new file mode 100644 index 000000000000..8674a5c0c031 --- /dev/null +++ b/arch/x86/kernel/cfi.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Clang Control Flow Integrity (CFI) support. + * + * Copyright (C) 2022 Google LLC + */ +#include <asm/cfi.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> +#include <linux/string.h> + +/* + * Returns the target address and the expected type when regs->ip points + * to a compiler-generated CFI trap. + */ +static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target, + u32 *type) +{ + char buffer[MAX_INSN_SIZE]; + struct insn insn; + int offset = 0; + + *target = *type = 0; + + /* + * The compiler generates the following instruction sequence + * for indirect call checks: + * + *  movl -<id>, %r10d ; 6 bytes + * addl -4(%reg), %r10d ; 4 bytes + * je .Ltmp1 ; 2 bytes + * ud2 ; <- regs->ip + * .Ltmp1: + * + * We can decode the expected type and the target address from the + * movl/addl instructions. + */ + if (copy_from_kernel_nofault(buffer, (void *)regs->ip - 12, MAX_INSN_SIZE)) + return false; + if (insn_decode_kernel(&insn, &buffer[offset])) + return false; + if (insn.opcode.value != 0xBA) + return false; + + *type = -(u32)insn.immediate.value; + + if (copy_from_kernel_nofault(buffer, (void *)regs->ip - 6, MAX_INSN_SIZE)) + return false; + if (insn_decode_kernel(&insn, &buffer[offset])) + return false; + if (insn.opcode.value != 0x3) + return false; + + /* Read the target address from the register. */ + offset = insn_get_modrm_rm_off(&insn, regs); + if (offset < 0) + return false; + + *target = *(unsigned long *)((void *)regs + offset); + + return true; +} + +/* + * Checks if a ud2 trap is because of a CFI failure, and handles the trap + * if needed. Returns a bug_trap_type value similarly to report_bug. + */ +enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) +{ + unsigned long target; + u32 type; + + if (!is_cfi_trap(regs->ip)) + return BUG_TRAP_TYPE_NONE; + + if (!decode_cfi_insn(regs, &target, &type)) + return report_cfi_failure_noaddr(regs, regs->ip); + + return report_cfi_failure(regs, regs->ip, &target, type); +} + +/* + * Ensure that __kcfi_typeid_ symbols are emitted for functions that may + * not be indirectly called with all configurations. + */ +__ADDRESSABLE(__memcpy) diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index 23f5f27b5a02..485441b7f030 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -28,6 +28,9 @@ static void __init acrn_init_platform(void) { /* Setup the IDT for ACRN hypervisor callback */ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback); + + x86_platform.calibrate_tsc = acrn_get_tsc_khz; + x86_platform.calibrate_cpu = acrn_get_tsc_khz; } static bool acrn_x2apic_available(void) diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 993697e71854..03851240c3e3 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/tboot.h> +#include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/msr-index.h> #include <asm/processor.h> #include <asm/vmx.h> -#include "cpu.h" #undef pr_fmt #define pr_fmt(fmt) "x86/cpu: " fmt diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index 717192915f28..8ed341714686 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -29,15 +29,26 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) { struct mce m; + int lsb; if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) return; + /* + * Even if the ->validation_bits are set for address mask, + * to be extra safe, check and reject an error radius '0', + * and fall back to the default page size. + */ + if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) + lsb = find_first_bit((void *)&mem_err->physical_addr_mask, PAGE_SHIFT); + else + lsb = PAGE_SHIFT; + mce_setup(&m); m.bank = -1; /* Fake a memory read error with unknown channel */ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; - m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT; + m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb; if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 8b2fcdfa6d31..e7410e98fc1f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -788,6 +788,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, kfree(patch); return -EINVAL; } + patch->size = *patch_size; mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); proc_id = mc_hdr->processor_rev_id; @@ -869,7 +870,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) return ret; memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + memcpy(amd_ucode_patch, p->data, min_t(u32, p->size, PATCH_MAX_SIZE)); return ret; } @@ -924,12 +925,6 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, return ret; } -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - return UCODE_ERROR; -} - static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -938,7 +933,6 @@ static void microcode_fini_cpu_amd(int cpu) } static struct microcode_ops microcode_amd_ops = { - .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_amd, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index ad57e0e4d674..6a41cee242f6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -491,7 +491,7 @@ wait_for_siblings: */ static int microcode_reload_late(void) { - int ret; + int old = boot_cpu_data.microcode, ret; pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n"); pr_err("You should switch to early loading, if possible.\n"); @@ -503,7 +503,8 @@ static int microcode_reload_late(void) if (ret == 0) microcode_check(); - pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); + pr_info("Reload completed, microcode revision: 0x%x -> 0x%x\n", + old, boot_cpu_data.microcode); return ret; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 025c8f0cd948..1fcbd671f1df 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -916,24 +916,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return ret; } -static enum ucode_state -request_microcode_user(int cpu, const void __user *buf, size_t size) -{ - struct iov_iter iter; - struct iovec iov; - - if (is_blacklisted(cpu)) - return UCODE_NFOUND; - - iov.iov_base = (void __user *)buf; - iov.iov_len = size; - iov_iter_init(&iter, WRITE, &iov, 1, size); - - return generic_load_microcode(cpu, &iter); -} - static struct microcode_ops microcode_intel_ops = { - .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode_intel, diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index bb1c3f5f60c8..de62b0b87ced 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -147,7 +147,6 @@ static inline void cache_alloc_hsw_probe(void) r->cache.shareable_bits = 0xc0000; r->cache.min_cbm_bits = 2; r->alloc_capable = true; - r->alloc_enabled = true; rdt_alloc_capable = true; } @@ -211,7 +210,6 @@ static bool __get_mem_config_intel(struct rdt_resource *r) thread_throttle_mode_init(); r->alloc_capable = true; - r->alloc_enabled = true; return true; } @@ -242,7 +240,6 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->data_width = 4; r->alloc_capable = true; - r->alloc_enabled = true; return true; } @@ -261,7 +258,6 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->cache.shareable_bits = ebx & r->default_ctrl; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; - r->alloc_enabled = true; } static void rdt_get_cdp_config(int level) @@ -300,7 +296,7 @@ mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) * that can be written to QOS_MSRs. * There are currently no SKUs which support non linear delay values. */ -u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) +static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { if (r->membw.delay_linear) return MAX_MBA_BW - bw; @@ -401,7 +397,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } -void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) +static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); int i; @@ -410,12 +406,17 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) * Initialize the Control MSRs to having no control. * For Cache Allocation: Set all bits in cbm * For Memory Allocation: Set b/w requested to 100% - * and the bandwidth in MBps to U32_MAX */ - for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) { + for (i = 0; i < hw_res->num_closid; i++, dc++) *dc = r->default_ctrl; - *dm = MBA_MAX_MBPS; - } +} + +static void domain_free(struct rdt_hw_domain *hw_dom) +{ + kfree(hw_dom->arch_mbm_total); + kfree(hw_dom->arch_mbm_local); + kfree(hw_dom->ctrl_val); + kfree(hw_dom); } static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) @@ -423,23 +424,15 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct msr_param m; - u32 *dc, *dm; + u32 *dc; dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val), GFP_KERNEL); if (!dc) return -ENOMEM; - dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val), - GFP_KERNEL); - if (!dm) { - kfree(dc); - return -ENOMEM; - } - hw_dom->ctrl_val = dc; - hw_dom->mbps_val = dm; - setup_default_ctrlval(r, dc, dm); + setup_default_ctrlval(r, dc); m.low = 0; m.high = hw_res->num_closid; @@ -447,39 +440,31 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) return 0; } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +/** + * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters + * @num_rmid: The size of the MBM counter array + * @hw_dom: The domain that owns the allocated arrays + */ +static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom) { size_t tsize; - if (is_llc_occupancy_enabled()) { - d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); - if (!d->rmid_busy_llc) - return -ENOMEM; - INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); - } if (is_mbm_total_enabled()) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); + tsize = sizeof(*hw_dom->arch_mbm_total); + hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_total) return -ENOMEM; - } } if (is_mbm_local_enabled()) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); + tsize = sizeof(*hw_dom->arch_mbm_local); + hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_local) { + kfree(hw_dom->arch_mbm_total); + hw_dom->arch_mbm_total = NULL; return -ENOMEM; } } - if (is_mbm_enabled()) { - INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); - mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); - } - return 0; } @@ -502,6 +487,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) struct list_head *add_pos = NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; + int err; d = rdt_find_domain(r, id, &add_pos); if (IS_ERR(d)) { @@ -527,25 +513,22 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) rdt_domain_reconfigure_cdp(r); if (r->alloc_capable && domain_setup_ctrlval(r, d)) { - kfree(hw_dom); + domain_free(hw_dom); return; } - if (r->mon_capable && domain_setup_mon_state(r, d)) { - kfree(hw_dom->ctrl_val); - kfree(hw_dom->mbps_val); - kfree(hw_dom); + if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + domain_free(hw_dom); return; } list_add_tail(&d->list, add_pos); - /* - * If resctrl is mounted, add - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - mkdir_mondata_subdir_allrdtgrp(r, d); + err = resctrl_online_domain(r, d); + if (err) { + list_del(&d->list); + domain_free(hw_dom); + } } static void domain_remove_cpu(int cpu, struct rdt_resource *r) @@ -563,27 +546,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { - /* - * If resctrl is mounted, remove all the - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - rmdir_mondata_subdir_allrdtgrp(r, d->id); + resctrl_offline_domain(r, d); list_del(&d->list); - if (r->mon_capable && is_mbm_enabled()) - cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { - /* - * When a package is going down, forcefully - * decrement rmid->ebusy. There is no way to know - * that the L3 was flushed and hence may lead to - * incorrect counts in rare scenarios, but leaving - * the RMID as busy creates RMID leaks if the - * package never comes back. - */ - __check_limbo(d, true); - cancel_delayed_work(&d->cqm_limbo); - } /* * rdt_domain "d" is going to be freed below, so clear @@ -591,13 +555,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (d->plr) d->plr->d = NULL; + domain_free(hw_dom); - kfree(hw_dom->ctrl_val); - kfree(hw_dom->mbps_val); - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); - kfree(hw_dom); return; } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 87666275eed9..1dafbdc5ac31 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -61,6 +61,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_domain *d) { struct resctrl_staged_config *cfg; + u32 closid = data->rdtgrp->closid; struct rdt_resource *r = s->res; unsigned long bw_val; @@ -72,6 +73,12 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; + + if (is_mba_sc(r)) { + d->mbps_val[closid] = bw_val; + return 0; + } + cfg->new_ctrl = bw_val; cfg->have_new_ctrl = true; @@ -261,14 +268,13 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type) static bool apply_config(struct rdt_hw_domain *hw_dom, struct resctrl_staged_config *cfg, u32 idx, - cpumask_var_t cpu_mask, bool mba_sc) + cpumask_var_t cpu_mask) { struct rdt_domain *dom = &hw_dom->d_resctrl; - u32 *dc = !mba_sc ? hw_dom->ctrl_val : hw_dom->mbps_val; - if (cfg->new_ctrl != dc[idx]) { + if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); - dc[idx] = cfg->new_ctrl; + hw_dom->ctrl_val[idx] = cfg->new_ctrl; return true; } @@ -276,6 +282,27 @@ static bool apply_config(struct rdt_hw_domain *hw_dom, return false; } +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + u32 idx = get_config_index(closid, t); + struct msr_param msr_param; + + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + + hw_dom->ctrl_val[idx] = cfg_val; + + msr_param.res = r; + msr_param.low = idx; + msr_param.high = idx + 1; + hw_res->msr_update(d, &msr_param, r); + + return 0; +} + int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; @@ -284,14 +311,12 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) enum resctrl_conf_type t; cpumask_var_t cpu_mask; struct rdt_domain *d; - bool mba_sc; int cpu; u32 idx; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - mba_sc = is_mba_sc(r); msr_param.res = NULL; list_for_each_entry(d, &r->domains, list) { hw_dom = resctrl_to_arch_dom(d); @@ -301,7 +326,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) continue; idx = get_config_index(closid, t); - if (!apply_config(hw_dom, cfg, idx, cpu_mask, mba_sc)) + if (!apply_config(hw_dom, cfg, idx, cpu_mask)) continue; if (!msr_param.res) { @@ -315,11 +340,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) } } - /* - * Avoid writing the control msr with control values when - * MBA software controller is enabled - */ - if (cpumask_empty(cpu_mask) || mba_sc) + if (cpumask_empty(cpu_mask)) goto done; cpu = get_cpu(); /* Update resource control msr on this CPU if it's in cpu_mask. */ @@ -406,6 +427,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; + + /* + * Writes to mba_sc resources update the software controller, + * not the control MSR. + */ + if (is_mba_sc(r)) + continue; + ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret) goto out; @@ -433,9 +462,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); u32 idx = get_config_index(closid, type); - if (!is_mba_sc(r)) - return hw_dom->ctrl_val[idx]; - return hw_dom->mbps_val[idx]; + return hw_dom->ctrl_val[idx]; } static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) @@ -450,8 +477,12 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo if (sep) seq_puts(s, ";"); - ctrl_val = resctrl_arch_get_config(r, dom, closid, - schema->conf_type); + if (is_mba_sc(r)) + ctrl_val = dom->mbps_val[closid]; + else + ctrl_val = resctrl_arch_get_config(r, dom, closid, + schema->conf_type); + seq_printf(s, r->format_str, dom->id, max_data_width, ctrl_val); sep = true; @@ -518,7 +549,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; - struct rdt_hw_resource *hw_res; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -538,8 +568,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) domid = md.u.domid; evtid = md.u.evtid; - hw_res = &rdt_resources_all[resid]; - r = &hw_res->r_resctrl; + r = &rdt_resources_all[resid].r_resctrl; d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; @@ -548,12 +577,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) mon_event_read(&rr, r, d, rdtgrp, evtid, false); - if (rr.val & RMID_VAL_ERROR) + if (rr.err == -EIO) seq_puts(m, "Error\n"); - else if (rr.val & RMID_VAL_UNAVAIL) + else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); else - seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale); + seq_printf(m, "%llu\n", rr.val); out: rdtgroup_kn_unlock(of->kn); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 1d647188a43b..5f7128686cfd 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -22,21 +22,12 @@ #define L2_QOS_CDP_ENABLE 0x01ULL -/* - * Event IDs are used to program IA32_QM_EVTSEL before reading event - * counter from IA32_QM_CTR - */ -#define QOS_L3_OCCUP_EVENT_ID 0x01 -#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02 -#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03 - #define CQM_LIMBOCHECK_INTERVAL 1000 #define MBM_CNTR_WIDTH_BASE 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 -#define MBA_MAX_MBPS U32_MAX #define MAX_MBA_BW_AMD 0x800 #define MBM_CNTR_WIDTH_OFFSET_AMD 20 @@ -74,7 +65,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); * @list: entry in &rdt_resource->evt_list */ struct mon_evt { - u32 evtid; + enum resctrl_event_id evtid; char *name; struct list_head list; }; @@ -91,9 +82,9 @@ struct mon_evt { union mon_data_bits { void *priv; struct { - unsigned int rid : 10; - unsigned int evtid : 8; - unsigned int domid : 14; + unsigned int rid : 10; + enum resctrl_event_id evtid : 8; + unsigned int domid : 14; } u; }; @@ -101,12 +92,12 @@ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; struct rdt_domain *d; - int evtid; + enum resctrl_event_id evtid; bool first; + int err; u64 val; }; -extern unsigned int resctrl_cqm_threshold; extern bool rdt_alloc_capable; extern bool rdt_mon_capable; extern unsigned int rdt_mon_features; @@ -288,35 +279,45 @@ struct rftype { /** * struct mbm_state - status for each MBM counter in each domain - * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) - * @prev_msr: Value of IA32_QM_CTR for this RMID last time we read it - * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting + * @prev_bw_bytes: Previous bytes value read for bandwidth calculation * @prev_bw: The most recent bandwidth in MBps * @delta_bw: Difference between the current and previous bandwidth * @delta_comp: Indicates whether to compute the delta_bw */ struct mbm_state { - u64 chunks; - u64 prev_msr; - u64 prev_bw_msr; + u64 prev_bw_bytes; u32 prev_bw; u32 delta_bw; bool delta_comp; }; /** + * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s + * return value. + * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) + * @prev_msr: Value of IA32_QM_CTR last time it was read for the RMID used to + * find this struct. + */ +struct arch_mbm_state { + u64 chunks; + u64 prev_msr; +}; + +/** * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share * a resource * @d_resctrl: Properties exposed to the resctrl file system * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) - * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps + * @arch_mbm_total: arch private state for MBM total bandwidth + * @arch_mbm_local: arch private state for MBM local bandwidth * * Members of this structure are accessed via helpers that provide abstraction. */ struct rdt_hw_domain { struct rdt_domain d_resctrl; u32 *ctrl_val; - u32 *mbps_val; + struct arch_mbm_state *arch_mbm_total; + struct arch_mbm_state *arch_mbm_local; }; static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) @@ -459,14 +460,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); for_each_rdt_resource(r) \ if (r->mon_capable) -#define for_each_alloc_enabled_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->alloc_enabled) - -#define for_each_mon_enabled_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->mon_enabled) - /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { struct { @@ -530,10 +523,6 @@ void free_rmid(u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id); -void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_domain *d, struct rdtgroup *rdtgrp, int evtid, int first); @@ -542,8 +531,6 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, void mbm_handle_overflow(struct work_struct *work); void __init intel_rdt_mbm_apply_quirk(void); bool is_mba_sc(struct rdt_resource *r); -void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm); -u32 delay_bw_map(unsigned long bw, struct rdt_resource *r); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index eaf25a234ff5..efe0c30d3a12 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -16,8 +16,12 @@ */ #include <linux/module.h> +#include <linux/sizes.h> #include <linux/slab.h> + #include <asm/cpu_device_id.h> +#include <asm/resctrl.h> + #include "internal.h" struct rmid_entry { @@ -37,8 +41,8 @@ static LIST_HEAD(rmid_free_lru); * @rmid_limbo_count count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that - * may have a occupancy value > intel_cqm_threshold. User can change - * the threshold occupancy value. + * may have a occupancy value > resctrl_rmid_realloc_threshold. User can + * change the threshold occupancy value. */ static unsigned int rmid_limbo_count; @@ -59,10 +63,15 @@ bool rdt_mon_capable; unsigned int rdt_mon_features; /* - * This is the threshold cache occupancy at which we will consider an + * This is the threshold cache occupancy in bytes at which we will consider an * RMID available for re-allocation. */ -unsigned int resctrl_cqm_threshold; +unsigned int resctrl_rmid_realloc_threshold; + +/* + * This is the maximum value for the reallocation threshold, in bytes. + */ +unsigned int resctrl_rmid_realloc_limit; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) @@ -137,9 +146,54 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } -static u64 __rmid_read(u32 rmid, u32 eventid) +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, + u32 rmid, + enum resctrl_event_id eventid) { - u64 val; + switch (eventid) { + case QOS_L3_OCCUP_EVENT_ID: + return NULL; + case QOS_L3_MBM_TOTAL_EVENT_ID: + return &hw_dom->arch_mbm_total[rmid]; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return &hw_dom->arch_mbm_local[rmid]; + } + + /* Never expect to get here */ + WARN_ON_ONCE(1); + + return NULL; +} + +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid) +{ + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct arch_mbm_state *am; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + memset(am, 0, sizeof(*am)); +} + +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) +{ + u64 shift = 64 - width, chunks; + + chunks = (cur_msr << shift) - (prev_msr << shift); + return chunks >> shift; +} + +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct arch_mbm_state *am; + u64 msr_val, chunks; + + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; /* * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured @@ -150,16 +204,26 @@ static u64 __rmid_read(u32 rmid, u32 eventid) * are error bits. */ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, val); - - return val; -} + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { + am->chunks += mbm_overflow_count(am->prev_msr, msr_val, + hw_res->mbm_width); + chunks = get_corrected_mbm_count(rmid, am->chunks); + am->prev_msr = msr_val; + } else { + chunks = msr_val; + } -static bool rmid_dirty(struct rmid_entry *entry) -{ - u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); + *val = chunks * hw_res->mon_scale; - return val >= resctrl_cqm_threshold; + return 0; } /* @@ -170,11 +234,11 @@ static bool rmid_dirty(struct rmid_entry *entry) */ void __check_limbo(struct rdt_domain *d, bool force_free) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; struct rmid_entry *entry; - struct rdt_resource *r; u32 crmid = 1, nrmid; - - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + bool rmid_dirty; + u64 val = 0; /* * Skip RMID 0 and start from RMID 1 and check all the RMIDs that @@ -188,7 +252,15 @@ void __check_limbo(struct rdt_domain *d, bool force_free) break; entry = __rmid_entry(nrmid); - if (force_free || !rmid_dirty(entry)) { + + if (resctrl_arch_rmid_read(r, d, entry->rmid, + QOS_L3_OCCUP_EVENT_ID, &val)) { + rmid_dirty = true; + } else { + rmid_dirty = (val >= resctrl_rmid_realloc_threshold); + } + + if (force_free || !rmid_dirty) { clear_bit(entry->rmid, d->rmid_busy_llc); if (!--entry->busy) { rmid_limbo_count--; @@ -227,19 +299,19 @@ int alloc_rmid(void) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r; + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; struct rdt_domain *d; - int cpu; - u64 val; - - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + int cpu, err; + u64 val = 0; entry->busy = 0; cpu = get_cpu(); list_for_each_entry(d, &r->domains, list) { if (cpumask_test_cpu(cpu, &d->cpu_mask)) { - val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); - if (val <= resctrl_cqm_threshold) + err = resctrl_arch_rmid_read(r, d, entry->rmid, + QOS_L3_OCCUP_EVENT_ID, + &val); + if (err || val <= resctrl_rmid_realloc_threshold) continue; } @@ -277,24 +349,18 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) +static int __mon_event_count(u32 rmid, struct rmid_read *rr) { - u64 shift = 64 - width, chunks; + struct mbm_state *m; + u64 tval = 0; - chunks = (cur_msr << shift) - (prev_msr << shift); - return chunks >> shift; -} + if (rr->first) + resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); -static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) -{ - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); - struct mbm_state *m; - u64 chunks, tval; + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval); + if (rr->err) + return rr->err; - tval = __rmid_read(rmid, rr->evtid); - if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - return tval; - } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: rr->val += tval; @@ -308,48 +374,47 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) default: /* * Code would never reach here because an invalid - * event id would fail the __rmid_read. + * event id would fail in resctrl_arch_rmid_read(). */ - return RMID_VAL_ERROR; + return -EINVAL; } if (rr->first) { memset(m, 0, sizeof(struct mbm_state)); - m->prev_bw_msr = m->prev_msr = tval; return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width); - m->chunks += chunks; - m->prev_msr = tval; - - rr->val += get_corrected_mbm_count(rmid, m->chunks); + rr->val += tval; return 0; } /* + * mbm_bw_count() - Update bw count from values previously read by + * __mon_event_count(). + * @rmid: The rmid used to identify the cached mbm_state. + * @rr: The struct rmid_read populated by __mon_event_count(). + * * Supporting function to calculate the memory bandwidth - * and delta bandwidth in MBps. + * and delta bandwidth in MBps. The chunks value previously read by + * __mon_event_count() is compared with the chunks value from the previous + * invocation. This must be called once per second to maintain values in MBps. */ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m = &rr->d->mbm_local[rmid]; - u64 tval, cur_bw, chunks; + u64 cur_bw, bytes, cur_bytes; - tval = __rmid_read(rmid, rr->evtid); - if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) - return; + cur_bytes = rr->val; + bytes = cur_bytes - m->prev_bw_bytes; + m->prev_bw_bytes = cur_bytes; - chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width); - cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20; + cur_bw = bytes / SZ_1M; if (m->delta_comp) m->delta_bw = abs(cur_bw - m->prev_bw); m->delta_comp = false; m->prev_bw = cur_bw; - m->prev_bw_msr = tval; } /* @@ -361,11 +426,11 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; - u64 ret_val; + int ret; rdtgrp = rr->rgrp; - ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); + ret = __mon_event_count(rdtgrp->mon.rmid, rr); /* * For Ctrl groups read data from child monitor groups and @@ -377,13 +442,17 @@ void mon_event_count(void *info) if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { if (__mon_event_count(entry->mon.rmid, rr) == 0) - ret_val = 0; + ret = 0; } } - /* Report error if none of rmid_reads are successful */ - if (ret_val) - rr->val = ret_val; + /* + * __mon_event_count() calls for newly created monitor groups may + * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. + * Discard error if any of the monitor event reads succeeded. + */ + if (ret == 0) + rr->err = 0; } /* @@ -420,10 +489,8 @@ void mon_event_count(void *info) */ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { - u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; + u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; - struct rdt_hw_resource *hw_r_mba; - struct rdt_hw_domain *hw_dom_mba; u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; @@ -433,8 +500,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) if (!is_mbm_local_enabled()) return; - hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; - r_mba = &hw_r_mba->r_resctrl; + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + closid = rgrp->closid; rmid = rgrp->mon.rmid; pmbm_data = &dom_mbm->mbm_local[rmid]; @@ -444,16 +511,13 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) pr_warn_once("Failure to get domain for MBA update\n"); return; } - hw_dom_mba = resctrl_to_arch_dom(dom_mba); cur_bw = pmbm_data->prev_bw; - user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); + user_bw = dom_mba->mbps_val[closid]; delta_bw = pmbm_data->delta_bw; - /* - * resctrl_arch_get_config() chooses the mbps/ctrl value to return - * based on is_mba_sc(). For now, reach into the hw_dom. - */ - cur_msr_val = hw_dom_mba->ctrl_val[closid]; + + /* MBA resource doesn't support CDP */ + cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE); /* * For Ctrl groups read data from child monitor groups. @@ -488,9 +552,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) return; } - cur_msr = hw_r_mba->msr_base + closid; - wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); - hw_dom_mba->ctrl_val[closid] = new_msr_val; + resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); /* * Delta values are updated dynamically package wise for each @@ -523,10 +585,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) */ if (is_mbm_total_enabled()) { rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; + rr.val = 0; __mon_event_count(rmid, &rr); } if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; + rr.val = 0; __mon_event_count(rmid, &rr); /* @@ -686,9 +750,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - unsigned int cl_size = boot_cpu_data.x86_cache_size; + unsigned int threshold; int ret; + resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; @@ -705,10 +770,14 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; + threshold = resctrl_rmid_realloc_limit / r->num_rmid; - /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ - resctrl_cqm_threshold /= hw_res->mon_scale; + /* + * Because num_rmid may not be a power of two, round the value + * to the nearest multiple of hw_res->mon_scale so it matches a + * value the hardware will measure. mon_scale may not be a power of 2. + */ + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); ret = dom_data_init(r); if (ret) @@ -717,7 +786,6 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) l3_mon_evt_init(r); r->mon_capable = true; - r->mon_enabled = true; return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index db813f819ad6..d961ae3ed96e 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -420,6 +420,7 @@ static int pseudo_lock_fn(void *_rdtgrp) struct pseudo_lock_region *plr = rdtgrp->plr; u32 rmid_p, closid_p; unsigned long i; + u64 saved_msr; #ifdef CONFIG_KASAN /* * The registers used for local register variables are also used @@ -463,6 +464,7 @@ static int pseudo_lock_fn(void *_rdtgrp) * the buffer and evict pseudo-locked memory read earlier from the * cache. */ + saved_msr = __rdmsr(MSR_MISC_FEATURE_CONTROL); __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); closid_p = this_cpu_read(pqr_state.cur_closid); rmid_p = this_cpu_read(pqr_state.cur_rmid); @@ -514,7 +516,7 @@ static int pseudo_lock_fn(void *_rdtgrp) __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p); /* Re-enable the hardware prefetcher(s) */ - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr); local_irq_enable(); plr->thread_done = 1; @@ -835,7 +837,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) * First determine which cpus have pseudo-locked regions * associated with them. */ - for_each_alloc_enabled_rdt_resource(r) { + for_each_alloc_capable_rdt_resource(r) { list_for_each_entry(d_i, &r->domains, list) { if (d_i->plr) cpumask_or(cpu_with_psl, cpu_with_psl, @@ -871,6 +873,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d) static int measure_cycles_lat_fn(void *_plr) { struct pseudo_lock_region *plr = _plr; + u32 saved_low, saved_high; unsigned long i; u64 start, end; void *mem_r; @@ -879,6 +882,7 @@ static int measure_cycles_lat_fn(void *_plr) /* * Disable hardware prefetchers. */ + rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); mem_r = READ_ONCE(plr->kmem); /* @@ -895,7 +899,7 @@ static int measure_cycles_lat_fn(void *_plr) end = rdtsc_ordered(); trace_pseudo_lock_mem_latency((u32)(end - start)); } - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); local_irq_enable(); plr->thread_done = 1; wake_up_interruptible(&plr->lock_thread_wq); @@ -940,6 +944,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0; struct perf_event *miss_event, *hit_event; int hit_pmcnum, miss_pmcnum; + u32 saved_low, saved_high; unsigned int line_size; unsigned int size; unsigned long i; @@ -973,6 +978,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, /* * Disable hardware prefetchers. */ + rdmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0); /* Initialize rest of local variables */ @@ -1031,7 +1037,7 @@ static int measure_residency_fn(struct perf_event_attr *miss_attr, */ rmb(); /* Re-enable hardware prefetchers */ - wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0); + wrmsr(MSR_MISC_FEATURE_CONTROL, saved_low, saved_high); local_irq_enable(); out_hit: perf_event_release_kernel(hit_event); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index f276aff521e8..e5a48f05e787 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1030,10 +1030,7 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of, static int max_threshold_occ_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale); + seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); return 0; } @@ -1055,7 +1052,6 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_hw_resource *hw_res; unsigned int bytes; int ret; @@ -1063,11 +1059,10 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (ret) return ret; - if (bytes > (boot_cpu_data.x86_cache_size * 1024)) + if (bytes > resctrl_rmid_realloc_limit) return -EINVAL; - hw_res = resctrl_to_arch_res(of->kn->parent->priv); - resctrl_cqm_threshold = bytes / hw_res->mon_scale; + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); return nbytes; } @@ -1356,11 +1351,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { struct resctrl_schema *schema; + enum resctrl_conf_type type; struct rdtgroup *rdtgrp; struct rdt_resource *r; struct rdt_domain *d; unsigned int size; int ret = 0; + u32 closid; bool sep; u32 ctrl; @@ -1386,8 +1383,11 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, goto out; } + closid = rdtgrp->closid; + list_for_each_entry(schema, &resctrl_schema_all, list) { r = schema->res; + type = schema->conf_type; sep = false; seq_printf(s, "%*s:", max_name_width, schema->name); list_for_each_entry(d, &r->domains, list) { @@ -1396,9 +1396,12 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - ctrl = resctrl_arch_get_config(r, d, - rdtgrp->closid, - schema->conf_type); + if (is_mba_sc(r)) + ctrl = d->mbps_val[closid]; + else + ctrl = resctrl_arch_get_config(r, d, + closid, + type); if (r->rid == RDT_RESOURCE_MBA) size = ctrl; else @@ -1756,7 +1759,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) if (ret) goto out_destroy; - /* loop over enabled controls, these are all alloc_enabled */ + /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; fflags = r->fflags | RF_CTRL_INFO; @@ -1765,7 +1768,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { fflags = r->fflags | RF_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); @@ -1889,26 +1892,61 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) l3_qos_cfg_update(&hw_res->cdp_enabled); } +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +{ + u32 num_closid = resctrl_arch_get_num_closid(r); + int cpu = cpumask_any(&d->cpu_mask); + int i; + + d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), + GFP_KERNEL, cpu_to_node(cpu)); + if (!d->mbps_val) + return -ENOMEM; + + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + + return 0; +} + +static void mba_sc_domain_destroy(struct rdt_resource *r, + struct rdt_domain *d) +{ + kfree(d->mbps_val); + d->mbps_val = NULL; +} + /* - * Enable or disable the MBA software controller - * which helps user specify bandwidth in MBps. * MBA software controller is supported only if * MBM is supported and MBA is in linear scale. */ +static bool supports_mba_mbps(void) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + + return (is_mbm_local_enabled() && + r->alloc_capable && is_mba_linear()); +} + +/* + * Enable or disable the MBA software controller + * which helps user specify bandwidth in MBps. + */ static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; - struct rdt_hw_domain *hw_dom; + u32 num_closid = resctrl_arch_get_num_closid(r); struct rdt_domain *d; + int i; - if (!is_mbm_enabled() || !is_mba_linear() || - mba_sc == is_mba_sc(r)) + if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) return -EINVAL; r->membw.mba_sc = mba_sc; + list_for_each_entry(d, &r->domains, list) { - hw_dom = resctrl_to_arch_dom(d); - setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val); + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; } return 0; @@ -2106,7 +2144,7 @@ static int schemata_list_create(void) struct rdt_resource *r; int ret = 0; - for_each_alloc_enabled_rdt_resource(r) { + for_each_alloc_capable_rdt_resource(r) { if (resctrl_arch_get_cdp_enabled(r->rid)) { ret = schemata_list_add(r, CDP_CODE); if (ret) @@ -2261,7 +2299,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->enable_cdpl2 = true; return 0; case Opt_mba_mbps: - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (!supports_mba_mbps()) return -EINVAL; ctx->enable_mba_mbps = true; return 0; @@ -2452,7 +2490,7 @@ static void rdt_kill_sb(struct super_block *sb) set_mba_sc(false); /*Put everything back to default values. */ - for_each_alloc_enabled_rdt_resource(r) + for_each_alloc_capable_rdt_resource(r) reset_all_ctrls(r); cdp_disable_all(); rmdir_all_sub(); @@ -2499,14 +2537,12 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, * Remove all subdirectories of mon_data of ctrl_mon groups * and monitor groups with given domain id. */ -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) +static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + unsigned int dom_id) { struct rdtgroup *prgrp, *crgrp; char name[32]; - if (!r->mon_enabled) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { sprintf(name, "mon_%s_%02d", r->name, dom_id); kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); @@ -2565,16 +2601,13 @@ out_destroy: * Add all subdirectories of mon_data for "ctrl_mon" groups * and "monitor" groups with given domain id. */ -void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) +static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + struct rdt_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; struct list_head *head; - if (!r->mon_enabled) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { parent_kn = prgrp->mon.mon_data_kn; mkdir_mondata_subdir(parent_kn, d, r, prgrp); @@ -2642,7 +2675,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, * Create the subdirectories for each domain. Note that all events * in a domain like L3 are grouped into a resource whose domain is L3 */ - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); if (ret) goto out_destroy; @@ -2786,14 +2819,19 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) } /* Initialize MBA resource with default values. */ -static void rdtgroup_init_mba(struct rdt_resource *r) +static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; struct rdt_domain *d; list_for_each_entry(d, &r->domains, list) { + if (is_mba_sc(r)) { + d->mbps_val[closid] = MBA_MAX_MBPS; + continue; + } + cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl; + cfg->new_ctrl = r->default_ctrl; cfg->have_new_ctrl = true; } } @@ -2808,7 +2846,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; if (r->rid == RDT_RESOURCE_MBA) { - rdtgroup_init_mba(r); + rdtgroup_init_mba(r, rdtgrp->closid); + if (is_mba_sc(r)) + continue; } else { ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) @@ -3236,6 +3276,110 @@ out: return ret; } +static void domain_destroy_mon_state(struct rdt_domain *d) +{ + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); +} + +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + lockdep_assert_held(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + mba_sc_domain_destroy(r, d); + + if (!r->mon_capable) + return; + + /* + * If resctrl is mounted, remove all the + * per domain monitor data directories. + */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + rmdir_mondata_subdir_allrdtgrp(r, d->id); + + if (is_mbm_enabled()) + cancel_delayed_work(&d->mbm_over); + if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { + /* + * When a package is going down, forcefully + * decrement rmid->ebusy. There is no way to know + * that the L3 was flushed and hence may lead to + * incorrect counts in rare scenarios, but leaving + * the RMID as busy creates RMID leaks if the + * package never comes back. + */ + __check_limbo(d, true); + cancel_delayed_work(&d->cqm_limbo); + } + + domain_destroy_mon_state(d); +} + +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +{ + size_t tsize; + + if (is_llc_occupancy_enabled()) { + d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); + if (!d->rmid_busy_llc) + return -ENOMEM; + } + if (is_mbm_total_enabled()) { + tsize = sizeof(*d->mbm_total); + d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_total) { + bitmap_free(d->rmid_busy_llc); + return -ENOMEM; + } + } + if (is_mbm_local_enabled()) { + tsize = sizeof(*d->mbm_local); + d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_local) { + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + return -ENOMEM; + } + } + + return 0; +} + +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + int err; + + lockdep_assert_held(&rdtgroup_mutex); + + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + /* RDT_RESOURCE_MBA is never mon_capable */ + return mba_sc_domain_allocate(r, d); + + if (!r->mon_capable) + return 0; + + err = domain_setup_mon_state(r, d); + if (err) + return err; + + if (is_mbm_enabled()) { + INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); + } + + if (is_llc_occupancy_enabled()) + INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); + + /* If resctrl is mounted, add per domain monitor data directories. */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + mkdir_mondata_subdir_allrdtgrp(r, d); + + return 0; +} + /* * rdtgroup_init - rdtgroup initialization * diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 8bdeae2fc309..9f13d724172e 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,9 @@ #include "encls.h" #include "sgx.h" +static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); + #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) /* * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to @@ -917,7 +920,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, } /** - * sgx_encl_get_backing() - Pin the backing storage + * __sgx_encl_get_backing() - Pin the backing storage * @encl: an enclave pointer * @page_index: enclave page index * @backing: data for accessing backing storage for the page @@ -929,7 +932,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, * 0 on success, * -errno otherwise. */ -static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); @@ -1004,7 +1007,7 @@ static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) } /** - * sgx_encl_alloc_backing() - allocate a new backing storage page + * sgx_encl_alloc_backing() - create a new backing storage page * @encl: an enclave pointer * @page_index: enclave page index * @backing: data for accessing backing storage for the page @@ -1012,7 +1015,9 @@ static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) * When called from ksgxd, sets the active memcg from one of the * mms in the enclave's mm_list prior to any backing page allocation, * in order to ensure that shmem page allocations are charged to the - * enclave. + * enclave. Create a backing page for loading data back into an EPC page with + * ELDU. This function takes a reference on a new backing page which + * must be dropped with a corresponding call to sgx_encl_put_backing(). * * Return: * 0 on success, @@ -1025,7 +1030,7 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, struct mem_cgroup *memcg = set_active_memcg(encl_memcg); int ret; - ret = sgx_encl_get_backing(encl, page_index, backing); + ret = __sgx_encl_get_backing(encl, page_index, backing); set_active_memcg(memcg); mem_cgroup_put(encl_memcg); @@ -1043,15 +1048,17 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, * It is the caller's responsibility to ensure that it is appropriate to use * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is * not used correctly, this will cause an allocation which is not accounted for. + * This function takes a reference on an existing backing page which must be + * dropped with a corresponding call to sgx_encl_put_backing(). * * Return: * 0 on success, * -errno otherwise. */ -int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, +static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { - return sgx_encl_get_backing(encl, page_index, backing); + return __sgx_encl_get_backing(encl, page_index, backing); } /** diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index a65a952116fd..f94ff14c9486 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -107,8 +107,6 @@ bool current_is_ksgxd(void); void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl); -int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, - struct sgx_backing *backing); int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing); void sgx_encl_put_backing(struct sgx_backing *backing); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index afae4dd77495..b3dba35f466e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -128,7 +128,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) /* No access to the user space stack of other tasks. Ignore. */ break; default: - printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", + printk("%sCode: Unable to access opcode bytes at 0x%lx.\n", loglvl, prologue); break; } diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 68b38925a74f..44f937015e1e 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -264,11 +264,11 @@ static __init void early_pci_serial_init(char *s) bar0 = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); /* - * Verify it is a UART type device + * Verify it is a 16550-UART type device */ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || - (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ { + (((classcode >> 8) & 0xff) != PCI_SERIAL_16550_COMPATIBLE)) { if (!force) return; } @@ -276,22 +276,22 @@ static __init void early_pci_serial_init(char *s) /* * Determine if it is IO or memory mapped */ - if (bar0 & 0x01) { + if ((bar0 & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { /* it is IO mapped */ serial_in = io_serial_in; serial_out = io_serial_out; - early_serial_base = bar0&0xfffffffc; + early_serial_base = bar0 & PCI_BASE_ADDRESS_IO_MASK; write_pci_config(bus, slot, func, PCI_COMMAND, - cmdreg|PCI_COMMAND_IO); + cmdreg|PCI_COMMAND_IO); } else { /* It is memory mapped - assume 32-bit alignment */ serial_in = mem32_serial_in; serial_out = mem32_serial_out; /* WARNING! assuming the address is always in the first 4G */ early_serial_base = - (unsigned long)early_ioremap(bar0 & 0xfffffff0, 0x10); + (unsigned long)early_ioremap(bar0 & PCI_BASE_ADDRESS_MEM_MASK, 0x10); write_pci_config(bus, slot, func, PCI_COMMAND, - cmdreg|PCI_COMMAND_MEMORY); + cmdreg|PCI_COMMAND_MEMORY); } /* diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 586f718b8e95..349046434513 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -4,11 +4,8 @@ */ #include <linux/platform_device.h> #include <linux/mc146818rtc.h> -#include <linux/acpi.h> -#include <linux/bcd.h> #include <linux/export.h> #include <linux/pnp.h> -#include <linux/of.h> #include <asm/vsyscall.h> #include <asm/x86_init.h> @@ -20,26 +17,23 @@ /* * This is a special lock that is owned by the CPU and holds the index * register we are working with. It is required for NMI access to the - * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. + * CMOS/RTC registers. See arch/x86/include/asm/mc146818rtc.h for details. */ volatile unsigned long cmos_lock; EXPORT_SYMBOL(cmos_lock); #endif /* CONFIG_X86_32 */ -/* For two digit years assume time is always after that */ -#define CMOS_YEARS_OFFS 2000 - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); /* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * In order to set the CMOS clock precisely, mach_set_cmos_time has to be * called 500 ms after the second nowtime has started, because when * nowtime is written into the registers of the CMOS clock, it will * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(const struct timespec64 *now) +int mach_set_cmos_time(const struct timespec64 *now) { unsigned long long nowtime = now->tv_sec; struct rtc_time tm; @@ -62,8 +56,7 @@ int mach_set_rtc_mmss(const struct timespec64 *now) void mach_get_cmos_time(struct timespec64 *now) { - unsigned int status, year, mon, day, hour, min, sec, century = 0; - unsigned long flags; + struct rtc_time tm; /* * If pm_trace abused the RTC as storage, set the timespec to 0, @@ -74,51 +67,13 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - spin_lock_irqsave(&rtc_lock, flags); - - /* - * If UIP is clear, then we have >= 244 microseconds before - * RTC registers will be updated. Spec sheet says that this - * is the reliable way to read RTC - registers. If UIP is set - * then the register access might be invalid. - */ - while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) - cpu_relax(); - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - -#ifdef CONFIG_ACPI - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && - acpi_gbl_FADT.century) - century = CMOS_READ(acpi_gbl_FADT.century); -#endif - - status = CMOS_READ(RTC_CONTROL); - WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); - - spin_unlock_irqrestore(&rtc_lock, flags); - - if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { - sec = bcd2bin(sec); - min = bcd2bin(min); - hour = bcd2bin(hour); - day = bcd2bin(day); - mon = bcd2bin(mon); - year = bcd2bin(year); + if (mc146818_get_time(&tm)) { + pr_err("Unable to read current time from RTC\n"); + now->tv_sec = now->tv_nsec = 0; + return; } - if (century) { - century = bcd2bin(century); - year += century * 100; - } else - year += CMOS_YEARS_OFFS; - - now->tv_sec = mktime64(year, mon, day, hour, min, sec); + now->tv_sec = rtc_tm_to_time64(&tm); now->tv_nsec = 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d62b2cb85cea..178015a820f0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -63,6 +63,7 @@ #include <asm/insn-eval.h> #include <asm/vdso.h> #include <asm/tdx.h> +#include <asm/cfi.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -313,7 +314,8 @@ static noinstr bool handle_bug(struct pt_regs *regs) */ if (regs->flags & X86_EFLAGS_IF) raw_local_irq_enable(); - if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN || + handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { regs->ip += LEN_UD2; handled = true; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e84ee5cdbd8c..57353519bc11 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -138,7 +138,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, - .set_wallclock = mach_set_rtc_mmss, + .set_wallclock = mach_set_cmos_time, .iommu_shutdown = iommu_shutdown_noop, .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4c1c2c06e96b..2796dde06302 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -902,8 +902,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; } break; - case 9: - break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax; union cpuid10_edx edx; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ddd4367d4826..7eaf96064cb0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2570,7 +2570,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * bits which we consider mandatory enabled. * The CR0_READ_SHADOW is what L2 should have expected to read given * the specifications by L1; It's not enough to take - * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we + * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we * have more bits than L1 expected. */ vmx_set_cr0(vcpu, vmcs12->guest_cr0); diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index fe59b8ac4fcc..ecbfb4dd3b01 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> +#include <asm/asm.h> #include <asm/export.h> /* @@ -50,3 +51,140 @@ SYM_FUNC_START(clear_page_erms) RET SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) + +/* + * Default clear user-space. + * Input: + * rdi destination + * rcx count + * + * Output: + * rcx: uncleared bytes or 0 if successful. + */ +SYM_FUNC_START(clear_user_original) + /* + * Copy only the lower 32 bits of size as that is enough to handle the rest bytes, + * i.e., no need for a 'q' suffix and thus a REX prefix. + */ + mov %ecx,%eax + shr $3,%rcx + jz .Lrest_bytes + + # do the qwords first + .p2align 4 +.Lqwords: + movq $0,(%rdi) + lea 8(%rdi),%rdi + dec %rcx + jnz .Lqwords + +.Lrest_bytes: + and $7, %eax + jz .Lexit + + # now do the rest bytes +.Lbytes: + movb $0,(%rdi) + inc %rdi + dec %eax + jnz .Lbytes + +.Lexit: + /* + * %rax still needs to be cleared in the exception case because this function is called + * from inline asm and the compiler expects %rax to be zero when exiting the inline asm, + * in case it might reuse it somewhere. + */ + xor %eax,%eax + RET + +.Lqwords_exception: + # convert remaining qwords back into bytes to return to caller + shl $3, %rcx + and $7, %eax + add %rax,%rcx + jmp .Lexit + +.Lbytes_exception: + mov %eax,%ecx + jmp .Lexit + + _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception) + _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception) +SYM_FUNC_END(clear_user_original) +EXPORT_SYMBOL(clear_user_original) + +/* + * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is + * present. + * Input: + * rdi destination + * rcx count + * + * Output: + * rcx: uncleared bytes or 0 if successful. + */ +SYM_FUNC_START(clear_user_rep_good) + # call the original thing for less than a cacheline + cmp $64, %rcx + jb clear_user_original + +.Lprep: + # copy lower 32-bits for rest bytes + mov %ecx, %edx + shr $3, %rcx + jz .Lrep_good_rest_bytes + +.Lrep_good_qwords: + rep stosq + +.Lrep_good_rest_bytes: + and $7, %edx + jz .Lrep_good_exit + +.Lrep_good_bytes: + mov %edx, %ecx + rep stosb + +.Lrep_good_exit: + # see .Lexit comment above + xor %eax, %eax + RET + +.Lrep_good_qwords_exception: + # convert remaining qwords back into bytes to return to caller + shl $3, %rcx + and $7, %edx + add %rdx, %rcx + jmp .Lrep_good_exit + + _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception) + _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit) +SYM_FUNC_END(clear_user_rep_good) +EXPORT_SYMBOL(clear_user_rep_good) + +/* + * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present. + * Input: + * rdi destination + * rcx count + * + * Output: + * rcx: uncleared bytes or 0 if successful. + * + */ +SYM_FUNC_START(clear_user_erms) + # call the original thing for less than a cacheline + cmp $64, %rcx + jb clear_user_original + +.Lerms_bytes: + rep stosb + +.Lerms_exit: + xorl %eax,%eax + RET + + _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit) +SYM_FUNC_END(clear_user_erms) +EXPORT_SYMBOL(clear_user_erms) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index d0d7b9bc6cad..dd8cd8831251 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -2,6 +2,7 @@ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/errno.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -27,7 +28,7 @@ * Output: * rax original destination */ -SYM_FUNC_START(__memcpy) +SYM_TYPED_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0ae6cf804197..6c1f8ac5e721 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -14,46 +14,6 @@ * Zero Userspace */ -unsigned long __clear_user(void __user *addr, unsigned long size) -{ - long __d0; - might_fault(); - /* no memory constraint because it doesn't change any memory gcc knows - about */ - stac(); - asm volatile( - " testq %[size8],%[size8]\n" - " jz 4f\n" - " .align 16\n" - "0: movq $0,(%[dst])\n" - " addq $8,%[dst]\n" - " decl %%ecx ; jnz 0b\n" - "4: movq %[size1],%%rcx\n" - " testl %%ecx,%%ecx\n" - " jz 2f\n" - "1: movb $0,(%[dst])\n" - " incq %[dst]\n" - " decl %%ecx ; jnz 1b\n" - "2:\n" - - _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN8, %[size1]) - _ASM_EXTABLE_UA(1b, 2b) - - : [size8] "=&c"(size), [dst] "=&D" (__d0) - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr)); - clac(); - return size; -} -EXPORT_SYMBOL(__clear_user); - -unsigned long clear_user(void __user *to, unsigned long n) -{ - if (access_ok(to, n)) - return __clear_user(to, n); - return n; -} -EXPORT_SYMBOL(clear_user); - #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fa71a5d12e87..a498ae1fbe66 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -769,6 +769,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct task_struct *tsk) { const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; + /* This is a racy snapshot, but it's better than nothing. */ + int cpu = raw_smp_processor_id(); if (!unhandled_signal(tsk, SIGSEGV)) return; @@ -782,6 +784,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, print_vma_addr(KERN_CONT " in ", regs->ip); + /* + * Dump the likely CPU where the fatal segfault happened. + * This can help identify faulty hardware. + */ + printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu, + topology_core_id(cpu), topology_physical_package_id(cpu)); + + printk(KERN_CONT "\n"); show_opcodes(regs, loglvl); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5b6230779cf3..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -419,7 +419,9 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { - EMIT2(0xFF, 0xE0 + reg); + EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ + if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ } *pprog = prog; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 6e598bd78eef..ebc98a68c400 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -169,7 +169,7 @@ static void __init do_add_efi_memmap(void) } /* - * Given add_efi_memmap defaults to 0 and there there is no alternative + * Given add_efi_memmap defaults to 0 and there is no alternative * e820 mechanism for soft-reserved memory, import the full EFI memory * map if soft reservations are present and enabled. Otherwise, the * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 31c634a22818..58a200dc762d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -55,6 +55,10 @@ ifdef CONFIG_RETPOLINE PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS) endif +ifdef CONFIG_CFI_CLANG +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) +endif + CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e2c5b296120d..2925074b9a58 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -56,6 +56,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" + "__kcfi_typeid_|" "__crc_)", /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0ed2e487a693..9b1a58dda935 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -765,6 +765,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + static const struct trap_info zero = { }; unsigned out; trace_xen_cpu_load_idt(desc); @@ -774,7 +775,7 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); out = xen_convert_trap_info(desc, traps, false); - memset(&traps[out], 0, sizeof(traps[0])); + traps[out] = zero; xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) diff --git a/block/bdev.c b/block/bdev.c index ce05175e71ce..d699ecdb3260 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -26,6 +26,7 @@ #include <linux/namei.h> #include <linux/part_stat.h> #include <linux/uaccess.h> +#include <linux/stat.h> #include "../fs/internal.h" #include "blk.h" @@ -1069,3 +1070,25 @@ void sync_bdevs(bool wait) spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); } + +/* + * Handle STATX_DIOALIGN for block devices. + * + * Note that the inode passed to this is the inode of a block device node file, + * not the block device's internal inode. Therefore it is *not* valid to use + * I_BDEV() here; the block device has to be looked up by i_rdev instead. + */ +void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +{ + struct block_device *bdev; + + bdev = blkdev_get_no_open(inode->i_rdev); + if (!bdev) + return; + + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + stat->result_mask |= STATX_DIOALIGN; + + blkdev_put_no_open(bdev); +} diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 7802d8846a8d..bc9ddd2e3b05 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -27,9 +27,6 @@ menuconfig ACPI Management (APM) specification. If both ACPI and APM support are configured, ACPI is used. - The project home page for the Linux ACPI subsystem is here: - <https://01.org/linux-acpi> - Linux support for ACPI is based on Intel Corporation's ACPI Component Architecture (ACPI CA). For more information on the ACPI CA, see: @@ -347,7 +344,6 @@ config ACPI_CUSTOM_DSDT_FILE depends on !STANDALONE help This option supports a custom DSDT by linking it into the kernel. - See Documentation/admin-guide/acpi/dsdt-override.rst Enter the full path name to the file which includes the AmlCode or dsdt_aml_code declaration. diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index c29e41bfcf35..bb9fe7984b1a 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -36,11 +36,6 @@ static int acpi_ac_add(struct acpi_device *device); static int acpi_ac_remove(struct acpi_device *device); static void acpi_ac_notify(struct acpi_device *device, u32 event); -struct acpi_ac_bl { - const char *hid; - int hrv; -}; - static const struct acpi_device_id ac_device_ids[] = { {"ACPI0003", 0}, {"", 0}, diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c index ab8a4e0191b1..f5b443ab01c2 100644 --- a/drivers/acpi/acpi_amba.c +++ b/drivers/acpi/acpi_amba.c @@ -21,6 +21,7 @@ static const struct acpi_device_id amba_id_list[] = { {"ARMH0061", 0}, /* PL061 GPIO Device */ + {"ARMH0330", 0}, /* ARM DMA Controller DMA-330 */ {"ARMHC500", 0}, /* ARM CoreSight ETM4x */ {"ARMHC501", 0}, /* ARM CoreSight ETR */ {"ARMHC502", 0}, /* ARM CoreSight STM */ @@ -48,6 +49,7 @@ static void amba_register_dummy_clk(void) static int amba_handler_attach(struct acpi_device *adev, const struct acpi_device_id *id) { + struct acpi_device *parent = acpi_dev_parent(adev); struct amba_device *dev; struct resource_entry *rentry; struct list_head resource_list; @@ -97,8 +99,8 @@ static int amba_handler_attach(struct acpi_device *adev, * attached to it, that physical device should be the parent of * the amba device we are about to create. */ - if (adev->parent) - dev->dev.parent = acpi_get_first_physical_node(adev->parent); + if (parent) + dev->dev.parent = acpi_get_first_physical_node(parent); ACPI_COMPANION_SET(&dev->dev, adev); diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index ad245bbd965e..3bbe2276cac7 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -60,12 +60,6 @@ static int acpi_apd_setup(struct apd_private_data *pdata) } #ifdef CONFIG_X86_AMD_PLATFORM_DEVICE -static int misc_check_res(struct acpi_resource *ares, void *data) -{ - struct resource res; - - return !acpi_dev_resource_memory(ares, &res); -} static int fch_misc_setup(struct apd_private_data *pdata) { @@ -82,8 +76,7 @@ static int fch_misc_setup(struct apd_private_data *pdata) return -ENOMEM; INIT_LIST_HEAD(&resource_list); - ret = acpi_dev_get_resources(adev, &resource_list, misc_check_res, - NULL); + ret = acpi_dev_get_memory_resources(adev, &resource_list); if (ret < 0) return -ENOENT; diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c index 6922a44b3ce7..a2056c4c8cb7 100644 --- a/drivers/acpi/acpi_fpdt.c +++ b/drivers/acpi/acpi_fpdt.c @@ -143,6 +143,23 @@ static const struct attribute_group boot_attr_group = { static struct kobject *fpdt_kobj; +#if defined CONFIG_X86 && defined CONFIG_PHYS_ADDR_T_64BIT +#include <linux/processor.h> +static bool fpdt_address_valid(u64 address) +{ + /* + * On some systems the table contains invalid addresses + * with unsuppored high address bits set, check for this. + */ + return !(address >> boot_cpu_data.x86_phys_bits); +} +#else +static bool fpdt_address_valid(u64 address) +{ + return true; +} +#endif + static int fpdt_process_subtable(u64 address, u32 subtable_type) { struct fpdt_subtable_header *subtable_header; @@ -151,6 +168,11 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) u32 length, offset; int result; + if (!fpdt_address_valid(address)) { + pr_info(FW_BUG "invalid physical address: 0x%llx!\n", address); + return -EINVAL; + } + subtable_header = acpi_os_map_memory(address, sizeof(*subtable_header)); if (!subtable_header) return -ENOMEM; diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index c4d4d21391d7..f08ffa75f4a7 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -167,10 +167,10 @@ static struct pwm_lookup byt_pwm_lookup[] = { static void byt_pwm_setup(struct lpss_private_data *pdata) { - struct acpi_device *adev = pdata->adev; + u64 uid; /* Only call pwm_add_table for the first PWM controller */ - if (!adev->pnp.unique_id || strcmp(adev->pnp.unique_id, "1")) + if (acpi_dev_uid_to_integer(pdata->adev, &uid) || uid != 1) return; pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup)); @@ -180,14 +180,13 @@ static void byt_pwm_setup(struct lpss_private_data *pdata) static void byt_i2c_setup(struct lpss_private_data *pdata) { - const char *uid_str = acpi_device_uid(pdata->adev); acpi_handle handle = pdata->adev->handle; unsigned long long shared_host = 0; acpi_status status; - long uid = 0; + u64 uid; - /* Expected to always be true, but better safe then sorry */ - if (uid_str && !kstrtol(uid_str, 10, &uid) && uid) { + /* Expected to always be successfull, but better safe then sorry */ + if (!acpi_dev_uid_to_integer(pdata->adev, &uid) && uid) { /* Detect I2C bus shared with PUNIT and ignore its d3 status */ status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host); if (ACPI_SUCCESS(status) && shared_host) @@ -211,10 +210,10 @@ static struct pwm_lookup bsw_pwm_lookup[] = { static void bsw_pwm_setup(struct lpss_private_data *pdata) { - struct acpi_device *adev = pdata->adev; + u64 uid; /* Only call pwm_add_table for the first PWM controller */ - if (!adev->pnp.unique_id || strcmp(adev->pnp.unique_id, "1")) + if (acpi_dev_uid_to_integer(pdata->adev, &uid) || uid != 1) return; pwm_add_table(bsw_pwm_lookup, ARRAY_SIZE(bsw_pwm_lookup)); @@ -392,13 +391,6 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = { #ifdef CONFIG_X86_INTEL_LPSS -static int is_memory(struct acpi_resource *res, void *not_used) -{ - struct resource r; - - return !acpi_dev_resource_memory(res, &r); -} - /* LPSS main clock device. */ static struct platform_device *lpss_clk_dev; @@ -659,29 +651,25 @@ static int acpi_lpss_create_device(struct acpi_device *adev, return -ENOMEM; INIT_LIST_HEAD(&resource_list); - ret = acpi_dev_get_resources(adev, &resource_list, is_memory, NULL); + ret = acpi_dev_get_memory_resources(adev, &resource_list); if (ret < 0) goto err_out; - list_for_each_entry(rentry, &resource_list, node) - if (resource_type(rentry->res) == IORESOURCE_MEM) { - if (dev_desc->prv_size_override) - pdata->mmio_size = dev_desc->prv_size_override; - else - pdata->mmio_size = resource_size(rentry->res); - pdata->mmio_base = ioremap(rentry->res->start, - pdata->mmio_size); - break; - } + rentry = list_first_entry_or_null(&resource_list, struct resource_entry, node); + if (rentry) { + if (dev_desc->prv_size_override) + pdata->mmio_size = dev_desc->prv_size_override; + else + pdata->mmio_size = resource_size(rentry->res); + pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); + } acpi_dev_free_resource_list(&resource_list); if (!pdata->mmio_base) { /* Avoid acpi_bus_attach() instantiating a pdev for this dev. */ adev->pnp.type.platform_id = 0; - /* Skip the device, but continue the namespace scan. */ - ret = 0; - goto err_out; + goto out_free; } pdata->adev = adev; @@ -692,11 +680,8 @@ static int acpi_lpss_create_device(struct acpi_device *adev, if (dev_desc->flags & LPSS_CLK) { ret = register_device_clock(adev, pdata); - if (ret) { - /* Skip the device, but continue the namespace scan. */ - ret = 0; - goto err_out; - } + if (ret) + goto out_free; } /* @@ -708,15 +693,19 @@ static int acpi_lpss_create_device(struct acpi_device *adev, adev->driver_data = pdata; pdev = acpi_create_platform_device(adev, dev_desc->properties); - if (!IS_ERR_OR_NULL(pdev)) { - acpi_lpss_create_device_links(adev, pdev); - return 1; + if (IS_ERR_OR_NULL(pdev)) { + adev->driver_data = NULL; + ret = PTR_ERR(pdev); + goto err_out; } - ret = PTR_ERR(pdev); - adev->driver_data = NULL; + acpi_lpss_create_device_links(adev, pdev); + return 1; - err_out: +out_free: + /* Skip the device, but continue the namespace scan */ + ret = 0; +err_out: kfree(pdata); return ret; } diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c index a12b55d81209..ee4ce5ba1fb2 100644 --- a/drivers/acpi/acpi_pcc.c +++ b/drivers/acpi/acpi_pcc.c @@ -23,6 +23,12 @@ #include <acpi/pcc.h> +/* + * Arbitrary retries in case the remote processor is slow to respond + * to PCC commands + */ +#define PCC_CMD_WAIT_RETRIES_NUM 500 + struct pcc_data { struct pcc_mbox_chan *pcc_chan; void __iomem *pcc_comm_addr; @@ -63,6 +69,7 @@ acpi_pcc_address_space_setup(acpi_handle region_handle, u32 function, if (IS_ERR(data->pcc_chan)) { pr_err("Failed to find PCC channel for subspace %d\n", ctx->subspace_id); + kfree(data); return AE_NOT_FOUND; } @@ -72,6 +79,8 @@ acpi_pcc_address_space_setup(acpi_handle region_handle, u32 function, if (!data->pcc_comm_addr) { pr_err("Failed to ioremap PCC comm region mem for %d\n", ctx->subspace_id); + pcc_mbox_free_channel(data->pcc_chan); + kfree(data); return AE_NO_MEMORY; } @@ -86,6 +95,7 @@ acpi_pcc_address_space_handler(u32 function, acpi_physical_address addr, { int ret; struct pcc_data *data = region_context; + u64 usecs_lat; reinit_completion(&data->done); @@ -96,10 +106,22 @@ acpi_pcc_address_space_handler(u32 function, acpi_physical_address addr, if (ret < 0) return AE_ERROR; - if (data->pcc_chan->mchan->mbox->txdone_irq) - wait_for_completion(&data->done); + if (data->pcc_chan->mchan->mbox->txdone_irq) { + /* + * pcc_chan->latency is just a Nominal value. In reality the remote + * processor could be much slower to reply. So add an arbitrary + * amount of wait on top of Nominal. + */ + usecs_lat = PCC_CMD_WAIT_RETRIES_NUM * data->pcc_chan->latency; + ret = wait_for_completion_timeout(&data->done, + usecs_to_jiffies(usecs_lat)); + if (ret == 0) { + pr_err("PCC command executed timeout!\n"); + return AE_TIME; + } + } - mbox_client_txdone(data->pcc_chan->mchan, ret); + mbox_chan_txdone(data->pcc_chan->mchan, ret); memcpy_fromio(value, data->pcc_comm_addr, data->ctx.length); diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index de3cbf152dee..fe00a5783f53 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -20,13 +20,13 @@ #include "internal.h" static const struct acpi_device_id forbidden_id_list[] = { + {"ACPI0009", 0}, /* IOxAPIC */ + {"ACPI000A", 0}, /* IOAPIC */ {"PNP0000", 0}, /* PIC */ {"PNP0100", 0}, /* Timer */ {"PNP0200", 0}, /* AT DMA Controller */ - {"ACPI0009", 0}, /* IOxAPIC */ - {"ACPI000A", 0}, /* IOAPIC */ {"SMB0001", 0}, /* ACPI SMBUS virtual device */ - {"", 0}, + { } }; static struct platform_device *acpi_platform_device_find_by_companion(struct acpi_device *adev) @@ -78,7 +78,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev, * If the device has parent we need to take its resources into * account as well because this device might consume part of those. */ - parent = acpi_get_first_physical_node(adev->parent); + parent = acpi_get_first_physical_node(acpi_dev_parent(adev)); if (parent && dev_is_pci(parent)) dest->parent = pci_find_resource(to_pci_dev(parent), dest); } @@ -97,6 +97,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev, struct platform_device *acpi_create_platform_device(struct acpi_device *adev, const struct property_entry *properties) { + struct acpi_device *parent = acpi_dev_parent(adev); struct platform_device *pdev = NULL; struct platform_device_info pdevinfo; struct resource_entry *rentry; @@ -113,13 +114,11 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, INIT_LIST_HEAD(&resource_list); count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); - if (count < 0) { + if (count < 0) return NULL; - } else if (count > 0) { - resources = kcalloc(count, sizeof(struct resource), - GFP_KERNEL); + if (count > 0) { + resources = kcalloc(count, sizeof(*resources), GFP_KERNEL); if (!resources) { - dev_err(&adev->dev, "No memory for resources\n"); acpi_dev_free_resource_list(&resource_list); return ERR_PTR(-ENOMEM); } @@ -137,10 +136,9 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev, * attached to it, that physical device should be the parent of the * platform device we are about to create. */ - pdevinfo.parent = adev->parent ? - acpi_get_first_physical_node(adev->parent) : NULL; + pdevinfo.parent = parent ? acpi_get_first_physical_node(parent) : NULL; pdevinfo.name = dev_name(&adev->dev); - pdevinfo.id = -1; + pdevinfo.id = PLATFORM_DEVID_NONE; pdevinfo.res = resources; pdevinfo.num_res = count; pdevinfo.fwnode = acpi_fwnode_handle(adev); diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 5cbe2196176d..6d209ea68bd8 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -496,6 +496,22 @@ static const struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE R830"), }, }, + { + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Satellite Z830", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE Z830"), + }, + }, + { + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Portege Z830", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE Z830"), + }, + }, /* * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set * but the IDs actually follow the Device ID Scheme. @@ -2030,7 +2046,7 @@ static int acpi_video_bus_add(struct acpi_device *device) acpi_status status; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, - device->parent->handle, 1, + acpi_dev_parent(device)->handle, 1, acpi_video_bus_match, NULL, device, NULL); if (status == AE_ALREADY_EXISTS) { diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 9f49272cad39..9b52482b4ed5 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -125,12 +125,9 @@ EXPORT_SYMBOL_GPL(apei_exec_write_register); int apei_exec_write_register_value(struct apei_exec_context *ctx, struct acpi_whea_header *entry) { - int rc; - ctx->value = entry->value; - rc = apei_exec_write_register(ctx, entry); - return rc; + return apei_exec_write_register(ctx, entry); } EXPORT_SYMBOL_GPL(apei_exec_write_register_value); diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 45973aa6e06d..c23eb75866d0 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -90,6 +90,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, if (skipped) pr_info(HW_ERR "Skipped %d error records\n", skipped); + + if (printed + skipped) + pr_info("Total records found: %d\n", printed + skipped); } static int __init setup_bert_disable(char *str) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 31b077eedb58..247989060e29 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1020,14 +1020,10 @@ static int reader_pos; static int erst_open_pstore(struct pstore_info *psi) { - int rc; - if (erst_disable) return -ENODEV; - rc = erst_get_record_id_begin(&reader_pos); - - return rc; + return erst_get_record_id_begin(&reader_pos); } static int erst_close_pstore(struct pstore_info *psi) diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c index f16739ad3cc0..93d796531af3 100644 --- a/drivers/acpi/arm64/dma.c +++ b/drivers/acpi/arm64/dma.c @@ -4,11 +4,12 @@ #include <linux/device.h> #include <linux/dma-direct.h> -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +void acpi_arch_dma_setup(struct device *dev) { int ret; u64 end, mask; - u64 dmaaddr = 0, size = 0, offset = 0; + u64 size = 0; + const struct bus_dma_region *map = NULL; /* * If @dev is expected to be DMA-capable then the bus code that created @@ -26,7 +27,19 @@ void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) else size = 1ULL << 32; - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + ret = acpi_dma_get_range(dev, &map); + if (!ret && map) { + const struct bus_dma_region *r = map; + + for (end = 0; r->size; r++) { + if (r->dma_start + r->size - 1 > end) + end = r->dma_start + r->size - 1; + } + + size = end + 1; + dev->dma_range_map = map; + } + if (ret == -ENODEV) ret = iort_dma_get_ranges(dev, &size); if (!ret) { @@ -34,17 +47,10 @@ void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) * Limit coherent and dma mask based on size retrieved from * firmware. */ - end = dmaaddr + size - 1; + end = size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); *dev->dma_mask = min(*dev->dma_mask, mask); } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index c0d20d997891..d466c8195314 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -456,7 +456,7 @@ out_free: Notification Handling -------------------------------------------------------------------------- */ -/** +/* * acpi_bus_notify * --------------- * Callback for all 'system-level' device notifications (values 0x00-0x7F). @@ -511,7 +511,7 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) break; } - adev = acpi_bus_get_acpi_device(handle); + adev = acpi_get_acpi_dev(handle); if (!adev) goto err; @@ -524,14 +524,14 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) } if (!hotplug_event) { - acpi_bus_put_acpi_device(adev); + acpi_put_acpi_dev(adev); return; } if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) return; - acpi_bus_put_acpi_device(adev); + acpi_put_acpi_dev(adev); err: acpi_evaluate_ost(handle, type, ost_code, NULL); @@ -802,7 +802,7 @@ static bool acpi_of_modalias(struct acpi_device *adev, str = obj->string.pointer; chr = strchr(str, ','); - strlcpy(modalias, chr ? chr + 1 : str, len); + strscpy(modalias, chr ? chr + 1 : str, len); return true; } @@ -822,7 +822,7 @@ void acpi_set_modalias(struct acpi_device *adev, const char *default_id, char *modalias, size_t len) { if (!acpi_of_modalias(adev, modalias, len)) - strlcpy(modalias, default_id, len); + strscpy(modalias, default_id, len); } EXPORT_SYMBOL_GPL(acpi_set_modalias); @@ -925,12 +925,13 @@ static const void *acpi_of_device_get_match_data(const struct device *dev) const void *acpi_device_get_match_data(const struct device *dev) { + const struct acpi_device_id *acpi_ids = dev->driver->acpi_match_table; const struct acpi_device_id *match; - if (!dev->driver->acpi_match_table) + if (!acpi_ids) return acpi_of_device_get_match_data(dev); - match = acpi_match_device(dev->driver->acpi_match_table, dev); + match = acpi_match_device(acpi_ids, dev); if (!match) return NULL; @@ -948,14 +949,13 @@ EXPORT_SYMBOL(acpi_match_device_ids); bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv) { - if (!drv->acpi_match_table) - return acpi_of_match_device(ACPI_COMPANION(dev), - drv->of_match_table, - NULL); - - return __acpi_match_device(acpi_companion_match(dev), - drv->acpi_match_table, drv->of_match_table, - NULL, NULL); + const struct acpi_device_id *acpi_ids = drv->acpi_match_table; + const struct of_device_id *of_ids = drv->of_match_table; + + if (!acpi_ids) + return acpi_of_match_device(ACPI_COMPANION(dev), of_ids, NULL); + + return __acpi_match_device(acpi_companion_match(dev), acpi_ids, of_ids, NULL, NULL); } EXPORT_SYMBOL_GPL(acpi_driver_match_device); @@ -973,16 +973,13 @@ EXPORT_SYMBOL_GPL(acpi_driver_match_device); */ int acpi_bus_register_driver(struct acpi_driver *driver) { - int ret; - if (acpi_disabled) return -ENODEV; driver->drv.name = driver->name; driver->drv.bus = &acpi_bus_type; driver->drv.owner = driver->owner; - ret = driver_register(&driver->drv); - return ret; + return driver_register(&driver->drv); } EXPORT_SYMBOL(acpi_bus_register_driver); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 1e15a9f25ae9..093675b1a1ff 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -424,6 +424,9 @@ bool acpi_cpc_valid(void) struct cpc_desc *cpc_ptr; int cpu; + if (acpi_disabled) + return false; + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); if (!cpc_ptr) @@ -1241,6 +1244,48 @@ out_err: EXPORT_SYMBOL_GPL(cppc_get_perf_caps); /** + * cppc_perf_ctrs_in_pcc - Check if any perf counters are in a PCC region. + * + * CPPC has flexibility about how CPU performance counters are accessed. + * One of the choices is PCC regions, which can have a high access latency. This + * routine allows callers of cppc_get_perf_ctrs() to know this ahead of time. + * + * Return: true if any of the counters are in PCC regions, false otherwise + */ +bool cppc_perf_ctrs_in_pcc(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + struct cpc_register_resource *ref_perf_reg; + struct cpc_desc *cpc_desc; + + cpc_desc = per_cpu(cpc_desc_ptr, cpu); + + if (CPC_IN_PCC(&cpc_desc->cpc_regs[DELIVERED_CTR]) || + CPC_IN_PCC(&cpc_desc->cpc_regs[REFERENCE_CTR]) || + CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME])) + return true; + + + ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; + + /* + * If reference perf register is not supported then we should + * use the nominal perf value + */ + if (!CPC_SUPPORTED(ref_perf_reg)) + ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; + + if (CPC_IN_PCC(ref_perf_reg)) + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(cppc_perf_ctrs_in_pcc); + +/** * cppc_get_perf_ctrs - Read a CPU's performance feedback counters. * @cpunum: CPU from which to read counters. * @perf_fb_ctrs: ptr to cppc_perf_fb_ctrs. See cppc_acpi.h diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 9dce1245689c..d594effe905f 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -75,15 +75,17 @@ static int acpi_dev_pm_explicit_get(struct acpi_device *device, int *state) int acpi_device_get_power(struct acpi_device *device, int *state) { int result = ACPI_STATE_UNKNOWN; + struct acpi_device *parent; int error; if (!device || !state) return -EINVAL; + parent = acpi_dev_parent(device); + if (!device->flags.power_manageable) { /* TBD: Non-recursive algorithm for walking up hierarchy. */ - *state = device->parent ? - device->parent->power.state : ACPI_STATE_D0; + *state = parent ? parent->power.state : ACPI_STATE_D0; goto out; } @@ -122,10 +124,10 @@ int acpi_device_get_power(struct acpi_device *device, int *state) * point, the fact that the device is in D0 implies that the parent has * to be in D0 too, except if ignore_parent is set. */ - if (!device->power.flags.ignore_parent && device->parent - && device->parent->power.state == ACPI_STATE_UNKNOWN - && result == ACPI_STATE_D0) - device->parent->power.state = ACPI_STATE_D0; + if (!device->power.flags.ignore_parent && parent && + parent->power.state == ACPI_STATE_UNKNOWN && + result == ACPI_STATE_D0) + parent->power.state = ACPI_STATE_D0; *state = result; @@ -191,13 +193,17 @@ int acpi_device_set_power(struct acpi_device *device, int state) return -ENODEV; } - if (!device->power.flags.ignore_parent && device->parent && - state < device->parent->power.state) { - acpi_handle_debug(device->handle, - "Cannot transition to %s for parent in %s\n", - acpi_power_state_string(state), - acpi_power_state_string(device->parent->power.state)); - return -ENODEV; + if (!device->power.flags.ignore_parent) { + struct acpi_device *parent; + + parent = acpi_dev_parent(device); + if (parent && state < parent->power.state) { + acpi_handle_debug(device->handle, + "Cannot transition to %s for parent in %s\n", + acpi_power_state_string(state), + acpi_power_state_string(parent->power.state)); + return -ENODEV; + } } /* @@ -497,7 +503,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) acpi_handle_debug(handle, "Wake notify\n"); - adev = acpi_bus_get_acpi_device(handle); + adev = acpi_get_acpi_dev(handle); if (!adev) return; @@ -515,7 +521,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) mutex_unlock(&acpi_pm_notifier_lock); - acpi_bus_put_acpi_device(adev); + acpi_put_acpi_dev(adev); } /** @@ -1460,7 +1466,7 @@ EXPORT_SYMBOL_GPL(acpi_storage_d3); * not valid to ask for the ACPI power state of the device in that time frame. * * This function is intended to be used in a driver's probe or remove - * function. See Documentation/firmware-guide/acpi/low-power-probe.rst for + * function. See Documentation/firmware-guide/acpi/non-d0-probe.rst for * more information. */ bool acpi_dev_state_d0(struct device *dev) diff --git a/drivers/acpi/dptf/Kconfig b/drivers/acpi/dptf/Kconfig index 1e8c7ce89bf1..4b3fdc03e4ed 100644 --- a/drivers/acpi/dptf/Kconfig +++ b/drivers/acpi/dptf/Kconfig @@ -11,9 +11,6 @@ menuconfig ACPI_DPTF a coordinated approach for different policies to effect the hardware state of a system. - For more information see: - <https://01.org/intel%C2%AE-dynamic-platform-and-thermal-framework-dptf-chromium-os/overview> - if ACPI_DPTF config DPTF_POWER diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index c95e535035a0..9b42628cf21b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -917,14 +917,10 @@ EXPORT_SYMBOL(ec_read); int ec_write(u8 addr, u8 val) { - int err; - if (!first_ec) return -ENODEV; - err = acpi_ec_write(first_ec, addr, val); - - return err; + return acpi_ec_write(first_ec, addr, val); } EXPORT_SYMBOL(ec_write); diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index b9a9a59ddcc1..52a0b303b70a 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -19,43 +19,12 @@ #include "fan.h" -MODULE_AUTHOR("Paul Diefenbaugh"); -MODULE_DESCRIPTION("ACPI Fan Driver"); -MODULE_LICENSE("GPL"); - -static int acpi_fan_probe(struct platform_device *pdev); -static int acpi_fan_remove(struct platform_device *pdev); - static const struct acpi_device_id fan_device_ids[] = { ACPI_FAN_DEVICE_IDS, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, fan_device_ids); -#ifdef CONFIG_PM_SLEEP -static int acpi_fan_suspend(struct device *dev); -static int acpi_fan_resume(struct device *dev); -static const struct dev_pm_ops acpi_fan_pm = { - .resume = acpi_fan_resume, - .freeze = acpi_fan_suspend, - .thaw = acpi_fan_resume, - .restore = acpi_fan_resume, -}; -#define FAN_PM_OPS_PTR (&acpi_fan_pm) -#else -#define FAN_PM_OPS_PTR NULL -#endif - -static struct platform_driver acpi_fan_driver = { - .probe = acpi_fan_probe, - .remove = acpi_fan_remove, - .driver = { - .name = "acpi-fan", - .acpi_match_table = fan_device_ids, - .pm = FAN_PM_OPS_PTR, - }, -}; - /* thermal cooling device callbacks */ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) @@ -459,6 +428,33 @@ static int acpi_fan_resume(struct device *dev) return result; } + +static const struct dev_pm_ops acpi_fan_pm = { + .resume = acpi_fan_resume, + .freeze = acpi_fan_suspend, + .thaw = acpi_fan_resume, + .restore = acpi_fan_resume, +}; +#define FAN_PM_OPS_PTR (&acpi_fan_pm) + +#else + +#define FAN_PM_OPS_PTR NULL + #endif +static struct platform_driver acpi_fan_driver = { + .probe = acpi_fan_probe, + .remove = acpi_fan_remove, + .driver = { + .name = "acpi-fan", + .acpi_match_table = fan_device_ids, + .pm = FAN_PM_OPS_PTR, + }, +}; + module_platform_driver(acpi_fan_driver); + +MODULE_AUTHOR("Paul Diefenbaugh"); +MODULE_DESCRIPTION("ACPI Fan Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 628bf8f18130..219c02df9a08 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -102,10 +102,10 @@ struct acpi_device_bus_id { struct list_head node; }; -int acpi_device_add(struct acpi_device *device, - void (*release)(struct device *)); void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, - int type); + int type, void (*release)(struct device *)); +int acpi_tie_acpi_dev(struct acpi_device *adev); +int acpi_device_add(struct acpi_device *device); int acpi_device_setup_files(struct acpi_device *dev); void acpi_device_remove_files(struct acpi_device *dev); void acpi_device_add_finalize(struct acpi_device *device); diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index dabe45eba055..4db5bb587599 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -118,12 +118,12 @@ acpi_get_irq_source_fwhandle(const struct acpi_resource_source *source, if (WARN_ON(ACPI_FAILURE(status))) return NULL; - device = acpi_bus_get_acpi_device(handle); + device = acpi_get_acpi_dev(handle); if (WARN_ON(!device)) return NULL; result = &device->fwnode; - acpi_bus_put_acpi_device(device); + acpi_put_acpi_dev(device); return result; } diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index c3d783aca196..23f49a2f4d14 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -9,7 +9,6 @@ */ #define pr_fmt(fmt) "acpi/hmat: " fmt -#define dev_fmt(fmt) "acpi/hmat: " fmt #include <linux/acpi.h> #include <linux/bitops.h> @@ -302,7 +301,7 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, u8 type, mem_hier; if (hmat_loc->header.length < sizeof(*hmat_loc)) { - pr_notice("HMAT: Unexpected locality header length: %u\n", + pr_notice("Unexpected locality header length: %u\n", hmat_loc->header.length); return -EINVAL; } @@ -314,12 +313,12 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header, total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds + sizeof(*inits) * ipds + sizeof(*targs) * tpds; if (hmat_loc->header.length < total_size) { - pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n", + pr_notice("Unexpected locality header length:%u, minimum required:%u\n", hmat_loc->header.length, total_size); return -EINVAL; } - pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n", + pr_info("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n", hmat_loc->flags, hmat_data_type(type), ipds, tpds, hmat_loc->entry_base_unit); @@ -363,13 +362,13 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header, u32 attrs; if (cache->header.length < sizeof(*cache)) { - pr_notice("HMAT: Unexpected cache header length: %u\n", + pr_notice("Unexpected cache header length: %u\n", cache->header.length); return -EINVAL; } attrs = cache->cache_attributes; - pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n", + pr_info("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n", cache->memory_PD, cache->cache_size, attrs, cache->number_of_SMBIOShandles); @@ -424,24 +423,24 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade struct memory_target *target = NULL; if (p->header.length != sizeof(*p)) { - pr_notice("HMAT: Unexpected address range header length: %u\n", + pr_notice("Unexpected address range header length: %u\n", p->header.length); return -EINVAL; } if (hmat_revision == 1) - pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n", + pr_info("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n", p->reserved3, p->reserved4, p->flags, p->processor_PD, p->memory_PD); else - pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n", + pr_info("Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n", p->flags, p->processor_PD, p->memory_PD); if ((hmat_revision == 1 && p->flags & ACPI_HMAT_MEMORY_PD_VALID) || hmat_revision > 1) { target = find_mem_target(p->memory_PD); if (!target) { - pr_debug("HMAT: Memory Domain missing from SRAT\n"); + pr_debug("Memory Domain missing from SRAT\n"); return -EINVAL; } } @@ -449,7 +448,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade int p_node = pxm_to_node(p->processor_PD); if (p_node == NUMA_NO_NODE) { - pr_debug("HMAT: Invalid Processor Domain\n"); + pr_debug("Invalid Processor Domain\n"); return -EINVAL; } target->processor_pxm = p->processor_PD; @@ -840,7 +839,7 @@ static __init int hmat_init(void) case 2: break; default: - pr_notice("Ignoring HMAT: Unknown revision:%d\n", hmat_revision); + pr_notice("Ignoring: Unknown revision:%d\n", hmat_revision); goto out_put; } @@ -848,7 +847,7 @@ static __init int hmat_init(void) if (acpi_table_parse_entries(ACPI_SIG_HMAT, sizeof(struct acpi_table_hmat), i, hmat_parse_subtable, 0) < 0) { - pr_notice("Ignoring HMAT: Invalid table"); + pr_notice("Ignoring: Invalid table"); goto out_put; } } diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index 9f6853809138..d4405e1ca9b9 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c @@ -44,30 +44,6 @@ osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = { {"Processor Device", true}, {"3.0 _SCP Extensions", true}, {"Processor Aggregator Device", true}, - /* - * Linux-Dell-Video is used by BIOS to disable RTD3 for NVidia graphics - * cards as RTD3 is not supported by drivers now. Systems with NVidia - * cards will hang without RTD3 disabled. - * - * Once NVidia drivers officially support RTD3, this _OSI strings can - * be removed if both new and old graphics cards are supported. - */ - {"Linux-Dell-Video", true}, - /* - * Linux-Lenovo-NV-HDMI-Audio is used by BIOS to power on NVidia's HDMI - * audio device which is turned off for power-saving in Windows OS. - * This power management feature observed on some Lenovo Thinkpad - * systems which will not be able to output audio via HDMI without - * a BIOS workaround. - */ - {"Linux-Lenovo-NV-HDMI-Audio", true}, - /* - * Linux-HPI-Hybrid-Graphics is used by BIOS to enable dGPU to - * output video directly to external monitors on HP Inc. mobile - * workstations as Nvidia and AMD VGA drivers provide limited - * hybrid graphics supports. - */ - {"Linux-HPI-Hybrid-Graphics", true}, }; static u32 acpi_osi_handler(acpi_string interface, u32 supported) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d57cf8454b93..c8385ef54c37 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -312,76 +312,25 @@ struct acpi_handle_node { */ struct pci_dev *acpi_get_pci_dev(acpi_handle handle) { - int dev, fn; - unsigned long long adr; - acpi_status status; - acpi_handle phandle; - struct pci_bus *pbus; - struct pci_dev *pdev = NULL; - struct acpi_handle_node *node, *tmp; - struct acpi_pci_root *root; - LIST_HEAD(device_list); - - /* - * Walk up the ACPI CA namespace until we reach a PCI root bridge. - */ - phandle = handle; - while (!acpi_is_root_bridge(phandle)) { - node = kzalloc(sizeof(struct acpi_handle_node), GFP_KERNEL); - if (!node) - goto out; - - INIT_LIST_HEAD(&node->node); - node->handle = phandle; - list_add(&node->node, &device_list); - - status = acpi_get_parent(phandle, &phandle); - if (ACPI_FAILURE(status)) - goto out; - } - - root = acpi_pci_find_root(phandle); - if (!root) - goto out; + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); + struct acpi_device_physical_node *pn; + struct pci_dev *pci_dev = NULL; - pbus = root->bus; - - /* - * Now, walk back down the PCI device tree until we return to our - * original handle. Assumes that everything between the PCI root - * bridge and the device we're looking for must be a P2P bridge. - */ - list_for_each_entry(node, &device_list, node) { - acpi_handle hnd = node->handle; - status = acpi_evaluate_integer(hnd, "_ADR", NULL, &adr); - if (ACPI_FAILURE(status)) - goto out; - dev = (adr >> 16) & 0xffff; - fn = adr & 0xffff; - - pdev = pci_get_slot(pbus, PCI_DEVFN(dev, fn)); - if (!pdev || hnd == handle) - break; + if (!adev) + return NULL; - pbus = pdev->subordinate; - pci_dev_put(pdev); + mutex_lock(&adev->physical_node_lock); - /* - * This function may be called for a non-PCI device that has a - * PCI parent (eg. a disk under a PCI SATA controller). In that - * case pdev->subordinate will be NULL for the parent. - */ - if (!pbus) { - dev_dbg(&pdev->dev, "Not a PCI-to-PCI bridge\n"); - pdev = NULL; + list_for_each_entry(pn, &adev->physical_node_list, node) { + if (dev_is_pci(pn->dev)) { + pci_dev = to_pci_dev(pn->dev); break; } } -out: - list_for_each_entry_safe(node, tmp, &device_list, node) - kfree(node); - return pdev; + mutex_unlock(&adev->physical_node_lock); + + return pci_dev; } EXPORT_SYMBOL_GPL(acpi_get_pci_dev); diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 8c4a73a1351e..f2588aba8421 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -944,13 +944,15 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle) return NULL; device = &resource->device; - acpi_init_device_object(device, handle, ACPI_BUS_TYPE_POWER); + acpi_init_device_object(device, handle, ACPI_BUS_TYPE_POWER, + acpi_release_power_resource); mutex_init(&resource->resource_lock); INIT_LIST_HEAD(&resource->list_node); INIT_LIST_HEAD(&resource->dependents); strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_POWER_CLASS); device->power.state = ACPI_STATE_UNKNOWN; + device->flags.match_driver = true; /* Evaluate the object to get the system level and resource order. */ status = acpi_evaluate_object(handle, NULL, NULL, &buffer); @@ -967,8 +969,11 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle) pr_info("%s [%s]\n", acpi_device_name(device), acpi_device_bid(device)); - device->flags.match_driver = true; - result = acpi_device_add(device, acpi_release_power_resource); + result = acpi_tie_acpi_dev(device); + if (result) + goto err; + + result = acpi_device_add(device); if (result) goto err; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 9f40917c49ef..acfabfe07c4f 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -804,7 +804,7 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) state = &drv->states[count]; snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i); - strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); + strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); state->exit_latency = cx->latency; state->target_residency = cx->latency * latency_factor; state->enter = acpi_idle_enter; @@ -973,7 +973,7 @@ static int acpi_processor_evaluate_lpi(acpi_handle handle, obj = pkg_elem + 9; if (obj->type == ACPI_TYPE_STRING) - strlcpy(lpi_state->desc, obj->string.pointer, + strscpy(lpi_state->desc, obj->string.pointer, ACPI_CX_DESC_LEN); lpi_state->index = state_idx; @@ -1039,7 +1039,7 @@ static bool combine_lpi_states(struct acpi_lpi_state *local, result->arch_flags = parent->arch_flags; result->index = parent->index; - strlcpy(result->desc, local->desc, ACPI_CX_DESC_LEN); + strscpy(result->desc, local->desc, ACPI_CX_DESC_LEN); strlcat(result->desc, "+", ACPI_CX_DESC_LEN); strlcat(result->desc, parent->desc, ACPI_CX_DESC_LEN); return true; @@ -1213,7 +1213,7 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) state = &drv->states[i]; snprintf(state->name, CPUIDLE_NAME_LEN, "LPI-%d", i); - strlcpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); + strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; state->target_residency = lpi->min_residency; if (lpi->arch_flags) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index d4c168ce428c..b8d9eb9a433e 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -304,8 +304,10 @@ static void acpi_init_of_compatible(struct acpi_device *adev) ret = acpi_dev_get_property(adev, "compatible", ACPI_TYPE_STRING, &of_compatible); if (ret) { - if (adev->parent - && adev->parent->flags.of_compatible_ok) + struct acpi_device *parent; + + parent = acpi_dev_parent(adev); + if (parent && parent->flags.of_compatible_ok) goto out; return; @@ -1267,10 +1269,11 @@ acpi_node_get_parent(const struct fwnode_handle *fwnode) return to_acpi_data_node(fwnode)->parent; } if (is_acpi_device_node(fwnode)) { - struct device *dev = to_acpi_device_node(fwnode)->dev.parent; + struct acpi_device *parent; - if (dev) - return acpi_fwnode_handle(to_acpi_device(dev)); + parent = acpi_dev_parent(to_acpi_device_node(fwnode)); + if (parent) + return acpi_fwnode_handle(parent); } return NULL; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 510cdec375c4..514d89656dde 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -399,6 +399,31 @@ static const struct dmi_system_id medion_laptop[] = { { } }; +static const struct dmi_system_id asus_laptop[] = { + { + .ident = "Asus Vivobook K3402ZA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "K3402ZA"), + }, + }, + { + .ident = "Asus Vivobook K3502ZA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "K3502ZA"), + }, + }, + { + .ident = "Asus Vivobook S5402ZA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "S5402ZA"), + }, + }, + { } +}; + struct irq_override_cmp { const struct dmi_system_id *system; unsigned char irq; @@ -409,6 +434,7 @@ struct irq_override_cmp { static const struct irq_override_cmp skip_override_table[] = { { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0 }, + { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0 }, }; static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, @@ -690,6 +716,9 @@ static int is_memory(struct acpi_resource *ares, void *not_used) memset(&win, 0, sizeof(win)); + if (acpi_dev_filter_resource_type(ares, IORESOURCE_MEM)) + return 1; + return !(acpi_dev_resource_memory(ares, res) || acpi_dev_resource_address_space(ares, &win) || acpi_dev_resource_ext_address_space(ares, &win)); @@ -719,6 +748,23 @@ int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list) EXPORT_SYMBOL_GPL(acpi_dev_get_dma_resources); /** + * acpi_dev_get_memory_resources - Get current memory resources of a device. + * @adev: ACPI device node to get the resources for. + * @list: Head of the resultant list of resources (must be empty). + * + * This is a helper function that locates all memory type resources of @adev + * with acpi_dev_get_resources(). + * + * The number of resources in the output list is returned on success, an error + * code reflecting the error condition is returned otherwise. + */ +int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list) +{ + return acpi_dev_get_resources(adev, list, is_memory, NULL); +} +EXPORT_SYMBOL_GPL(acpi_dev_get_memory_resources); + +/** * acpi_dev_filter_resource_type - Filter ACPI resource according to resource * types * @ares: Input ACPI resource object. diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 4938010fcac7..e6a01a8df1b8 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -632,7 +632,7 @@ static int acpi_sbs_add(struct acpi_device *device) mutex_init(&sbs->lock); - sbs->hc = acpi_driver_data(device->parent); + sbs->hc = acpi_driver_data(acpi_dev_parent(device)); sbs->device = device; strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_SBS_CLASS); diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 7c62e149a7a1..340e0b61587e 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -266,7 +266,7 @@ static int acpi_smbus_hc_add(struct acpi_device *device) mutex_init(&hc->lock); init_waitqueue_head(&hc->wait); - hc->ec = acpi_driver_data(device->parent); + hc->ec = acpi_driver_data(acpi_dev_parent(device)); hc->offset = (val >> 8) & 0xff; hc->query_bit = val & 0xff; device->driver_data = hc; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 42cec8120f18..558664d169fc 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -20,6 +20,7 @@ #include <linux/platform_data/x86/apple.h> #include <linux/pgtable.h> #include <linux/crc32.h> +#include <linux/dma-direct.h> #include "internal.h" @@ -29,8 +30,6 @@ extern struct acpi_device *acpi_root; #define ACPI_BUS_HID "LNXSYBUS" #define ACPI_BUS_DEVICE_NAME "System Bus" -#define ACPI_IS_ROOT_DEVICE(device) (!(device)->parent) - #define INVALID_ACPI_HANDLE ((acpi_handle)empty_zero_page) static const char *dummy_hid = "device"; @@ -429,7 +428,7 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) acpi_evaluate_ost(adev->handle, src, ost_code, NULL); out: - acpi_bus_put_acpi_device(adev); + acpi_put_acpi_dev(adev); mutex_unlock(&acpi_scan_lock); unlock_device_hotplug(); } @@ -599,11 +598,22 @@ static void get_acpi_device(void *dev) acpi_dev_get(dev); } -struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle) +/** + * acpi_get_acpi_dev - Retrieve ACPI device object and reference count it. + * @handle: ACPI handle associated with the requested ACPI device object. + * + * Return a pointer to the ACPI device object associated with @handle and bump + * up that object's reference counter (under the ACPI Namespace lock), if + * present, or return NULL otherwise. + * + * The ACPI device object reference acquired by this function needs to be + * dropped via acpi_dev_put(). + */ +struct acpi_device *acpi_get_acpi_dev(acpi_handle handle) { return handle_to_device(handle, get_acpi_device); } -EXPORT_SYMBOL_GPL(acpi_bus_get_acpi_device); +EXPORT_SYMBOL_GPL(acpi_get_acpi_dev); static struct acpi_device_bus_id *acpi_device_bus_id_match(const char *dev_id) { @@ -632,7 +642,7 @@ static int acpi_device_set_name(struct acpi_device *device, return 0; } -static int acpi_tie_acpi_dev(struct acpi_device *adev) +int acpi_tie_acpi_dev(struct acpi_device *adev) { acpi_handle handle = adev->handle; acpi_status status; @@ -662,8 +672,7 @@ static void acpi_store_pld_crc(struct acpi_device *adev) ACPI_FREE(pld); } -static int __acpi_device_add(struct acpi_device *device, - void (*release)(struct device *)) +int acpi_device_add(struct acpi_device *device) { struct acpi_device_bus_id *acpi_device_bus_id; int result; @@ -719,11 +728,6 @@ static int __acpi_device_add(struct acpi_device *device, mutex_unlock(&acpi_device_lock); - if (device->parent) - device->dev.parent = &device->parent->dev; - - device->dev.bus = &acpi_bus_type; - device->dev.release = release; result = device_add(&device->dev); if (result) { dev_err(&device->dev, "Error registering device\n"); @@ -750,17 +754,6 @@ err_unlock: return result; } -int acpi_device_add(struct acpi_device *adev, void (*release)(struct device *)) -{ - int ret; - - ret = acpi_tie_acpi_dev(adev); - if (ret) - return ret; - - return __acpi_device_add(adev, release); -} - /* -------------------------------------------------------------------------- Device Enumeration -------------------------------------------------------------------------- */ @@ -805,10 +798,9 @@ static const char * const acpi_honor_dep_ids[] = { NULL }; -static struct acpi_device *acpi_bus_get_parent(acpi_handle handle) +static struct acpi_device *acpi_find_parent_acpi_dev(acpi_handle handle) { - struct acpi_device *device; - acpi_status status; + struct acpi_device *adev; /* * Fixed hardware devices do not appear in the namespace and do not @@ -819,13 +811,18 @@ static struct acpi_device *acpi_bus_get_parent(acpi_handle handle) return acpi_root; do { + acpi_status status; + status = acpi_get_parent(handle, &handle); - if (ACPI_FAILURE(status)) - return status == AE_NULL_ENTRY ? NULL : acpi_root; + if (ACPI_FAILURE(status)) { + if (status != AE_NULL_ENTRY) + return acpi_root; - device = acpi_fetch_acpi_dev(handle); - } while (!device); - return device; + return NULL; + } + adev = acpi_fetch_acpi_dev(handle); + } while (!adev); + return adev; } acpi_status @@ -1112,7 +1109,7 @@ static void acpi_device_get_busid(struct acpi_device *device) * The device's Bus ID is simply the object name. * TBD: Shouldn't this value be unique (within the ACPI namespace)? */ - if (ACPI_IS_ROOT_DEVICE(device)) { + if (!acpi_dev_parent(device)) { strcpy(device->pnp.bus_id, "ACPI"); return; } @@ -1467,25 +1464,21 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) * acpi_dma_get_range() - Get device DMA parameters. * * @dev: device to configure - * @dma_addr: pointer device DMA address result - * @offset: pointer to the DMA offset result - * @size: pointer to DMA range size result + * @map: pointer to DMA ranges result * - * Evaluate DMA regions and return respectively DMA region start, offset - * and size in dma_addr, offset and size on parsing success; it does not - * update the passed in values on failure. + * Evaluate DMA regions and return pointer to DMA regions on + * parsing success; it does not update the passed in values on failure. * * Return 0 on success, < 0 on failure. */ -int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, - u64 *size) +int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { struct acpi_device *adev; LIST_HEAD(list); struct resource_entry *rentry; int ret; struct device *dma_dev = dev; - u64 len, dma_start = U64_MAX, dma_end = 0, dma_offset = 0; + struct bus_dma_region *r; /* * Walk the device tree chasing an ACPI companion with a _DMA @@ -1510,31 +1503,28 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, ret = acpi_dev_get_dma_resources(adev, &list); if (ret > 0) { + r = kcalloc(ret + 1, sizeof(*r), GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + goto out; + } + list_for_each_entry(rentry, &list, node) { - if (dma_offset && rentry->offset != dma_offset) { + if (rentry->res->start >= rentry->res->end) { + kfree(r); ret = -EINVAL; - dev_warn(dma_dev, "Can't handle multiple windows with different offsets\n"); + dev_dbg(dma_dev, "Invalid DMA regions configuration\n"); goto out; } - dma_offset = rentry->offset; - /* Take lower and upper limits */ - if (rentry->res->start < dma_start) - dma_start = rentry->res->start; - if (rentry->res->end > dma_end) - dma_end = rentry->res->end; - } - - if (dma_start >= dma_end) { - ret = -EINVAL; - dev_dbg(dma_dev, "Invalid DMA regions configuration\n"); - goto out; + r->cpu_start = rentry->res->start; + r->dma_start = rentry->res->start - rentry->offset; + r->size = resource_size(rentry->res); + r->offset = rentry->offset; + r++; } - *dma_addr = dma_start - dma_offset; - len = dma_end - dma_start; - *size = max(len, len + 1); - *offset = dma_offset; + *map = r; } out: acpi_dev_free_resource_list(&list); @@ -1624,20 +1614,19 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { const struct iommu_ops *iommu; - u64 dma_addr = 0, size = 0; if (attr == DEV_DMA_NOT_SUPPORTED) { set_dma_ops(dev, &dma_dummy_ops); return 0; } - acpi_arch_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev); iommu = acpi_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; - arch_setup_dma_ops(dev, dma_addr, size, + arch_setup_dma_ops(dev, 0, U64_MAX, iommu, attr == DEV_DMA_COHERENT); return 0; @@ -1648,7 +1637,7 @@ static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; acpi_status status; - struct acpi_device *parent = adev->parent; + struct acpi_device *parent = acpi_dev_parent(adev); if (parent && parent->flags.cca_seen) { /* @@ -1692,7 +1681,7 @@ static int acpi_check_serial_bus_slave(struct acpi_resource *ares, void *data) static bool acpi_is_indirect_io_slave(struct acpi_device *device) { - struct acpi_device *parent = device->parent; + struct acpi_device *parent = acpi_dev_parent(device); static const struct acpi_device_id indirect_io_hosts[] = { {"HISI0191", 0}, {} @@ -1762,12 +1751,16 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) } void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, - int type) + int type, void (*release)(struct device *)) { + struct acpi_device *parent = acpi_find_parent_acpi_dev(handle); + INIT_LIST_HEAD(&device->pnp.ids); device->device_type = type; device->handle = handle; - device->parent = acpi_bus_get_parent(handle); + device->dev.parent = parent ? &parent->dev : NULL; + device->dev.release = release; + device->dev.bus = &acpi_bus_type; fwnode_init(&device->fwnode, &acpi_device_fwnode_ops); acpi_set_device_status(device, ACPI_STA_DEFAULT); acpi_device_get_busid(device); @@ -1821,7 +1814,7 @@ static int acpi_add_single_object(struct acpi_device **child, if (!device) return -ENOMEM; - acpi_init_device_object(device, handle, type); + acpi_init_device_object(device, handle, type, acpi_device_release); /* * Getting the status is delayed till here so that we can call * acpi_bus_get_status() and use its quirk handling. Note that @@ -1851,7 +1844,7 @@ static int acpi_add_single_object(struct acpi_device **child, mutex_unlock(&acpi_dep_list_lock); if (!result) - result = __acpi_device_add(device, acpi_device_release); + result = acpi_device_add(device); if (result) { acpi_device_release(&device->dev); @@ -1862,8 +1855,8 @@ static int acpi_add_single_object(struct acpi_device **child, acpi_device_add_finalize(device); acpi_handle_debug(handle, "Added as %s, parent %s\n", - dev_name(&device->dev), device->parent ? - dev_name(&device->parent->dev) : "(null)"); + dev_name(&device->dev), device->dev.parent ? + dev_name(device->dev.parent) : "(null)"); *child = device; return 0; @@ -2235,11 +2228,24 @@ ok: return 0; } -static int acpi_dev_get_first_consumer_dev_cb(struct acpi_dep_data *dep, void *data) +static int acpi_dev_get_next_consumer_dev_cb(struct acpi_dep_data *dep, void *data) { - struct acpi_device *adev; + struct acpi_device **adev_p = data; + struct acpi_device *adev = *adev_p; - adev = acpi_bus_get_acpi_device(dep->consumer); + /* + * If we're passed a 'previous' consumer device then we need to skip + * any consumers until we meet the previous one, and then NULL @data + * so the next one can be returned. + */ + if (adev) { + if (dep->consumer == adev->handle) + *adev_p = NULL; + + return 0; + } + + adev = acpi_get_acpi_dev(dep->consumer); if (adev) { *(struct acpi_device **)data = adev; return 1; @@ -2292,7 +2298,7 @@ static bool acpi_scan_clear_dep_queue(struct acpi_device *adev) static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data) { - struct acpi_device *adev = acpi_bus_get_acpi_device(dep->consumer); + struct acpi_device *adev = acpi_get_acpi_dev(dep->consumer); if (adev) { adev->dep_unmet--; @@ -2368,25 +2374,32 @@ bool acpi_dev_ready_for_enumeration(const struct acpi_device *device) EXPORT_SYMBOL_GPL(acpi_dev_ready_for_enumeration); /** - * acpi_dev_get_first_consumer_dev - Return ACPI device dependent on @supplier + * acpi_dev_get_next_consumer_dev - Return the next adev dependent on @supplier * @supplier: Pointer to the dependee device + * @start: Pointer to the current dependent device * - * Returns the first &struct acpi_device which declares itself dependent on + * Returns the next &struct acpi_device which declares itself dependent on * @supplier via the _DEP buffer, parsed from the acpi_dep_list. * - * The caller is responsible for putting the reference to adev when it is no - * longer needed. + * If the returned adev is not passed as @start to this function, the caller is + * responsible for putting the reference to adev when it is no longer needed. */ -struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier) +struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier, + struct acpi_device *start) { - struct acpi_device *adev = NULL; + struct acpi_device *adev = start; acpi_walk_dep_device_list(supplier->handle, - acpi_dev_get_first_consumer_dev_cb, &adev); + acpi_dev_get_next_consumer_dev_cb, &adev); + + acpi_dev_put(start); + + if (adev == start) + return NULL; return adev; } -EXPORT_SYMBOL_GPL(acpi_dev_get_first_consumer_dev); +EXPORT_SYMBOL_GPL(acpi_dev_get_next_consumer_dev); /** * acpi_bus_scan - Add ACPI device node objects in a given namespace scope. diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 5a7b8065e77f..2ea14648a661 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -794,6 +794,30 @@ bool acpi_dev_hid_uid_match(struct acpi_device *adev, EXPORT_SYMBOL(acpi_dev_hid_uid_match); /** + * acpi_dev_uid_to_integer - treat ACPI device _UID as integer + * @adev: ACPI device to get _UID from + * @integer: output buffer for integer + * + * Considers _UID as integer and converts it to @integer. + * + * Returns 0 on success, or negative error code otherwise. + */ +int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) +{ + const char *uid; + + if (!adev) + return -ENODEV; + + uid = acpi_device_uid(adev); + if (!uid) + return -ENODATA; + + return kstrtou64(uid, 0, integer); +} +EXPORT_SYMBOL(acpi_dev_uid_to_integer); + +/** * acpi_dev_found - Detect presence of a given ACPI device in the namespace. * @hid: Hardware ID of the device. * @@ -878,7 +902,7 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) struct acpi_dev_match_info match = {}; struct device *dev; - strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); + strscpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); match.uid = uid; match.hrv = hrv; @@ -911,7 +935,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha struct acpi_dev_match_info match = {}; struct device *dev; - strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); + strscpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); match.uid = uid; match.hrv = hrv; @@ -961,7 +985,7 @@ EXPORT_SYMBOL(acpi_video_backlight_string); static int __init acpi_backlight(char *str) { - strlcpy(acpi_video_backlight_string, str, + strscpy(acpi_video_backlight_string, str, sizeof(acpi_video_backlight_string)); return 1; } diff --git a/drivers/acpi/x86/apple.c b/drivers/acpi/x86/apple.c index c285c91a5e9c..8812ecd03d55 100644 --- a/drivers/acpi/x86/apple.c +++ b/drivers/acpi/x86/apple.c @@ -8,6 +8,7 @@ #include <linux/bitmap.h> #include <linux/platform_data/x86/apple.h> #include <linux/uuid.h> +#include "../internal.h" /* Apple _DSM device properties GUID */ static const guid_t apple_prp_guid = diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index f9ac12b778e6..0155c1d2d608 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -17,6 +17,7 @@ #include <linux/acpi.h> #include <linux/device.h> +#include <linux/dmi.h> #include <linux/suspend.h> #include "../sleep.h" @@ -27,6 +28,10 @@ static bool sleep_no_lps0 __read_mostly; module_param(sleep_no_lps0, bool, 0644); MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface"); +static bool prefer_microsoft_dsm_guid __read_mostly; +module_param(prefer_microsoft_dsm_guid, bool, 0644); +MODULE_PARM_DESC(prefer_microsoft_dsm_guid, "Prefer using Microsoft GUID in LPS0 device _DSM evaluation"); + static const struct acpi_device_id lps0_device_ids[] = { {"PNP0D80", }, {"", }, @@ -363,40 +368,132 @@ out: return ret; } +struct amd_lps0_hid_device_data { + const unsigned int rev_id; + const bool check_off_by_one; + const bool prefer_amd_guid; +}; + +static const struct amd_lps0_hid_device_data amd_picasso = { + .rev_id = 0, + .check_off_by_one = true, + .prefer_amd_guid = false, +}; + +static const struct amd_lps0_hid_device_data amd_cezanne = { + .rev_id = 0, + .check_off_by_one = false, + .prefer_amd_guid = false, +}; + +static const struct amd_lps0_hid_device_data amd_rembrandt = { + .rev_id = 2, + .check_off_by_one = false, + .prefer_amd_guid = true, +}; + +static const struct acpi_device_id amd_hid_ids[] = { + {"AMD0004", (kernel_ulong_t)&amd_picasso, }, + {"AMD0005", (kernel_ulong_t)&amd_picasso, }, + {"AMDI0005", (kernel_ulong_t)&amd_picasso, }, + {"AMDI0006", (kernel_ulong_t)&amd_cezanne, }, + {"AMDI0007", (kernel_ulong_t)&amd_rembrandt, }, + {} +}; + +static int lps0_prefer_microsoft(const struct dmi_system_id *id) +{ + pr_debug("Preferring Microsoft GUID.\n"); + prefer_microsoft_dsm_guid = true; + return 0; +} + +static const struct dmi_system_id s2idle_dmi_table[] __initconst = { + { + /* + * ASUS TUF Gaming A17 FA707RE + * https://bugzilla.kernel.org/show_bug.cgi?id=216101 + */ + .callback = lps0_prefer_microsoft, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS TUF Gaming A17"), + }, + }, + { + /* ASUS ROG Zephyrus G14 (2022) */ + .callback = lps0_prefer_microsoft, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ROG Zephyrus G14 GA402"), + }, + }, + { + /* + * Lenovo Yoga Slim 7 Pro X 14ARH7 + * https://bugzilla.kernel.org/show_bug.cgi?id=216473 : 82V2 + * https://bugzilla.kernel.org/show_bug.cgi?id=216438 : 82TL + */ + .callback = lps0_prefer_microsoft, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82"), + }, + }, + { + /* + * ASUSTeK COMPUTER INC. ROG Flow X13 GV301RE_GV301RE + * https://gitlab.freedesktop.org/drm/amd/-/issues/2148 + */ + .callback = lps0_prefer_microsoft, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X13 GV301"), + }, + }, + { + /* + * ASUSTeK COMPUTER INC. ROG Flow X16 GV601RW_GV601RW + * https://gitlab.freedesktop.org/drm/amd/-/issues/2148 + */ + .callback = lps0_prefer_microsoft, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X16 GV601"), + }, + }, + {} +}; + static int lps0_device_attach(struct acpi_device *adev, const struct acpi_device_id *not_used) { if (lps0_device_handle) return 0; + lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, + ACPI_LPS0_DSM_UUID_MICROSOFT, 0, + &lps0_dsm_guid_microsoft); if (acpi_s2idle_vendor_amd()) { - /* AMD0004, AMD0005, AMDI0005: - * - Should use rev_id 0x0 - * - function mask > 0x3: Should use AMD method, but has off by one bug - * - function mask = 0x3: Should use Microsoft method - * AMDI0006: - * - should use rev_id 0x0 - * - function mask = 0x3: Should use Microsoft method - * AMDI0007: - * - Should use rev_id 0x2 - * - Should only use AMD method - */ - const char *hid = acpi_device_hid(adev); - rev_id = strcmp(hid, "AMDI0007") ? 0 : 2; + static const struct acpi_device_id *dev_id; + const struct amd_lps0_hid_device_data *data; + + for (dev_id = &amd_hid_ids[0]; dev_id->id[0]; dev_id++) + if (acpi_dev_hid_uid_match(adev, dev_id->id, NULL)) + break; + if (dev_id->id[0]) + data = (const struct amd_lps0_hid_device_data *) dev_id->driver_data; + else + data = &amd_rembrandt; + rev_id = data->rev_id; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); - lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, - ACPI_LPS0_DSM_UUID_MICROSOFT, 0, - &lps0_dsm_guid_microsoft); - if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") || - !strcmp(hid, "AMD0005") || - !strcmp(hid, "AMDI0005"))) { + if (lps0_dsm_func_mask > 0x3 && data->check_off_by_one) { lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); - } else if (lps0_dsm_func_mask_microsoft > 0 && - (!strcmp(hid, "AMDI0007") || - !strcmp(hid, "AMDI0008"))) { + } else if (lps0_dsm_func_mask_microsoft > 0 && data->prefer_amd_guid && + !prefer_microsoft_dsm_guid) { lps0_dsm_func_mask_microsoft = -EINVAL; acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); } @@ -404,7 +501,8 @@ static int lps0_device_attach(struct acpi_device *adev, rev_id = 1; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID, rev_id, &lps0_dsm_guid); - lps0_dsm_func_mask_microsoft = -EINVAL; + if (!prefer_microsoft_dsm_guid) + lps0_dsm_func_mask_microsoft = -EINVAL; } if (lps0_dsm_func_mask < 0 && lps0_dsm_func_mask_microsoft < 0) @@ -533,8 +631,9 @@ static const struct platform_s2idle_ops acpi_s2idle_ops_lps0 = { .end = acpi_s2idle_end, }; -void acpi_s2idle_setup(void) +void __init acpi_s2idle_setup(void) { + dmi_check_system(s2idle_dmi_table); acpi_scan_add_handler(&lps0_handler); s2idle_set_ops(&acpi_s2idle_ops_lps0); } diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index 664070fc8349..f8a2cbdc0ce2 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -207,9 +207,26 @@ static const struct x86_cpu_id storage_d3_cpu_ids[] = { {} }; +static const struct dmi_system_id force_storage_d3_dmi[] = { + { + /* + * _ADR is ambiguous between GPP1.DEV0 and GPP1.NVME + * but .NVME is needed to get StorageD3Enable node + * https://bugzilla.kernel.org/show_bug.cgi?id=216440 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14 7425 2-in-1"), + } + }, + {} +}; + bool force_storage_d3(void) { - return x86_match_cpu(storage_d3_cpu_ids); + const struct dmi_system_id *dmi_id = dmi_first_match(force_storage_d3_dmi); + + return dmi_id || x86_match_cpu(storage_d3_cpu_ids); } /* @@ -351,11 +368,17 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s struct acpi_device *adev = ACPI_COMPANION(controller_parent); const struct dmi_system_id *dmi_id; long quirks = 0; + u64 uid; + int ret; *skip = false; - /* !dev_is_platform() to not match on PNP enumerated debug UARTs */ - if (!adev || !adev->pnp.unique_id || !dev_is_platform(controller_parent)) + ret = acpi_dev_uid_to_integer(adev, &uid); + if (ret) + return 0; + + /* to not match on PNP enumerated debug UARTs */ + if (!dev_is_platform(controller_parent)) return 0; dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids); @@ -363,10 +386,10 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s quirks = (unsigned long)dmi_id->driver_data; if (quirks & ACPI_QUIRK_UART1_TTY_UART2_SKIP) { - if (!strcmp(adev->pnp.unique_id, "1")) + if (uid == 1) return -ENODEV; /* Create tty cdev instead of serdev */ - if (!strcmp(adev->pnp.unique_id, "2")) + if (uid == 2) *skip = true; } diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 79aa9f285312..b734e069034d 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -327,7 +327,7 @@ static int read_adc_sum(void *dev, u16 rtune_ctl_reg, void __iomem * mmio) } /* SATA AHCI temperature monitor */ -static int sata_ahci_read_temperature(void *dev, int *temp) +static int __sata_ahci_read_temperature(void *dev, int *temp) { u16 mpll_test_reg, rtune_ctl_reg, dac_ctl_reg, read_sum; u32 str1, str2, str3, str4; @@ -416,6 +416,11 @@ static int sata_ahci_read_temperature(void *dev, int *temp) return 0; } +static int sata_ahci_read_temperature(struct thermal_zone_device *tz, int *temp) +{ + return __sata_ahci_read_temperature(tz->devdata, temp); +} + static ssize_t sata_ahci_show_temp(struct device *dev, struct device_attribute *da, char *buf) @@ -423,14 +428,14 @@ static ssize_t sata_ahci_show_temp(struct device *dev, unsigned int temp = 0; int err; - err = sata_ahci_read_temperature(dev, &temp); + err = __sata_ahci_read_temperature(dev, &temp); if (err < 0) return err; return sprintf(buf, "%u\n", temp); } -static const struct thermal_zone_of_device_ops fsl_sata_ahci_of_thermal_ops = { +static const struct thermal_zone_device_ops fsl_sata_ahci_of_thermal_ops = { .get_temp = sata_ahci_read_temperature, }; @@ -1131,8 +1136,8 @@ static int imx_ahci_probe(struct platform_device *pdev) ret = PTR_ERR(hwmon_dev); goto disable_clk; } - devm_thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev, - &fsl_sata_ahci_of_thermal_ops); + devm_thermal_of_zone_register(hwmon_dev, 0, hwmon_dev, + &fsl_sata_ahci_of_thermal_ops); dev_info(dev, "%s: sensor 'sata_ahci'\n", dev_name(hwmon_dev)); } diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 46cbe4471e78..dd90591e51ba 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -353,7 +353,7 @@ void topology_init_cpu_capacity_cppc(void) struct cppc_perf_caps perf_caps; int cpu; - if (likely(acpi_disabled || !acpi_cpc_valid())) + if (likely(!acpi_cpc_valid())) return; raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity), diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 997be3ac20a7..b52049098d4e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -792,10 +792,13 @@ static int rpm_resume(struct device *dev, int rpmflags) DEFINE_WAIT(wait); if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { - if (dev->power.runtime_status == RPM_SUSPENDING) + if (dev->power.runtime_status == RPM_SUSPENDING) { dev->power.deferred_resume = true; - else + if (rpmflags & RPM_NOWAIT) + retval = -EINPROGRESS; + } else { retval = -EINPROGRESS; + } goto out; } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index e3befa2c1b66..7cc0c0cf8eaa 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -944,6 +944,8 @@ void pm_system_irq_wakeup(unsigned int irq_number) else irq_number = 0; + pm_pr_dbg("Triggering wakeup from IRQ %d\n", irq_number); + raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags); if (irq_number) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 30255fcaf181..dd9a05174726 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -322,14 +322,14 @@ static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(status)) return status; - blk_mq_start_request(req); - vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); if (unlikely(vbr->sg_table.nents < 0)) { virtblk_cleanup_cmd(req); return BLK_STS_RESOURCE; } + blk_mq_start_request(req); + return BLK_STS_OK; } @@ -391,8 +391,7 @@ static bool virtblk_prep_rq_batch(struct request *req) } static bool virtblk_add_req_batch(struct virtio_blk_vq *vq, - struct request **rqlist, - struct request **requeue_list) + struct request **rqlist) { unsigned long flags; int err; @@ -408,7 +407,7 @@ static bool virtblk_add_req_batch(struct virtio_blk_vq *vq, if (err) { virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); - rq_list_add(requeue_list, req); + blk_mq_requeue_request(req, true); } } @@ -436,7 +435,7 @@ static void virtio_queue_rqs(struct request **rqlist) if (!next || req->mq_hctx != next->mq_hctx) { req->rq_next = NULL; - kick = virtblk_add_req_batch(vq, rqlist, &requeue_list); + kick = virtblk_add_req_batch(vq, rqlist); if (kick) virtqueue_notify(vq->vq); diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 1a098db12062..680f9d8d357c 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -726,6 +726,7 @@ void iproc_pll_clk_setup(struct device_node *node, const char *parent_name; struct iproc_clk *iclk_array; struct clk_hw_onecell_data *clk_data; + const char *clk_name; if (WARN_ON(!pll_ctrl) || WARN_ON(!clk_ctrl)) return; @@ -773,7 +774,12 @@ void iproc_pll_clk_setup(struct device_node *node, iclk = &iclk_array[0]; iclk->pll = pll; - init.name = node->name; + ret = of_property_read_string_index(node, "clock-output-names", + 0, &clk_name); + if (WARN_ON(ret)) + goto err_pll_register; + + init.name = clk_name; init.ops = &iproc_pll_ops; init.flags = 0; parent_name = of_clk_get_parent_name(node, 0); @@ -793,13 +799,11 @@ void iproc_pll_clk_setup(struct device_node *node, goto err_pll_register; clk_data->hws[0] = &iclk->hw; + parent_name = clk_name; /* now initialize and register all leaf clocks */ for (i = 1; i < num_clks; i++) { - const char *clk_name; - memset(&init, 0, sizeof(init)); - parent_name = node->name; ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); diff --git a/drivers/clk/clk-tps68470.c b/drivers/clk/clk-tps68470.c index e5fbefd6ac2d..38f44b5b9b1b 100644 --- a/drivers/clk/clk-tps68470.c +++ b/drivers/clk/clk-tps68470.c @@ -200,7 +200,9 @@ static int tps68470_clk_probe(struct platform_device *pdev) .flags = CLK_SET_RATE_GATE, }; struct tps68470_clkdata *tps68470_clkdata; + struct tps68470_clk_consumer *consumer; int ret; + int i; tps68470_clkdata = devm_kzalloc(&pdev->dev, sizeof(*tps68470_clkdata), GFP_KERNEL); @@ -223,10 +225,13 @@ static int tps68470_clk_probe(struct platform_device *pdev) return ret; if (pdata) { - ret = devm_clk_hw_register_clkdev(&pdev->dev, - &tps68470_clkdata->clkout_hw, - pdata->consumer_con_id, - pdata->consumer_dev_name); + for (i = 0; i < pdata->n_consumers; i++) { + consumer = &pdata->consumers[i]; + ret = devm_clk_hw_register_clkdev(&pdev->dev, + &tps68470_clkdata->clkout_hw, + consumer->consumer_con_id, + consumer->consumer_dev_name); + } } return ret; diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c index fc1bd23d4583..598f3cf4eba4 100644 --- a/drivers/clk/imx/clk-imx6sx.c +++ b/drivers/clk/imx/clk-imx6sx.c @@ -280,13 +280,13 @@ static void __init imx6sx_clocks_init(struct device_node *ccm_node) hws[IMX6SX_CLK_SSI3_SEL] = imx_clk_hw_mux("ssi3_sel", base + 0x1c, 14, 2, ssi_sels, ARRAY_SIZE(ssi_sels)); hws[IMX6SX_CLK_SSI2_SEL] = imx_clk_hw_mux("ssi2_sel", base + 0x1c, 12, 2, ssi_sels, ARRAY_SIZE(ssi_sels)); hws[IMX6SX_CLK_SSI1_SEL] = imx_clk_hw_mux("ssi1_sel", base + 0x1c, 10, 2, ssi_sels, ARRAY_SIZE(ssi_sels)); - hws[IMX6SX_CLK_QSPI1_SEL] = imx_clk_hw_mux_flags("qspi1_sel", base + 0x1c, 7, 3, qspi1_sels, ARRAY_SIZE(qspi1_sels), CLK_SET_RATE_PARENT); + hws[IMX6SX_CLK_QSPI1_SEL] = imx_clk_hw_mux("qspi1_sel", base + 0x1c, 7, 3, qspi1_sels, ARRAY_SIZE(qspi1_sels)); hws[IMX6SX_CLK_PERCLK_SEL] = imx_clk_hw_mux("perclk_sel", base + 0x1c, 6, 1, perclk_sels, ARRAY_SIZE(perclk_sels)); hws[IMX6SX_CLK_VID_SEL] = imx_clk_hw_mux("vid_sel", base + 0x20, 21, 3, vid_sels, ARRAY_SIZE(vid_sels)); hws[IMX6SX_CLK_ESAI_SEL] = imx_clk_hw_mux("esai_sel", base + 0x20, 19, 2, audio_sels, ARRAY_SIZE(audio_sels)); hws[IMX6SX_CLK_CAN_SEL] = imx_clk_hw_mux("can_sel", base + 0x20, 8, 2, can_sels, ARRAY_SIZE(can_sels)); hws[IMX6SX_CLK_UART_SEL] = imx_clk_hw_mux("uart_sel", base + 0x24, 6, 1, uart_sels, ARRAY_SIZE(uart_sels)); - hws[IMX6SX_CLK_QSPI2_SEL] = imx_clk_hw_mux_flags("qspi2_sel", base + 0x2c, 15, 3, qspi2_sels, ARRAY_SIZE(qspi2_sels), CLK_SET_RATE_PARENT); + hws[IMX6SX_CLK_QSPI2_SEL] = imx_clk_hw_mux("qspi2_sel", base + 0x2c, 15, 3, qspi2_sels, ARRAY_SIZE(qspi2_sels)); hws[IMX6SX_CLK_SPDIF_SEL] = imx_clk_hw_mux("spdif_sel", base + 0x30, 20, 2, audio_sels, ARRAY_SIZE(audio_sels)); hws[IMX6SX_CLK_AUDIO_SEL] = imx_clk_hw_mux("audio_sel", base + 0x30, 7, 2, audio_sels, ARRAY_SIZE(audio_sels)); hws[IMX6SX_CLK_ENET_PRE_SEL] = imx_clk_hw_mux("enet_pre_sel", base + 0x34, 15, 3, enet_pre_sels, ARRAY_SIZE(enet_pre_sels)); diff --git a/drivers/clk/imx/clk-imx93.c b/drivers/clk/imx/clk-imx93.c index f5c9fa40491c..dcc41d178238 100644 --- a/drivers/clk/imx/clk-imx93.c +++ b/drivers/clk/imx/clk-imx93.c @@ -332,7 +332,7 @@ static struct platform_driver imx93_clk_driver = { .driver = { .name = "imx93-ccm", .suppress_bind_attrs = true, - .of_match_table = of_match_ptr(imx93_clk_of_match), + .of_match_table = imx93_clk_of_match, }, }; module_platform_driver(imx93_clk_driver); diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c index 201bf6e6b6e0..d5544cbc5c48 100644 --- a/drivers/clk/ingenic/tcu.c +++ b/drivers/clk/ingenic/tcu.c @@ -101,15 +101,11 @@ static bool ingenic_tcu_enable_regs(struct clk_hw *hw) bool enabled = false; /* - * If the SoC has no global TCU clock, we must ungate the channel's - * clock to be able to access its registers. - * If we have a TCU clock, it will be enabled automatically as it has - * been attached to the regmap. + * According to the programming manual, a timer channel's registers can + * only be accessed when the channel's stop bit is clear. */ - if (!tcu->clk) { - enabled = !!ingenic_tcu_is_enabled(hw); - regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit)); - } + enabled = !!ingenic_tcu_is_enabled(hw); + regmap_write(tcu->map, TCU_REG_TSCR, BIT(info->gate_bit)); return enabled; } @@ -120,8 +116,7 @@ static void ingenic_tcu_disable_regs(struct clk_hw *hw) const struct ingenic_tcu_clk_info *info = tcu_clk->info; struct ingenic_tcu *tcu = tcu_clk->tcu; - if (!tcu->clk) - regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit)); + regmap_write(tcu->map, TCU_REG_TSSR, BIT(info->gate_bit)); } static u8 ingenic_tcu_get_parent(struct clk_hw *hw) diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 070c3b896559..b6b89413e090 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -239,6 +239,11 @@ static const struct clk_ops mpfs_clk_cfg_ops = { .hw.init = CLK_HW_INIT(_name, _parent, &mpfs_clk_cfg_ops, 0), \ } +#define CLK_CPU_OFFSET 0u +#define CLK_AXI_OFFSET 1u +#define CLK_AHB_OFFSET 2u +#define CLK_RTCREF_OFFSET 3u + static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = { CLK_CFG(CLK_CPU, "clk_cpu", "clk_msspll", 0, 2, mpfs_div_cpu_axi_table, 0, REG_CLOCK_CONFIG_CR), @@ -362,7 +367,7 @@ static const struct clk_ops mpfs_periph_clk_ops = { _flags), \ } -#define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT].hw) +#define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT##_OFFSET].hw) /* * Critical clocks: @@ -370,6 +375,8 @@ static const struct clk_ops mpfs_periph_clk_ops = { * trap handler * - CLK_MMUART0: reserved by the hss * - CLK_DDRC: provides clock to the ddr subsystem + * - CLK_RTC: the onboard RTC's AHB bus clock must be kept running as the rtc will stop + * if the AHB interface clock is disabled * - CLK_FICx: these provide the processor side clocks to the "FIC" (Fabric InterConnect) * clock domain crossers which provide the interface to the FPGA fabric. Disabling them * causes the FPGA fabric to go into reset. @@ -394,7 +401,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_CAN0, "clk_periph_can0", PARENT_CLK(AHB), 14, 0), CLK_PERIPH(CLK_CAN1, "clk_periph_can1", PARENT_CLK(AHB), 15, 0), CLK_PERIPH(CLK_USB, "clk_periph_usb", PARENT_CLK(AHB), 16, 0), - CLK_PERIPH(CLK_RTC, "clk_periph_rtc", PARENT_CLK(AHB), 18, 0), + CLK_PERIPH(CLK_RTC, "clk_periph_rtc", PARENT_CLK(AHB), 18, CLK_IS_CRITICAL), CLK_PERIPH(CLK_QSPI, "clk_periph_qspi", PARENT_CLK(AHB), 19, 0), CLK_PERIPH(CLK_GPIO0, "clk_periph_gpio0", PARENT_CLK(AHB), 20, 0), CLK_PERIPH(CLK_GPIO1, "clk_periph_gpio1", PARENT_CLK(AHB), 21, 0), diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index 30056da3e0af..42568c616181 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c @@ -1191,9 +1191,13 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev) if (IS_ERR(reg)) return PTR_ERR(reg); - /* Force PLL_GPU output divider bits to 0 */ + /* + * Force PLL_GPU output divider bits to 0 and adjust + * multiplier to sensible default value of 432 MHz. + */ val = readl(reg + SUN50I_H6_PLL_GPU_REG); - val &= ~BIT(0); + val &= ~(GENMASK(15, 8) | BIT(0)); + val |= 17 << 8; writel(val, reg + SUN50I_H6_PLL_GPU_REG); /* Force GPU_CLK divider bits to 0 */ diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9ac75c1cde9c..f52b8f2fe529 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -46,8 +46,8 @@ #include <asm/cpu_device_id.h> #include "amd-pstate-trace.h" -#define AMD_PSTATE_TRANSITION_LATENCY 0x20000 -#define AMD_PSTATE_TRANSITION_DELAY 500 +#define AMD_PSTATE_TRANSITION_LATENCY 20000 +#define AMD_PSTATE_TRANSITION_DELAY 1000 /* * TODO: We need more time to fine tune processors with shared memory solution @@ -120,7 +120,7 @@ struct amd_cpudata { struct amd_aperf_mperf cur; struct amd_aperf_mperf prev; - u64 freq; + u64 freq; bool boost_supported; }; @@ -152,6 +152,7 @@ static inline int amd_pstate_enable(bool enable) static int pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; + u32 highest_perf; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); @@ -163,7 +164,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) * * CPPC entry doesn't indicate the highest performance in some ASICs. */ - WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); + highest_perf = amd_get_highest_perf(); + if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); + + WRITE_ONCE(cpudata->highest_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); @@ -175,12 +180,17 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) static int cppc_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + u32 highest_perf; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf()); + highest_perf = amd_get_highest_perf(); + if (highest_perf > cppc_perf.highest_perf) + highest_perf = cppc_perf.highest_perf; + + WRITE_ONCE(cpudata->highest_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, @@ -269,6 +279,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; + des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); value &= ~AMD_CPPC_MIN_PERF(~0L); value |= AMD_CPPC_MIN_PERF(min_perf); @@ -312,7 +323,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy, return -ENODEV; cap_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); max_perf = cap_perf; freqs.old = policy->cur; @@ -357,8 +368,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (max_perf < min_perf) max_perf = min_perf; - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); } @@ -555,9 +564,7 @@ free_cpudata1: static int amd_pstate_cpu_exit(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata; - - cpudata = policy->driver_data; + struct amd_cpudata *cpudata = policy->driver_data; freq_qos_remove_request(&cpudata->req[1]); freq_qos_remove_request(&cpudata->req[0]); @@ -599,9 +606,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { int max_freq; - struct amd_cpudata *cpudata; - - cpudata = policy->driver_data; + struct amd_cpudata *cpudata = policy->driver_data; max_freq = amd_get_max_freq(cpudata); if (max_freq < 0) @@ -614,9 +619,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli char *buf) { int freq; - struct amd_cpudata *cpudata; - - cpudata = policy->driver_data; + struct amd_cpudata *cpudata = policy->driver_data; freq = amd_get_lowest_nonlinear_freq(cpudata); if (freq < 0) @@ -662,7 +665,7 @@ static struct cpufreq_driver amd_pstate_driver = { .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, .name = "amd-pstate", - .attr = amd_pstate_attr, + .attr = amd_pstate_attr, }; static int __init amd_pstate_init(void) @@ -673,7 +676,7 @@ static int __init amd_pstate_init(void) return -ENODEV; if (!acpi_cpc_valid()) { - pr_debug("the _CPC object is not present in SBIOS\n"); + pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); return -ENODEV; } diff --git a/drivers/cpufreq/bmips-cpufreq.c b/drivers/cpufreq/bmips-cpufreq.c index f7c23fa468f0..39221a9a187a 100644 --- a/drivers/cpufreq/bmips-cpufreq.c +++ b/drivers/cpufreq/bmips-cpufreq.c @@ -156,7 +156,7 @@ static struct cpufreq_driver bmips_cpufreq_driver = { .name = BMIPS_CPUFREQ_PREFIX, }; -static int __init bmips_cpufreq_probe(void) +static int __init bmips_cpufreq_driver_init(void) { struct cpufreq_compat *cc; struct device_node *np; @@ -176,7 +176,13 @@ static int __init bmips_cpufreq_probe(void) return cpufreq_register_driver(&bmips_cpufreq_driver); } -device_initcall(bmips_cpufreq_probe); +module_init(bmips_cpufreq_driver_init); + +static void __exit bmips_cpufreq_driver_exit(void) +{ + cpufreq_unregister_driver(&bmips_cpufreq_driver); +} +module_exit(bmips_cpufreq_driver_exit); MODULE_AUTHOR("Markus Mayer <mmayer@broadcom.com>"); MODULE_DESCRIPTION("CPUfreq driver for Broadcom BMIPS SoCs"); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 24eaf0ec344d..432dfb4e8027 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -63,7 +63,15 @@ static struct cppc_workaround_oem_info wa_info[] = { static struct cpufreq_driver cppc_cpufreq_driver; +static enum { + FIE_UNSET = -1, + FIE_ENABLED, + FIE_DISABLED +} fie_disabled = FIE_UNSET; + #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE +module_param(fie_disabled, int, 0444); +MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)"); /* Frequency invariance support */ struct cppc_freq_invariance { @@ -158,7 +166,7 @@ static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) struct cppc_freq_invariance *cppc_fi; int cpu, ret; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; for_each_cpu(cpu, policy->cpus) { @@ -199,7 +207,7 @@ static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) struct cppc_freq_invariance *cppc_fi; int cpu; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; /* policy->cpus will be empty here, use related_cpus instead */ @@ -229,7 +237,15 @@ static void __init cppc_freq_invariance_init(void) }; int ret; - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled != FIE_ENABLED && fie_disabled != FIE_DISABLED) { + fie_disabled = FIE_ENABLED; + if (cppc_perf_ctrs_in_pcc()) { + pr_info("FIE not enabled on systems with registers in PCC\n"); + fie_disabled = FIE_DISABLED; + } + } + + if (fie_disabled) return; kworker_fie = kthread_create_worker(0, "cppc_fie"); @@ -247,7 +263,7 @@ static void __init cppc_freq_invariance_init(void) static void cppc_freq_invariance_exit(void) { - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + if (fie_disabled) return; kthread_destroy_worker(kworker_fie); @@ -936,6 +952,7 @@ static void cppc_check_hisi_workaround(void) wa_info[i].oem_revision == tbl->oem_revision) { /* Overwrite the get() callback */ cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate; + fie_disabled = FIE_DISABLED; break; } } @@ -947,7 +964,7 @@ static int __init cppc_cpufreq_init(void) { int ret; - if ((acpi_disabled) || !acpi_cpc_valid()) + if (!acpi_cpc_valid()) return -ENODEV; cppc_check_hisi_workaround(); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2c96de3f2d83..6ac3800db450 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -146,6 +146,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sc8280xp", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index ac57cddc5f2f..a45864701143 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -55,7 +55,7 @@ static struct notifier_block hb_cpufreq_clk_nb = { .notifier_call = hb_cpufreq_clk_notify, }; -static int hb_cpufreq_driver_init(void) +static int __init hb_cpufreq_driver_init(void) { struct platform_device_info devinfo = { .name = "cpufreq-dt", }; struct device *cpu_dev; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 57cdb3679885..fc3ebeb0bbe5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2416,6 +2416,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(COMETLAKE, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), + X86_MATCH(TIGERLAKE, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index d5ef3c66c762..833589bc95e4 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -13,6 +13,7 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/pm_opp.h> +#include <linux/pm_qos.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/units.h> @@ -56,6 +57,8 @@ struct qcom_cpufreq_data { struct cpufreq_policy *policy; bool per_core_dcvs; + + struct freq_qos_request throttle_freq_req; }; static unsigned long cpu_hw_rate, xo_rate; @@ -316,14 +319,16 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) if (IS_ERR(opp)) { dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp); } else { - throttled_freq = freq_hz / HZ_PER_KHZ; - - /* Update thermal pressure (the boost frequencies are accepted) */ - arch_update_thermal_pressure(policy->related_cpus, throttled_freq); - dev_pm_opp_put(opp); } + throttled_freq = freq_hz / HZ_PER_KHZ; + + freq_qos_update_request(&data->throttle_freq_req, throttled_freq); + + /* Update thermal pressure (the boost frequencies are accepted) */ + arch_update_thermal_pressure(policy->related_cpus, throttled_freq); + /* * In the unlikely case policy is unregistered do not enable * polling or h/w interrupt @@ -413,6 +418,14 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) if (data->throttle_irq < 0) return data->throttle_irq; + ret = freq_qos_add_request(&policy->constraints, + &data->throttle_freq_req, FREQ_QOS_MAX, + FREQ_QOS_MAX_DEFAULT_VALUE); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to add freq constraint (%d)\n", ret); + return ret; + } + data->cancel_throttle = false; data->policy = policy; @@ -479,6 +492,7 @@ static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) if (data->throttle_irq <= 0) return; + freq_qos_remove_request(&data->throttle_freq_req); free_irq(data->throttle_irq, data); } diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index a67df90848c2..1a63aeea8711 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -252,7 +252,7 @@ static int sti_cpufreq_fetch_syscon_registers(void) return 0; } -static int sti_cpufreq_init(void) +static int __init sti_cpufreq_init(void) { int ret; diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 1216046cf4c2..c2004cae3f02 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -38,14 +38,6 @@ /* cpufreq transisition latency */ #define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */ -enum cluster { - CLUSTER0, - CLUSTER1, - CLUSTER2, - CLUSTER3, - MAX_CLUSTERS, -}; - struct tegra_cpu_ctr { u32 cpu; u32 coreclk_cnt, last_coreclk_cnt; @@ -67,12 +59,12 @@ struct tegra_cpufreq_ops { struct tegra_cpufreq_soc { struct tegra_cpufreq_ops *ops; int maxcpus_per_cluster; + unsigned int num_clusters; phys_addr_t actmon_cntr_base; }; struct tegra194_cpufreq_data { void __iomem *regs; - size_t num_clusters; struct cpufreq_frequency_table **tables; const struct tegra_cpufreq_soc *soc; }; @@ -166,6 +158,14 @@ static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = { .ops = &tegra234_cpufreq_ops, .actmon_cntr_base = 0x9000, .maxcpus_per_cluster = 4, + .num_clusters = 3, +}; + +static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = { + .ops = &tegra234_cpufreq_ops, + .actmon_cntr_base = 0x4000, + .maxcpus_per_cluster = 8, + .num_clusters = 1, }; static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid) @@ -314,11 +314,7 @@ static void tegra194_get_cpu_ndiv_sysreg(void *ndiv) static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv) { - int ret; - - ret = smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); - - return ret; + return smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true); } static void tegra194_set_cpu_ndiv_sysreg(void *data) @@ -382,7 +378,7 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); - if (clusterid >= data->num_clusters || !data->tables[clusterid]) + if (clusterid >= data->soc->num_clusters || !data->tables[clusterid]) return -EINVAL; start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); @@ -433,6 +429,7 @@ static struct tegra_cpufreq_ops tegra194_cpufreq_ops = { static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = { .ops = &tegra194_cpufreq_ops, .maxcpus_per_cluster = 2, + .num_clusters = 4, }; static void tegra194_cpufreq_free_resources(void) @@ -525,15 +522,14 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) soc = of_device_get_match_data(&pdev->dev); - if (soc->ops && soc->maxcpus_per_cluster) { + if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) { data->soc = soc; } else { dev_err(&pdev->dev, "soc data missing\n"); return -EINVAL; } - data->num_clusters = MAX_CLUSTERS; - data->tables = devm_kcalloc(&pdev->dev, data->num_clusters, + data->tables = devm_kcalloc(&pdev->dev, data->soc->num_clusters, sizeof(*data->tables), GFP_KERNEL); if (!data->tables) return -ENOMEM; @@ -558,7 +554,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) goto put_bpmp; } - for (i = 0; i < data->num_clusters; i++) { + for (i = 0; i < data->soc->num_clusters; i++) { data->tables[i] = init_freq_table(pdev, bpmp, i); if (IS_ERR(data->tables[i])) { err = PTR_ERR(data->tables[i]); @@ -590,6 +586,7 @@ static int tegra194_cpufreq_remove(struct platform_device *pdev) static const struct of_device_id tegra194_cpufreq_of_match[] = { { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc }, { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc }, + { .compatible = "nvidia,tegra239-ccplex-cluster", .data = &tegra239_cpufreq_soc }, { /* sentinel */ } }; diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index df85a77d476b..f64180dd2005 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -398,7 +398,7 @@ fail_put_node: return ret; } -static int ti_cpufreq_init(void) +static int __init ti_cpufreq_init(void) { const struct of_device_id *match; diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 74068742cef3..9acde71558d5 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -54,7 +54,7 @@ * variable is not locked. It is only written from the cpu that * it stores (or by the on/offlining cpu if that cpu is offline), * and only read after all the cpus are ready for the coupled idle - * state are are no longer updating it. + * state are no longer updating it. * * Three atomic counters are used. alive_count tracks the number * of cpus in the coupled set that are currently or soon will be diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index c32c600b3cf8..0b5461b3d7dd 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -233,8 +233,8 @@ static inline void add_powernv_state(int index, const char *name, unsigned int exit_latency, u64 psscr_val, u64 psscr_mask) { - strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN); - strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN); + strscpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN); + strscpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN); powernv_states[index].flags = flags; powernv_states[index].target_residency = target_residency; powernv_states[index].exit_latency = exit_latency; diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 29acaf48e575..0d0f9751ff8f 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -63,12 +63,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov) cpuidle_curr_governor = gov; - if (gov) { - list_for_each_entry(dev, &cpuidle_detected_devices, device_list) - cpuidle_enable_device(dev); - cpuidle_install_idle_handler(); - printk(KERN_INFO "cpuidle: using governor %s\n", gov->name); - } + list_for_each_entry(dev, &cpuidle_detected_devices, device_list) + cpuidle_enable_device(dev); + + cpuidle_install_idle_handler(); + pr_info("cpuidle: using governor %s\n", gov->name); return 0; } diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 2a60d0525cde..168195672e2e 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -56,6 +56,10 @@ static void virtio_crypto_akcipher_finalize_req( struct virtio_crypto_akcipher_request *vc_akcipher_req, struct akcipher_request *req, int err) { + kfree(vc_akcipher_req->src_buf); + kfree(vc_akcipher_req->dst_buf); + vc_akcipher_req->src_buf = NULL; + vc_akcipher_req->dst_buf = NULL; virtcrypto_clear_request(&vc_akcipher_req->base); crypto_finalize_akcipher_request(vc_akcipher_req->base.dataq->engine, req, err); diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 9a88faaf8b27..39ac069cabc7 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -189,10 +189,9 @@ static int rockchip_dfi_probe(struct platform_device *pdev) return PTR_ERR(data->regs); data->clk = devm_clk_get(dev, "pclk_ddr_mon"); - if (IS_ERR(data->clk)) { - dev_err(dev, "Cannot get the clk dmc_clk\n"); - return PTR_ERR(data->clk); - } + if (IS_ERR(data->clk)) + return dev_err_probe(dev, PTR_ERR(data->clk), + "Cannot get the clk pclk_ddr_mon\n"); /* try to find the optional reference to the pmu syscon */ node = of_parse_phandle(np, "rockchip,pmu", 0); diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c index 71abb3fbd042..e5458ada5197 100644 --- a/drivers/devfreq/mtk-cci-devfreq.c +++ b/drivers/devfreq/mtk-cci-devfreq.c @@ -291,9 +291,13 @@ static int mtk_ccifreq_probe(struct platform_device *pdev) } drv->sram_reg = devm_regulator_get_optional(dev, "sram"); - if (IS_ERR(drv->sram_reg)) + if (IS_ERR(drv->sram_reg)) { + ret = PTR_ERR(drv->sram_reg); + if (ret == -EPROBE_DEFER) + goto out_free_resources; + drv->sram_reg = NULL; - else { + } else { ret = regulator_enable(drv->sram_reg); if (ret) { dev_err(dev, "failed to enable sram regulator\n"); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index eb58644bb019..6faeb2ab3960 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm) edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); edac_dbg(4, " dimm->grain = %d\n", dimm->grain); - edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 96f6de0c8ff6..50ed9f2425bb 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void); extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); -extern int edac_get_log_ue(void); -extern int edac_get_log_ce(void); -extern int edac_get_panic_on_ue(void); extern int edac_mc_get_log_ue(void); extern int edac_mc_get_log_ce(void); extern int edac_mc_get_panic_on_ue(void); -extern int edac_get_poll_msec(void); extern unsigned int edac_mc_get_poll_msec(void); unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 6cf50ee0b77c..a22ea053f8e1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -74,31 +74,47 @@ static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; static int retry_rd_err_log; +static int decoding_via_mca; +static bool mem_cfg_2lm; static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; +static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; +static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; +static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable) +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, + u32 *offsets_scrub, u32 *offsets_demand, + u32 *offsets_demand2) { - u32 s, d; + u32 s, d, d2; - if (!imc->mbase) - return; - - s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]); + s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); + d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); + if (offsets_demand2) + d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); if (enable) { /* Save default configurations */ imc->chan[chan].retry_rd_err_log_s = s; imc->chan[chan].retry_rd_err_log_d = d; + if (offsets_demand2) + imc->chan[chan].retry_rd_err_log_d2 = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; d |= RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + d2 &= ~RETRY_RD_ERR_LOG_UC; + d2 |= RETRY_RD_ERR_LOG_NOOVER; + d2 |= RETRY_RD_ERR_LOG_EN; + } } else { /* Restore default configurations */ if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) @@ -113,23 +129,55 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable d |= RETRY_RD_ERR_LOG_NOOVER; if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + d2 |= RETRY_RD_ERR_LOG_UC; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + d2 &= ~RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + d2 &= ~RETRY_RD_ERR_LOG_EN; + } } - I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d); + I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); + I10NM_SET_REG32(imc, chan, offsets_demand[0], d); + if (offsets_demand2) + I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); } static void enable_retry_rd_err_log(bool enable) { + struct skx_imc *imc; struct skx_dev *d; int i, j; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) - for (i = 0; i < I10NM_NUM_IMC; i++) - for (j = 0; j < I10NM_NUM_CHANNELS; j++) - __enable_retry_rd_err_log(&d->imc[i], j, enable); + for (i = 0; i < I10NM_NUM_IMC; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + for (j = 0; j < I10NM_NUM_CHANNELS; j++) { + if (imc->hbm_mc) { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm0, + res_cfg->offsets_demand_hbm0, + NULL); + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm1, + res_cfg->offsets_demand_hbm1, + NULL); + } else { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub, + res_cfg->offsets_demand, + res_cfg->offsets_demand2); + } + } + } } static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, @@ -138,14 +186,33 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, struct skx_imc *imc = &res->dev->imc[res->imc]; u32 log0, log1, log2, log3, log4; u32 corr0, corr1, corr2, corr3; + u32 lxg0, lxg1, lxg3, lxg4; + u32 *xffsets = NULL; u64 log2a, log5; + u64 lxg2a, lxg5; u32 *offsets; - int n; + int n, pch; if (!imc->mbase) return; - offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand; + if (imc->hbm_mc) { + pch = res->cs & 1; + + if (pch) + offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : + res_cfg->offsets_demand_hbm1; + else + offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : + res_cfg->offsets_demand_hbm0; + } else { + if (scrub_err) { + offsets = res_cfg->offsets_scrub; + } else { + offsets = res_cfg->offsets_demand; + xffsets = res_cfg->offsets_demand2; + } + } log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); @@ -153,20 +220,52 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); + if (xffsets) { + lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); + lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); + lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); + lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); + lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); + } + if (res_cfg->type == SPR) { log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]", + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", log0, log1, log2a, log3, log4, log5); + + if (len - n > 0) { + if (xffsets) { + lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); + n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", + lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); + } else { + n += snprintf(msg + n, len - n, "]"); + } + } } else { log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", log0, log1, log2, log3, log4, log5); } - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + if (imc->hbm_mc) { + if (pch) { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + } + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + } if (len - n > 0) snprintf(msg + n, len - n, @@ -177,9 +276,16 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, corr3 & 0xffff, corr3 >> 16); /* Clear status bits */ - if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + if (retry_rd_err_log == 2) { + if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { + log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + } + + if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { + lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); + } } } @@ -231,6 +337,103 @@ static bool i10nm_check_2lm(struct res_config *cfg) return false; } +/* + * Check whether the error comes from DDRT by ICX/Tremont model specific error code. + * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. + */ +static bool i10nm_mscod_is_ddrt(u32 mscod) +{ + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + return false; +} + +static bool i10nm_mc_decode_available(struct mce *mce) +{ + u8 bank; + + if (!decoding_via_mca || mem_cfg_2lm) + return false; + + if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + return false; + + bank = mce->bank; + + switch (res_cfg->type) { + case I10NM: + if (bank < 13 || bank > 26) + return false; + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + /* Check whether one of {13,14,17,18,21,22,25,26} */ + return ((bank - 13) & BIT(1)) == 0; + default: + return false; + } +} + +static bool i10nm_mc_decode(struct decoded_addr *res) +{ + struct mce *m = res->mce; + struct skx_dev *d; + u8 bank; + + if (!i10nm_mc_decode_available(m)) + return false; + + list_for_each_entry(d, i10nm_edac_list, list) { + if (d->imc[0].src_id == m->socketid) { + res->socket = m->socketid; + res->dev = d; + break; + } + } + + switch (res_cfg->type) { + case I10NM: + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + break; + default: + return false; + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + m->socketid, res->imc); + return false; + } + + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + + return true; +} + static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; @@ -420,7 +623,12 @@ static struct res_config spr_cfg = { .sad_all_devfn = PCI_DEVFN(10, 0), .sad_all_offset = 0x300, .offsets_scrub = offsets_scrub_spr, + .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, + .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, .offsets_demand = offsets_demand_spr, + .offsets_demand2 = offsets_demand2_spr, + .offsets_demand_hbm0 = offsets_demand_spr_hbm0, + .offsets_demand_hbm1 = offsets_demand_spr_hbm1, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -574,7 +782,8 @@ static int __init i10nm_init(void) return -ENODEV; } - skx_set_mem_cfg(i10nm_check_2lm(cfg)); + mem_cfg_2lm = i10nm_check_2lm(cfg); + skx_set_mem_cfg(mem_cfg_2lm); rc = i10nm_get_ddr_munits(); @@ -626,9 +835,11 @@ static int __init i10nm_init(void) setup_i10nm_debug(); if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { - skx_set_decode(NULL, show_retry_rd_err_log); + skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); + } else { + skx_set_decode(i10nm_mc_decode, NULL); } i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); @@ -658,6 +869,34 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + + if (ret || val > 1) + return -EINVAL; + + if (val && mem_cfg_2lm) { + i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n"); + return -EIO; + } + + ret = param_set_int(buf, kp); + + return ret; +} + +static const struct kernel_param_ops decoding_via_mca_param_ops = { + .set = set_decoding_via_mca, + .get = param_get_int, +}; + +module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); +MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable"); + module_param(retry_rd_err_log, int, 0444); MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 4f28b8c8d378..61adaa872ba7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -1193,7 +1193,7 @@ static int __init i7300_init(void) } /** - * i7300_init() - Unregisters the driver + * i7300_exit() - Unregisters the driver */ static void __exit i7300_exit(void) { diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 9a9ff5ad611a..9ef13570f2e5 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -20,11 +20,15 @@ * 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers + * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers * * Based on Intel specification: * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/desktop-6th-gen-core-family-datasheet-vol-2.pdf + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v6-vol-2-datasheet.pdf * https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html * https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html * @@ -53,15 +57,17 @@ #define ie31200_printk(level, fmt, arg...) \ edac_printk(level, "ie31200", fmt, ##arg) -#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c -#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c -#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918 /* Coffee Lake-S */ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 @@ -80,6 +86,8 @@ #define DEVICE_ID_SKYLAKE_OR_LATER(did) \ (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) @@ -577,6 +585,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 0bc670778c99..046969b4e82e 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -178,11 +178,6 @@ struct ppc4xx_ecc_status { u32 wmirq; }; -/* Function Prototypes */ - -static int ppc4xx_edac_probe(struct platform_device *device); -static int ppc4xx_edac_remove(struct platform_device *device); - /* Global Variables */ /* @@ -197,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = { }; MODULE_DEVICE_TABLE(of, ppc4xx_edac_match); -static struct platform_driver ppc4xx_edac_driver = { - .probe = ppc4xx_edac_probe, - .remove = ppc4xx_edac_remove, - .driver = { - .name = PPC4XX_EDAC_MODULE_NAME, - .of_match_table = ppc4xx_edac_match, - }, -}; - /* * TODO: The row and channel parameters likely need to be dynamically * set based on the aforementioned variant controller realizations. @@ -1391,6 +1377,15 @@ ppc4xx_edac_opstate_init(void) EDAC_OPSTATE_UNKNOWN_STR))); } +static struct platform_driver ppc4xx_edac_driver = { + .probe = ppc4xx_edac_probe, + .remove = ppc4xx_edac_remove, + .driver = { + .name = PPC4XX_EDAC_MODULE_NAME, + .of_match_table = ppc4xx_edac_match, + }, +}; + /** * ppc4xx_edac_init - driver/module insertion entry point * diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 9678ab97c7ac..8e39370fdb5c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -335,6 +335,12 @@ struct sbridge_info { struct sbridge_channel { u32 ranks; u32 dimms; + struct dimm { + u32 rowbits; + u32 colbits; + u32 bank_xor_enable; + u32 amap_fine; + } dimm[MAX_DIMMS]; }; struct pci_id_descr { @@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, banks = 8; for (i = 0; i < channels; i++) { - u32 mtr; + u32 mtr, amap = 0; int max_dimms_per_channel; @@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, max_dimms_per_channel = ARRAY_SIZE(mtr_regs); if (!pvt->pci_tad[i]) continue; + pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap); } for (j = 0; j < max_dimms_per_channel; j++) { @@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, mtr_regs[j], &mtr); } edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) { if (!IS_ECC_ENABLED(pvt->info.mcmtr)) { sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n", @@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci, dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->mtype = mtype; dimm->edac_mode = mode; + pvt->channel[i].dimm[j].rowbits = order_base_2(rows); + pvt->channel[i].dimm[j].colbits = order_base_2(cols); + pvt->channel[i].dimm[j].bank_xor_enable = + GET_BITFIELD(pvt->info.mcmtr, 9, 9); + pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j); @@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha) return NULL; } +static u8 sb_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; + +static u8 sb_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; + +static u8 sb_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; + +static u8 sb_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; + +static u8 sb_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int sb_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + int dimmno = 0; + int row, col, bank_address, bank_group; + struct sbridge_pvt *pvt; + u32 bg0 = 0, rowbits = 0, colbits = 0; + u32 amap_fine = 0, bank_xor_enable = 0; + + dimmno = (rank < 12) ? rank / 4 : 2; + pvt = mci->pvt_info; + amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine; + bg0 = amap_fine ? 6 : 13; + rowbits = pvt->channel[ch].dimm[dimmno].rowbits; + colbits = pvt->channel[ch].dimm[dimmno].colbits; + bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable; + + if (pvt->is_lockstep) { + pr_warn_once("LockStep row/column decode is not supported yet!\n"); + msg[0] = '\0'; + return false; + } + + if (pvt->is_close_pg) { + row = sb_bits(rank_addr, rowbits, sb_close_row); + col = sb_bits(rank_addr, colbits, sb_close_column); + col |= 0x400; /* C10 is autoprecharge, always set */ + bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28); + bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21); + } else { + row = sb_bits(rank_addr, rowbits, sb_open_row); + if (amap_fine) + col = sb_bits(rank_addr, colbits, sb_open_fine_column); + else + col = sb_bits(rank_addr, colbits, sb_open_column); + bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23); + bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21); + } + + row &= (1u << rowbits) - 1; + + sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d", + row, col, bank_address, bank_group); + return true; +} + +static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + pr_warn_once("DDR3 row/column decode not support yet!\n"); + msg[0] = '\0'; + return false; +} + static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, u8 *socket, u8 *ha, @@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci, int interleave_mode, shiftup = 0; unsigned int sad_interleave[MAX_INTERLEAVE]; u32 reg, dram_rule; - u8 ch_way, sck_way, pkg, sad_ha = 0; + u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0; u32 tad_offset; u32 rir_way; u32 mb, gb; u64 ch_addr, offset, limit = 0, prv = 0; - + u64 rank_addr; + enum mem_type mtype; /* * Step 0) Check if the address is at special memory ranges @@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®); *rank = RIR_RNK_TGT(pvt->info.type, reg); + if (pvt->info.type == BROADWELL) { + if (pvt->is_close_pg) + shiftup = 6; + else + shiftup = 13; + + rank_addr = ch_addr >> shiftup; + rank_addr /= (1 << rir_way); + rank_addr <<= shiftup; + rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0); + rank_addr -= RIR_OFFSET(pvt->info.type, reg); + + mtype = pvt->info.get_memory_type(pvt); + rankid = *rank; + if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) + sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg); + else + sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg); + } else { + msg[0] = '\0'; + } + edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, ch_addr, @@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *optype, msg[256]; + char *optype, msg[256], msg_full[512]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, */ if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) channel = first_channel; - - snprintf(msg, sizeof(msg), - "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d", + snprintf(msg_full, sizeof(msg_full), + "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", area_type, mscod, errcode, socket, ha, channel_mask, - rank); + rank, msg); - edac_dbg(0, "%s\n", msg); + edac_dbg(0, "%s\n", msg_full); /* FIXME: need support for channel mask */ @@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, dimm, -1, - optype, msg); + optype, msg_full); return; err_parsing: edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 1abc020d49ab..7e2762f62eec 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -714,8 +714,13 @@ static int __init skx_init(void) skx_set_decode(skx_decode, skx_show_retry_rd_err_log); - if (nvdimm_count && skx_adxl_get() == -ENODEV) - skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + if (nvdimm_count && skx_adxl_get() != -ENODEV) { + skx_set_decode(NULL, skx_show_retry_rd_err_log); + } else { + if (nvdimm_count) + skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); + } /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 19c17c5198c5..f0f8e98f6efb 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -27,9 +27,11 @@ static const char * const component_names[] = { [INDEX_MEMCTRL] = "MemoryControllerId", [INDEX_CHANNEL] = "ChannelId", [INDEX_DIMM] = "DimmSlotId", + [INDEX_CS] = "ChipSelect", [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", [INDEX_NM_CHANNEL] = "NmChannelId", [INDEX_NM_DIMM] = "NmDimmSlotId", + [INDEX_NM_CS] = "NmChipSelect", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -40,7 +42,7 @@ static char *adxl_msg; static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; -static skx_decode_f skx_decode; +static skx_decode_f driver_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); @@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? + (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; } else { res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + res->cs = (int)adxl_values[component_indices[INDEX_CS]]; } if (res->imc > NUM_IMC - 1 || res->imc < 0) { @@ -173,6 +178,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me break; } + res->decoded_by_adxl = true; + return true; } @@ -183,7 +190,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { - skx_decode = decode; + driver_decode = decode; skx_show_retry_rd_err_log = show_retry_log; } @@ -591,19 +598,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; } } - if (adxl_component_count) { + if (res->decoded_by_adxl) { len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { len = snprintf(skx_msg, MSG_SIZE, - "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x", + "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, res->socket, res->imc, res->rank, - res->bank_group, res->bank_address, res->row, res->column); + res->row, res->column, res->bank_address, res->bank_group); } if (skx_show_retry_rd_err_log) @@ -649,13 +656,14 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, return NOTIFY_DONE; memset(&res, 0, sizeof(res)); + res.mce = mce; res.addr = mce->addr; - if (adxl_component_count) { - if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) + /* Try driver decoder first */ + if (!(driver_decode && driver_decode(&res))) { + /* Then try firmware decoder (ACPI DSM methods) */ + if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) return NOTIFY_DONE; - } else if (!skx_decode || !skx_decode(&res)) { - return NOTIFY_DONE; } mci = res.dev->imc[res.imc].mci; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 03ac067a80b9..0cbadd3d2cd3 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -10,6 +10,7 @@ #define _SKX_COMM_EDAC_H #include <linux/bits.h> +#include <asm/mce.h> #define MSG_SIZE 1024 @@ -52,6 +53,9 @@ #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) +#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) +#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -82,6 +86,7 @@ struct skx_dev { struct pci_dev *edev; u32 retry_rd_err_log_s; u32 retry_rd_err_log_d; + u32 retry_rd_err_log_d2; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -108,18 +113,22 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_CS, INDEX_NM_FIRST, INDEX_NM_MEMCTRL = INDEX_NM_FIRST, INDEX_NM_CHANNEL, INDEX_NM_DIMM, + INDEX_NM_CS, INDEX_MAX }; #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) +#define BIT_NM_CS BIT_ULL(INDEX_NM_CS) struct decoded_addr { + struct mce *mce; struct skx_dev *dev; u64 addr; int socket; @@ -129,6 +138,7 @@ struct decoded_addr { int sktways; int chanways; int dimm; + int cs; int rank; int channel_rank; u64 rank_address; @@ -136,6 +146,7 @@ struct decoded_addr { int column; int bank_address; int bank_group; + bool decoded_by_adxl; }; struct res_config { @@ -154,7 +165,12 @@ struct res_config { int sad_all_offset; /* Offsets of retry_rd_err_log registers */ u32 *offsets_scrub; + u32 *offsets_scrub_hbm0; + u32 *offsets_scrub_hbm1; u32 *offsets_demand; + u32 *offsets_demand2; + u32 *offsets_demand_hbm0; + u32 *offsets_demand_hbm1; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index d021d287eaec..ad3f516627c5 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -37,7 +37,6 @@ int edac_workqueue_setup(void) void edac_workqueue_teardown(void) { - flush_workqueue(wq); destroy_workqueue(wq); wq = NULL; } diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index eb9c65f97841..f80d87c199c3 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -15,9 +15,11 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, struct device *parent, struct device **child) { - char hid[ACPI_ID_LEN], uid[11]; /* UINT_MAX + null byte */ struct acpi_device *adev; struct device *phys_dev; + char hid[ACPI_ID_LEN]; + u64 uid; + int ret; if (node->header.length != 12) return -EINVAL; @@ -27,12 +29,12 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, 'A' + ((node->acpi.hid >> 5) & 0x1f) - 1, 'A' + ((node->acpi.hid >> 0) & 0x1f) - 1, node->acpi.hid >> 16); - sprintf(uid, "%u", node->acpi.uid); for_each_acpi_dev_match(adev, hid, NULL, -1) { - if (adev->pnp.unique_id && !strcmp(adev->pnp.unique_id, uid)) + ret = acpi_dev_uid_to_integer(adev, &uid); + if (ret == 0 && node->acpi.uid == uid) break; - if (!adev->pnp.unique_id && node->acpi.uid == 0) + if (ret == -ENODATA && node->acpi.uid == 0) break; } if (!adev) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 2c67f71f2375..b43fdb319fd4 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -46,6 +46,8 @@ KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS), $(KBUILD_CFLAGS)) # remove SCS flags from all objects in this directory KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) +# disable CFI +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index cfb448eabdaa..bc6b5a12bf74 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -274,7 +274,7 @@ static void set_conduit(enum arm_smccc_conduit conduit) psci_conduit = conduit; } -static int get_set_conduit_method(struct device_node *np) +static int get_set_conduit_method(const struct device_node *np) { const char *method; @@ -334,7 +334,7 @@ static int __init psci_features(u32 psci_func_id) static int psci_suspend_finisher(unsigned long state) { u32 power_state = state; - phys_addr_t pa_cpu_resume = __pa_symbol(function_nocfi(cpu_resume)); + phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume); return psci_ops.cpu_suspend(power_state, pa_cpu_resume); } @@ -359,7 +359,7 @@ int psci_cpu_suspend_enter(u32 state) static int psci_system_suspend(unsigned long unused) { - phys_addr_t pa_cpu_resume = __pa_symbol(function_nocfi(cpu_resume)); + phys_addr_t pa_cpu_resume = __pa_symbol(cpu_resume); return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), pa_cpu_resume, 0, 0); @@ -528,7 +528,7 @@ typedef int (*psci_initcall_t)(const struct device_node *); * * Probe based on PSCI PSCI_VERSION function */ -static int __init psci_0_2_init(struct device_node *np) +static int __init psci_0_2_init(const struct device_node *np) { int err; @@ -549,7 +549,7 @@ static int __init psci_0_2_init(struct device_node *np) /* * PSCI < v0.2 get PSCI Function IDs via DT. */ -static int __init psci_0_1_init(struct device_node *np) +static int __init psci_0_1_init(const struct device_node *np) { u32 id; int err; @@ -585,7 +585,7 @@ static int __init psci_0_1_init(struct device_node *np) return 0; } -static int __init psci_1_0_init(struct device_node *np) +static int __init psci_1_0_init(const struct device_node *np) { int err; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index aa126ab80f0c..1bb317b8dcce 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -790,8 +790,12 @@ static int mvebu_pwm_probe(struct platform_device *pdev, u32 offset; u32 set; - if (of_device_is_compatible(mvchip->chip.of_node, - "marvell,armada-370-gpio")) { + if (mvchip->soc_variant == MVEBU_GPIO_SOC_VARIANT_A8K) { + int ret = of_property_read_u32(dev->of_node, + "marvell,pwm-offset", &offset); + if (ret < 0) + return 0; + } else { /* * There are only two sets of PWM configuration registers for * all the GPIO lines on those SoCs which this driver reserves @@ -801,13 +805,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev, if (!platform_get_resource_byname(pdev, IORESOURCE_MEM, "pwm")) return 0; offset = 0; - } else if (mvchip->soc_variant == MVEBU_GPIO_SOC_VARIANT_A8K) { - int ret = of_property_read_u32(dev->of_node, - "marvell,pwm-offset", &offset); - if (ret < 0) - return 0; - } else { - return 0; } if (IS_ERR(mvchip->clk)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 130060834b4e..48bd660ddb85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1050,6 +1050,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) return false; + + if (amdgpu_sriov_vf(adev)) + return false; + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index be7aff2d4a57..25e1f5ed7ead 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3152,7 +3152,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { @@ -4064,12 +4065,20 @@ static void amdgpu_device_evict_resources(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); + int r = 0; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; adev->in_suspend = true; + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_virt_request_full_gpu(adev, false); + if (r) + return r; + } + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); @@ -4093,6 +4102,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_device_ip_suspend_phase2(adev); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, false); + return 0; } @@ -4111,6 +4123,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return r; + } + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -4125,6 +4143,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } r = amdgpu_device_ip_resume(adev); + + /* no matter what r is, always need to properly release full GPU */ + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_release_full_gpu(adev, true); + } + if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8adeb7469f1e..d0d99ed607dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -400,7 +400,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) /* We are not protected by ring lock when reading the last sequence * but it's ok to report slightly wrong fence count here. */ - amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); emitted += READ_ONCE(ring->fence_drv.sync_seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 23a696d38390..de5b936b016d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -304,6 +304,10 @@ struct amdgpu_gfx { uint32_t rlc_srlg_feature_version; uint32_t rlc_srls_fw_version; uint32_t rlc_srls_feature_version; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; bool mec_fw_write_wait; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7b46f6bf4187..ad980f4b66e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -222,6 +222,8 @@ struct mes_add_queue_input { uint64_t tba_addr; uint64_t tma_addr; uint32_t is_kfd_process; + uint32_t is_aql_queue; + uint32_t queue_size; }; struct mes_remove_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 6373bfb47d55..e23f6192c50e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -272,3 +272,267 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); } + +static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev) +{ + const struct common_firmware_header *common_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + struct amdgpu_firmware_info *info; + unsigned int *tmp; + unsigned int i; + + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n"); + return -ENOMEM; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + if (info->fw) { + common_hdr = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE); + } + } + + return 0; +} + +static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_2 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); + adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); + adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); + adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_3 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; + adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version); + adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version); + adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); + adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); + + adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version); + adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version); + adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); + adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlcp_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_P; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlcv_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_V; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + } +} + +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor) +{ + int err; + + if (version_major < 2) { + /* only support rlc_hdr v2.x and onwards */ + dev_err(adev->dev, "unsupported rlc fw hdr\n"); + return -EINVAL; + } + + /* is_rlc_v2_1 is still used in APU code path */ + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + if (version_minor >= 0) { + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; + } + } + if (version_minor >= 1) + amdgpu_gfx_rlc_init_microcode_v2_1(adev); + if (version_minor >= 2) + amdgpu_gfx_rlc_init_microcode_v2_2(adev); + if (version_minor == 3) + amdgpu_gfx_rlc_init_microcode_v2_3(adev); + if (version_minor == 4) + amdgpu_gfx_rlc_init_microcode_v2_4(adev); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 03ac36b2c2cf..23f060db9255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -267,5 +267,7 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); - +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 96b6cf4c4d54..59edf32f775e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -260,8 +260,12 @@ struct rlc_firmware_header_v2_2 { /* version_major=2, version_minor=3 */ struct rlc_firmware_header_v2_3 { struct rlc_firmware_header_v2_2 v2_2; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; uint32_t rlcp_ucode_size_bytes; uint32_t rlcp_ucode_offset_bytes; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t rlcv_ucode_size_bytes; uint32_t rlcv_ucode_offset_bytes; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f36e4f08db6d..0b52af415b28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -191,7 +191,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) fw_name = FIRMWARE_VCN4_0_2; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) - adev->vcn.indirect_sram = false; + adev->vcn.indirect_sram = true; break; case IP_VERSION(4, 0, 4): fw_name = FIRMWARE_VCN4_0_4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f6b1bb40e503..daf8ba8235cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -474,49 +474,6 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } -static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - -static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_2 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); - adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); - adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); - adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); -} - -static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_3 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); - adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); - adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); - adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); -} - static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; @@ -527,8 +484,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *cp_hdr; const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; @@ -583,58 +538,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (version_major == 2) { - if (version_minor >= 1) - gfx_v11_0_init_rlc_ext_microcode(adev); - if (version_minor >= 2) - gfx_v11_0_init_rlc_iram_dram_microcode(adev); - if (version_minor == 3) - gfx_v11_0_init_rlcp_rlcv_microcode(adev); - } } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); @@ -769,60 +680,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); } - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - if (info->fw) { - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - if (adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlc_iram_ucode_size_bytes && - adev->gfx.rlc.rlc_dram_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcp_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_P; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcv_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_V; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); - } } out: @@ -5260,6 +5117,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { u32 reg, data; + amdgpu_gfx_off_ctrl(adev, false); + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); @@ -5273,6 +5132,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); else WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + + amdgpu_gfx_off_ctrl(adev, true); } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fc9c1043244c..037af8352677 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5597,7 +5597,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne BUG_ON(offset > ring->buf_mask); BUG_ON(ring->ring[offset] != 0x55aa55aa); - cur = (ring->wptr & ring->buf_mask) - 1; + cur = (ring->wptr - 1) & ring->buf_mask; if (likely(cur > offset)) ring->ring[offset] = cur - offset; else diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index cc3fdbbcd314..f92744b8d79d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -185,6 +185,10 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; mes_add_queue_pkt.trap_en = 1; + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 2e50db3b761e..6e564b549b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -625,6 +625,7 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e83725a28106..007a3db69df1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -205,6 +205,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, } queue_input.is_kfd_process = 1; + queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); + queue_input.queue_size = q->properties.queue_size >> 2; queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index a6fcbeeb7428..0d53f6067422 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -350,11 +350,11 @@ static void event_interrupt_wq_v11(struct kfd_dev *dev, print_sq_intr_info_inst(context_id0, context_id1); sq_int_priv = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV); - if (sq_int_priv /*&& (kfd_set_dbg_ev_from_interrupt(dev, pasid, + /*if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_CTXID0_TRAP_CODE(context_id0), - NULL, 0))*/) - return; + NULL, 0))) + return;*/ break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: print_sq_intr_info_error(context_id0, context_id1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index b8e14c2cc295..3ae350220d42 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -126,6 +126,10 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF; m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index c8da18e45b0e..8ca10ab3dfc1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -170,7 +170,13 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) &stream, 1, ¶ms); - power_opt |= psr_power_opt_z10_static_screen; + /* + * Only enable static-screen optimizations for PSR1. For PSR SU, this + * causes vstartup interrupt issues, used by amdgpu_dm to send vblank + * events. + */ + if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) + power_opt |= psr_power_opt_z10_static_screen; return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 8559dcd80af0..4a15aa7a375f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -130,11 +130,20 @@ static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; + if (disable) { + if (stream_enc && stream_enc->funcs->disable_fifo) + pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); - } else + } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); + + if (stream_enc && stream_enc->funcs->enable_fifo) + pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); + } } } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index c6785969eb1a..f0f3f66629cc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -156,12 +156,14 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); unsigned int num_levels; + unsigned int num_dcfclk_levels, num_dtbclk_levels, num_dispclk_levels; memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks)); clk_mgr_base->clks.p_state_change_support = true; clk_mgr_base->clks.prev_p_state_change_support = true; clk_mgr_base->clks.fclk_prev_p_state_change_support = true; clk_mgr->smu_present = false; + clk_mgr->dpm_present = false; if (!clk_mgr_base->bw_params) return; @@ -179,6 +181,7 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) dcn32_init_single_clock(clk_mgr, PPCLK_DCFCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz, &num_levels); + num_dcfclk_levels = num_levels; /* SOCCLK */ dcn32_init_single_clock(clk_mgr, PPCLK_SOCCLK, @@ -189,11 +192,16 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) dcn32_init_single_clock(clk_mgr, PPCLK_DTBCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_levels); + num_dtbclk_levels = num_levels; /* DISPCLK */ dcn32_init_single_clock(clk_mgr, PPCLK_DISPCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz, &num_levels); + num_dispclk_levels = num_levels; + + if (num_dcfclk_levels && num_dtbclk_levels && num_dispclk_levels) + clk_mgr->dpm_present = true; if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) { unsigned int i; @@ -658,6 +666,12 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &num_levels); clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; + if (clk_mgr->dpm_present && !num_levels) + clk_mgr->dpm_present = false; + + if (!clk_mgr->dpm_present) + dcn32_patch_dpm_table(clk_mgr_base->bw_params); + DC_FP_START(); /* Refresh bounding box */ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index aea49334021c..38a67051d470 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2164,8 +2164,7 @@ static void dce110_setup_audio_dto( continue; if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A) continue; - if (pipe_ctx->stream_res.audio != NULL && - pipe_ctx->stream_res.audio->enabled == false) { + if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output; build_audio_output(context, pipe_ctx, &audio_output); @@ -2205,8 +2204,7 @@ static void dce110_setup_audio_dto( if (!dc_is_dp_signal(pipe_ctx->stream->signal)) continue; - if (pipe_ctx->stream_res.audio != NULL && - pipe_ctx->stream_res.audio->enabled == false) { + if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output; build_audio_output(context, pipe_ctx, &audio_output); diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 232cc15979dd..fb729674953b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -45,6 +45,48 @@ #define DC_LOGGER \ dccg->ctx->logger +static void dccg314_get_pixel_rate_div( + struct dccg *dccg, + uint32_t otg_inst, + enum pixel_rate_div *k1, + enum pixel_rate_div *k2) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA; + + *k1 = PIXEL_RATE_DIV_NA; + *k2 = PIXEL_RATE_DIV_NA; + + switch (otg_inst) { + case 0: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG0_PIXEL_RATE_DIVK1, &val_k1, + OTG0_PIXEL_RATE_DIVK2, &val_k2); + break; + case 1: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG1_PIXEL_RATE_DIVK1, &val_k1, + OTG1_PIXEL_RATE_DIVK2, &val_k2); + break; + case 2: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG2_PIXEL_RATE_DIVK1, &val_k1, + OTG2_PIXEL_RATE_DIVK2, &val_k2); + break; + case 3: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG3_PIXEL_RATE_DIVK1, &val_k1, + OTG3_PIXEL_RATE_DIVK2, &val_k2); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + *k1 = (enum pixel_rate_div)val_k1; + *k2 = (enum pixel_rate_div)val_k2; +} + static void dccg314_set_pixel_rate_div( struct dccg *dccg, uint32_t otg_inst, @@ -52,6 +94,11 @@ static void dccg314_set_pixel_rate_div( enum pixel_rate_div k2) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) + return; switch (otg_inst) { case 0: diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index 06d8638db696..8c0ab013764e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -56,7 +56,8 @@ static void enc314_enable_fifo(struct stream_encoder *enc) /* TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON */ REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); - REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); @@ -261,6 +262,16 @@ static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing) return two_pix; } +void enc314_stream_encoder_dp_blank( + struct dc_link *link, + struct stream_encoder *enc) +{ + /* New to DCN314 - disable the FIFO before VID stream disable. */ + enc314_disable_fifo(enc); + + enc1_stream_encoder_dp_blank(link, enc); +} + static void enc314_stream_encoder_dp_unblank( struct dc_link *link, struct stream_encoder *enc, @@ -316,15 +327,11 @@ static void enc314_stream_encoder_dp_unblank( /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen * that it overflows during mode transition, and sometimes doesn't recover. */ - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1); udelay(10); REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); - /* DIG Resync FIFO now needs to be explicitly enabled. */ - enc314_enable_fifo(enc); - /* wait 100us for DIG/DP logic to prime * (i.e. a few video lines) */ @@ -340,6 +347,12 @@ static void enc314_stream_encoder_dp_unblank( REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true); + /* + * DIG Resync FIFO now needs to be explicitly enabled. + * This should come after DP_VID_STREAM_ENABLE per HW docs. + */ + enc314_enable_fifo(enc); + dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM); } @@ -408,7 +421,7 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, .dp_blank = - enc1_stream_encoder_dp_blank, + enc314_stream_encoder_dp_blank, .dp_unblank = enc314_stream_encoder_dp_unblank, .audio_mute_control = enc3_audio_mute_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index 0d5e8a441512..6640d0ac4304 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -42,6 +42,48 @@ #define DC_LOGGER \ dccg->ctx->logger +static void dccg32_get_pixel_rate_div( + struct dccg *dccg, + uint32_t otg_inst, + enum pixel_rate_div *k1, + enum pixel_rate_div *k2) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA; + + *k1 = PIXEL_RATE_DIV_NA; + *k2 = PIXEL_RATE_DIV_NA; + + switch (otg_inst) { + case 0: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG0_PIXEL_RATE_DIVK1, &val_k1, + OTG0_PIXEL_RATE_DIVK2, &val_k2); + break; + case 1: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG1_PIXEL_RATE_DIVK1, &val_k1, + OTG1_PIXEL_RATE_DIVK2, &val_k2); + break; + case 2: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG2_PIXEL_RATE_DIVK1, &val_k1, + OTG2_PIXEL_RATE_DIVK2, &val_k2); + break; + case 3: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG3_PIXEL_RATE_DIVK1, &val_k1, + OTG3_PIXEL_RATE_DIVK2, &val_k2); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + *k1 = (enum pixel_rate_div)val_k1; + *k2 = (enum pixel_rate_div)val_k2; +} + static void dccg32_set_pixel_rate_div( struct dccg *dccg, uint32_t otg_inst, @@ -50,6 +92,17 @@ static void dccg32_set_pixel_rate_div( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + return; + + dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); + if (k1 == cur_k1 && k2 == cur_k2) + return; + switch (otg_inst) { case 0: REG_UPDATE_2(OTG_PIXEL_RATE_DIV, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c index 99eb239bbc7b..9aebc1be2f59 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c @@ -98,9 +98,13 @@ static void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigne default: break; } - /* Should never be hit, if it is we have an erroneous hw config*/ - ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size - + hubbub2->det3_size + hubbub2->compbuf_size_segments <= hubbub2->crb_size_segs); + if (hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size + + hubbub2->det3_size + hubbub2->compbuf_size_segments > hubbub2->crb_size_segs) { + /* This may happen during seamless transition from ODM 2:1 to ODM4:1 */ + DC_LOG_WARNING("CRB Config Warning: DET size (%d,%d,%d,%d) + Compbuf size (%d) > CRB segments (%d)\n", + hubbub2->det0_size, hubbub2->det1_size, hubbub2->det2_size, hubbub2->det3_size, + hubbub2->compbuf_size_segments, hubbub2->crb_size_segs); + } } static void dcn32_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index f43686997917..e573e706430d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -121,8 +121,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 20.16, - .sr_enter_plus_exit_time_us = 27.13, + .sr_exit_time_us = 42.97, + .sr_enter_plus_exit_time_us = 49.94, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, @@ -1926,6 +1926,45 @@ static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); } +void dcn32_patch_dpm_table(struct clk_bw_params *bw_params) +{ + int i; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + /* Scan through clock values we currently have and if they are 0, + * then populate it with dcn3_2_soc.clock_limits[] value. + * + * Do it for DCFCLK, DISPCLK, DTBCLK and UCLK as any of those being + * 0, will cause it to skip building the clock table. + */ + if (max_dcfclk_mhz == 0) + bw_params->clk_table.entries[0].dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (max_dispclk_mhz == 0) + bw_params->clk_table.entries[0].dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (max_dtbclk_mhz == 0) + bw_params->clk_table.entries[0].dtbclk_mhz = dcn3_2_soc.clock_limits[0].dtbclk_mhz; + if (max_uclk_mhz == 0) + bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16; +} + static int build_synthetic_soc_states(struct clk_bw_params *bw_params, struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 6ce221098979..e1b79e2aab8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -77,4 +77,6 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, int pipe_cnt, int vlevel); +void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 68c2ed434d2c..cff5fd55a0ad 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -340,6 +340,8 @@ struct clk_mgr_internal { bool smu_present; void *wm_range_table; long long wm_range_table_addr; + + bool dpm_present; }; struct clk_mgr_internal_funcs { diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 50bfa513cb35..7e85cdc5bd34 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -269,7 +269,8 @@ union MESAPI__ADD_QUEUE { uint32_t map_kiq_utility_queue : 1; uint32_t is_kfd_process : 1; uint32_t trap_en : 1; - uint32_t reserved : 21; + uint32_t is_aql_queue : 1; + uint32_t reserved : 20; }; struct MES_API_STATUS api_status; uint64_t tma_addr; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 096327513dd0..1d454485e0d9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -239,82 +239,47 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num) { struct amdgpu_device *adev = smu->adev; + u32 smu_version; if (num > 2) return -EINVAL; - memset(feature_mask, 0, sizeof(uint32_t) * num); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DATA_READ_BIT); + memset(feature_mask, 0xff, sizeof(uint32_t) * num); - if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) { - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT); + if (!(adev->pm.pp_feature & PP_SCLK_DPM_MASK)) { + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_GFX_IMU_BIT); } - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MM_DPM_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_VCN_BIT); - - if ((adev->pg_flags & AMD_PG_SUPPORT_ATHUB) && - (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ATHUB_MMHUB_PG_BIT); + if (!(adev->pg_flags & AMD_PG_SUPPORT_ATHUB) || + !(adev->pg_flags & AMD_PG_SUPPORT_MMHUB)) + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_ATHUB_MMHUB_PG_BIT); - if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT); - -#if 0 - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFXOFF_BIT); -#endif + if (!(adev->pm.pp_feature & PP_SOCCLK_DPM_MASK)) + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_THROTTLERS_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FAN_CONTROL_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DF_CSTATE_BIT); - - if (adev->pm.pp_feature & PP_MCLK_DPM_MASK) { - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VMEMP_SCALING_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT); + /* PMFW 78.58 contains a critical fix for gfxoff feature */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if ((smu_version < 0x004e3a00) || + !(adev->pm.pp_feature & PP_GFXOFF_MASK)) + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_GFXOFF_BIT); + + if (!(adev->pm.pp_feature & PP_MCLK_DPM_MASK)) { + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DPM_UCLK_BIT); + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_VMEMP_SCALING_BIT); + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_VDDIO_MEM_SCALING_BIT); } - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MEM_TEMP_READ_BIT); - - if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_SOCCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_MPCLK_DS_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT); + if (!(adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)) + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_FCLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_DCFCLK_BIT); - - if (adev->pm.pp_feature & PP_PCIE_DPM_MASK) { - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_LINK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); + if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DPM_LINK_BIT); + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_DS_LCLK_BIT); } - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_DSTATE_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT); - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_FCLK_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FW_CTF_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_UCLK_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VR0HOT_BIT); - - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); - - if (adev->pm.pp_feature & PP_ULV_MASK) - *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT); + if (!(adev->pm.pp_feature & PP_ULV_MASK)) + *(uint64_t *)feature_mask &= ~FEATURE_MASK(FEATURE_GFX_ULV_BIT); return 0; } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 8aadcc0aa90b..df9370e0ff23 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1864,12 +1864,6 @@ EXPORT_SYMBOL_GPL(analogix_dp_remove); int analogix_dp_suspend(struct analogix_dp_device *dp) { clk_disable_unprepare(dp->clock); - - if (dp->plat_data->panel) { - if (drm_panel_unprepare(dp->plat_data->panel)) - DRM_ERROR("failed to turnoff the panel\n"); - } - return 0; } EXPORT_SYMBOL_GPL(analogix_dp_suspend); @@ -1884,13 +1878,6 @@ int analogix_dp_resume(struct analogix_dp_device *dp) return ret; } - if (dp->plat_data->panel) { - if (drm_panel_prepare(dp->plat_data->panel)) { - DRM_ERROR("failed to setup the panel\n"); - return -EBUSY; - } - } - return 0; } EXPORT_SYMBOL_GPL(analogix_dp_resume); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 28bad30dc4e5..5968f4af190b 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -188,7 +188,7 @@ static int lt8912_write_lvds_config(struct lt8912 *lt) {0x03, 0xff}, }; - return regmap_multi_reg_write(lt->regmap[I2C_CEC_DSI], seq, ARRAY_SIZE(seq)); + return regmap_multi_reg_write(lt->regmap[I2C_MAIN], seq, ARRAY_SIZE(seq)); }; static inline struct lt8912 *bridge_to_lt8912(struct drm_bridge *b) @@ -268,7 +268,7 @@ static int lt8912_video_setup(struct lt8912 *lt) u32 hactive, h_total, hpw, hfp, hbp; u32 vactive, v_total, vpw, vfp, vbp; u8 settle = 0x08; - int ret; + int ret, hsync_activehigh, vsync_activehigh; if (!lt) return -EINVAL; @@ -278,12 +278,14 @@ static int lt8912_video_setup(struct lt8912 *lt) hpw = lt->mode.hsync_len; hbp = lt->mode.hback_porch; h_total = hactive + hfp + hpw + hbp; + hsync_activehigh = lt->mode.flags & DISPLAY_FLAGS_HSYNC_HIGH; vactive = lt->mode.vactive; vfp = lt->mode.vfront_porch; vpw = lt->mode.vsync_len; vbp = lt->mode.vback_porch; v_total = vactive + vfp + vpw + vbp; + vsync_activehigh = lt->mode.flags & DISPLAY_FLAGS_VSYNC_HIGH; if (vactive <= 600) settle = 0x04; @@ -317,6 +319,13 @@ static int lt8912_video_setup(struct lt8912 *lt) ret |= regmap_write(lt->regmap[I2C_CEC_DSI], 0x3e, hfp & 0xff); ret |= regmap_write(lt->regmap[I2C_CEC_DSI], 0x3f, hfp >> 8); + ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xab, BIT(0), + vsync_activehigh ? BIT(0) : 0); + ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xab, BIT(1), + hsync_activehigh ? BIT(1) : 0); + ret |= regmap_update_bits(lt->regmap[I2C_MAIN], 0xb2, BIT(0), + lt->connector.display_info.is_hdmi ? BIT(0) : 0); + return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 633a7e5dba3b..6b5d4ea22b67 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -166,6 +166,21 @@ struct intel_engine_execlists { struct timer_list preempt; /** + * @preempt_target: active request at the time of the preemption request + * + * We force a preemption to occur if the pending contexts have not + * been promoted to active upon receipt of the CS ack event within + * the timeout. This timeout maybe chosen based on the target, + * using a very short timeout if the context is no longer schedulable. + * That short timeout may not be applicable to other contexts, so + * if a context switch should happen within before the preemption + * timeout, we may shoot early at an innocent context. To prevent this, + * we record which context was active at the time of the preemption + * request and only reset that context upon the timeout. + */ + const struct i915_request *preempt_target; + + /** * @ccid: identifier for contexts submitted to this engine */ u32 ccid; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4b909cb88cdf..c718e6dc40b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1241,6 +1241,9 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, if (!rq) return 0; + /* Only allow ourselves to force reset the currently active context */ + engine->execlists.preempt_target = rq; + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS; @@ -2427,8 +2430,24 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); if (unlikely(preempt_timeout(engine))) { + const struct i915_request *rq = *engine->execlists.active; + + /* + * If after the preempt-timeout expired, we are still on the + * same active request/context as before we initiated the + * preemption, reset the engine. + * + * However, if we have processed a CS event to switch contexts, + * but not yet processed the CS event for the pending + * preemption, reset the timer allowing the new context to + * gracefully exit. + */ cancel_timer(&engine->execlists.preempt); - engine->execlists.error_interrupt |= ERROR_PREEMPT; + if (rq == engine->execlists.preempt_target) + engine->execlists.error_interrupt |= ERROR_PREEMPT; + else + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine, rq)); } if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 73a8b46e0234..d09a0e845d09 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -545,8 +545,7 @@ static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, RATL_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, VR_THERMALERT_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK); -static const struct attribute *freq_attrs[] = { - &dev_attr_punit_req_freq_mhz.attr, +static const struct attribute *throttle_reason_attrs[] = { &attr_throttle_reason_status.attr, &attr_throttle_reason_pl1.attr, &attr_throttle_reason_pl2.attr, @@ -763,12 +762,20 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (!is_object_gt(kobj)) return; - ret = sysfs_create_files(kobj, freq_attrs); + ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr); if (ret) drm_warn(>->i915->drm, - "failed to create gt%u throttle sysfs files (%pe)", + "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", gt->info.id, ERR_PTR(ret)); + if (GRAPHICS_VER(gt->i915) >= 11) { + ret = sysfs_create_files(kobj, throttle_reason_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u throttle sysfs files (%pe)", + gt->info.id, ERR_PTR(ret)); + } + if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(>->uc)) { ret = sysfs_create_files(kobj, media_perf_power_attrs); if (ret) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 3c833ea60db6..7b9f3fc3adf7 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2453,7 +2453,8 @@ static int vmbus_acpi_add(struct acpi_device *device) * Some ancestor of the vmbus acpi device (Gen1 or Gen2 * firmware) is the VMOD that has the mmio ranges. Get that. */ - for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { + for (ancestor = acpi_dev_parent(device); ancestor; + ancestor = acpi_dev_parent(ancestor)) { result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index d2545a1be9fc..44e04c75d0d3 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -598,7 +598,7 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource) continue; /* Create a symlink to domain objects */ - obj = acpi_bus_get_acpi_device(element->reference.handle); + obj = acpi_get_acpi_dev(element->reference.handle); resource->domain_devices[i] = obj; if (!obj) continue; diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 2e2cd79d89eb..4218750d5a66 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -151,9 +151,9 @@ static DEFINE_IDA(hwmon_ida); * between hwmon and thermal_sys modules. */ #ifdef CONFIG_THERMAL_OF -static int hwmon_thermal_get_temp(void *data, int *temp) +static int hwmon_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct hwmon_thermal_data *tdata = data; + struct hwmon_thermal_data *tdata = tz->devdata; struct hwmon_device *hwdev = to_hwmon_device(tdata->dev); int ret; long t; @@ -168,9 +168,9 @@ static int hwmon_thermal_get_temp(void *data, int *temp) return 0; } -static int hwmon_thermal_set_trips(void *data, int low, int high) +static int hwmon_thermal_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct hwmon_thermal_data *tdata = data; + struct hwmon_thermal_data *tdata = tz->devdata; struct hwmon_device *hwdev = to_hwmon_device(tdata->dev); const struct hwmon_chip_info *chip = hwdev->chip; const struct hwmon_channel_info **info = chip->info; @@ -203,7 +203,7 @@ static int hwmon_thermal_set_trips(void *data, int low, int high) return 0; } -static const struct thermal_zone_of_device_ops hwmon_thermal_ops = { +static const struct thermal_zone_device_ops hwmon_thermal_ops = { .get_temp = hwmon_thermal_get_temp, .set_trips = hwmon_thermal_set_trips, }; @@ -227,8 +227,8 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index) tdata->dev = dev; tdata->index = index; - tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_of_zone_register(dev, index, tdata, + &hwmon_thermal_ops); if (IS_ERR(tzd)) { if (PTR_ERR(tzd) != -ENODEV) return PTR_ERR(tzd); diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 81d3f91dd204..7ec04934747e 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1270,9 +1270,9 @@ struct pmbus_thermal_data { struct pmbus_sensor *sensor; }; -static int pmbus_thermal_get_temp(void *data, int *temp) +static int pmbus_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct pmbus_thermal_data *tdata = data; + struct pmbus_thermal_data *tdata = tz->devdata; struct pmbus_sensor *sensor = tdata->sensor; struct pmbus_data *pmbus_data = tdata->pmbus_data; struct i2c_client *client = to_i2c_client(pmbus_data->dev); @@ -1296,7 +1296,7 @@ static int pmbus_thermal_get_temp(void *data, int *temp) return ret; } -static const struct thermal_zone_of_device_ops pmbus_thermal_ops = { +static const struct thermal_zone_device_ops pmbus_thermal_ops = { .get_temp = pmbus_thermal_get_temp, }; @@ -1314,8 +1314,8 @@ static int pmbus_thermal_add_sensor(struct pmbus_data *pmbus_data, tdata->sensor = sensor; tdata->pmbus_data = pmbus_data; - tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, - &pmbus_thermal_ops); + tzd = devm_thermal_of_zone_register(dev, index, tdata, + &pmbus_thermal_ops); /* * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, * so ignore that error but forward any other error. diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c index 5187c6dd5a4f..4d75385f7d5e 100644 --- a/drivers/hwmon/scpi-hwmon.c +++ b/drivers/hwmon/scpi-hwmon.c @@ -62,9 +62,9 @@ static void scpi_scale_reading(u64 *value, struct sensor_data *sensor) } } -static int scpi_read_temp(void *dev, int *temp) +static int scpi_read_temp(struct thermal_zone_device *tz, int *temp) { - struct scpi_thermal_zone *zone = dev; + struct scpi_thermal_zone *zone = tz->devdata; struct scpi_sensors *scpi_sensors = zone->scpi_sensors; struct scpi_ops *scpi_ops = scpi_sensors->scpi_ops; struct sensor_data *sensor = &scpi_sensors->data[zone->sensor_id]; @@ -121,7 +121,7 @@ scpi_show_label(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "%s\n", sensor->info.name); } -static const struct thermal_zone_of_device_ops scpi_sensor_ops = { +static const struct thermal_zone_device_ops scpi_sensor_ops = { .get_temp = scpi_read_temp, }; @@ -275,10 +275,10 @@ static int scpi_hwmon_probe(struct platform_device *pdev) zone->sensor_id = i; zone->scpi_sensors = scpi_sensors; - z = devm_thermal_zone_of_sensor_register(dev, - sensor->info.sensor_id, - zone, - &scpi_sensor_ops); + z = devm_thermal_of_zone_register(dev, + sensor->info.sensor_id, + zone, + &scpi_sensor_ops); /* * The call to thermal_zone_of_sensor_register returns * an error for sensors that are not associated with diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index 84b7e6cbc67b..423fe0c8a471 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -244,14 +244,18 @@ static const struct i2c_adapter_quirks amd_i2c_dev_quirks = { static int i2c_amd_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; int ret; struct amd_i2c_dev *i2c_dev; - struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); struct amd_mp2_dev *mp2_dev; - const char *uid; + u64 uid; - if (!adev) - return -ENODEV; + ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid); + if (ret) + return dev_err_probe(dev, ret, "missing UID/bus id!\n"); + if (uid >= 2) + return dev_err_probe(dev, -EINVAL, "incorrect UID/bus id \"%llu\"!\n", uid); + dev_dbg(dev, "bus id is %llu\n", uid); /* The ACPI namespace doesn't contain information about which MP2 PCI * device an AMDI0011 ACPI device is related to, so assume that there's @@ -266,6 +270,7 @@ static int i2c_amd_probe(struct platform_device *pdev) if (!i2c_dev) return -ENOMEM; + i2c_dev->common.bus_id = uid; i2c_dev->common.mp2_dev = mp2_dev; i2c_dev->pdev = pdev; platform_set_drvdata(pdev, i2c_dev); @@ -276,20 +281,6 @@ static int i2c_amd_probe(struct platform_device *pdev) i2c_dev->common.resume = &i2c_amd_resume; #endif - uid = adev->pnp.unique_id; - if (!uid) { - dev_err(&pdev->dev, "missing UID/bus id!\n"); - return -EINVAL; - } else if (strcmp(uid, "0") == 0) { - i2c_dev->common.bus_id = 0; - } else if (strcmp(uid, "1") == 0) { - i2c_dev->common.bus_id = 1; - } else { - dev_err(&pdev->dev, "incorrect UID/bus id \"%s\"!\n", uid); - return -EINVAL; - } - dev_dbg(&pdev->dev, "bus id is %u\n", i2c_dev->common.bus_id); - /* Register the adapter */ amd_mp2_pm_runtime_get(mp2_dev); diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 471c47db546b..c836cf884185 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -823,7 +823,7 @@ static int davinci_i2c_probe(struct platform_device *pdev) r = pm_runtime_resume_and_get(dev->dev); if (r < 0) { dev_err(dev->dev, "failed to runtime_get device: %d\n", r); - return r; + goto err_pm; } i2c_davinci_init(dev); @@ -882,6 +882,7 @@ static int davinci_i2c_probe(struct platform_device *pdev) err_unuse_clocks: pm_runtime_dont_use_autosuspend(dev->dev); pm_runtime_put_sync(dev->dev); +err_pm: pm_runtime_disable(dev->dev); return r; diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c index ad5efd7497d1..60908c5737f1 100644 --- a/drivers/i2c/busses/i2c-mlxbf.c +++ b/drivers/i2c/busses/i2c-mlxbf.c @@ -2215,35 +2215,27 @@ MODULE_DEVICE_TABLE(acpi, mlxbf_i2c_acpi_ids); static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) { const struct acpi_device_id *aid; - struct acpi_device *adev; - unsigned long bus_id = 0; - const char *uid; + u64 bus_id; int ret; if (acpi_disabled) return -ENOENT; - adev = ACPI_COMPANION(dev); - if (!adev) - return -ENXIO; - aid = acpi_match_device(mlxbf_i2c_acpi_ids, dev); if (!aid) return -ENODEV; priv->chip = (struct mlxbf_i2c_chip_info *)aid->driver_data; - uid = acpi_device_uid(adev); - if (!uid || !(*uid)) { + ret = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &bus_id); + if (ret) { dev_err(dev, "Cannot retrieve UID\n"); - return -ENODEV; + return ret; } - ret = kstrtoul(uid, 0, &bus_id); - if (!ret) - priv->bus = bus_id; + priv->bus = bus_id; - return ret; + return 0; } #else static int mlxbf_i2c_acpi_probe(struct device *dev, struct mlxbf_i2c_priv *priv) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3e101719689a..cfeb24d40d37 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -928,6 +928,51 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state adl_n_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 195, + .target_residency = 585, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C8", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 260, + .target_residency = 1040, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 660, + .target_residency = 1980, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1", @@ -1309,6 +1354,10 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = { .state_table = adl_l_cstates, }; +static const struct idle_cpu idle_cpu_adl_n __initconst = { + .state_table = adl_n_cstates, +}; + static const struct idle_cpu idle_cpu_spr __initconst = { .state_table = spr_cstates, .disable_promotion_to_c1e = true, @@ -1379,6 +1428,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), @@ -1507,7 +1557,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) state = &drv->states[drv->state_count++]; snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); - strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); + strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); state->exit_latency = cx->latency; /* * For C1-type C-states use the same number for both the exit @@ -1816,6 +1866,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) break; case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_ALDERLAKE_N: adl_idle_state_table_update(); break; } diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c index 2d393a4dfff6..a6ade70dedf8 100644 --- a/drivers/iio/adc/sun4i-gpadc-iio.c +++ b/drivers/iio/adc/sun4i-gpadc-iio.c @@ -412,9 +412,9 @@ static int sun4i_gpadc_runtime_resume(struct device *dev) return 0; } -static int sun4i_gpadc_get_temp(void *data, int *temp) +static int sun4i_gpadc_get_temp(struct thermal_zone_device *tz, int *temp) { - struct sun4i_gpadc_iio *info = data; + struct sun4i_gpadc_iio *info = tz->devdata; int val, scale, offset; if (sun4i_gpadc_temp_read(info->indio_dev, &val)) @@ -428,7 +428,7 @@ static int sun4i_gpadc_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops sun4i_ts_tz_ops = { +static const struct thermal_zone_device_ops sun4i_ts_tz_ops = { .get_temp = &sun4i_gpadc_get_temp, }; @@ -637,9 +637,9 @@ static int sun4i_gpadc_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); if (IS_ENABLED(CONFIG_THERMAL_OF)) { - info->tzd = thermal_zone_of_sensor_register(info->sensor_device, - 0, info, - &sun4i_ts_tz_ops); + info->tzd = devm_thermal_of_zone_register(info->sensor_device, + 0, info, + &sun4i_ts_tz_ops); /* * Do not fail driver probing when failing to register in * thermal because no thermal DT node is found. @@ -681,8 +681,6 @@ static int sun4i_gpadc_remove(struct platform_device *pdev) if (!IS_ENABLED(CONFIG_THERMAL_OF)) return 0; - thermal_zone_of_sensor_unregister(info->sensor_device, info->tzd); - if (!info->no_irq) iio_map_array_unregister(indio_dev); diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index 742a7e96c1b5..73eb8f80be6e 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -192,12 +192,12 @@ static int sun4i_get_temp(const struct sun4i_ts_data *ts, int *temp) return 0; } -static int sun4i_get_tz_temp(void *data, int *temp) +static int sun4i_get_tz_temp(struct thermal_zone_device *tz, int *temp) { - return sun4i_get_temp(data, temp); + return sun4i_get_temp(tz->devdata, temp); } -static const struct thermal_zone_of_device_ops sun4i_ts_tz_ops = { +static const struct thermal_zone_device_ops sun4i_ts_tz_ops = { .get_temp = sun4i_get_tz_temp, }; @@ -356,8 +356,8 @@ static int sun4i_ts_probe(struct platform_device *pdev) if (IS_ERR(hwmon)) return PTR_ERR(hwmon); - thermal = devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, - &sun4i_ts_tz_ops); + thermal = devm_thermal_of_zone_register(ts->dev, 0, ts, + &sun4i_ts_tz_ops); if (IS_ERR(thermal)) return PTR_ERR(thermal); diff --git a/drivers/md/dm-verity-loadpin.c b/drivers/md/dm-verity-loadpin.c index 387ec43aef72..4f78cc55c251 100644 --- a/drivers/md/dm-verity-loadpin.c +++ b/drivers/md/dm-verity-loadpin.c @@ -14,6 +14,7 @@ LIST_HEAD(dm_verity_loadpin_trusted_root_digests); static bool is_trusted_verity_target(struct dm_target *ti) { + int verity_mode; u8 *root_digest; unsigned int digest_size; struct dm_verity_loadpin_trusted_root_digest *trd; @@ -22,6 +23,13 @@ static bool is_trusted_verity_target(struct dm_target *ti) if (!dm_is_verity_target(ti)) return false; + verity_mode = dm_verity_get_mode(ti); + + if ((verity_mode != DM_VERITY_MODE_EIO) && + (verity_mode != DM_VERITY_MODE_RESTART) && + (verity_mode != DM_VERITY_MODE_PANIC)) + return false; + if (dm_verity_get_root_digest(ti, &root_digest, &digest_size)) return false; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 94b6cb599db4..8a00cc42e498 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1447,6 +1447,22 @@ bool dm_is_verity_target(struct dm_target *ti) } /* + * Get the verity mode (error behavior) of a verity target. + * + * Returns the verity mode of the target, or -EINVAL if 'ti' is not a verity + * target. + */ +int dm_verity_get_mode(struct dm_target *ti) +{ + struct dm_verity *v = ti->private; + + if (!dm_is_verity_target(ti)) + return -EINVAL; + + return v->mode; +} + +/* * Get the root digest of a verity target. * * Returns a copy of the root digest, the caller is responsible for diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 45455de1b4bc..98f306ec6a33 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -134,6 +134,7 @@ extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); extern bool dm_is_verity_target(struct dm_target *ti); +extern int dm_verity_get_mode(struct dm_target *ti); extern int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned int *digest_size); diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 742b2349fea3..10e0c5381d01 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -876,8 +876,8 @@ static int join(struct mddev *mddev, int nodes) memset(str, 0, 64); sprintf(str, "%pU", mddev->uuid); ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name, - DLM_LSFL_FS, LVB_SIZE, - &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace); + 0, LVB_SIZE, &md_ls_ops, mddev, + &ops_rv, &cinfo->lockspace); if (ret) goto err; wait_for_completion(&cinfo->completion); diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c index a1bd6d9c9223..909df82fed33 100644 --- a/drivers/media/dvb-core/dvb_vb2.c +++ b/drivers/media/dvb-core/dvb_vb2.c @@ -354,6 +354,12 @@ int dvb_vb2_reqbufs(struct dvb_vb2_ctx *ctx, struct dmx_requestbuffers *req) int dvb_vb2_querybuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) { + struct vb2_queue *q = &ctx->vb_q; + + if (b->index >= q->num_buffers) { + dprintk(1, "[%s] buffer index out of range\n", ctx->name); + return -EINVAL; + } vb2_core_querybuf(&ctx->vb_q, b->index, b); dprintk(3, "[%s] index=%d\n", ctx->name, b->index); return 0; @@ -378,8 +384,13 @@ int dvb_vb2_expbuf(struct dvb_vb2_ctx *ctx, struct dmx_exportbuffer *exp) int dvb_vb2_qbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) { + struct vb2_queue *q = &ctx->vb_q; int ret; + if (b->index >= q->num_buffers) { + dprintk(1, "[%s] buffer index out of range\n", ctx->name); + return -EINVAL; + } ret = vb2_core_qbuf(&ctx->vb_q, b->index, b, NULL); if (ret) { dprintk(1, "[%s] index=%d errno=%d\n", ctx->name, diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c index 95e8c29ccc65..d2f5f30582a9 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c @@ -228,7 +228,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) { struct mtk_vcodec_dev *dev; struct video_device *vfd_enc; - struct resource *res; phandle rproc_phandle; enum mtk_vcodec_fw_type fw_type; int ret; @@ -272,14 +271,12 @@ static int mtk_vcodec_probe(struct platform_device *pdev) goto err_res; } - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (res == NULL) { - dev_err(&pdev->dev, "failed to get irq resource"); - ret = -ENOENT; + dev->enc_irq = platform_get_irq(pdev, 0); + if (dev->enc_irq < 0) { + ret = dev->enc_irq; goto err_res; } - dev->enc_irq = platform_get_irq(pdev, 0); irq_set_status_flags(dev->enc_irq, IRQ_NOAUTOEN); ret = devm_request_irq(&pdev->dev, dev->enc_irq, mtk_vcodec_enc_irq_handler, diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 9c05776f11d1..d509a4a2f08e 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -2740,7 +2740,7 @@ static const struct usb_device_id uvc_ids[] = { .idProduct = 0x4034, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, - .bInterfaceProtocol = 0, + .bInterfaceProtocol = UVC_PC_PROTOCOL_15, .driver_info = (kernel_ulong_t)&uvc_ctrl_power_line_limited }, /* LogiLink Wireless Webcam */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 0f3d6b5667b0..55c26e7d370e 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1040,6 +1040,8 @@ int v4l2_compat_get_array_args(struct file *file, void *mbuf, { int err = 0; + memset(mbuf, 0, array_size); + switch (cmd) { case VIDIOC_G_FMT32: case VIDIOC_S_FMT32: diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index c314025d977e..e6fd355a2e92 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2872,9 +2872,9 @@ static const struct v4l2_ioctl_info v4l2_ioctls[] = { IOCTL_INFO(VIDIOC_S_PRIORITY, v4l_s_priority, v4l_print_u32, INFO_FL_PRIO), IOCTL_INFO(VIDIOC_G_SLICED_VBI_CAP, v4l_g_sliced_vbi_cap, v4l_print_sliced_vbi_cap, INFO_FL_CLEAR(v4l2_sliced_vbi_cap, type)), IOCTL_INFO(VIDIOC_LOG_STATUS, v4l_log_status, v4l_print_newline, 0), - IOCTL_INFO(VIDIOC_G_EXT_CTRLS, v4l_g_ext_ctrls, v4l_print_ext_controls, INFO_FL_CTRL), - IOCTL_INFO(VIDIOC_S_EXT_CTRLS, v4l_s_ext_ctrls, v4l_print_ext_controls, INFO_FL_PRIO | INFO_FL_CTRL), - IOCTL_INFO(VIDIOC_TRY_EXT_CTRLS, v4l_try_ext_ctrls, v4l_print_ext_controls, INFO_FL_CTRL), + IOCTL_INFO(VIDIOC_G_EXT_CTRLS, v4l_g_ext_ctrls, v4l_print_ext_controls, INFO_FL_CTRL | INFO_FL_ALWAYS_COPY), + IOCTL_INFO(VIDIOC_S_EXT_CTRLS, v4l_s_ext_ctrls, v4l_print_ext_controls, INFO_FL_PRIO | INFO_FL_CTRL | INFO_FL_ALWAYS_COPY), + IOCTL_INFO(VIDIOC_TRY_EXT_CTRLS, v4l_try_ext_ctrls, v4l_print_ext_controls, INFO_FL_CTRL | INFO_FL_ALWAYS_COPY), IOCTL_INFO(VIDIOC_ENUM_FRAMESIZES, v4l_stub_enum_framesizes, v4l_print_frmsizeenum, INFO_FL_CLEAR(v4l2_frmsizeenum, pixel_format)), IOCTL_INFO(VIDIOC_ENUM_FRAMEINTERVALS, v4l_stub_enum_frameintervals, v4l_print_frmivalenum, INFO_FL_CLEAR(v4l2_frmivalenum, height)), IOCTL_INFO(VIDIOC_G_ENC_INDEX, v4l_stub_g_enc_index, v4l_print_enc_idx, 0), @@ -3367,8 +3367,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, array_buf = kvmalloc(array_size, GFP_KERNEL); err = -ENOMEM; if (array_buf == NULL) - goto out_array_args; - err = -EFAULT; + goto out; if (in_compat_syscall()) err = v4l2_compat_get_array_args(file, array_buf, user_ptr, array_size, @@ -3377,7 +3376,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, err = copy_from_user(array_buf, user_ptr, array_size) ? -EFAULT : 0; if (err) - goto out_array_args; + goto out; *kernel_ptr = array_buf; } @@ -3395,6 +3394,13 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, trace_v4l2_qbuf(video_devdata(file)->minor, parg); } + /* + * Some ioctls can return an error, but still have valid + * results that must be returned. + */ + if (err < 0 && !always_copy) + goto out; + if (has_array_args) { *kernel_ptr = (void __force *)user_ptr; if (in_compat_syscall()) { @@ -3409,16 +3415,8 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, } else if (copy_to_user(user_ptr, array_buf, array_size)) { err = -EFAULT; } - goto out_array_args; } - /* - * Some ioctls can return an error, but still have valid - * results that must be returned. - */ - if (err < 0 && !always_copy) - goto out; -out_array_args: if (video_put_user((void __user *)arg, parg, cmd, orig_cmd)) err = -EFAULT; out: diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c index 71483cb1e422..5245cf6013c9 100644 --- a/drivers/misc/lkdtm/cfi.c +++ b/drivers/misc/lkdtm/cfi.c @@ -20,6 +20,13 @@ static noinline int lkdtm_increment_int(int *counter) return *counter; } + +/* Don't allow the compiler to inline the calls. */ +static noinline void lkdtm_indirect_call(void (*func)(int *)) +{ + func(&called_count); +} + /* * This tries to call an indirect function with a mismatched prototype. */ @@ -29,15 +36,11 @@ static void lkdtm_CFI_FORWARD_PROTO(void) * Matches lkdtm_increment_void()'s prototype, but not * lkdtm_increment_int()'s prototype. */ - void (*func)(int *); - pr_info("Calling matched prototype ...\n"); - func = lkdtm_increment_void; - func(&called_count); + lkdtm_indirect_call(lkdtm_increment_void); pr_info("Calling mismatched prototype ...\n"); - func = (void *)lkdtm_increment_int; - func(&called_count); + lkdtm_indirect_call((void *)lkdtm_increment_int); pr_err("FAIL: survived mismatched prototype function call!\n"); pr_expected_config(CONFIG_CFI_CLANG); diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c index 080293fa3c52..015927665678 100644 --- a/drivers/misc/lkdtm/fortify.c +++ b/drivers/misc/lkdtm/fortify.c @@ -10,28 +10,31 @@ static volatile int fortify_scratch_space; -static void lkdtm_FORTIFIED_OBJECT(void) +static void lkdtm_FORTIFY_STR_OBJECT(void) { struct target { char a[10]; - } target[2] = {}; + int foo; + } target[3] = {}; /* * Using volatile prevents the compiler from determining the value of * 'size' at compile time. Without that, we would get a compile error * rather than a runtime error. */ - volatile int size = 11; + volatile int size = 20; + + pr_info("trying to strcmp() past the end of a struct\n"); - pr_info("trying to read past the end of a struct\n"); + strncpy(target[0].a, target[1].a, size); /* Store result to global to prevent the code from being eliminated */ - fortify_scratch_space = memcmp(&target[0], &target[1], size); + fortify_scratch_space = target[0].a[3]; - pr_err("FAIL: fortify did not block an object overread!\n"); + pr_err("FAIL: fortify did not block a strncpy() object write overflow!\n"); pr_expected_config(CONFIG_FORTIFY_SOURCE); } -static void lkdtm_FORTIFIED_SUBOBJECT(void) +static void lkdtm_FORTIFY_STR_MEMBER(void) { struct target { char a[10]; @@ -44,7 +47,7 @@ static void lkdtm_FORTIFIED_SUBOBJECT(void) strscpy(src, "over ten bytes", size); size = strlen(src) + 1; - pr_info("trying to strncpy past the end of a member of a struct\n"); + pr_info("trying to strncpy() past the end of a struct member...\n"); /* * strncpy(target.a, src, 20); will hit a compile error because the @@ -56,7 +59,72 @@ static void lkdtm_FORTIFIED_SUBOBJECT(void) /* Store result to global to prevent the code from being eliminated */ fortify_scratch_space = target.a[3]; - pr_err("FAIL: fortify did not block an sub-object overrun!\n"); + pr_err("FAIL: fortify did not block a strncpy() struct member write overflow!\n"); + pr_expected_config(CONFIG_FORTIFY_SOURCE); + + kfree(src); +} + +static void lkdtm_FORTIFY_MEM_OBJECT(void) +{ + int before[10]; + struct target { + char a[10]; + int foo; + } target = {}; + int after[10]; + /* + * Using volatile prevents the compiler from determining the value of + * 'size' at compile time. Without that, we would get a compile error + * rather than a runtime error. + */ + volatile int size = 20; + + memset(before, 0, sizeof(before)); + memset(after, 0, sizeof(after)); + fortify_scratch_space = before[5]; + fortify_scratch_space = after[5]; + + pr_info("trying to memcpy() past the end of a struct\n"); + + pr_info("0: %zu\n", __builtin_object_size(&target, 0)); + pr_info("1: %zu\n", __builtin_object_size(&target, 1)); + pr_info("s: %d\n", size); + memcpy(&target, &before, size); + + /* Store result to global to prevent the code from being eliminated */ + fortify_scratch_space = target.a[3]; + + pr_err("FAIL: fortify did not block a memcpy() object write overflow!\n"); + pr_expected_config(CONFIG_FORTIFY_SOURCE); +} + +static void lkdtm_FORTIFY_MEM_MEMBER(void) +{ + struct target { + char a[10]; + char b[10]; + } target; + volatile int size = 20; + char *src; + + src = kmalloc(size, GFP_KERNEL); + strscpy(src, "over ten bytes", size); + size = strlen(src) + 1; + + pr_info("trying to memcpy() past the end of a struct member...\n"); + + /* + * strncpy(target.a, src, 20); will hit a compile error because the + * compiler knows at build time that target.a < 20 bytes. Use a + * volatile to force a runtime error. + */ + memcpy(target.a, src, size); + + /* Store result to global to prevent the code from being eliminated */ + fortify_scratch_space = target.a[3]; + + pr_err("FAIL: fortify did not block a memcpy() struct member write overflow!\n"); pr_expected_config(CONFIG_FORTIFY_SOURCE); kfree(src); @@ -67,7 +135,7 @@ static void lkdtm_FORTIFIED_SUBOBJECT(void) * strscpy and generate a panic because there is a write overflow (i.e. src * length is greater than dst length). */ -static void lkdtm_FORTIFIED_STRSCPY(void) +static void lkdtm_FORTIFY_STRSCPY(void) { char *src; char dst[5]; @@ -136,9 +204,11 @@ static void lkdtm_FORTIFIED_STRSCPY(void) } static struct crashtype crashtypes[] = { - CRASHTYPE(FORTIFIED_OBJECT), - CRASHTYPE(FORTIFIED_SUBOBJECT), - CRASHTYPE(FORTIFIED_STRSCPY), + CRASHTYPE(FORTIFY_STR_OBJECT), + CRASHTYPE(FORTIFY_STR_MEMBER), + CRASHTYPE(FORTIFY_MEM_OBJECT), + CRASHTYPE(FORTIFY_MEM_MEMBER), + CRASHTYPE(FORTIFY_STRSCPY), }; struct crashtype_category fortify_crashtypes = { diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index 6215ec995cd3..67db57249a34 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -330,7 +330,7 @@ static void lkdtm_USERCOPY_KERNEL(void) pr_info("attempting bad copy_to_user from kernel text: %px\n", vm_mmap); - if (copy_to_user((void __user *)user_addr, function_nocfi(vm_mmap), + if (copy_to_user((void __user *)user_addr, vm_mmap, unconst + PAGE_SIZE)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 3548fe1df7c8..987fe5c9d5a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -21,7 +21,6 @@ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) -#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MIN_STATE 2 #define MLXSW_THERMAL_MAX_DUTY 255 @@ -101,8 +100,6 @@ struct mlxsw_thermal { struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; - unsigned int tz_highest_score; - struct thermal_zone_device *tz_highest_dev; struct mlxsw_thermal_area line_cards[]; }; @@ -193,34 +190,6 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, return 0; } -static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, - struct thermal_zone_device *tzdev, - struct mlxsw_thermal_trip *trips, - int temp) -{ - struct mlxsw_thermal_trip *trip = trips; - unsigned int score, delta, i, shift = 1; - - /* Calculate thermal zone score, if temperature is above the hot - * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. - */ - score = MLXSW_THERMAL_TEMP_SCORE_MAX; - for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; - i++, trip++) { - if (temp < trip->temp) { - delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); - score = delta * shift; - break; - } - shift *= 256; - } - - if (score > thermal->tz_highest_score) { - thermal->tz_highest_score = score; - thermal->tz_highest_dev = tzdev; - } -} - static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -286,9 +255,6 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); - if (temp > 0) - mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, - temp); *p_temp = temp; return 0; @@ -349,21 +315,6 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, return 0; } -static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, - int trip, enum thermal_trend *trend) -{ - struct mlxsw_thermal *thermal = tzdev->devdata; - - if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) - return -EINVAL; - - if (tzdev == thermal->tz_highest_dev) - return 1; - - *trend = THERMAL_TREND_STABLE; - return 0; -} - static struct thermal_zone_params mlxsw_thermal_params = { .no_hwmon = true, }; @@ -377,7 +328,6 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .set_trip_temp = mlxsw_thermal_set_trip_temp, .get_trip_hyst = mlxsw_thermal_get_trip_hyst, .set_trip_hyst = mlxsw_thermal_set_trip_hyst, - .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, @@ -463,7 +413,6 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, int temp, crit_temp, emerg_temp; struct device *dev; u16 sensor_index; - int err; dev = thermal->bus_info->dev; sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module; @@ -479,10 +428,8 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, return 0; /* Update trip points. */ - err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz, - crit_temp, emerg_temp); - if (!err && temp > 0) - mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); + mlxsw_thermal_module_trips_update(dev, thermal->core, tz, + crit_temp, emerg_temp); return 0; } @@ -546,22 +493,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, return 0; } -static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev, - int trip, enum thermal_trend *trend) -{ - struct mlxsw_thermal_module *tz = tzdev->devdata; - struct mlxsw_thermal *thermal = tz->parent; - - if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) - return -EINVAL; - - if (tzdev == thermal->tz_highest_dev) - return 1; - - *trend = THERMAL_TREND_STABLE; - return 0; -} - static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .bind = mlxsw_thermal_module_bind, .unbind = mlxsw_thermal_module_unbind, @@ -571,7 +502,6 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .set_trip_temp = mlxsw_thermal_module_trip_temp_set, .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, - .get_trend = mlxsw_thermal_module_trend_get, }; static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, @@ -592,8 +522,6 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, return err; mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); - if (temp > 0) - mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); *p_temp = temp; return 0; @@ -608,7 +536,6 @@ static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { .set_trip_temp = mlxsw_thermal_module_trip_temp_set, .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, - .get_trend = mlxsw_thermal_module_trend_get, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 66446f1e06cf..8d5a7ae19844 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2162,14 +2162,14 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, static int nvme_pr_clear(struct block_device *bdev, u64 key) { - u32 cdw10 = 1 | (key ? 1 << 3 : 0); + u32 cdw10 = 1 | (key ? 0 : 1 << 3); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 98864b853eef..67d3335e9cc8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3470,6 +3470,10 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1987, 0x5019), /* phison E19 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1987, 0x5021), /* Phison E21 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index a36698a90d2f..4a15c86f45ef 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -639,6 +639,7 @@ static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask) static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask) { #ifdef CONFIG_ACPI + struct acpi_device *parent_adev = acpi_dev_parent(ACPI_COMPANION(dev)); int cpu; /* @@ -653,8 +654,7 @@ static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask) continue; acpi_dev = ACPI_COMPANION(cpu_dev); - if (acpi_dev && - acpi_dev->parent == ACPI_COMPANION(dev)->parent) + if (acpi_dev && acpi_dev_parent(acpi_dev) == parent_adev) cpumask_set_cpu(cpu, mask); } #endif diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 30234c261b05..aaca6db7d8f6 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -840,16 +840,16 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev->parent); struct platform_device *sdev = to_platform_device(dev); - struct acpi_device *adev = ACPI_COMPANION(dev); struct l2cache_pmu *l2cache_pmu = data; struct cluster_pmu *cluster; - unsigned long fw_cluster_id; + u64 fw_cluster_id; int err; int irq; - if (!adev || kstrtoul(adev->pnp.unique_id, 10, &fw_cluster_id) < 0) { + err = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &fw_cluster_id); + if (err) { dev_err(&pdev->dev, "unable to read ACPI uid\n"); - return -ENODEV; + return err; } cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL); @@ -879,7 +879,7 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) } dev_info(&pdev->dev, - "Registered L2 cache PMU cluster %ld\n", fw_cluster_id); + "Registered L2 cache PMU cluster %lld\n", fw_cluster_id); spin_lock_init(&cluster->pmu_lock); diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 1ff2ff6582bf..346311a05460 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -742,7 +742,8 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL); name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s", - acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id); + acpi_dev_parent(acpi_dev)->pnp.unique_id, + acpi_dev->pnp.unique_id); if (!l3pmu || !name) return -ENOMEM; diff --git a/drivers/platform/x86/intel/int3472/common.c b/drivers/platform/x86/intel/int3472/common.c index 77cf058e4168..9db2bb0bbba4 100644 --- a/drivers/platform/x86/intel/int3472/common.c +++ b/drivers/platform/x86/intel/int3472/common.c @@ -62,7 +62,7 @@ int skl_int3472_get_sensor_adev_and_name(struct device *dev, struct acpi_device *sensor; int ret = 0; - sensor = acpi_dev_get_first_consumer_dev(adev); + sensor = acpi_dev_get_next_consumer_dev(adev, NULL); if (!sensor) { dev_err(dev, "INT3472 seems to have no dependents.\n"); return -ENODEV; diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c index 22f61b47f9e5..49fc379fe680 100644 --- a/drivers/platform/x86/intel/int3472/tps68470.c +++ b/drivers/platform/x86/intel/int3472/tps68470.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Author: Dan Scally <djrscally@gmail.com> */ +#include <linux/acpi.h> #include <linux/i2c.h> #include <linux/kernel.h> #include <linux/mfd/core.h> @@ -95,20 +96,65 @@ static int skl_int3472_tps68470_calc_type(struct acpi_device *adev) return DESIGNED_FOR_WINDOWS; } +/* + * Return the size of the flexible array member, because we'll need that later + * on to pass .pdata_size to cells. + */ +static int +skl_int3472_fill_clk_pdata(struct device *dev, struct tps68470_clk_platform_data **clk_pdata) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + struct acpi_device *consumer; + unsigned int n_consumers = 0; + const char *sensor_name; + unsigned int i = 0; + + for_each_acpi_consumer_dev(adev, consumer) + n_consumers++; + + if (!n_consumers) { + dev_err(dev, "INT3472 seems to have no dependents\n"); + return -ENODEV; + } + + *clk_pdata = devm_kzalloc(dev, struct_size(*clk_pdata, consumers, n_consumers), + GFP_KERNEL); + if (!*clk_pdata) + return -ENOMEM; + + (*clk_pdata)->n_consumers = n_consumers; + i = 0; + + for_each_acpi_consumer_dev(adev, consumer) { + sensor_name = devm_kasprintf(dev, GFP_KERNEL, I2C_DEV_NAME_FORMAT, + acpi_dev_name(consumer)); + if (!sensor_name) + return -ENOMEM; + + (*clk_pdata)->consumers[i].consumer_dev_name = sensor_name; + i++; + } + + acpi_dev_put(consumer); + + return n_consumers; +} + static int skl_int3472_tps68470_probe(struct i2c_client *client) { struct acpi_device *adev = ACPI_COMPANION(&client->dev); const struct int3472_tps68470_board_data *board_data; - struct tps68470_clk_platform_data clk_pdata = {}; + struct tps68470_clk_platform_data *clk_pdata; struct mfd_cell *cells; struct regmap *regmap; + int n_consumers; int device_type; int ret; + int i; - ret = skl_int3472_get_sensor_adev_and_name(&client->dev, NULL, - &clk_pdata.consumer_dev_name); - if (ret) - return ret; + n_consumers = skl_int3472_fill_clk_pdata(&client->dev, &clk_pdata); + if (n_consumers < 0) + return n_consumers; regmap = devm_regmap_init_i2c(client, &tps68470_regmap_config); if (IS_ERR(regmap)) { @@ -142,22 +188,25 @@ static int skl_int3472_tps68470_probe(struct i2c_client *client) * the clk + regulators must be ready when this happens. */ cells[0].name = "tps68470-clk"; - cells[0].platform_data = &clk_pdata; - cells[0].pdata_size = sizeof(clk_pdata); + cells[0].platform_data = clk_pdata; + cells[0].pdata_size = struct_size(clk_pdata, consumers, n_consumers); cells[1].name = "tps68470-regulator"; cells[1].platform_data = (void *)board_data->tps68470_regulator_pdata; cells[1].pdata_size = sizeof(struct tps68470_regulator_platform_data); cells[2].name = "tps68470-gpio"; - gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_table); + for (i = 0; i < board_data->n_gpiod_lookups; i++) + gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_tables[i]); ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE, cells, TPS68470_WIN_MFD_CELL_COUNT, NULL, 0, NULL); kfree(cells); - if (ret) - gpiod_remove_lookup_table(board_data->tps68470_gpio_lookup_table); + if (ret) { + for (i = 0; i < board_data->n_gpiod_lookups; i++) + gpiod_remove_lookup_table(board_data->tps68470_gpio_lookup_tables[i]); + } break; case DESIGNED_FOR_CHROMEOS: @@ -181,10 +230,13 @@ static int skl_int3472_tps68470_probe(struct i2c_client *client) static int skl_int3472_tps68470_remove(struct i2c_client *client) { const struct int3472_tps68470_board_data *board_data; + int i; board_data = int3472_tps68470_get_board_data(dev_name(&client->dev)); - if (board_data) - gpiod_remove_lookup_table(board_data->tps68470_gpio_lookup_table); + if (board_data) { + for (i = 0; i < board_data->n_gpiod_lookups; i++) + gpiod_remove_lookup_table(board_data->tps68470_gpio_lookup_tables[i]); + } return 0; } diff --git a/drivers/platform/x86/intel/int3472/tps68470.h b/drivers/platform/x86/intel/int3472/tps68470.h index cfd33eb62740..35915e701593 100644 --- a/drivers/platform/x86/intel/int3472/tps68470.h +++ b/drivers/platform/x86/intel/int3472/tps68470.h @@ -16,8 +16,9 @@ struct tps68470_regulator_platform_data; struct int3472_tps68470_board_data { const char *dev_name; - struct gpiod_lookup_table *tps68470_gpio_lookup_table; const struct tps68470_regulator_platform_data *tps68470_regulator_pdata; + unsigned int n_gpiod_lookups; + struct gpiod_lookup_table *tps68470_gpio_lookup_tables[]; }; const struct int3472_tps68470_board_data *int3472_tps68470_get_board_data(const char *dev_name); diff --git a/drivers/platform/x86/intel/int3472/tps68470_board_data.c b/drivers/platform/x86/intel/int3472/tps68470_board_data.c index 525f09a3b5ff..309eab9c0558 100644 --- a/drivers/platform/x86/intel/int3472/tps68470_board_data.c +++ b/drivers/platform/x86/intel/int3472/tps68470_board_data.c @@ -30,6 +30,15 @@ static struct regulator_consumer_supply int347a_vcm_consumer_supplies[] = { static struct regulator_consumer_supply int347a_vsio_consumer_supplies[] = { REGULATOR_SUPPLY("dovdd", "i2c-INT347A:00"), REGULATOR_SUPPLY("vsio", "i2c-INT347A:00-VCM"), + REGULATOR_SUPPLY("vddd", "i2c-INT347E:00"), +}; + +static struct regulator_consumer_supply int347a_aux1_consumer_supplies[] = { + REGULATOR_SUPPLY("vdda", "i2c-INT347E:00"), +}; + +static struct regulator_consumer_supply int347a_aux2_consumer_supplies[] = { + REGULATOR_SUPPLY("vdddo", "i2c-INT347E:00"), }; static const struct regulator_init_data surface_go_tps68470_core_reg_init_data = { @@ -86,6 +95,28 @@ static const struct regulator_init_data surface_go_tps68470_vsio_reg_init_data = .consumer_supplies = int347a_vsio_consumer_supplies, }; +static const struct regulator_init_data surface_go_tps68470_aux1_reg_init_data = { + .constraints = { + .min_uV = 2815200, + .max_uV = 2815200, + .apply_uV = 1, + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(int347a_aux1_consumer_supplies), + .consumer_supplies = int347a_aux1_consumer_supplies, +}; + +static const struct regulator_init_data surface_go_tps68470_aux2_reg_init_data = { + .constraints = { + .min_uV = 1800600, + .max_uV = 1800600, + .apply_uV = 1, + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, + .num_consumer_supplies = ARRAY_SIZE(int347a_aux2_consumer_supplies), + .consumer_supplies = int347a_aux2_consumer_supplies, +}; + static const struct tps68470_regulator_platform_data surface_go_tps68470_pdata = { .reg_init_data = { [TPS68470_CORE] = &surface_go_tps68470_core_reg_init_data, @@ -93,10 +124,12 @@ static const struct tps68470_regulator_platform_data surface_go_tps68470_pdata = [TPS68470_VCM] = &surface_go_tps68470_vcm_reg_init_data, [TPS68470_VIO] = &surface_go_tps68470_vio_reg_init_data, [TPS68470_VSIO] = &surface_go_tps68470_vsio_reg_init_data, + [TPS68470_AUX1] = &surface_go_tps68470_aux1_reg_init_data, + [TPS68470_AUX2] = &surface_go_tps68470_aux2_reg_init_data, }, }; -static struct gpiod_lookup_table surface_go_tps68470_gpios = { +static struct gpiod_lookup_table surface_go_int347a_gpios = { .dev_id = "i2c-INT347A:00", .table = { GPIO_LOOKUP("tps68470-gpio", 9, "reset", GPIO_ACTIVE_LOW), @@ -105,16 +138,31 @@ static struct gpiod_lookup_table surface_go_tps68470_gpios = { } }; +static struct gpiod_lookup_table surface_go_int347e_gpios = { + .dev_id = "i2c-INT347E:00", + .table = { + GPIO_LOOKUP("tps68470-gpio", 5, "enable", GPIO_ACTIVE_HIGH), + { } + } +}; + static const struct int3472_tps68470_board_data surface_go_tps68470_board_data = { .dev_name = "i2c-INT3472:05", - .tps68470_gpio_lookup_table = &surface_go_tps68470_gpios, .tps68470_regulator_pdata = &surface_go_tps68470_pdata, + .n_gpiod_lookups = 2, + .tps68470_gpio_lookup_tables = { + &surface_go_int347a_gpios, + &surface_go_int347e_gpios, + }, }; static const struct int3472_tps68470_board_data surface_go3_tps68470_board_data = { .dev_name = "i2c-INT3472:01", - .tps68470_gpio_lookup_table = &surface_go_tps68470_gpios, .tps68470_regulator_pdata = &surface_go_tps68470_pdata, + .n_gpiod_lookups = 1, + .tps68470_gpio_lookup_tables = { + &surface_go_int347a_gpios + }, }; static const struct dmi_system_id int3472_tps68470_board_data_table[] = { diff --git a/drivers/pnp/pnpbios/pnpbios.h b/drivers/pnp/pnpbios/pnpbios.h index 2ce739ff9c1a..f3302006842e 100644 --- a/drivers/pnp/pnpbios/pnpbios.h +++ b/drivers/pnp/pnpbios/pnpbios.h @@ -153,7 +153,6 @@ extern int pnpbios_dont_use_current_config; extern int pnpbios_parse_data_stream(struct pnp_dev *dev, struct pnp_bios_node * node); extern int pnpbios_read_resources_from_node(struct pnp_dev *dev, struct pnp_bios_node *node); extern int pnpbios_write_resources_to_node(struct pnp_dev *dev, struct pnp_bios_node *node); -extern void pnpid32_to_pnpid(u32 id, char *str); extern void pnpbios_print_status(const char * module, u16 status); extern void pnpbios_calls_init(union pnp_bios_install_struct * header); diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 21d624f9f5fb..26d00b1853b4 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -994,6 +994,9 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, y = value & 0x1f; value = (1 << y) * (4 + f) * rp->time_unit / 4; } else { + if (value < rp->time_unit) + return 0; + do_div(value, rp->time_unit); y = ilog2(value); f = div64_u64(4 * (value - (1 << y)), 1 << y); @@ -1035,7 +1038,6 @@ static const struct rapl_defaults rapl_defaults_spr_server = { .check_unit = rapl_check_unit_core, .set_floor_freq = set_floor_freq_default, .compute_time_window = rapl_compute_time_window_core, - .dram_domain_energy_unit = 15300, .psys_domain_energy_unit = 1000000000, .spr_psys_bits = true, }; @@ -1110,6 +1112,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 42f2fc0bc8a9..321af498ee11 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -556,6 +556,14 @@ static int __init cec_init(void) if (ce_arr.disabled) return -ENODEV; + /* + * Intel systems may avoid uncorrectable errors + * if pages with corrected errors are aggressively + * taken offline. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + action_threshold = 2; + ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL); if (!ce_arr.array) { pr_err("Error allocating CE array page!\n"); diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c index fdcb0f508984..596cc36aaff6 100644 --- a/drivers/regulator/max8973-regulator.c +++ b/drivers/regulator/max8973-regulator.c @@ -434,9 +434,9 @@ static int max8973_init_dcdc(struct max8973_chip *max, return ret; } -static int max8973_thermal_read_temp(void *data, int *temp) +static int max8973_thermal_read_temp(struct thermal_zone_device *tz, int *temp) { - struct max8973_chip *mchip = data; + struct max8973_chip *mchip = tz->devdata; unsigned int val; int ret; @@ -465,7 +465,7 @@ static irqreturn_t max8973_thermal_irq(int irq, void *data) return IRQ_HANDLED; } -static const struct thermal_zone_of_device_ops max77621_tz_ops = { +static const struct thermal_zone_device_ops max77621_tz_ops = { .get_temp = max8973_thermal_read_temp, }; @@ -479,8 +479,8 @@ static int max8973_thermal_init(struct max8973_chip *mchip) if (mchip->id != MAX77621) return 0; - tzd = devm_thermal_zone_of_sensor_register(mchip->dev, 0, mchip, - &max77621_tz_ops); + tzd = devm_thermal_of_zone_register(mchip->dev, 0, mchip, + &max77621_tz_ops); if (IS_ERR(tzd)) { ret = PTR_ERR(tzd); dev_err(mchip->dev, "Failed to register thermal sensor: %d\n", diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 838d12e65144..c8e079d7e541 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1441,31 +1441,6 @@ static const struct of_device_id pxa2xx_spi_of_match[] = { }; MODULE_DEVICE_TABLE(of, pxa2xx_spi_of_match); -#ifdef CONFIG_ACPI - -static int pxa2xx_spi_get_port_id(struct device *dev) -{ - struct acpi_device *adev; - unsigned int devid; - int port_id = -1; - - adev = ACPI_COMPANION(dev); - if (adev && adev->pnp.unique_id && - !kstrtouint(adev->pnp.unique_id, 0, &devid)) - port_id = devid; - return port_id; -} - -#else /* !CONFIG_ACPI */ - -static int pxa2xx_spi_get_port_id(struct device *dev) -{ - return -1; -} - -#endif /* CONFIG_ACPI */ - - #ifdef CONFIG_PCI static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param) @@ -1479,13 +1454,16 @@ static struct pxa2xx_spi_controller * pxa2xx_spi_init_pdata(struct platform_device *pdev) { struct pxa2xx_spi_controller *pdata; + struct device *dev = &pdev->dev; + struct device *parent = dev->parent; struct ssp_device *ssp; struct resource *res; - struct device *parent = pdev->dev.parent; struct pci_dev *pcidev = dev_is_pci(parent) ? to_pci_dev(parent) : NULL; const struct pci_device_id *pcidev_id = NULL; enum pxa_ssp_type type; const void *match; + int status; + u64 uid; if (pcidev) pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match, pcidev); @@ -1529,7 +1507,12 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev) ssp->type = type; ssp->dev = &pdev->dev; - ssp->port_id = pxa2xx_spi_get_port_id(&pdev->dev); + + status = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &uid); + if (status) + ssp->port_id = -1; + else + ssp->port_id = uid; pdata->is_slave = device_property_read_bool(&pdev->dev, "spi-slave"); pdata->num_chipselect = 1; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 32c01e684af3..30e82f968639 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4374,7 +4374,7 @@ static int acpi_spi_notify(struct notifier_block *nb, unsigned long value, switch (value) { case ACPI_RECONFIG_DEVICE_ADD: - ctlr = acpi_spi_find_controller_by_adev(adev->parent); + ctlr = acpi_spi_find_controller_by_adev(acpi_dev_parent(adev)); if (!ctlr) break; diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c index 4af5a831bde0..4fc167b42cf0 100644 --- a/drivers/staging/media/rkvdec/rkvdec-h264.c +++ b/drivers/staging/media/rkvdec/rkvdec-h264.c @@ -1162,8 +1162,8 @@ static int rkvdec_h264_run(struct rkvdec_ctx *ctx) schedule_delayed_work(&rkvdec->watchdog_work, msecs_to_jiffies(2000)); - writel(0xffffffff, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN); - writel(0xffffffff, rkvdec->regs + RKVDEC_REG_H264_ERR_E); + writel(0, rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN); + writel(0, rkvdec->regs + RKVDEC_REG_H264_ERR_E); writel(1, rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND); writel(1, rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND); diff --git a/drivers/thermal/amlogic_thermal.c b/drivers/thermal/amlogic_thermal.c index e61b91d14ad1..d30cb791e63c 100644 --- a/drivers/thermal/amlogic_thermal.c +++ b/drivers/thermal/amlogic_thermal.c @@ -179,12 +179,12 @@ static int amlogic_thermal_disable(struct amlogic_thermal *data) return 0; } -static int amlogic_thermal_get_temp(void *data, int *temp) +static int amlogic_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { unsigned int tval; - struct amlogic_thermal *pdata = data; + struct amlogic_thermal *pdata = tz->devdata; - if (!data) + if (!pdata) return -EINVAL; regmap_read(pdata->regmap, TSENSOR_STAT0, &tval); @@ -195,7 +195,7 @@ static int amlogic_thermal_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops amlogic_thermal_ops = { +static const struct thermal_zone_device_ops amlogic_thermal_ops = { .get_temp = amlogic_thermal_get_temp, }; @@ -276,10 +276,10 @@ static int amlogic_thermal_probe(struct platform_device *pdev) return PTR_ERR(pdata->sec_ao_map); } - pdata->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - 0, - pdata, - &amlogic_thermal_ops); + pdata->tzd = devm_thermal_of_zone_register(&pdev->dev, + 0, + pdata, + &amlogic_thermal_ops); if (IS_ERR(pdata->tzd)) { ret = PTR_ERR(pdata->tzd); dev_err(dev, "Failed to register tsensor: %d\n", ret); diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index c2ebfb5be4b3..52d63b3997fe 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -420,9 +420,9 @@ static struct thermal_zone_device_ops legacy_ops = { .get_temp = armada_get_temp_legacy, }; -static int armada_get_temp(void *_sensor, int *temp) +static int armada_get_temp(struct thermal_zone_device *tz, int *temp) { - struct armada_thermal_sensor *sensor = _sensor; + struct armada_thermal_sensor *sensor = tz->devdata; struct armada_thermal_priv *priv = sensor->priv; int ret; @@ -450,7 +450,7 @@ unlock_mutex: return ret; } -static const struct thermal_zone_of_device_ops of_ops = { +static const struct thermal_zone_device_ops of_ops = { .get_temp = armada_get_temp, }; @@ -928,9 +928,9 @@ static int armada_thermal_probe(struct platform_device *pdev) /* Register the sensor */ sensor->priv = priv; sensor->id = sensor_id; - tz = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->id, sensor, - &of_ops); + tz = devm_thermal_of_zone_register(&pdev->dev, + sensor->id, sensor, + &of_ops); if (IS_ERR(tz)) { dev_info(&pdev->dev, "Thermal sensor %d unavailable\n", sensor_id); diff --git a/drivers/thermal/broadcom/bcm2711_thermal.c b/drivers/thermal/broadcom/bcm2711_thermal.c index e9bef5c3414b..1f8651d15160 100644 --- a/drivers/thermal/broadcom/bcm2711_thermal.c +++ b/drivers/thermal/broadcom/bcm2711_thermal.c @@ -31,11 +31,11 @@ struct bcm2711_thermal_priv { struct thermal_zone_device *thermal; }; -static int bcm2711_get_temp(void *data, int *temp) +static int bcm2711_get_temp(struct thermal_zone_device *tz, int *temp) { - struct bcm2711_thermal_priv *priv = data; - int slope = thermal_zone_get_slope(priv->thermal); - int offset = thermal_zone_get_offset(priv->thermal); + struct bcm2711_thermal_priv *priv = tz->devdata; + int slope = thermal_zone_get_slope(tz); + int offset = thermal_zone_get_offset(tz); u32 val; int ret; @@ -54,7 +54,7 @@ static int bcm2711_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops bcm2711_thermal_of_ops = { +static const struct thermal_zone_device_ops bcm2711_thermal_of_ops = { .get_temp = bcm2711_get_temp, }; @@ -88,8 +88,8 @@ static int bcm2711_thermal_probe(struct platform_device *pdev) } priv->regmap = regmap; - thermal = devm_thermal_zone_of_sensor_register(dev, 0, priv, - &bcm2711_thermal_of_ops); + thermal = devm_thermal_of_zone_register(dev, 0, priv, + &bcm2711_thermal_of_ops); if (IS_ERR(thermal)) { ret = PTR_ERR(thermal); dev_err(dev, "could not register sensor: %d\n", ret); diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index c8e4344d5a3d..2c67841a1115 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -88,9 +88,9 @@ static int bcm2835_thermal_temp2adc(int temp, int offset, int slope) return temp; } -static int bcm2835_thermal_get_temp(void *d, int *temp) +static int bcm2835_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct bcm2835_thermal_data *data = d; + struct bcm2835_thermal_data *data = tz->devdata; u32 val = readl(data->regs + BCM2835_TS_TSENSSTAT); if (!(val & BCM2835_TS_TSENSSTAT_VALID)) @@ -135,7 +135,7 @@ static void bcm2835_thermal_debugfs(struct platform_device *pdev) debugfs_create_regset32("regset", 0444, data->debugfsdir, regset); } -static const struct thermal_zone_of_device_ops bcm2835_thermal_ops = { +static const struct thermal_zone_device_ops bcm2835_thermal_ops = { .get_temp = bcm2835_thermal_get_temp, }; @@ -206,8 +206,8 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) data->clk, rate); /* register of thermal sensor and get info from DT */ - tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data, - &bcm2835_thermal_ops); + tz = devm_thermal_of_zone_register(&pdev->dev, 0, data, + &bcm2835_thermal_ops); if (IS_ERR(tz)) { err = PTR_ERR(tz); dev_err(&pdev->dev, @@ -277,7 +277,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) return 0; err_tz: - thermal_zone_of_sensor_unregister(&pdev->dev, tz); + thermal_of_zone_unregister(tz); err_clk: clk_disable_unprepare(data->clk); @@ -290,7 +290,7 @@ static int bcm2835_thermal_remove(struct platform_device *pdev) struct thermal_zone_device *tz = data->tz; debugfs_remove_recursive(data->debugfsdir); - thermal_zone_of_sensor_unregister(&pdev->dev, tz); + thermal_of_zone_unregister(tz); clk_disable_unprepare(data->clk); return 0; diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c index 0cedb8b4f00a..c79c6cfdd74d 100644 --- a/drivers/thermal/broadcom/brcmstb_thermal.c +++ b/drivers/thermal/broadcom/brcmstb_thermal.c @@ -105,7 +105,7 @@ static struct avs_tmon_trip avs_tmon_trips[] = { struct brcmstb_thermal_params { unsigned int offset; unsigned int mult; - const struct thermal_zone_of_device_ops *of_ops; + const struct thermal_zone_device_ops *of_ops; }; struct brcmstb_thermal_priv { @@ -150,9 +150,9 @@ static inline u32 avs_tmon_temp_to_code(struct brcmstb_thermal_priv *priv, return (u32)((offset - temp) / mult); } -static int brcmstb_get_temp(void *data, int *temp) +static int brcmstb_get_temp(struct thermal_zone_device *tz, int *temp) { - struct brcmstb_thermal_priv *priv = data; + struct brcmstb_thermal_priv *priv = tz->devdata; u32 val; long t; @@ -260,9 +260,9 @@ static irqreturn_t brcmstb_tmon_irq_thread(int irq, void *data) return IRQ_HANDLED; } -static int brcmstb_set_trips(void *data, int low, int high) +static int brcmstb_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct brcmstb_thermal_priv *priv = data; + struct brcmstb_thermal_priv *priv = tz->devdata; dev_dbg(priv->dev, "set trips %d <--> %d\n", low, high); @@ -288,7 +288,7 @@ static int brcmstb_set_trips(void *data, int low, int high) return 0; } -static const struct thermal_zone_of_device_ops brcmstb_16nm_of_ops = { +static const struct thermal_zone_device_ops brcmstb_16nm_of_ops = { .get_temp = brcmstb_get_temp, }; @@ -298,7 +298,7 @@ static const struct brcmstb_thermal_params brcmstb_16nm_params = { .of_ops = &brcmstb_16nm_of_ops, }; -static const struct thermal_zone_of_device_ops brcmstb_28nm_of_ops = { +static const struct thermal_zone_device_ops brcmstb_28nm_of_ops = { .get_temp = brcmstb_get_temp, .set_trips = brcmstb_set_trips, }; @@ -318,7 +318,7 @@ MODULE_DEVICE_TABLE(of, brcmstb_thermal_id_table); static int brcmstb_thermal_probe(struct platform_device *pdev) { - const struct thermal_zone_of_device_ops *of_ops; + const struct thermal_zone_device_ops *of_ops; struct thermal_zone_device *thermal; struct brcmstb_thermal_priv *priv; struct resource *res; @@ -341,8 +341,8 @@ static int brcmstb_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); of_ops = priv->temp_params->of_ops; - thermal = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, priv, - of_ops); + thermal = devm_thermal_of_zone_register(&pdev->dev, 0, priv, + of_ops); if (IS_ERR(thermal)) { ret = PTR_ERR(thermal); dev_err(&pdev->dev, "could not register sensor: %d\n", ret); diff --git a/drivers/thermal/broadcom/ns-thermal.c b/drivers/thermal/broadcom/ns-thermal.c index c9468ba9d449..07a8a3f49bd0 100644 --- a/drivers/thermal/broadcom/ns-thermal.c +++ b/drivers/thermal/broadcom/ns-thermal.c @@ -14,19 +14,14 @@ #define PVTMON_CONTROL0_SEL_TEST_MODE 0x0000000e #define PVTMON_STATUS 0x08 -struct ns_thermal { - struct thermal_zone_device *tz; - void __iomem *pvtmon; -}; - -static int ns_thermal_get_temp(void *data, int *temp) +static int ns_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct ns_thermal *ns_thermal = data; - int offset = thermal_zone_get_offset(ns_thermal->tz); - int slope = thermal_zone_get_slope(ns_thermal->tz); + void __iomem *pvtmon = tz->devdata; + int offset = thermal_zone_get_offset(tz); + int slope = thermal_zone_get_slope(tz); u32 val; - val = readl(ns_thermal->pvtmon + PVTMON_CONTROL0); + val = readl(pvtmon + PVTMON_CONTROL0); if ((val & PVTMON_CONTROL0_SEL_MASK) != PVTMON_CONTROL0_SEL_TEMP_MONITOR) { /* Clear current mode selection */ val &= ~PVTMON_CONTROL0_SEL_MASK; @@ -34,50 +29,47 @@ static int ns_thermal_get_temp(void *data, int *temp) /* Set temp monitor mode (it's the default actually) */ val |= PVTMON_CONTROL0_SEL_TEMP_MONITOR; - writel(val, ns_thermal->pvtmon + PVTMON_CONTROL0); + writel(val, pvtmon + PVTMON_CONTROL0); } - val = readl(ns_thermal->pvtmon + PVTMON_STATUS); + val = readl(pvtmon + PVTMON_STATUS); *temp = slope * val + offset; return 0; } -static const struct thermal_zone_of_device_ops ns_thermal_ops = { +static const struct thermal_zone_device_ops ns_thermal_ops = { .get_temp = ns_thermal_get_temp, }; static int ns_thermal_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct ns_thermal *ns_thermal; - - ns_thermal = devm_kzalloc(dev, sizeof(*ns_thermal), GFP_KERNEL); - if (!ns_thermal) - return -ENOMEM; + struct thermal_zone_device *tz; + void __iomem *pvtmon; - ns_thermal->pvtmon = of_iomap(dev_of_node(dev), 0); - if (WARN_ON(!ns_thermal->pvtmon)) + pvtmon = of_iomap(dev_of_node(dev), 0); + if (WARN_ON(!pvtmon)) return -ENOENT; - ns_thermal->tz = devm_thermal_zone_of_sensor_register(dev, 0, - ns_thermal, - &ns_thermal_ops); - if (IS_ERR(ns_thermal->tz)) { - iounmap(ns_thermal->pvtmon); - return PTR_ERR(ns_thermal->tz); + tz = devm_thermal_of_zone_register(dev, 0, + pvtmon, + &ns_thermal_ops); + if (IS_ERR(tz)) { + iounmap(pvtmon); + return PTR_ERR(tz); } - platform_set_drvdata(pdev, ns_thermal); + platform_set_drvdata(pdev, pvtmon); return 0; } static int ns_thermal_remove(struct platform_device *pdev) { - struct ns_thermal *ns_thermal = platform_get_drvdata(pdev); + void __iomem *pvtmon = platform_get_drvdata(pdev); - iounmap(ns_thermal->pvtmon); + iounmap(pvtmon); return 0; } diff --git a/drivers/thermal/broadcom/sr-thermal.c b/drivers/thermal/broadcom/sr-thermal.c index 85ab9edd580c..2b93502543ff 100644 --- a/drivers/thermal/broadcom/sr-thermal.c +++ b/drivers/thermal/broadcom/sr-thermal.c @@ -19,7 +19,6 @@ #define SR_TMON_MAX_LIST 6 struct sr_tmon { - struct thermal_zone_device *tz; unsigned int crit_temp; unsigned int tmon_id; struct sr_thermal *priv; @@ -31,9 +30,9 @@ struct sr_thermal { struct sr_tmon tmon[SR_TMON_MAX_LIST]; }; -static int sr_get_temp(void *data, int *temp) +static int sr_get_temp(struct thermal_zone_device *tz, int *temp) { - struct sr_tmon *tmon = data; + struct sr_tmon *tmon = tz->devdata; struct sr_thermal *sr_thermal = tmon->priv; *temp = readl(sr_thermal->regs + SR_TMON_TEMP_BASE(tmon->tmon_id)); @@ -41,13 +40,14 @@ static int sr_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops sr_tz_ops = { +static const struct thermal_zone_device_ops sr_tz_ops = { .get_temp = sr_get_temp, }; static int sr_thermal_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct thermal_zone_device *tz; struct sr_thermal *sr_thermal; struct sr_tmon *tmon; struct resource *res; @@ -84,10 +84,10 @@ static int sr_thermal_probe(struct platform_device *pdev) writel(0, sr_thermal->regs + SR_TMON_TEMP_BASE(i)); tmon->tmon_id = i; tmon->priv = sr_thermal; - tmon->tz = devm_thermal_zone_of_sensor_register(dev, i, tmon, - &sr_tz_ops); - if (IS_ERR(tmon->tz)) - return PTR_ERR(tmon->tz); + tz = devm_thermal_of_zone_register(dev, i, tmon, + &sr_tz_ops); + if (IS_ERR(tz)) + return PTR_ERR(tz); dev_dbg(dev, "thermal sensor %d registered\n", i); } diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index b76293cc989c..9f8b438fcf8f 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -475,7 +475,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, /** * __cpufreq_cooling_register - helper function to create cpufreq cooling device - * @np: a valid struct device_node to the cooling device device tree node + * @np: a valid struct device_node to the cooling device tree node * @policy: cpufreq policy * Normally this should be same as cpufreq policy->related_cpus. * @em: Energy Model of the cpufreq policy @@ -501,17 +501,17 @@ __cpufreq_cooling_register(struct device_node *np, struct thermal_cooling_device_ops *cooling_ops; char *name; + if (IS_ERR_OR_NULL(policy)) { + pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy); + return ERR_PTR(-EINVAL); + } + dev = get_cpu_device(policy->cpu); if (unlikely(!dev)) { pr_warn("No cpu device for cpu %d\n", policy->cpu); return ERR_PTR(-ENODEV); } - if (IS_ERR_OR_NULL(policy)) { - pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy); - return ERR_PTR(-EINVAL); - } - i = cpufreq_table_count_valid_entries(policy); if (!i) { pr_debug("%s: CPUFreq table not found or has no valid entries\n", diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c index 180edec34e07..7dcfde7a9f2c 100644 --- a/drivers/thermal/da9062-thermal.c +++ b/drivers/thermal/da9062-thermal.c @@ -248,10 +248,9 @@ static int da9062_thermal_probe(struct platform_device *pdev) jiffies_to_msecs(thermal->zone->passive_delay_jiffies)); ret = platform_get_irq_byname(pdev, "THERMAL"); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to get platform IRQ.\n"); + if (ret < 0) goto err_zone; - } + thermal->irq = ret; ret = request_threaded_irq(thermal->irq, NULL, diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 121cf853e545..cb10e280681f 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -58,9 +58,9 @@ struct db8500_thermal_zone { }; /* Callback to get current temperature */ -static int db8500_thermal_get_temp(void *data, int *temp) +static int db8500_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct db8500_thermal_zone *th = data; + struct db8500_thermal_zone *th = tz->devdata; /* * TODO: There is no PRCMU interface to get temperature data currently, @@ -72,7 +72,7 @@ static int db8500_thermal_get_temp(void *data, int *temp) return 0; } -static struct thermal_zone_of_device_ops thdev_ops = { +static const struct thermal_zone_device_ops thdev_ops = { .get_temp = db8500_thermal_get_temp, }; @@ -182,7 +182,7 @@ static int db8500_thermal_probe(struct platform_device *pdev) } /* register of thermal sensor and get info from DT */ - th->tz = devm_thermal_zone_of_sensor_register(dev, 0, th, &thdev_ops); + th->tz = devm_thermal_of_zone_register(dev, 0, th, &thdev_ops); if (IS_ERR(th->tz)) { dev_err(dev, "register thermal zone sensor failed\n"); return PTR_ERR(th->tz); diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 991a1c54296d..a08bbe33be96 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -31,8 +31,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trip, trip_temp, tz->temperature, trip_hyst); - mutex_lock(&tz->lock); - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if (instance->trip != trip) continue; @@ -65,8 +63,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) instance->cdev->updated = false; /* cdev needs update */ mutex_unlock(&instance->cdev->lock); } - - mutex_unlock(&tz->lock); } /** @@ -100,15 +96,13 @@ static int bang_bang_control(struct thermal_zone_device *tz, int trip) { struct thermal_instance *instance; - thermal_zone_trip_update(tz, trip); + lockdep_assert_held(&tz->lock); - mutex_lock(&tz->lock); + thermal_zone_trip_update(tz, trip); list_for_each_entry(instance, &tz->thermal_instances, tz_node) thermal_cdev_update(instance->cdev); - mutex_unlock(&tz->lock); - return 0; } diff --git a/drivers/thermal/gov_fair_share.c b/drivers/thermal/gov_fair_share.c index 6a2abcfc648f..a4ee4661e9cc 100644 --- a/drivers/thermal/gov_fair_share.c +++ b/drivers/thermal/gov_fair_share.c @@ -82,7 +82,7 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip) int total_instance = 0; int cur_trip_level = get_trip_level(tz); - mutex_lock(&tz->lock); + lockdep_assert_held(&tz->lock); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if (instance->trip != trip) @@ -112,7 +112,6 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip) mutex_unlock(&cdev->lock); } - mutex_unlock(&tz->lock); return 0; } diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 1d5052470967..2d1aeaba38a8 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -392,8 +392,6 @@ static int allocate_power(struct thermal_zone_device *tz, int i, num_actors, total_weight, ret = 0; int trip_max_desired_temperature = params->trip_max_desired_temperature; - mutex_lock(&tz->lock); - num_actors = 0; total_weight = 0; list_for_each_entry(instance, &tz->thermal_instances, tz_node) { @@ -404,10 +402,8 @@ static int allocate_power(struct thermal_zone_device *tz, } } - if (!num_actors) { - ret = -ENODEV; - goto unlock; - } + if (!num_actors) + return -ENODEV; /* * We need to allocate five arrays of the same size: @@ -421,10 +417,8 @@ static int allocate_power(struct thermal_zone_device *tz, BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power)); req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL); - if (!req_power) { - ret = -ENOMEM; - goto unlock; - } + if (!req_power) + return -ENOMEM; max_power = &req_power[num_actors]; granted_power = &req_power[2 * num_actors]; @@ -496,8 +490,6 @@ static int allocate_power(struct thermal_zone_device *tz, control_temp - tz->temperature); kfree(req_power); -unlock: - mutex_unlock(&tz->lock); return ret; } @@ -576,7 +568,6 @@ static void allow_maximum_power(struct thermal_zone_device *tz, bool update) struct power_allocator_params *params = tz->governor_data; u32 req_power; - mutex_lock(&tz->lock); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { struct thermal_cooling_device *cdev = instance->cdev; @@ -598,7 +589,6 @@ static void allow_maximum_power(struct thermal_zone_device *tz, bool update) mutex_unlock(&instance->cdev->lock); } - mutex_unlock(&tz->lock); } /** @@ -712,6 +702,8 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) struct power_allocator_params *params = tz->governor_data; bool update; + lockdep_assert_held(&tz->lock); + /* * We get called for every trip point but we only need to do * our calculations once diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c index 9729b46d0258..cdd3354bc27f 100644 --- a/drivers/thermal/gov_step_wise.c +++ b/drivers/thermal/gov_step_wise.c @@ -117,8 +117,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n", trip, trip_type, trip_temp, trend, throttle); - mutex_lock(&tz->lock); - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if (instance->trip != trip) continue; @@ -145,8 +143,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) instance->cdev->updated = false; /* cdev needs update */ mutex_unlock(&instance->cdev->lock); } - - mutex_unlock(&tz->lock); } /** @@ -164,15 +160,13 @@ static int step_wise_throttle(struct thermal_zone_device *tz, int trip) { struct thermal_instance *instance; - thermal_zone_trip_update(tz, trip); + lockdep_assert_held(&tz->lock); - mutex_lock(&tz->lock); + thermal_zone_trip_update(tz, trip); list_for_each_entry(instance, &tz->thermal_instances, tz_node) thermal_cdev_update(instance->cdev); - mutex_unlock(&tz->lock); - return 0; } diff --git a/drivers/thermal/gov_user_space.c b/drivers/thermal/gov_user_space.c index a62a4e90bd3f..8bc1c22aaf03 100644 --- a/drivers/thermal/gov_user_space.c +++ b/drivers/thermal/gov_user_space.c @@ -34,7 +34,8 @@ static int notify_user_space(struct thermal_zone_device *tz, int trip) char *thermal_prop[5]; int i; - mutex_lock(&tz->lock); + lockdep_assert_held(&tz->lock); + thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", tz->type); thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", tz->temperature); thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=%d", trip); @@ -43,7 +44,7 @@ static int notify_user_space(struct thermal_zone_device *tz, int trip) kobject_uevent_env(&tz->device.kobj, KOBJ_CHANGE, thermal_prop); for (i = 0; i < 4; ++i) kfree(thermal_prop[i]); - mutex_unlock(&tz->lock); + return 0; } diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 19a242c69ce6..d6974db7aaf7 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -434,9 +434,9 @@ static int hi3660_thermal_probe(struct hisi_thermal_data *data) return 0; } -static int hisi_thermal_get_temp(void *__data, int *temp) +static int hisi_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct hisi_thermal_sensor *sensor = __data; + struct hisi_thermal_sensor *sensor = tz->devdata; struct hisi_thermal_data *data = sensor->data; *temp = data->ops->get_temp(sensor); @@ -447,7 +447,7 @@ static int hisi_thermal_get_temp(void *__data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { +static const struct thermal_zone_device_ops hisi_of_thermal_ops = { .get_temp = hisi_thermal_get_temp, }; @@ -459,7 +459,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) data->ops->irq_handler(sensor); - hisi_thermal_get_temp(sensor, &temp); + temp = data->ops->get_temp(sensor); if (temp >= sensor->thres_temp) { dev_crit(&data->pdev->dev, @@ -484,9 +484,9 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, int ret, i; const struct thermal_trip *trip; - sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->id, sensor, - &hisi_of_thermal_ops); + sensor->tzd = devm_thermal_of_zone_register(&pdev->dev, + sensor->id, sensor, + &hisi_of_thermal_ops); if (IS_ERR(sensor->tzd)) { ret = PTR_ERR(sensor->tzd); sensor->tzd = NULL; diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c index af666bd9e8d4..e2c2673025a7 100644 --- a/drivers/thermal/imx8mm_thermal.c +++ b/drivers/thermal/imx8mm_thermal.c @@ -96,15 +96,15 @@ static int imx8mp_tmu_get_temp(void *data, int *temp) return 0; } -static int tmu_get_temp(void *data, int *temp) +static int tmu_get_temp(struct thermal_zone_device *tz, int *temp) { - struct tmu_sensor *sensor = data; + struct tmu_sensor *sensor = tz->devdata; struct imx8mm_tmu *tmu = sensor->priv; - return tmu->socdata->get_temp(data, temp); + return tmu->socdata->get_temp(sensor, temp); } -static struct thermal_zone_of_device_ops tmu_tz_ops = { +static const struct thermal_zone_device_ops tmu_tz_ops = { .get_temp = tmu_get_temp, }; @@ -165,9 +165,9 @@ static int imx8mm_tmu_probe(struct platform_device *pdev) for (i = 0; i < data->num_sensors; i++) { tmu->sensors[i].priv = tmu; tmu->sensors[i].tzd = - devm_thermal_zone_of_sensor_register(&pdev->dev, i, - &tmu->sensors[i], - &tmu_tz_ops); + devm_thermal_of_zone_register(&pdev->dev, i, + &tmu->sensors[i], + &tmu_tz_ops); if (IS_ERR(tmu->sensors[i].tzd)) { ret = PTR_ERR(tmu->sensors[i].tzd); dev_err(&pdev->dev, diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c index 331a241eb0ef..10bfa6507eb4 100644 --- a/drivers/thermal/imx_sc_thermal.c +++ b/drivers/thermal/imx_sc_thermal.c @@ -43,11 +43,11 @@ struct imx_sc_msg_misc_get_temp { } data; } __packed __aligned(4); -static int imx_sc_thermal_get_temp(void *data, int *temp) +static int imx_sc_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { struct imx_sc_msg_misc_get_temp msg; struct imx_sc_rpc_msg *hdr = &msg.hdr; - struct imx_sc_sensor *sensor = data; + struct imx_sc_sensor *sensor = tz->devdata; int ret; msg.data.req.resource_id = sensor->resource_id; @@ -70,7 +70,7 @@ static int imx_sc_thermal_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops imx_sc_thermal_ops = { +static const struct thermal_zone_device_ops imx_sc_thermal_ops = { .get_temp = imx_sc_thermal_get_temp, }; @@ -109,10 +109,10 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) break; } - sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->resource_id, - sensor, - &imx_sc_thermal_ops); + sensor->tzd = devm_thermal_of_zone_register(&pdev->dev, + sensor->resource_id, + sensor, + &imx_sc_thermal_ops); if (IS_ERR(sensor->tzd)) { dev_err(&pdev->dev, "failed to register thermal zone\n"); ret = PTR_ERR(sensor->tzd); diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 365489bf4b8c..db8a6f63657d 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -614,9 +614,8 @@ static int int3400_thermal_probe(struct platform_device *pdev) free_sysfs: cleanup_odvp(priv); - if (priv->data_vault) { - if (!ZERO_OR_NULL_PTR(priv->data_vault)) - sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); + if (!ZERO_OR_NULL_PTR(priv->data_vault)) { + sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); kfree(priv->data_vault); } free_uuid: diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index c2dc4c158b9d..bf1b1cdfade4 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -373,18 +373,7 @@ static struct pci_driver proc_thermal_pci_driver = { .driver.pm = &proc_thermal_pci_pm, }; -static int __init proc_thermal_init(void) -{ - return pci_register_driver(&proc_thermal_pci_driver); -} - -static void __exit proc_thermal_exit(void) -{ - pci_unregister_driver(&proc_thermal_pci_driver); -} - -module_init(proc_thermal_init); -module_exit(proc_thermal_exit); +module_pci_driver(proc_thermal_pci_driver); MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver"); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c index 4571a1a53b84..09e032f822f3 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c @@ -151,18 +151,7 @@ static struct pci_driver proc_thermal_pci_driver = { .driver.pm = &proc_thermal_pci_pm, }; -static int __init proc_thermal_init(void) -{ - return pci_register_driver(&proc_thermal_pci_driver); -} - -static void __exit proc_thermal_exit(void) -{ - pci_unregister_driver(&proc_thermal_pci_driver); -} - -module_init(proc_thermal_init); -module_exit(proc_thermal_exit); +module_pci_driver(proc_thermal_pci_driver); MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver"); diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index c841ab37e7c6..2a5570b9799a 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -62,8 +62,7 @@ static struct dentry *debug_dir; static unsigned int set_target_ratio; static unsigned int current_ratio; static bool should_skip; -static bool reduce_irq; -static atomic_t idle_wakeup_counter; + static unsigned int control_cpu; /* The cpu assigned to collect stat and update * control parameters. default to BSP but BSP * can be offlined. @@ -285,9 +284,6 @@ static unsigned int get_compensation(int ratio) cal_data[ratio + 1].steady_comp) / 3; } - /* REVISIT: simple penalty of double idle injection */ - if (reduce_irq) - comp = ratio; /* do not exceed limit */ if (comp + ratio >= MAX_TARGET_RATIO) comp = MAX_TARGET_RATIO - ratio - 1; @@ -301,13 +297,9 @@ static void adjust_compensation(int target_ratio, unsigned int win) struct powerclamp_calibration_data *d = &cal_data[target_ratio]; /* - * adjust compensations if confidence level has not been reached or - * there are too many wakeups during the last idle injection period, we - * cannot trust the data for compensation. + * adjust compensations if confidence level has not been reached. */ - if (d->confidence >= CONFIDENCE_OK || - atomic_read(&idle_wakeup_counter) > - win * num_online_cpus()) + if (d->confidence >= CONFIDENCE_OK) return; delta = set_target_ratio - current_ratio; @@ -347,14 +339,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, tsc_last = tsc_now; adjust_compensation(target_ratio, win); - /* - * too many external interrupts, set flag such - * that we can take measure later. - */ - reduce_irq = atomic_read(&idle_wakeup_counter) >= - 2 * win * num_online_cpus(); - atomic_set(&idle_wakeup_counter, 0); /* if we are above target+guard, skip */ return set_target_ratio + guard <= current_ratio; } @@ -532,8 +517,10 @@ static int start_power_clamp(void) /* prefer BSP */ control_cpu = 0; - if (!cpu_online(control_cpu)) - control_cpu = smp_processor_id(); + if (!cpu_online(control_cpu)) { + control_cpu = get_cpu(); + put_cpu(); + } clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); diff --git a/drivers/thermal/k3_bandgap.c b/drivers/thermal/k3_bandgap.c index 5d0b3ffc6f46..22c9bcb899c3 100644 --- a/drivers/thermal/k3_bandgap.c +++ b/drivers/thermal/k3_bandgap.c @@ -139,9 +139,9 @@ static int k3_bgp_read_temp(struct k3_thermal_data *devdata, return 0; } -static int k3_thermal_get_temp(void *devdata, int *temp) +static int k3_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct k3_thermal_data *data = devdata; + struct k3_thermal_data *data = tz->devdata; int ret = 0; ret = k3_bgp_read_temp(data, temp); @@ -151,7 +151,7 @@ static int k3_thermal_get_temp(void *devdata, int *temp) return ret; } -static const struct thermal_zone_of_device_ops k3_of_thermal_ops = { +static const struct thermal_zone_device_ops k3_of_thermal_ops = { .get_temp = k3_thermal_get_temp, }; @@ -213,9 +213,9 @@ static int k3_bandgap_probe(struct platform_device *pdev) writel(val, data[id].bgp->base + data[id].ctrl_offset); data[id].tzd = - devm_thermal_zone_of_sensor_register(dev, id, - &data[id], - &k3_of_thermal_ops); + devm_thermal_of_zone_register(dev, id, + &data[id], + &k3_of_thermal_ops); if (IS_ERR(data[id].tzd)) { dev_err(dev, "thermal zone device is NULL\n"); ret = PTR_ERR(data[id].tzd); diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c index 115a44eb4fbf..16b6bcf1bf4f 100644 --- a/drivers/thermal/k3_j72xx_bandgap.c +++ b/drivers/thermal/k3_j72xx_bandgap.c @@ -247,9 +247,9 @@ static inline int k3_bgp_read_temp(struct k3_thermal_data *devdata, } /* Get temperature callback function for thermal zone */ -static int k3_thermal_get_temp(void *devdata, int *temp) +static int k3_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct k3_thermal_data *data = devdata; + struct k3_thermal_data *data = tz->devdata; int ret = 0; ret = k3_bgp_read_temp(data, temp); @@ -259,7 +259,7 @@ static int k3_thermal_get_temp(void *devdata, int *temp) return ret; } -static const struct thermal_zone_of_device_ops k3_of_thermal_ops = { +static const struct thermal_zone_device_ops k3_of_thermal_ops = { .get_temp = k3_thermal_get_temp, }; @@ -474,10 +474,8 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev) writel(val, data[id].bgp->cfg2_base + data[id].ctrl_offset); bgp->ts_data[id] = &data[id]; - ti_thermal = - devm_thermal_zone_of_sensor_register(bgp->dev, id, - &data[id], - &k3_of_thermal_ops); + ti_thermal = devm_thermal_of_zone_register(bgp->dev, id, &data[id], + &k3_of_thermal_ops); if (IS_ERR(ti_thermal)) { dev_err(bgp->dev, "thermal zone device is NULL\n"); ret = PTR_ERR(ti_thermal); diff --git a/drivers/thermal/max77620_thermal.c b/drivers/thermal/max77620_thermal.c index 82d06c7411eb..6451a55eb582 100644 --- a/drivers/thermal/max77620_thermal.c +++ b/drivers/thermal/max77620_thermal.c @@ -44,9 +44,9 @@ struct max77620_therm_info { * Return 0 on success otherwise error number to show reason of failure. */ -static int max77620_thermal_read_temp(void *data, int *temp) +static int max77620_thermal_read_temp(struct thermal_zone_device *tz, int *temp) { - struct max77620_therm_info *mtherm = data; + struct max77620_therm_info *mtherm = tz->devdata; unsigned int val; int ret; @@ -66,7 +66,7 @@ static int max77620_thermal_read_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops max77620_thermal_ops = { +static const struct thermal_zone_device_ops max77620_thermal_ops = { .get_temp = max77620_thermal_read_temp, }; @@ -114,7 +114,7 @@ static int max77620_thermal_probe(struct platform_device *pdev) */ device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent); - mtherm->tz_device = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, + mtherm->tz_device = devm_thermal_of_zone_register(&pdev->dev, 0, mtherm, &max77620_thermal_ops); if (IS_ERR(mtherm->tz_device)) { ret = PTR_ERR(mtherm->tz_device); diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index ede94eadddda..8440692e3890 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -679,9 +679,9 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank) return max; } -static int mtk_read_temp(void *data, int *temperature) +static int mtk_read_temp(struct thermal_zone_device *tz, int *temperature) { - struct mtk_thermal *mt = data; + struct mtk_thermal *mt = tz->devdata; int i; int tempmax = INT_MIN; @@ -700,7 +700,7 @@ static int mtk_read_temp(void *data, int *temperature) return 0; } -static const struct thermal_zone_of_device_ops mtk_thermal_ops = { +static const struct thermal_zone_device_ops mtk_thermal_ops = { .get_temp = mtk_read_temp, }; @@ -1082,8 +1082,8 @@ static int mtk_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mt); - tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt, - &mtk_thermal_ops); + tzdev = devm_thermal_of_zone_register(&pdev->dev, 0, mt, + &mtk_thermal_ops); if (IS_ERR(tzdev)) { ret = PTR_ERR(tzdev); goto err_disable_clk_peri_therm; diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index 073943cbcc2b..af68adf720cc 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -357,9 +357,9 @@ static irqreturn_t adc_tm5_gen2_isr(int irq, void *data) return IRQ_HANDLED; } -static int adc_tm5_get_temp(void *data, int *temp) +static int adc_tm5_get_temp(struct thermal_zone_device *tz, int *temp) { - struct adc_tm5_channel *channel = data; + struct adc_tm5_channel *channel = tz->devdata; int ret; if (!channel || !channel->iio) @@ -639,9 +639,9 @@ config_fail: return ret; } -static int adc_tm5_set_trips(void *data, int low, int high) +static int adc_tm5_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct adc_tm5_channel *channel = data; + struct adc_tm5_channel *channel = tz->devdata; struct adc_tm5_chip *chip; int ret; @@ -660,7 +660,7 @@ static int adc_tm5_set_trips(void *data, int low, int high) return ret; } -static struct thermal_zone_of_device_ops adc_tm5_thermal_ops = { +static const struct thermal_zone_device_ops adc_tm5_thermal_ops = { .get_temp = adc_tm5_get_temp, .set_trips = adc_tm5_set_trips, }; @@ -672,11 +672,10 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm) for (i = 0; i < adc_tm->nchannels; i++) { adc_tm->channels[i].chip = adc_tm; - - tzd = devm_thermal_zone_of_sensor_register(adc_tm->dev, - adc_tm->channels[i].channel, - &adc_tm->channels[i], - &adc_tm5_thermal_ops); + tzd = devm_thermal_of_zone_register(adc_tm->dev, + adc_tm->channels[i].channel, + &adc_tm->channels[i], + &adc_tm5_thermal_ops); if (IS_ERR(tzd)) { if (PTR_ERR(tzd) == -ENODEV) { dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n", @@ -1026,10 +1025,8 @@ static int adc_tm5_probe(struct platform_device *pdev) adc_tm->base = reg; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(dev, "get_irq failed: %d\n", irq); + if (irq < 0) return irq; - } ret = adc_tm5_get_dt_data(adc_tm, node); if (ret) { diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c index 770f82cc9bca..be785ab37e53 100644 --- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c @@ -186,9 +186,9 @@ static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip) return 0; } -static int qpnp_tm_get_temp(void *data, int *temp) +static int qpnp_tm_get_temp(struct thermal_zone_device *tz, int *temp) { - struct qpnp_tm_chip *chip = data; + struct qpnp_tm_chip *chip = tz->devdata; int ret, mili_celsius; if (!temp) @@ -263,9 +263,9 @@ skip: return qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg); } -static int qpnp_tm_set_trip_temp(void *data, int trip, int temp) +static int qpnp_tm_set_trip_temp(struct thermal_zone_device *tz, int trip, int temp) { - struct qpnp_tm_chip *chip = data; + struct qpnp_tm_chip *chip = tz->devdata; const struct thermal_trip *trip_points; int ret; @@ -283,7 +283,7 @@ static int qpnp_tm_set_trip_temp(void *data, int trip, int temp) return ret; } -static const struct thermal_zone_of_device_ops qpnp_tm_sensor_ops = { +static const struct thermal_zone_device_ops qpnp_tm_sensor_ops = { .get_temp = qpnp_tm_get_temp, .set_trip_temp = qpnp_tm_set_trip_temp, }; @@ -446,7 +446,7 @@ static int qpnp_tm_probe(struct platform_device *pdev) * read the trip points. get_temp() returns the default temperature * before the hardware initialization is completed. */ - chip->tz_dev = devm_thermal_zone_of_sensor_register( + chip->tz_dev = devm_thermal_of_zone_register( &pdev->dev, 0, chip, &qpnp_tm_sensor_ops); if (IS_ERR(chip->tz_dev)) { dev_err(&pdev->dev, "failed to register sensor\n"); diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index e49f58e83513..b1b10005fb28 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -532,9 +532,9 @@ static irqreturn_t tsens_irq_thread(int irq, void *data) return IRQ_HANDLED; } -static int tsens_set_trips(void *_sensor, int low, int high) +static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct tsens_sensor *s = _sensor; + struct tsens_sensor *s = tz->devdata; struct tsens_priv *priv = s->priv; struct device *dev = priv->dev; struct tsens_irq_data d; @@ -925,9 +925,9 @@ err_put_device: return ret; } -static int tsens_get_temp(void *data, int *temp) +static int tsens_get_temp(struct thermal_zone_device *tz, int *temp) { - struct tsens_sensor *s = data; + struct tsens_sensor *s = tz->devdata; struct tsens_priv *priv = s->priv; return priv->ops->get_temp(s, temp); @@ -991,7 +991,7 @@ static const struct of_device_id tsens_table[] = { }; MODULE_DEVICE_TABLE(of, tsens_table); -static const struct thermal_zone_of_device_ops tsens_of_ops = { +static const struct thermal_zone_device_ops tsens_of_ops = { .get_temp = tsens_get_temp, .set_trips = tsens_set_trips, }; @@ -1044,9 +1044,9 @@ static int tsens_register(struct tsens_priv *priv) for (i = 0; i < priv->num_sensors; i++) { priv->sensor[i].priv = priv; - tzd = devm_thermal_zone_of_sensor_register(priv->dev, priv->sensor[i].hw_id, - &priv->sensor[i], - &tsens_of_ops); + tzd = devm_thermal_of_zone_register(priv->dev, priv->sensor[i].hw_id, + &priv->sensor[i], + &tsens_of_ops); if (IS_ERR(tzd)) continue; priv->sensor[i].tzd = tzd; diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 73049f9bea25..d111e218f362 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -82,9 +82,9 @@ static struct qoriq_tmu_data *qoriq_sensor_to_data(struct qoriq_sensor *s) return container_of(s, struct qoriq_tmu_data, sensor[s->id]); } -static int tmu_get_temp(void *p, int *temp) +static int tmu_get_temp(struct thermal_zone_device *tz, int *temp) { - struct qoriq_sensor *qsensor = p; + struct qoriq_sensor *qsensor = tz->devdata; struct qoriq_tmu_data *qdata = qoriq_sensor_to_data(qsensor); u32 val; /* @@ -122,7 +122,7 @@ static int tmu_get_temp(void *p, int *temp) return 0; } -static const struct thermal_zone_of_device_ops tmu_tz_ops = { +static const struct thermal_zone_device_ops tmu_tz_ops = { .get_temp = tmu_get_temp, }; @@ -146,9 +146,9 @@ static int qoriq_tmu_register_tmu_zone(struct device *dev, sensor->id = id; - tzd = devm_thermal_zone_of_sensor_register(dev, id, - sensor, - &tmu_tz_ops); + tzd = devm_thermal_of_zone_register(dev, id, + sensor, + &tmu_tz_ops); ret = PTR_ERR_OR_ZERO(tzd); if (ret) { if (ret == -ENODEV) diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index cda7c52f2319..4c1c6f89aa2f 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -164,9 +164,9 @@ static int rcar_gen3_thermal_round(int temp) return result * RCAR3_THERMAL_GRAN; } -static int rcar_gen3_thermal_get_temp(void *devdata, int *temp) +static int rcar_gen3_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct rcar_gen3_thermal_tsc *tsc = devdata; + struct rcar_gen3_thermal_tsc *tsc = tz->devdata; int mcelsius, val; int reg; @@ -203,9 +203,9 @@ static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc, return INT_FIXPT(val); } -static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high) +static int rcar_gen3_thermal_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct rcar_gen3_thermal_tsc *tsc = devdata; + struct rcar_gen3_thermal_tsc *tsc = tz->devdata; u32 irqmsk = 0; if (low != -INT_MAX) { @@ -225,7 +225,7 @@ static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high) return 0; } -static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = { +static struct thermal_zone_device_ops rcar_gen3_tz_of_ops = { .get_temp = rcar_gen3_thermal_get_temp, .set_trips = rcar_gen3_thermal_set_trips, }; @@ -508,8 +508,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) for (i = 0; i < priv->num_tscs; i++) { struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i]; - zone = devm_thermal_zone_of_sensor_register(dev, i, tsc, - &rcar_gen3_tz_of_ops); + zone = devm_thermal_of_zone_register(dev, i, tsc, + &rcar_gen3_tz_of_ops); if (IS_ERR(zone)) { dev_err(dev, "Sensor %u: Can't register thermal zone\n", i); ret = PTR_ERR(zone); @@ -560,7 +560,7 @@ static int __maybe_unused rcar_gen3_thermal_resume(struct device *dev) priv->thermal_init(tsc); if (zone->ops->set_trips) - rcar_gen3_thermal_set_trips(tsc, zone->prev_low_trip, + rcar_gen3_thermal_set_trips(zone, zone->prev_low_trip, zone->prev_high_trip); } diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 1d729ed4d685..4df42d70d867 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -271,13 +271,6 @@ static int rcar_thermal_get_current_temp(struct rcar_thermal_priv *priv, return 0; } -static int rcar_thermal_of_get_temp(void *data, int *temp) -{ - struct rcar_thermal_priv *priv = data; - - return rcar_thermal_get_current_temp(priv, temp); -} - static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp) { struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); @@ -323,8 +316,8 @@ static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, return 0; } -static const struct thermal_zone_of_device_ops rcar_thermal_zone_of_ops = { - .get_temp = rcar_thermal_of_get_temp, +static struct thermal_zone_device_ops rcar_thermal_zone_of_ops = { + .get_temp = rcar_thermal_get_temp, }; static struct thermal_zone_device_ops rcar_thermal_zone_ops = { @@ -534,7 +527,7 @@ static int rcar_thermal_probe(struct platform_device *pdev) goto error_unregister; if (chip->use_of_thermal) { - priv->zone = devm_thermal_zone_of_sensor_register( + priv->zone = devm_thermal_of_zone_register( dev, i, priv, &rcar_thermal_zone_of_ops); } else { diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index dc3a9c276a09..819e059cde71 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -1211,9 +1211,9 @@ static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev) return IRQ_HANDLED; } -static int rockchip_thermal_set_trips(void *_sensor, int low, int high) +static int rockchip_thermal_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct rockchip_thermal_sensor *sensor = _sensor; + struct rockchip_thermal_sensor *sensor = tz->devdata; struct rockchip_thermal_data *thermal = sensor->thermal; const struct rockchip_tsadc_chip *tsadc = thermal->chip; @@ -1224,9 +1224,9 @@ static int rockchip_thermal_set_trips(void *_sensor, int low, int high) sensor->id, thermal->regs, high); } -static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) +static int rockchip_thermal_get_temp(struct thermal_zone_device *tz, int *out_temp) { - struct rockchip_thermal_sensor *sensor = _sensor; + struct rockchip_thermal_sensor *sensor = tz->devdata; struct rockchip_thermal_data *thermal = sensor->thermal; const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip; int retval; @@ -1239,7 +1239,7 @@ static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) return retval; } -static const struct thermal_zone_of_device_ops rockchip_of_thermal_ops = { +static const struct thermal_zone_device_ops rockchip_of_thermal_ops = { .get_temp = rockchip_thermal_get_temp, .set_trips = rockchip_thermal_set_trips, }; @@ -1326,8 +1326,8 @@ rockchip_thermal_register_sensor(struct platform_device *pdev, sensor->thermal = thermal; sensor->id = id; - sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, id, - sensor, &rockchip_of_thermal_ops); + sensor->tzd = devm_thermal_of_zone_register(&pdev->dev, id, sensor, + &rockchip_of_thermal_ops); if (IS_ERR(sensor->tzd)) { error = PTR_ERR(sensor->tzd); dev_err(&pdev->dev, "failed to register sensor %d: %d\n", diff --git a/drivers/thermal/rzg2l_thermal.c b/drivers/thermal/rzg2l_thermal.c index 51ae80eda6af..2e0649f38506 100644 --- a/drivers/thermal/rzg2l_thermal.c +++ b/drivers/thermal/rzg2l_thermal.c @@ -73,9 +73,9 @@ static inline void rzg2l_thermal_write(struct rzg2l_thermal_priv *priv, u32 reg, iowrite32(data, priv->base + reg); } -static int rzg2l_thermal_get_temp(void *devdata, int *temp) +static int rzg2l_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct rzg2l_thermal_priv *priv = devdata; + struct rzg2l_thermal_priv *priv = tz->devdata; u32 result = 0, dsensor, ts_code_ave; int val, i; @@ -114,7 +114,7 @@ static int rzg2l_thermal_get_temp(void *devdata, int *temp) return 0; } -static const struct thermal_zone_of_device_ops rzg2l_tz_of_ops = { +static const struct thermal_zone_device_ops rzg2l_tz_of_ops = { .get_temp = rzg2l_thermal_get_temp, }; @@ -207,8 +207,8 @@ static int rzg2l_thermal_probe(struct platform_device *pdev) goto err; } - zone = devm_thermal_zone_of_sensor_register(dev, 0, priv, - &rzg2l_tz_of_ops); + zone = devm_thermal_of_zone_register(dev, 0, priv, + &rzg2l_tz_of_ops); if (IS_ERR(zone)) { dev_err(dev, "Can't register thermal zone"); ret = PTR_ERR(zone); diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index f4ab4c5b4b62..51874d0a284c 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -650,9 +650,9 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) writel(con, data->base + EXYNOS_TMU_REG_CONTROL); } -static int exynos_get_temp(void *p, int *temp) +static int exynos_get_temp(struct thermal_zone_device *tz, int *temp) { - struct exynos_tmu_data *data = p; + struct exynos_tmu_data *data = tz->devdata; int value, ret = 0; if (!data || !data->tmu_read) @@ -728,9 +728,9 @@ static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, writel(val, data->base + emul_con); } -static int exynos_tmu_set_emulation(void *drv_data, int temp) +static int exynos_tmu_set_emulation(struct thermal_zone_device *tz, int temp) { - struct exynos_tmu_data *data = drv_data; + struct exynos_tmu_data *data = tz->devdata; int ret = -EINVAL; if (data->soc == SOC_ARCH_EXYNOS4210) @@ -750,7 +750,7 @@ out: } #else #define exynos4412_tmu_set_emulation NULL -static int exynos_tmu_set_emulation(void *drv_data, int temp) +static int exynos_tmu_set_emulation(struct thermal_zone_device *tz, int temp) { return -EINVAL; } #endif /* CONFIG_THERMAL_EMULATION */ @@ -997,7 +997,7 @@ static int exynos_map_dt_data(struct platform_device *pdev) return 0; } -static const struct thermal_zone_of_device_ops exynos_sensor_ops = { +static const struct thermal_zone_device_ops exynos_sensor_ops = { .get_temp = exynos_get_temp, .set_emul_temp = exynos_tmu_set_emulation, }; @@ -1091,8 +1091,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) * data->tzd must be registered before calling exynos_tmu_initialize(), * requesting irq and calling exynos_tmu_control(). */ - data->tzd = thermal_zone_of_sensor_register(&pdev->dev, 0, data, - &exynos_sensor_ops); + data->tzd = devm_thermal_of_zone_register(&pdev->dev, 0, data, + &exynos_sensor_ops); if (IS_ERR(data->tzd)) { ret = PTR_ERR(data->tzd); if (ret != -EPROBE_DEFER) @@ -1104,21 +1104,19 @@ static int exynos_tmu_probe(struct platform_device *pdev) ret = exynos_tmu_initialize(pdev); if (ret) { dev_err(&pdev->dev, "Failed to initialize TMU\n"); - goto err_thermal; + goto err_sclk; } ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, IRQF_TRIGGER_RISING | IRQF_SHARED, dev_name(&pdev->dev), data); if (ret) { dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); - goto err_thermal; + goto err_sclk; } exynos_tmu_control(pdev, true); return 0; -err_thermal: - thermal_zone_of_sensor_unregister(&pdev->dev, data->tzd); err_sclk: clk_disable_unprepare(data->sclk); err_clk: @@ -1136,9 +1134,7 @@ err_sensor: static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); - struct thermal_zone_device *tzd = data->tzd; - thermal_zone_of_sensor_unregister(&pdev->dev, tzd); exynos_tmu_control(pdev, false); clk_disable_unprepare(data->sclk); diff --git a/drivers/thermal/sprd_thermal.c b/drivers/thermal/sprd_thermal.c index fff80fc18002..ac884514f116 100644 --- a/drivers/thermal/sprd_thermal.c +++ b/drivers/thermal/sprd_thermal.c @@ -204,9 +204,9 @@ static int sprd_thm_temp_to_rawdata(int temp, struct sprd_thermal_sensor *sen) return clamp(val, val, (u32)(SPRD_THM_RAW_DATA_HIGH - 1)); } -static int sprd_thm_read_temp(void *devdata, int *temp) +static int sprd_thm_read_temp(struct thermal_zone_device *tz, int *temp) { - struct sprd_thermal_sensor *sen = devdata; + struct sprd_thermal_sensor *sen = tz->devdata; u32 data; data = readl(sen->data->base + SPRD_THM_TEMP(sen->id)) & @@ -217,7 +217,7 @@ static int sprd_thm_read_temp(void *devdata, int *temp) return 0; } -static const struct thermal_zone_of_device_ops sprd_thm_ops = { +static const struct thermal_zone_device_ops sprd_thm_ops = { .get_temp = sprd_thm_read_temp, }; @@ -408,10 +408,10 @@ static int sprd_thm_probe(struct platform_device *pdev) sprd_thm_sensor_init(thm, sen); - sen->tzd = devm_thermal_zone_of_sensor_register(sen->dev, - sen->id, - sen, - &sprd_thm_ops); + sen->tzd = devm_thermal_of_zone_register(sen->dev, + sen->id, + sen, + &sprd_thm_ops); if (IS_ERR(sen->tzd)) { dev_err(&pdev->dev, "register thermal zone failed %d\n", sen->id); @@ -523,8 +523,8 @@ static int sprd_thm_remove(struct platform_device *pdev) for (i = 0; i < thm->nr_sensors; i++) { sprd_thm_toggle_sensor(thm->sensor[i], false); - devm_thermal_zone_of_sensor_unregister(&pdev->dev, - thm->sensor[i]->tzd); + devm_thermal_of_zone_unregister(&pdev->dev, + thm->sensor[i]->tzd); } clk_disable_unprepare(thm->clk); diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c index 5fd3fb8912a6..78feb802a87d 100644 --- a/drivers/thermal/st/stm_thermal.c +++ b/drivers/thermal/st/stm_thermal.c @@ -302,9 +302,9 @@ static int stm_disable_irq(struct stm_thermal_sensor *sensor) return 0; } -static int stm_thermal_set_trips(void *data, int low, int high) +static int stm_thermal_set_trips(struct thermal_zone_device *tz, int low, int high) { - struct stm_thermal_sensor *sensor = data; + struct stm_thermal_sensor *sensor = tz->devdata; u32 itr1, th; int ret; @@ -350,9 +350,9 @@ static int stm_thermal_set_trips(void *data, int low, int high) } /* Callback to get temperature from HW */ -static int stm_thermal_get_temp(void *data, int *temp) +static int stm_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct stm_thermal_sensor *sensor = data; + struct stm_thermal_sensor *sensor = tz->devdata; u32 periods; int freqM, ret; @@ -474,7 +474,7 @@ static int stm_thermal_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(stm_thermal_pm_ops, stm_thermal_suspend, stm_thermal_resume); -static const struct thermal_zone_of_device_ops stm_tz_ops = { +static const struct thermal_zone_device_ops stm_tz_ops = { .get_temp = stm_thermal_get_temp, .set_trips = stm_thermal_set_trips, }; @@ -539,9 +539,9 @@ static int stm_thermal_probe(struct platform_device *pdev) return ret; } - sensor->th_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, - sensor, - &stm_tz_ops); + sensor->th_dev = devm_thermal_of_zone_register(&pdev->dev, 0, + sensor, + &stm_tz_ops); if (IS_ERR(sensor->th_dev)) { dev_err(&pdev->dev, "%s: thermal zone sensor registering KO\n", @@ -572,7 +572,6 @@ static int stm_thermal_probe(struct platform_device *pdev) return 0; err_tz: - thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev); return ret; } @@ -582,7 +581,6 @@ static int stm_thermal_remove(struct platform_device *pdev) stm_thermal_sensor_off(sensor); thermal_remove_hwmon_sysfs(sensor->th_dev); - thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev); return 0; } diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c index 212c87e63a66..e64d06d1328c 100644 --- a/drivers/thermal/sun8i_thermal.c +++ b/drivers/thermal/sun8i_thermal.c @@ -108,9 +108,9 @@ static int sun50i_h5_calc_temp(struct ths_device *tmdev, return -1590 * reg / 10 + 276000; } -static int sun8i_ths_get_temp(void *data, int *temp) +static int sun8i_ths_get_temp(struct thermal_zone_device *tz, int *temp) { - struct tsensor *s = data; + struct tsensor *s = tz->devdata; struct ths_device *tmdev = s->tmdev; int val = 0; @@ -135,7 +135,7 @@ static int sun8i_ths_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops ths_ops = { +static const struct thermal_zone_device_ops ths_ops = { .get_temp = sun8i_ths_get_temp, }; @@ -468,10 +468,10 @@ static int sun8i_ths_register(struct ths_device *tmdev) tmdev->sensor[i].tmdev = tmdev; tmdev->sensor[i].id = i; tmdev->sensor[i].tzd = - devm_thermal_zone_of_sensor_register(tmdev->dev, - i, - &tmdev->sensor[i], - &ths_ops); + devm_thermal_of_zone_register(tmdev->dev, + i, + &tmdev->sensor[i], + &ths_ops); if (IS_ERR(tmdev->sensor[i].tzd)) return PTR_ERR(tmdev->sensor[i].tzd); diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c index 825eab526619..1efe470f31e9 100644 --- a/drivers/thermal/tegra/soctherm.c +++ b/drivers/thermal/tegra/soctherm.c @@ -421,9 +421,9 @@ static int translate_temp(u16 val) return t; } -static int tegra_thermctl_get_temp(void *data, int *out_temp) +static int tegra_thermctl_get_temp(struct thermal_zone_device *tz, int *out_temp) { - struct tegra_thermctl_zone *zone = data; + struct tegra_thermctl_zone *zone = tz->devdata; u32 val; val = readl(zone->reg); @@ -582,10 +582,9 @@ static int tsensor_group_thermtrip_get(struct tegra_soctherm *ts, int id) return temp; } -static int tegra_thermctl_set_trip_temp(void *data, int trip, int temp) +static int tegra_thermctl_set_trip_temp(struct thermal_zone_device *tz, int trip, int temp) { - struct tegra_thermctl_zone *zone = data; - struct thermal_zone_device *tz = zone->tz; + struct tegra_thermctl_zone *zone = tz->devdata; struct tegra_soctherm *ts = zone->ts; const struct tegra_tsensor_group *sg = zone->sg; struct device *dev = zone->dev; @@ -657,9 +656,9 @@ static void thermal_irq_disable(struct tegra_thermctl_zone *zn) mutex_unlock(&zn->ts->thermctl_lock); } -static int tegra_thermctl_set_trips(void *data, int lo, int hi) +static int tegra_thermctl_set_trips(struct thermal_zone_device *tz, int lo, int hi) { - struct tegra_thermctl_zone *zone = data; + struct tegra_thermctl_zone *zone = tz->devdata; u32 r; thermal_irq_disable(zone); @@ -682,7 +681,7 @@ static int tegra_thermctl_set_trips(void *data, int lo, int hi) return 0; } -static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = { +static const struct thermal_zone_device_ops tegra_of_thermal_ops = { .get_temp = tegra_thermctl_get_temp, .set_trip_temp = tegra_thermctl_set_trip_temp, .set_trips = tegra_thermctl_set_trips, @@ -2194,9 +2193,9 @@ static int tegra_soctherm_probe(struct platform_device *pdev) zone->sg = soc->ttgs[i]; zone->ts = tegra; - z = devm_thermal_zone_of_sensor_register(&pdev->dev, - soc->ttgs[i]->id, zone, - &tegra_of_thermal_ops); + z = devm_thermal_of_zone_register(&pdev->dev, + soc->ttgs[i]->id, zone, + &tegra_of_thermal_ops); if (IS_ERR(z)) { err = PTR_ERR(z); dev_err(&pdev->dev, "failed to register sensor: %d\n", diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c index 5affc3d196be..eb84f0b9dc7c 100644 --- a/drivers/thermal/tegra/tegra-bpmp-thermal.c +++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c @@ -30,9 +30,9 @@ struct tegra_bpmp_thermal { struct tegra_bpmp_thermal_zone **zones; }; -static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp) +static int __tegra_bpmp_thermal_get_temp(struct tegra_bpmp_thermal_zone *zone, + int *out_temp) { - struct tegra_bpmp_thermal_zone *zone = data; struct mrq_thermal_host_to_bpmp_request req; union mrq_thermal_bpmp_to_host_response reply; struct tegra_bpmp_message msg; @@ -60,9 +60,14 @@ static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp) return 0; } -static int tegra_bpmp_thermal_set_trips(void *data, int low, int high) +static int tegra_bpmp_thermal_get_temp(struct thermal_zone_device *tz, int *out_temp) { - struct tegra_bpmp_thermal_zone *zone = data; + return __tegra_bpmp_thermal_get_temp(tz->devdata, out_temp); +} + +static int tegra_bpmp_thermal_set_trips(struct thermal_zone_device *tz, int low, int high) +{ + struct tegra_bpmp_thermal_zone *zone = tz->devdata; struct mrq_thermal_host_to_bpmp_request req; struct tegra_bpmp_message msg; int err; @@ -157,7 +162,7 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp, return 0; } -static const struct thermal_zone_of_device_ops tegra_bpmp_of_thermal_ops = { +static const struct thermal_zone_device_ops tegra_bpmp_of_thermal_ops = { .get_temp = tegra_bpmp_thermal_get_temp, .set_trips = tegra_bpmp_thermal_set_trips, }; @@ -200,13 +205,13 @@ static int tegra_bpmp_thermal_probe(struct platform_device *pdev) zone->idx = i; zone->tegra = tegra; - err = tegra_bpmp_thermal_get_temp(zone, &temp); + err = __tegra_bpmp_thermal_get_temp(zone, &temp); if (err < 0) { devm_kfree(&pdev->dev, zone); continue; } - tzd = devm_thermal_zone_of_sensor_register( + tzd = devm_thermal_of_zone_register( &pdev->dev, i, zone, &tegra_bpmp_of_thermal_ops); if (IS_ERR(tzd)) { if (PTR_ERR(tzd) == -EPROBE_DEFER) diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c index 05886684f429..c34501287e96 100644 --- a/drivers/thermal/tegra/tegra30-tsensor.c +++ b/drivers/thermal/tegra/tegra30-tsensor.c @@ -159,9 +159,9 @@ static void devm_tegra_tsensor_hw_disable(void *data) tegra_tsensor_hw_disable(ts); } -static int tegra_tsensor_get_temp(void *data, int *temp) +static int tegra_tsensor_get_temp(struct thermal_zone_device *tz, int *temp) { - const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor_channel *tsc = tz->devdata; const struct tegra_tsensor *ts = tsc->ts; int err, c1, c2, c3, c4, counter; u32 val; @@ -217,9 +217,9 @@ static int tegra_tsensor_temp_to_counter(const struct tegra_tsensor *ts, int tem return DIV_ROUND_CLOSEST(c2 * 1000000 - ts->calib.b, ts->calib.a); } -static int tegra_tsensor_set_trips(void *data, int low, int high) +static int tegra_tsensor_set_trips(struct thermal_zone_device *tz, int low, int high) { - const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor_channel *tsc = tz->devdata; const struct tegra_tsensor *ts = tsc->ts; u32 val; @@ -240,7 +240,7 @@ static int tegra_tsensor_set_trips(void *data, int low, int high) return 0; } -static const struct thermal_zone_of_device_ops ops = { +static const struct thermal_zone_device_ops ops = { .get_temp = tegra_tsensor_get_temp, .set_trips = tegra_tsensor_set_trips, }; @@ -516,7 +516,7 @@ static int tegra_tsensor_register_channel(struct tegra_tsensor *ts, tsc->id = id; tsc->regs = ts->regs + 0x40 * (hw_id + 1); - tsc->tzd = devm_thermal_zone_of_sensor_register(ts->dev, id, tsc, &ops); + tsc->tzd = devm_thermal_of_zone_register(ts->dev, id, tsc, &ops); if (IS_ERR(tsc->tzd)) { if (PTR_ERR(tsc->tzd) != -ENODEV) return dev_err_probe(ts->dev, PTR_ERR(tsc->tzd), diff --git a/drivers/thermal/thermal-generic-adc.c b/drivers/thermal/thermal-generic-adc.c index 73665c3ccfe0..323e273e3298 100644 --- a/drivers/thermal/thermal-generic-adc.c +++ b/drivers/thermal/thermal-generic-adc.c @@ -52,9 +52,9 @@ static int gadc_thermal_adc_to_temp(struct gadc_thermal_info *gti, int val) return temp; } -static int gadc_thermal_get_temp(void *data, int *temp) +static int gadc_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct gadc_thermal_info *gti = data; + struct gadc_thermal_info *gti = tz->devdata; int val; int ret; @@ -68,7 +68,7 @@ static int gadc_thermal_get_temp(void *data, int *temp) return 0; } -static const struct thermal_zone_of_device_ops gadc_thermal_ops = { +static const struct thermal_zone_device_ops gadc_thermal_ops = { .get_temp = gadc_thermal_get_temp, }; @@ -143,8 +143,8 @@ static int gadc_thermal_probe(struct platform_device *pdev) gti->dev = &pdev->dev; platform_set_drvdata(pdev, gti); - gti->tz_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, gti, - &gadc_thermal_ops); + gti->tz_dev = devm_thermal_of_zone_register(&pdev->dev, 0, gti, + &gadc_thermal_ops); if (IS_ERR(gti->tz_dev)) { ret = PTR_ERR(gti->tz_dev); if (ret != -EPROBE_DEFER) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 50d50cec7774..7e669b60a065 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -295,27 +295,14 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, cancel_delayed_work(&tz->poll_queue); } -static inline bool should_stop_polling(struct thermal_zone_device *tz) -{ - return !thermal_zone_device_is_enabled(tz); -} - static void monitor_thermal_zone(struct thermal_zone_device *tz) { - bool stop; - - stop = should_stop_polling(tz); - - mutex_lock(&tz->lock); - - if (!stop && tz->passive) + if (tz->mode != THERMAL_DEVICE_ENABLED) + thermal_zone_device_set_polling(tz, 0); + else if (tz->passive) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); - else if (!stop && tz->polling_delay_jiffies) + else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); - else - thermal_zone_device_set_polling(tz, 0); - - mutex_unlock(&tz->lock); } static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip) @@ -383,18 +370,13 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) handle_critical_trips(tz, trip, trip_temp, type); else handle_non_critical_trips(tz, trip); - /* - * Alright, we handled this trip successfully. - * So, start monitoring again. - */ - monitor_thermal_zone(tz); } static void update_temperature(struct thermal_zone_device *tz) { int temp, ret; - ret = thermal_zone_get_temp(tz, &temp); + ret = __thermal_zone_get_temp(tz, &temp); if (ret) { if (ret != -EAGAIN) dev_warn(&tz->device, @@ -403,10 +385,8 @@ static void update_temperature(struct thermal_zone_device *tz) return; } - mutex_lock(&tz->lock); tz->last_temperature = tz->temperature; tz->temperature = temp; - mutex_unlock(&tz->lock); trace_thermal_temperature(tz); @@ -469,15 +449,9 @@ EXPORT_SYMBOL_GPL(thermal_zone_device_disable); int thermal_zone_device_is_enabled(struct thermal_zone_device *tz) { - enum thermal_device_mode mode; - - mutex_lock(&tz->lock); - - mode = tz->mode; + lockdep_assert_held(&tz->lock); - mutex_unlock(&tz->lock); - - return mode == THERMAL_DEVICE_ENABLED; + return tz->mode == THERMAL_DEVICE_ENABLED; } void thermal_zone_device_update(struct thermal_zone_device *tz, @@ -485,9 +459,6 @@ void thermal_zone_device_update(struct thermal_zone_device *tz, { int count; - if (should_stop_polling(tz)) - return; - if (atomic_read(&in_suspend)) return; @@ -495,14 +466,23 @@ void thermal_zone_device_update(struct thermal_zone_device *tz, "'get_temp' ops set\n", __func__)) return; + mutex_lock(&tz->lock); + + if (!thermal_zone_device_is_enabled(tz)) + goto out; + update_temperature(tz); - thermal_zone_set_trips(tz); + __thermal_zone_set_trips(tz); tz->notify_event = event; for (count = 0; count < tz->num_trips; count++) handle_thermal_trip(tz, count); + + monitor_thermal_zone(tz); +out: + mutex_unlock(&tz->lock); } EXPORT_SYMBOL_GPL(thermal_zone_device_update); @@ -1212,7 +1192,20 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t return ERR_PTR(-EINVAL); } - if (num_trips > THERMAL_MAX_TRIPS || num_trips < 0 || mask >> num_trips) { + /* + * Max trip count can't exceed 31 as the "mask >> num_trips" condition. + * For example, shifting by 32 will result in compiler warning: + * warning: right shift count >= width of type [-Wshift-count- overflow] + * + * Also "mask >> num_trips" will always be true with 32 bit shift. + * E.g. mask = 0x80000000 for trip id 31 to be RW. Then + * mask >> 32 = 0x80000000 + * This will result in failure for the below condition. + * + * Check will be true when the bit 31 of the mask is set. + * 32 bit shift will cause overflow of 4 byte integer. + */ + if (num_trips > (BITS_PER_TYPE(int) - 1) || num_trips < 0 || mask >> num_trips) { pr_err("Incorrect number of thermal trips\n"); return ERR_PTR(-EINVAL); } @@ -1239,7 +1232,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t } tz->id = id; - strlcpy(tz->type, type, sizeof(tz->type)); + strscpy(tz->type, type, sizeof(tz->type)); result = dev_set_name(&tz->device, "thermal_zone%d", tz->id); if (result) @@ -1458,9 +1451,6 @@ static int thermal_pm_notify(struct notifier_block *nb, case PM_POST_SUSPEND: atomic_set(&in_suspend, 0); list_for_each_entry(tz, &thermal_tz_list, node) { - if (!thermal_zone_device_is_enabled(tz)) - continue; - thermal_zone_device_init(tz); thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); @@ -1492,10 +1482,6 @@ static int __init thermal_init(void) if (result) goto unregister_governors; - result = of_parse_thermal_zones(); - if (result) - goto unregister_class; - result = register_pm_notifier(&thermal_pm_nb); if (result) pr_warn("Thermal: Can not register suspend notifier, return %d\n", @@ -1503,8 +1489,6 @@ static int __init thermal_init(void) return 0; -unregister_class: - class_unregister(&thermal_class); unregister_governors: thermal_unregister_governors(); error: diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index c991bb290512..1571917bd3c8 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -112,6 +112,8 @@ int thermal_build_list_of_policies(char *buf); /* Helpers */ void thermal_zone_set_trips(struct thermal_zone_device *tz); +void __thermal_zone_set_trips(struct thermal_zone_device *tz); +int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); /* sysfs I/F */ int thermal_zone_create_device_groups(struct thermal_zone_device *, int); @@ -135,13 +137,11 @@ thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, /* device tree support */ #ifdef CONFIG_THERMAL_OF -int of_parse_thermal_zones(void); int of_thermal_get_ntrips(struct thermal_zone_device *); bool of_thermal_is_trip_valid(struct thermal_zone_device *, int); const struct thermal_trip * of_thermal_get_trip_points(struct thermal_zone_device *); #else -static inline int of_parse_thermal_zones(void) { return 0; } static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz) { return 0; diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index 690890f054a3..c65cdce8f856 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -64,27 +64,17 @@ get_thermal_instance(struct thermal_zone_device *tz, } EXPORT_SYMBOL(get_thermal_instance); -/** - * thermal_zone_get_temp() - returns the temperature of a thermal zone - * @tz: a valid pointer to a struct thermal_zone_device - * @temp: a valid pointer to where to store the resulting temperature. - * - * When a valid thermal zone reference is passed, it will fetch its - * temperature and fill @temp. - * - * Return: On success returns 0, an error code otherwise - */ -int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) +int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { int ret = -EINVAL; int count; int crit_temp = INT_MAX; enum thermal_trip_type type; - if (!tz || IS_ERR(tz) || !tz->ops->get_temp) - goto exit; + lockdep_assert_held(&tz->lock); - mutex_lock(&tz->lock); + if (!tz || IS_ERR(tz) || !tz->ops->get_temp) + return -EINVAL; ret = tz->ops->get_temp(tz, temp); @@ -107,35 +97,42 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) *temp = tz->emul_temperature; } - mutex_unlock(&tz->lock); -exit: return ret; } -EXPORT_SYMBOL_GPL(thermal_zone_get_temp); /** - * thermal_zone_set_trips - Computes the next trip points for the driver - * @tz: a pointer to a thermal zone device structure + * thermal_zone_get_temp() - returns the temperature of a thermal zone + * @tz: a valid pointer to a struct thermal_zone_device + * @temp: a valid pointer to where to store the resulting temperature. * - * The function computes the next temperature boundaries by browsing - * the trip points. The result is the closer low and high trip points - * to the current temperature. These values are passed to the backend - * driver to let it set its own notification mechanism (usually an - * interrupt). + * When a valid thermal zone reference is passed, it will fetch its + * temperature and fill @temp. * - * It does not return a value + * Return: On success returns 0, an error code otherwise */ -void thermal_zone_set_trips(struct thermal_zone_device *tz) +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) +{ + int ret; + + mutex_lock(&tz->lock); + ret = __thermal_zone_get_temp(tz, temp); + mutex_unlock(&tz->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(thermal_zone_get_temp); + +void __thermal_zone_set_trips(struct thermal_zone_device *tz) { int low = -INT_MAX; int high = INT_MAX; int trip_temp, hysteresis; int i, ret; - mutex_lock(&tz->lock); + lockdep_assert_held(&tz->lock); if (!tz->ops->set_trips || !tz->ops->get_trip_hyst) - goto exit; + return; for (i = 0; i < tz->num_trips; i++) { int trip_low; @@ -154,7 +151,7 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) /* No need to change trip points */ if (tz->prev_low_trip == low && tz->prev_high_trip == high) - goto exit; + return; tz->prev_low_trip = low; tz->prev_high_trip = high; @@ -169,8 +166,24 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) ret = tz->ops->set_trips(tz, low, high); if (ret) dev_err(&tz->device, "Failed to set trips: %d\n", ret); +} -exit: +/** + * thermal_zone_set_trips - Computes the next trip points for the driver + * @tz: a pointer to a thermal zone device structure + * + * The function computes the next temperature boundaries by browsing + * the trip points. The result is the closer low and high trip points + * to the current temperature. These values are passed to the backend + * driver to let it set its own notification mechanism (usually an + * interrupt). + * + * It does not return a value + */ +void thermal_zone_set_trips(struct thermal_zone_device *tz) +{ + mutex_lock(&tz->lock); + __thermal_zone_set_trips(tz); mutex_unlock(&tz->lock); } diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 09e49ec8b6f4..f53f4ceb6a5d 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -147,7 +147,7 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return -ENOMEM; INIT_LIST_HEAD(&hwmon->tz_list); - strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); + strscpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); strreplace(hwmon->type, '-', '_'); hwmon->device = hwmon_device_register_for_thermal(&tz->device, hwmon->type, hwmon); diff --git a/drivers/thermal/thermal_mmio.c b/drivers/thermal/thermal_mmio.c index 360b0dfdc3b0..39c921415989 100644 --- a/drivers/thermal/thermal_mmio.c +++ b/drivers/thermal/thermal_mmio.c @@ -20,11 +20,10 @@ static u32 thermal_mmio_readb(void __iomem *mmio_base) return readb(mmio_base); } -static int thermal_mmio_get_temperature(void *private, int *temp) +static int thermal_mmio_get_temperature(struct thermal_zone_device *tz, int *temp) { int t; - struct thermal_mmio *sensor = - (struct thermal_mmio *)private; + struct thermal_mmio *sensor = tz->devdata; t = sensor->read_mmio(sensor->mmio_base) & sensor->mask; t *= sensor->factor; @@ -34,7 +33,7 @@ static int thermal_mmio_get_temperature(void *private, int *temp) return 0; } -static const struct thermal_zone_of_device_ops thermal_mmio_ops = { +static const struct thermal_zone_device_ops thermal_mmio_ops = { .get_temp = thermal_mmio_get_temperature, }; @@ -68,10 +67,10 @@ static int thermal_mmio_probe(struct platform_device *pdev) } } - thermal_zone = devm_thermal_zone_of_sensor_register(&pdev->dev, - 0, - sensor, - &thermal_mmio_ops); + thermal_zone = devm_thermal_of_zone_register(&pdev->dev, + 0, + sensor, + &thermal_mmio_ops); if (IS_ERR(thermal_zone)) { dev_err(&pdev->dev, "failed to register sensor (%ld)\n", @@ -79,7 +78,7 @@ static int thermal_mmio_probe(struct platform_device *pdev) return PTR_ERR(thermal_zone); } - thermal_mmio_get_temperature(sensor, &temperature); + thermal_mmio_get_temperature(thermal_zone, &temperature); dev_info(&pdev->dev, "thermal mmio sensor %s registered, current temperature: %d\n", pdev->name, temperature); @@ -107,7 +106,7 @@ static struct platform_driver thermal_mmio_driver = { .probe = thermal_mmio_probe, .driver = { .name = "thermal-mmio", - .of_match_table = of_match_ptr(thermal_mmio_id_table), + .of_match_table = thermal_mmio_id_table, }, }; diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 802c30b72a92..fd2fb84bf246 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -19,93 +19,6 @@ #include "thermal_core.h" -/*** Private data structures to represent thermal device tree data ***/ - -/** - * struct __thermal_cooling_bind_param - a cooling device for a trip point - * @cooling_device: a pointer to identify the referred cooling device - * @min: minimum cooling state used at this trip point - * @max: maximum cooling state used at this trip point - */ - -struct __thermal_cooling_bind_param { - struct device_node *cooling_device; - unsigned long min; - unsigned long max; -}; - -/** - * struct __thermal_bind_params - a match between trip and cooling device - * @tcbp: a pointer to an array of cooling devices - * @count: number of elements in array - * @trip_id: the trip point index - * @usage: the percentage (from 0 to 100) of cooling contribution - */ - -struct __thermal_bind_params { - struct __thermal_cooling_bind_param *tcbp; - unsigned int count; - unsigned int trip_id; - unsigned int usage; -}; - -/** - * struct __thermal_zone - internal representation of a thermal zone - * @passive_delay: polling interval while passive cooling is activated - * @polling_delay: zone polling interval - * @slope: slope of the temperature adjustment curve - * @offset: offset of the temperature adjustment curve - * @ntrips: number of trip points - * @trips: an array of trip points (0..ntrips - 1) - * @num_tbps: number of thermal bind params - * @tbps: an array of thermal bind params (0..num_tbps - 1) - * @sensor_data: sensor private data used while reading temperature and trend - * @ops: set of callbacks to handle the thermal zone based on DT - */ - -struct __thermal_zone { - int passive_delay; - int polling_delay; - int slope; - int offset; - - /* trip data */ - int ntrips; - struct thermal_trip *trips; - - /* cooling binding data */ - int num_tbps; - struct __thermal_bind_params *tbps; - - /* sensor interface */ - void *sensor_data; - const struct thermal_zone_of_device_ops *ops; -}; - -/*** DT thermal zone device callbacks ***/ - -static int of_thermal_get_temp(struct thermal_zone_device *tz, - int *temp) -{ - struct __thermal_zone *data = tz->devdata; - - if (!data->ops || !data->ops->get_temp) - return -EINVAL; - - return data->ops->get_temp(data->sensor_data, temp); -} - -static int of_thermal_set_trips(struct thermal_zone_device *tz, - int low, int high) -{ - struct __thermal_zone *data = tz->devdata; - - if (!data->ops || !data->ops->set_trips) - return -EINVAL; - - return data->ops->set_trips(data->sensor_data, low, high); -} - /** * of_thermal_get_ntrips - function to export number of available trip * points. @@ -158,114 +71,6 @@ of_thermal_get_trip_points(struct thermal_zone_device *tz) } EXPORT_SYMBOL_GPL(of_thermal_get_trip_points); -/** - * of_thermal_set_emul_temp - function to set emulated temperature - * - * @tz: pointer to a thermal zone - * @temp: temperature to set - * - * This function gives the ability to set emulated value of temperature, - * which is handy for debugging - * - * Return: zero on success, error code otherwise - */ -static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, - int temp) -{ - struct __thermal_zone *data = tz->devdata; - - if (!data->ops || !data->ops->set_emul_temp) - return -EINVAL; - - return data->ops->set_emul_temp(data->sensor_data, temp); -} - -static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, - enum thermal_trend *trend) -{ - struct __thermal_zone *data = tz->devdata; - - if (!data->ops || !data->ops->get_trend) - return -EINVAL; - - return data->ops->get_trend(data->sensor_data, trip, trend); -} - -static int of_thermal_change_mode(struct thermal_zone_device *tz, - enum thermal_device_mode mode) -{ - struct __thermal_zone *data = tz->devdata; - - return data->ops->change_mode(data->sensor_data, mode); -} - -static int of_thermal_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct __thermal_zone *data = thermal->devdata; - struct __thermal_bind_params *tbp; - struct __thermal_cooling_bind_param *tcbp; - int i, j; - - if (!data || IS_ERR(data)) - return -ENODEV; - - /* find where to bind */ - for (i = 0; i < data->num_tbps; i++) { - tbp = data->tbps + i; - - for (j = 0; j < tbp->count; j++) { - tcbp = tbp->tcbp + j; - - if (tcbp->cooling_device == cdev->np) { - int ret; - - ret = thermal_zone_bind_cooling_device(thermal, - tbp->trip_id, cdev, - tcbp->max, - tcbp->min, - tbp->usage); - if (ret) - return ret; - } - } - } - - return 0; -} - -static int of_thermal_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - struct __thermal_zone *data = thermal->devdata; - struct __thermal_bind_params *tbp; - struct __thermal_cooling_bind_param *tcbp; - int i, j; - - if (!data || IS_ERR(data)) - return -ENODEV; - - /* find where to unbind */ - for (i = 0; i < data->num_tbps; i++) { - tbp = data->tbps + i; - - for (j = 0; j < tbp->count; j++) { - tcbp = tbp->tcbp + j; - - if (tcbp->cooling_device == cdev->np) { - int ret; - - ret = thermal_zone_unbind_cooling_device(thermal, - tbp->trip_id, cdev); - if (ret) - return ret; - } - } - } - - return 0; -} - static int of_thermal_get_trip_type(struct thermal_zone_device *tz, int trip, enum thermal_trip_type *type) { @@ -288,28 +93,6 @@ static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip, return 0; } -static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, - int temp) -{ - struct __thermal_zone *data = tz->devdata; - - if (trip >= tz->num_trips || trip < 0) - return -EDOM; - - if (data->ops && data->ops->set_trip_temp) { - int ret; - - ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); - if (ret) - return ret; - } - - /* thermal framework should take care of data->mask & (1 << trip) */ - tz->trips[trip].temperature = temp; - - return 0; -} - static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip, int *hyst) { @@ -347,62 +130,6 @@ static int of_thermal_get_crit_temp(struct thermal_zone_device *tz, return -EINVAL; } -static struct thermal_zone_device_ops of_thermal_ops = { - .get_trip_type = of_thermal_get_trip_type, - .get_trip_temp = of_thermal_get_trip_temp, - .set_trip_temp = of_thermal_set_trip_temp, - .get_trip_hyst = of_thermal_get_trip_hyst, - .set_trip_hyst = of_thermal_set_trip_hyst, - .get_crit_temp = of_thermal_get_crit_temp, - - .bind = of_thermal_bind, - .unbind = of_thermal_unbind, -}; - -/*** sensor API ***/ - -static struct thermal_zone_device * -thermal_zone_of_add_sensor(struct device_node *zone, - struct device_node *sensor, void *data, - const struct thermal_zone_of_device_ops *ops) -{ - struct thermal_zone_device *tzd; - struct __thermal_zone *tz; - - tzd = thermal_zone_get_zone_by_name(zone->name); - if (IS_ERR(tzd)) - return ERR_PTR(-EPROBE_DEFER); - - tz = tzd->devdata; - - if (!ops) - return ERR_PTR(-EINVAL); - - mutex_lock(&tzd->lock); - tz->ops = ops; - tz->sensor_data = data; - - tzd->ops->get_temp = of_thermal_get_temp; - tzd->ops->get_trend = of_thermal_get_trend; - - /* - * The thermal zone core will calculate the window if they have set the - * optional set_trips pointer. - */ - if (ops->set_trips) - tzd->ops->set_trips = of_thermal_set_trips; - - if (ops->set_emul_temp) - tzd->ops->set_emul_temp = of_thermal_set_emul_temp; - - if (ops->change_mode) - tzd->ops->change_mode = of_thermal_change_mode; - - mutex_unlock(&tzd->lock); - - return tzd; -} - /** * thermal_zone_of_get_sensor_id - get sensor ID from a DT thermal zone * @tz_np: a valid thermal zone device node. @@ -447,207 +174,6 @@ int thermal_zone_of_get_sensor_id(struct device_node *tz_np, } EXPORT_SYMBOL_GPL(thermal_zone_of_get_sensor_id); -/** - * thermal_zone_of_sensor_register - registers a sensor to a DT thermal zone - * @dev: a valid struct device pointer of a sensor device. Must contain - * a valid .of_node, for the sensor node. - * @sensor_id: a sensor identifier, in case the sensor IP has more - * than one sensors - * @data: a private pointer (owned by the caller) that will be passed - * back, when a temperature reading is needed. - * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp. - * - * This function will search the list of thermal zones described in device - * tree and look for the zone that refer to the sensor device pointed by - * @dev->of_node as temperature providers. For the zone pointing to the - * sensor node, the sensor will be added to the DT thermal zone device. - * - * The thermal zone temperature is provided by the @get_temp function - * pointer. When called, it will have the private pointer @data back. - * - * The thermal zone temperature trend is provided by the @get_trend function - * pointer. When called, it will have the private pointer @data back. - * - * TODO: - * 01 - This function must enqueue the new sensor instead of using - * it as the only source of temperature values. - * - * 02 - There must be a way to match the sensor with all thermal zones - * that refer to it. - * - * Return: On success returns a valid struct thermal_zone_device, - * otherwise, it returns a corresponding ERR_PTR(). Caller must - * check the return value with help of IS_ERR() helper. - */ -struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data, - const struct thermal_zone_of_device_ops *ops) -{ - struct device_node *np, *child, *sensor_np; - struct thermal_zone_device *tzd = ERR_PTR(-ENODEV); - - np = of_find_node_by_name(NULL, "thermal-zones"); - if (!np) - return ERR_PTR(-ENODEV); - - if (!dev || !dev->of_node) { - of_node_put(np); - return ERR_PTR(-ENODEV); - } - - sensor_np = of_node_get(dev->of_node); - - for_each_available_child_of_node(np, child) { - int ret, id; - - /* For now, thermal framework supports only 1 sensor per zone */ - ret = thermal_zone_of_get_sensor_id(child, sensor_np, &id); - if (ret) - continue; - - if (id == sensor_id) { - tzd = thermal_zone_of_add_sensor(child, sensor_np, - data, ops); - if (!IS_ERR(tzd)) - thermal_zone_device_enable(tzd); - - of_node_put(child); - goto exit; - } - } -exit: - of_node_put(sensor_np); - of_node_put(np); - - return tzd; -} -EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_register); - -/** - * thermal_zone_of_sensor_unregister - unregisters a sensor from a DT thermal zone - * @dev: a valid struct device pointer of a sensor device. Must contain - * a valid .of_node, for the sensor node. - * @tzd: a pointer to struct thermal_zone_device where the sensor is registered. - * - * This function removes the sensor callbacks and private data from the - * thermal zone device registered with thermal_zone_of_sensor_register() - * API. It will also silent the zone by remove the .get_temp() and .get_trend() - * thermal zone device callbacks. - * - * TODO: When the support to several sensors per zone is added, this - * function must search the sensor list based on @dev parameter. - * - */ -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tzd) -{ - struct __thermal_zone *tz; - - if (!dev || !tzd || !tzd->devdata) - return; - - tz = tzd->devdata; - - /* no __thermal_zone, nothing to be done */ - if (!tz) - return; - - /* stop temperature polling */ - thermal_zone_device_disable(tzd); - - mutex_lock(&tzd->lock); - tzd->ops->get_temp = NULL; - tzd->ops->get_trend = NULL; - tzd->ops->set_emul_temp = NULL; - tzd->ops->change_mode = NULL; - - tz->ops = NULL; - tz->sensor_data = NULL; - mutex_unlock(&tzd->lock); -} -EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister); - -static void devm_thermal_zone_of_sensor_release(struct device *dev, void *res) -{ - thermal_zone_of_sensor_unregister(dev, - *(struct thermal_zone_device **)res); -} - -static int devm_thermal_zone_of_sensor_match(struct device *dev, void *res, - void *data) -{ - struct thermal_zone_device **r = res; - - if (WARN_ON(!r || !*r)) - return 0; - - return *r == data; -} - -/** - * devm_thermal_zone_of_sensor_register - Resource managed version of - * thermal_zone_of_sensor_register() - * @dev: a valid struct device pointer of a sensor device. Must contain - * a valid .of_node, for the sensor node. - * @sensor_id: a sensor identifier, in case the sensor IP has more - * than one sensors - * @data: a private pointer (owned by the caller) that will be passed - * back, when a temperature reading is needed. - * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp. - * - * Refer thermal_zone_of_sensor_register() for more details. - * - * Return: On success returns a valid struct thermal_zone_device, - * otherwise, it returns a corresponding ERR_PTR(). Caller must - * check the return value with help of IS_ERR() helper. - * Registered thermal_zone_device device will automatically be - * released when device is unbounded. - */ -struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int sensor_id, - void *data, const struct thermal_zone_of_device_ops *ops) -{ - struct thermal_zone_device **ptr, *tzd; - - ptr = devres_alloc(devm_thermal_zone_of_sensor_release, sizeof(*ptr), - GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - tzd = thermal_zone_of_sensor_register(dev, sensor_id, data, ops); - if (IS_ERR(tzd)) { - devres_free(ptr); - return tzd; - } - - *ptr = tzd; - devres_add(dev, ptr); - - return tzd; -} -EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_register); - -/** - * devm_thermal_zone_of_sensor_unregister - Resource managed version of - * thermal_zone_of_sensor_unregister(). - * @dev: Device for which which resource was allocated. - * @tzd: a pointer to struct thermal_zone_device where the sensor is registered. - * - * This function removes the sensor callbacks and private data from the - * thermal zone device registered with devm_thermal_zone_of_sensor_register() - * API. It will also silent the zone by remove the .get_temp() and .get_trend() - * thermal zone device callbacks. - * Normally this function will not need to be called and the resource - * management code will ensure that the resource is freed. - */ -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tzd) -{ - WARN_ON(devres_release(dev, devm_thermal_zone_of_sensor_release, - devm_thermal_zone_of_sensor_match, tzd)); -} -EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_unregister); - /*** functions parsing device tree nodes ***/ static int of_find_trip_id(struct device_node *np, struct device_node *trip) @@ -679,98 +205,6 @@ out: return i; } -/** - * thermal_of_populate_bind_params - parse and fill cooling map data - * @np: DT node containing a cooling-map node - * @__tbp: data structure to be filled with cooling map info - * @trips: array of thermal zone trip points - * @ntrips: number of trip points inside trips. - * - * This function parses a cooling-map type of node represented by - * @np parameter and fills the read data into @__tbp data structure. - * It needs the already parsed array of trip points of the thermal zone - * in consideration. - * - * Return: 0 on success, proper error code otherwise - */ -static int thermal_of_populate_bind_params(struct device_node *tz_np, - struct device_node *np, - struct __thermal_bind_params *__tbp) -{ - struct of_phandle_args cooling_spec; - struct __thermal_cooling_bind_param *__tcbp; - struct device_node *trip; - int ret, i, count; - int trip_id; - u32 prop; - - /* Default weight. Usage is optional */ - __tbp->usage = THERMAL_WEIGHT_DEFAULT; - ret = of_property_read_u32(np, "contribution", &prop); - if (ret == 0) - __tbp->usage = prop; - - trip = of_parse_phandle(np, "trip", 0); - if (!trip) { - pr_err("missing trip property\n"); - return -ENODEV; - } - - trip_id = of_find_trip_id(tz_np, trip); - if (trip_id < 0) { - ret = trip_id; - goto end; - } - - __tbp->trip_id = trip_id; - - count = of_count_phandle_with_args(np, "cooling-device", - "#cooling-cells"); - if (count <= 0) { - pr_err("Add a cooling_device property with at least one device\n"); - ret = -ENOENT; - goto end; - } - - __tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL); - if (!__tcbp) { - ret = -ENOMEM; - goto end; - } - - for (i = 0; i < count; i++) { - ret = of_parse_phandle_with_args(np, "cooling-device", - "#cooling-cells", i, &cooling_spec); - if (ret < 0) { - pr_err("Invalid cooling-device entry\n"); - goto free_tcbp; - } - - __tcbp[i].cooling_device = cooling_spec.np; - - if (cooling_spec.args_count >= 2) { /* at least min and max */ - __tcbp[i].min = cooling_spec.args[0]; - __tcbp[i].max = cooling_spec.args[1]; - } else { - pr_err("wrong reference to cooling device, missing limits\n"); - } - } - - __tbp->tcbp = __tcbp; - __tbp->count = count; - - goto end; - -free_tcbp: - for (i = i - 1; i >= 0; i--) - of_node_put(__tcbp[i].cooling_device); - kfree(__tcbp); -end: - of_node_put(trip); - - return ret; -} - /* * It maps 'enum thermal_trip_type' found in include/linux/thermal.h * into the device tree binding of 'trip', property type. @@ -811,16 +245,6 @@ static int thermal_of_get_trip_type(struct device_node *np, return -ENODEV; } -/** - * thermal_of_populate_trip - parse and fill one trip point data - * @np: DT node containing a trip point node - * @trip: trip point data structure to be filled up - * - * This function parses a trip point type of node represented by - * @np parameter and fills the read data into @trip data structure. - * - * Return: 0 on success, proper error code otherwise - */ static int thermal_of_populate_trip(struct device_node *np, struct thermal_trip *trip) { @@ -897,258 +321,458 @@ out_of_node_put: return ERR_PTR(ret); } -/** - * thermal_of_build_thermal_zone - parse and fill one thermal zone data - * @np: DT node containing a thermal zone node - * - * This function parses a thermal zone type of node represented by - * @np parameter and fills the read data into a __thermal_zone data structure - * and return this pointer. - * - * TODO: Missing properties to parse: thermal-sensor-names - * - * Return: On success returns a valid struct __thermal_zone, - * otherwise, it returns a corresponding ERR_PTR(). Caller must - * check the return value with help of IS_ERR() helper. - */ -static struct __thermal_zone -__init *thermal_of_build_thermal_zone(struct device_node *np) +static struct device_node *of_thermal_zone_find(struct device_node *sensor, int id) { - struct device_node *child = NULL, *gchild; - struct __thermal_zone *tz; - int ret, i; - u32 prop, coef[2]; + struct device_node *np, *tz; + struct of_phandle_args sensor_specs; + np = of_find_node_by_name(NULL, "thermal-zones"); if (!np) { - pr_err("no thermal zone np\n"); - return ERR_PTR(-EINVAL); + pr_debug("No thermal zones description\n"); + return ERR_PTR(-ENODEV); } - tz = kzalloc(sizeof(*tz), GFP_KERNEL); - if (!tz) - return ERR_PTR(-ENOMEM); + /* + * Search for each thermal zone, a defined sensor + * corresponding to the one passed as parameter + */ + for_each_available_child_of_node(np, tz) { + + int count, i; + + count = of_count_phandle_with_args(tz, "thermal-sensors", + "#thermal-sensor-cells"); + if (count <= 0) { + pr_err("%pOFn: missing thermal sensor\n", tz); + tz = ERR_PTR(-EINVAL); + goto out; + } + + for (i = 0; i < count; i++) { + + int ret; - ret = of_property_read_u32(np, "polling-delay-passive", &prop); + ret = of_parse_phandle_with_args(tz, "thermal-sensors", + "#thermal-sensor-cells", + i, &sensor_specs); + if (ret < 0) { + pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", tz, ret); + tz = ERR_PTR(ret); + goto out; + } + + if ((sensor == sensor_specs.np) && id == (sensor_specs.args_count ? + sensor_specs.args[0] : 0)) { + pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, tz); + goto out; + } + } + } + tz = ERR_PTR(-ENODEV); +out: + of_node_put(np); + return tz; +} + +static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay) +{ + int ret; + + ret = of_property_read_u32(np, "polling-delay-passive", pdelay); if (ret < 0) { pr_err("%pOFn: missing polling-delay-passive property\n", np); - goto free_tz; + return ret; } - tz->passive_delay = prop; - ret = of_property_read_u32(np, "polling-delay", &prop); + ret = of_property_read_u32(np, "polling-delay", delay); if (ret < 0) { pr_err("%pOFn: missing polling-delay property\n", np); - goto free_tz; + return ret; } - tz->polling_delay = prop; + + return 0; +} + +static struct thermal_zone_params *thermal_of_parameters_init(struct device_node *np) +{ + struct thermal_zone_params *tzp; + int coef[2]; + int ncoef = ARRAY_SIZE(coef); + int prop, ret; + + tzp = kzalloc(sizeof(*tzp), GFP_KERNEL); + if (!tzp) + return ERR_PTR(-ENOMEM); + + tzp->no_hwmon = true; + + if (!of_property_read_u32(np, "sustainable-power", &prop)) + tzp->sustainable_power = prop; /* - * REVIST: for now, the thermal framework supports only - * one sensor per thermal zone. Thus, we are considering - * only the first two values as slope and offset. + * For now, the thermal framework supports only one sensor per + * thermal zone. Thus, we are considering only the first two + * values as slope and offset. */ - ret = of_property_read_u32_array(np, "coefficients", coef, 2); - if (ret == 0) { - tz->slope = coef[0]; - tz->offset = coef[1]; - } else { - tz->slope = 1; - tz->offset = 0; + ret = of_property_read_u32_array(np, "coefficients", coef, ncoef); + if (ret) { + coef[0] = 1; + coef[1] = 0; + } + + tzp->slope = coef[0]; + tzp->offset = coef[1]; + + return tzp; +} + +static struct device_node *thermal_of_zone_get_by_name(struct thermal_zone_device *tz) +{ + struct device_node *np, *tz_np; + + np = of_find_node_by_name(NULL, "thermal-zones"); + if (!np) + return ERR_PTR(-ENODEV); + + tz_np = of_get_child_by_name(np, tz->type); + + of_node_put(np); + + if (!tz_np) + return ERR_PTR(-ENODEV); + + return tz_np; +} + +static int __thermal_of_unbind(struct device_node *map_np, int index, int trip_id, + struct thermal_zone_device *tz, struct thermal_cooling_device *cdev) +{ + struct of_phandle_args cooling_spec; + int ret; + + ret = of_parse_phandle_with_args(map_np, "cooling-device", "#cooling-cells", + index, &cooling_spec); + + of_node_put(cooling_spec.np); + + if (ret < 0) { + pr_err("Invalid cooling-device entry\n"); + return ret; } - tz->trips = thermal_of_trips_init(np, &tz->ntrips); - if (IS_ERR(tz->trips)) { - ret = PTR_ERR(tz->trips); - goto finish; + if (cooling_spec.args_count < 2) { + pr_err("wrong reference to cooling device, missing limits\n"); + return -EINVAL; } - /* cooling-maps */ - child = of_get_child_by_name(np, "cooling-maps"); + if (cooling_spec.np != cdev->np) + return 0; - /* cooling-maps not provided */ - if (!child) - goto finish; + ret = thermal_zone_unbind_cooling_device(tz, trip_id, cdev); + if (ret) + pr_err("Failed to unbind '%s' with '%s': %d\n", tz->type, cdev->type, ret); - tz->num_tbps = of_get_child_count(child); - if (tz->num_tbps == 0) - goto finish; + return ret; +} - tz->tbps = kcalloc(tz->num_tbps, sizeof(*tz->tbps), GFP_KERNEL); - if (!tz->tbps) { - ret = -ENOMEM; - goto free_trips; +static int __thermal_of_bind(struct device_node *map_np, int index, int trip_id, + struct thermal_zone_device *tz, struct thermal_cooling_device *cdev) +{ + struct of_phandle_args cooling_spec; + int ret, weight = THERMAL_WEIGHT_DEFAULT; + + of_property_read_u32(map_np, "contribution", &weight); + + ret = of_parse_phandle_with_args(map_np, "cooling-device", "#cooling-cells", + index, &cooling_spec); + + of_node_put(cooling_spec.np); + + if (ret < 0) { + pr_err("Invalid cooling-device entry\n"); + return ret; } - i = 0; - for_each_child_of_node(child, gchild) { - ret = thermal_of_populate_bind_params(np, gchild, &tz->tbps[i++]); - if (ret) { - of_node_put(gchild); - goto free_tbps; - } + if (cooling_spec.args_count < 2) { + pr_err("wrong reference to cooling device, missing limits\n"); + return -EINVAL; } -finish: - of_node_put(child); + if (cooling_spec.np != cdev->np) + return 0; + + ret = thermal_zone_bind_cooling_device(tz, trip_id, cdev, cooling_spec.args[1], + cooling_spec.args[0], + weight); + if (ret) + pr_err("Failed to bind '%s' with '%s': %d\n", tz->type, cdev->type, ret); - return tz; + return ret; +} -free_tbps: - for (i = i - 1; i >= 0; i--) { - struct __thermal_bind_params *tbp = tz->tbps + i; - int j; +static int thermal_of_for_each_cooling_device(struct device_node *tz_np, struct device_node *map_np, + struct thermal_zone_device *tz, struct thermal_cooling_device *cdev, + int (*action)(struct device_node *, int, int, + struct thermal_zone_device *, struct thermal_cooling_device *)) +{ + struct device_node *tr_np; + int count, i, trip_id; - for (j = 0; j < tbp->count; j++) - of_node_put(tbp->tcbp[j].cooling_device); + tr_np = of_parse_phandle(map_np, "trip", 0); + if (!tr_np) + return -ENODEV; - kfree(tbp->tcbp); + trip_id = of_find_trip_id(tz_np, tr_np); + if (trip_id < 0) + return trip_id; + + count = of_count_phandle_with_args(map_np, "cooling-device", "#cooling-cells"); + if (count <= 0) { + pr_err("Add a cooling_device property with at least one device\n"); + return -ENOENT; } - kfree(tz->tbps); -free_trips: - kfree(tz->trips); -free_tz: - kfree(tz); - of_node_put(child); + /* + * At this point, we don't want to bail out when there is an + * error, we will try to bind/unbind as many as possible + * cooling devices + */ + for (i = 0; i < count; i++) + action(map_np, i, trip_id, tz, cdev); - return ERR_PTR(ret); + return 0; } -static __init void of_thermal_free_zone(struct __thermal_zone *tz) +static int thermal_of_for_each_cooling_maps(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev, + int (*action)(struct device_node *, int, int, + struct thermal_zone_device *, struct thermal_cooling_device *)) { - struct __thermal_bind_params *tbp; - int i, j; + struct device_node *tz_np, *cm_np, *child; + int ret = 0; - for (i = 0; i < tz->num_tbps; i++) { - tbp = tz->tbps + i; + tz_np = thermal_of_zone_get_by_name(tz); + if (IS_ERR(tz_np)) { + pr_err("Failed to get node tz by name\n"); + return PTR_ERR(tz_np); + } - for (j = 0; j < tbp->count; j++) - of_node_put(tbp->tcbp[j].cooling_device); + cm_np = of_get_child_by_name(tz_np, "cooling-maps"); + if (!cm_np) + goto out; - kfree(tbp->tcbp); + for_each_child_of_node(cm_np, child) { + ret = thermal_of_for_each_cooling_device(tz_np, child, tz, cdev, action); + if (ret) + break; } - kfree(tz->tbps); - kfree(tz->trips); - kfree(tz); + of_node_put(cm_np); +out: + of_node_put(tz_np); + + return ret; +} + +static int thermal_of_bind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + return thermal_of_for_each_cooling_maps(tz, cdev, __thermal_of_bind); +} + +static int thermal_of_unbind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + return thermal_of_for_each_cooling_maps(tz, cdev, __thermal_of_unbind); } /** - * of_thermal_destroy_zones - remove all zones parsed and allocated resources + * thermal_of_zone_unregister - Cleanup the specific allocated ressources * - * Finds all zones parsed and added to the thermal framework and remove them - * from the system, together with their resources. + * This function disables the thermal zone and frees the different + * ressources allocated specific to the thermal OF. * + * @tz: a pointer to the thermal zone structure */ -static __init void of_thermal_destroy_zones(void) +void thermal_of_zone_unregister(struct thermal_zone_device *tz) { - struct device_node *np, *child; + struct thermal_trip *trips = tz->trips; + struct thermal_zone_params *tzp = tz->tzp; + struct thermal_zone_device_ops *ops = tz->ops; - np = of_find_node_by_name(NULL, "thermal-zones"); - if (!np) { - pr_debug("unable to find thermal zones\n"); - return; + thermal_zone_device_disable(tz); + thermal_zone_device_unregister(tz); + kfree(trips); + kfree(tzp); + kfree(ops); +} +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister); + +/** + * thermal_of_zone_register - Register a thermal zone with device node + * sensor + * + * The thermal_of_zone_register() parses a device tree given a device + * node sensor and identifier. It searches for the thermal zone + * associated to the couple sensor/id and retrieves all the thermal + * zone properties and registers new thermal zone with those + * properties. + * + * @sensor: A device node pointer corresponding to the sensor in the device tree + * @id: An integer as sensor identifier + * @data: A private data to be stored in the thermal zone dedicated private area + * @ops: A set of thermal sensor ops + * + * Return: a valid thermal zone structure pointer on success. + * - EINVAL: if the device tree thermal description is malformed + * - ENOMEM: if one structure can not be allocated + * - Other negative errors are returned by the underlying called functions + */ +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops) +{ + struct thermal_zone_device *tz; + struct thermal_trip *trips; + struct thermal_zone_params *tzp; + struct thermal_zone_device_ops *of_ops; + struct device_node *np; + int delay, pdelay; + int ntrips, mask; + int ret; + + of_ops = kmemdup(ops, sizeof(*ops), GFP_KERNEL); + if (!of_ops) + return ERR_PTR(-ENOMEM); + + np = of_thermal_zone_find(sensor, id); + if (IS_ERR(np)) { + if (PTR_ERR(np) != -ENODEV) + pr_err("Failed to find thermal zone for %pOFn id=%d\n", sensor, id); + return ERR_CAST(np); } - for_each_available_child_of_node(np, child) { - struct thermal_zone_device *zone; + trips = thermal_of_trips_init(np, &ntrips); + if (IS_ERR(trips)) { + pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); + return ERR_CAST(trips); + } - zone = thermal_zone_get_zone_by_name(child->name); - if (IS_ERR(zone)) - continue; + ret = thermal_of_monitor_init(np, &delay, &pdelay); + if (ret) { + pr_err("Failed to initialize monitoring delays from %pOFn\n", np); + goto out_kfree_trips; + } - thermal_zone_device_unregister(zone); - kfree(zone->tzp); - kfree(zone->ops); - of_thermal_free_zone(zone->devdata); + tzp = thermal_of_parameters_init(np); + if (IS_ERR(tzp)) { + ret = PTR_ERR(tzp); + pr_err("Failed to initialize parameter from %pOFn: %d\n", np, ret); + goto out_kfree_trips; } - of_node_put(np); + + of_ops->get_trip_type = of_ops->get_trip_type ? : of_thermal_get_trip_type; + of_ops->get_trip_temp = of_ops->get_trip_temp ? : of_thermal_get_trip_temp; + of_ops->get_trip_hyst = of_ops->get_trip_hyst ? : of_thermal_get_trip_hyst; + of_ops->set_trip_hyst = of_ops->set_trip_hyst ? : of_thermal_set_trip_hyst; + of_ops->get_crit_temp = of_ops->get_crit_temp ? : of_thermal_get_crit_temp; + of_ops->bind = thermal_of_bind; + of_ops->unbind = thermal_of_unbind; + + mask = GENMASK_ULL((ntrips) - 1, 0); + + tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips, + mask, data, of_ops, tzp, + pdelay, delay); + if (IS_ERR(tz)) { + ret = PTR_ERR(tz); + pr_err("Failed to register thermal zone %pOFn: %d\n", np, ret); + goto out_kfree_tzp; + } + + ret = thermal_zone_device_enable(tz); + if (ret) { + pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n", + tz->type, tz->id, ret); + thermal_of_zone_unregister(tz); + return ERR_PTR(ret); + } + + return tz; + +out_kfree_tzp: + kfree(tzp); +out_kfree_trips: + kfree(trips); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(thermal_of_zone_register); + +static void devm_thermal_of_zone_release(struct device *dev, void *res) +{ + thermal_of_zone_unregister(*(struct thermal_zone_device **)res); +} + +static int devm_thermal_of_zone_match(struct device *dev, void *res, + void *data) +{ + struct thermal_zone_device **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; } /** - * of_parse_thermal_zones - parse device tree thermal data + * devm_thermal_of_zone_register - register a thermal tied with the sensor life cycle * - * Initialization function that can be called by machine initialization - * code to parse thermal data and populate the thermal framework - * with hardware thermal zones info. This function only parses thermal zones. - * Cooling devices and sensor devices nodes are supposed to be parsed - * by their respective drivers. - * - * Return: 0 on success, proper error code otherwise + * This function is the device version of the thermal_of_zone_register() function. * + * @dev: a device structure pointer to sensor to be tied with the thermal zone OF life cycle + * @sensor_id: the sensor identifier + * @data: a pointer to a private data to be stored in the thermal zone 'devdata' field + * @ops: a pointer to the ops structure associated with the sensor */ -int __init of_parse_thermal_zones(void) +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int sensor_id, void *data, + const struct thermal_zone_device_ops *ops) { - struct device_node *np, *child; - struct __thermal_zone *tz; - struct thermal_zone_device_ops *ops; - - np = of_find_node_by_name(NULL, "thermal-zones"); - if (!np) { - pr_debug("unable to find thermal zones\n"); - return 0; /* Run successfully on systems without thermal DT */ - } - - for_each_available_child_of_node(np, child) { - struct thermal_zone_device *zone; - struct thermal_zone_params *tzp; - int i, mask = 0; - u32 prop; - - tz = thermal_of_build_thermal_zone(child); - if (IS_ERR(tz)) { - pr_err("failed to build thermal zone %pOFn: %ld\n", - child, - PTR_ERR(tz)); - continue; - } - - ops = kmemdup(&of_thermal_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) - goto exit_free; + struct thermal_zone_device **ptr, *tzd; - tzp = kzalloc(sizeof(*tzp), GFP_KERNEL); - if (!tzp) { - kfree(ops); - goto exit_free; - } + ptr = devres_alloc(devm_thermal_of_zone_release, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); - /* No hwmon because there might be hwmon drivers registering */ - tzp->no_hwmon = true; - - if (!of_property_read_u32(child, "sustainable-power", &prop)) - tzp->sustainable_power = prop; - - for (i = 0; i < tz->ntrips; i++) - mask |= 1 << i; - - /* these two are left for temperature drivers to use */ - tzp->slope = tz->slope; - tzp->offset = tz->offset; - - zone = thermal_zone_device_register_with_trips(child->name, tz->trips, tz->ntrips, - mask, tz, ops, tzp, tz->passive_delay, - tz->polling_delay); - if (IS_ERR(zone)) { - pr_err("Failed to build %pOFn zone %ld\n", child, - PTR_ERR(zone)); - kfree(tzp); - kfree(ops); - of_thermal_free_zone(tz); - /* attempting to build remaining zones still */ - } + tzd = thermal_of_zone_register(dev->of_node, sensor_id, data, ops); + if (IS_ERR(tzd)) { + devres_free(ptr); + return tzd; } - of_node_put(np); - - return 0; -exit_free: - of_node_put(child); - of_node_put(np); - of_thermal_free_zone(tz); + *ptr = tzd; + devres_add(dev, ptr); - /* no memory available, so free what we have built */ - of_thermal_destroy_zones(); + return tzd; +} +EXPORT_SYMBOL_GPL(devm_thermal_of_zone_register); - return -ENOMEM; +/** + * devm_thermal_of_zone_unregister - Resource managed version of + * thermal_of_zone_unregister(). + * @dev: Device for which which resource was allocated. + * @tz: a pointer to struct thermal_zone where the sensor is registered. + * + * This function removes the sensor callbacks and private data from the + * thermal zone device registered with devm_thermal_zone_of_sensor_register() + * API. It will also silent the zone by remove the .get_temp() and .get_trend() + * thermal zone device callbacks. + * Normally this function will not need to be called and the resource + * management code will ensure that the resource is freed. + */ +void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz) +{ + WARN_ON(devres_release(dev, devm_thermal_of_zone_release, + devm_thermal_of_zone_match, tz)); } +EXPORT_SYMBOL_GPL(devm_thermal_of_zone_unregister); diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 3a8d6e747c25..78c5841bdfae 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -49,7 +49,11 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_zone_device *tz = to_thermal_zone(dev); - int enabled = thermal_zone_device_is_enabled(tz); + int enabled; + + mutex_lock(&tz->lock); + enabled = thermal_zone_device_is_enabled(tz); + mutex_unlock(&tz->lock); return sprintf(buf, "%s\n", enabled ? "enabled" : "disabled"); } @@ -115,7 +119,7 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, int temperature, hyst = 0; enum thermal_trip_type type; - if (!tz->ops->set_trip_temp) + if (!tz->ops->set_trip_temp && !tz->trips) return -EPERM; if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip) != 1) @@ -128,6 +132,9 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + if (tz->trips) + tz->trips[trip].temperature = temperature; + if (tz->ops->get_trip_hyst) { ret = tz->ops->get_trip_hyst(tz, trip, &hyst); if (ret) diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 703039d8b937..8a9055bd376e 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -65,10 +65,10 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c) /* thermal zone ops */ /* Get temperature callback function for thermal zone */ -static inline int __ti_thermal_get_temp(void *devdata, int *temp) +static inline int __ti_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { struct thermal_zone_device *pcb_tz = NULL; - struct ti_thermal_data *data = devdata; + struct ti_thermal_data *data = tz->devdata; struct ti_bandgap *bgp; const struct ti_temp_sensor *s; int ret, tmp, slope, constant; @@ -85,8 +85,8 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp) return ret; /* Default constants */ - slope = thermal_zone_get_slope(data->ti_thermal); - constant = thermal_zone_get_offset(data->ti_thermal); + slope = thermal_zone_get_slope(tz); + constant = thermal_zone_get_offset(tz); pcb_tz = data->pcb_tz; /* In case pcb zone is available, use the extrapolation rule with it */ @@ -107,9 +107,9 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp) return ret; } -static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend) +static int __ti_thermal_get_trend(struct thermal_zone_device *tz, int trip, enum thermal_trend *trend) { - struct ti_thermal_data *data = p; + struct ti_thermal_data *data = tz->devdata; struct ti_bandgap *bgp; int id, tr, ret = 0; @@ -130,7 +130,7 @@ static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend) return 0; } -static const struct thermal_zone_of_device_ops ti_of_thermal_ops = { +static const struct thermal_zone_device_ops ti_of_thermal_ops = { .get_temp = __ti_thermal_get_temp, .get_trend = __ti_thermal_get_trend, }; @@ -170,7 +170,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, return -EINVAL; /* in case this is specified by DT */ - data->ti_thermal = devm_thermal_zone_of_sensor_register(bgp->dev, id, + data->ti_thermal = devm_thermal_of_zone_register(bgp->dev, id, data, &ti_of_thermal_ops); if (IS_ERR(data->ti_thermal)) { dev_err(bgp->dev, "thermal zone device is NULL\n"); diff --git a/drivers/thermal/uniphier_thermal.c b/drivers/thermal/uniphier_thermal.c index 4cae5561a2a3..4111d99ef50e 100644 --- a/drivers/thermal/uniphier_thermal.c +++ b/drivers/thermal/uniphier_thermal.c @@ -187,9 +187,9 @@ static void uniphier_tm_disable_sensor(struct uniphier_tm_dev *tdev) usleep_range(1000, 2000); /* The spec note says at least 1ms */ } -static int uniphier_tm_get_temp(void *data, int *out_temp) +static int uniphier_tm_get_temp(struct thermal_zone_device *tz, int *out_temp) { - struct uniphier_tm_dev *tdev = data; + struct uniphier_tm_dev *tdev = tz->devdata; struct regmap *map = tdev->regmap; int ret; u32 temp; @@ -204,7 +204,7 @@ static int uniphier_tm_get_temp(void *data, int *out_temp) return 0; } -static const struct thermal_zone_of_device_ops uniphier_of_thermal_ops = { +static const struct thermal_zone_device_ops uniphier_of_thermal_ops = { .get_temp = uniphier_tm_get_temp, }; @@ -289,8 +289,8 @@ static int uniphier_tm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tdev); - tdev->tz_dev = devm_thermal_zone_of_sensor_register(dev, 0, tdev, - &uniphier_of_thermal_ops); + tdev->tz_dev = devm_thermal_of_zone_register(dev, 0, tdev, + &uniphier_of_thermal_ops); if (IS_ERR(tdev->tz_dev)) { dev_err(dev, "failed to register sensor device\n"); return PTR_ERR(tdev->tz_dev); diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c index b1f0dc8df47c..7a8adf5ad5a0 100644 --- a/drivers/thunderbolt/acpi.c +++ b/drivers/thunderbolt/acpi.c @@ -42,7 +42,7 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, */ dev = acpi_get_first_physical_node(adev); while (!dev) { - adev = adev->parent; + adev = acpi_dev_parent(adev); if (!adev) break; dev = acpi_get_first_physical_node(adev); diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index c63c1f4ff9dc..77d7f07ca075 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2413,6 +2413,7 @@ int tb_switch_configure(struct tb_switch *sw) * additional capabilities. */ sw->config.cmuv = USB4_VERSION_1_0; + sw->config.plug_events_delay = 0xa; /* Enumerate the switch */ ret = tb_sw_write(sw, (u32 *)&sw->config + 1, TB_CFG_SWITCH, diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 166b5bde45cb..6c14a79279f9 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -251,7 +251,7 @@ static int st_dwc3_probe(struct platform_device *pdev) /* Manage SoftReset */ reset_control_deassert(dwc3_data->rstc_rst); - child = of_get_child_by_name(node, "dwc3"); + child = of_get_child_by_name(node, "usb"); if (!child) { dev_err(&pdev->dev, "failed to find dwc3 core node\n"); ret = -ENODEV; diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 23ab3b048d9b..251778d14e2d 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -52,6 +52,13 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */ +UNUSUAL_DEV(0x090c, 0x2000, 0x0000, 0x9999, + "Hiksemi", + "External HDD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI * commands in UAS mode. Observed with the 1.28 firmware; are there others? @@ -76,6 +83,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), +/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */ +UNUSUAL_DEV(0x0bda, 0x9210, 0x0000, 0x9999, + "Hiksemi", + "External HDD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", @@ -118,6 +132,13 @@ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */ +UNUSUAL_DEV(0x17ef, 0x3899, 0x0000, 0x9999, + "Thinkplus", + "External HDD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Hans de Goede <hdegoede@redhat.com> */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, "VIA", diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index a8e273fe204a..e1f4df7238bf 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -569,15 +569,6 @@ err_unregister_switch: return ret; } -static int is_memory(struct acpi_resource *res, void *data) -{ - struct resource_win win = {}; - struct resource *r = &win.res; - - return !(acpi_dev_resource_memory(res, r) || - acpi_dev_resource_address_space(res, &win)); -} - /* IOM ACPI IDs and IOM_PORT_STATUS_OFFSET */ static const struct acpi_device_id iom_acpi_ids[] = { /* TigerLake */ @@ -611,7 +602,7 @@ static int pmc_usb_probe_iom(struct pmc_usb *pmc) return -ENODEV; INIT_LIST_HEAD(&resource_list); - ret = acpi_dev_get_resources(adev, &resource_list, is_memory, NULL); + ret = acpi_dev_get_memory_resources(adev, &resource_list); if (ret < 0) return ret; diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 7f2624f42724..6364f0d467ea 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -588,8 +588,6 @@ static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner, num_pdos * sizeof(u32)); if (ret < 0 && ret != -ETIMEDOUT) dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret); - if (ret == 0 && offset == 0) - dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); return ret; } diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 75a703b803a2..3e4486bfa0b7 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -323,7 +323,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) u32 q_pair_id; ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / hw->nr_vring; + q_pair_id = qid / 2; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; last_avail_idx = vp_ioread16(avail_idx_addr); @@ -337,7 +337,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) u32 q_pair_id; ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / hw->nr_vring; + q_pair_id = qid / 2; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; hw->vring[qid].last_avail_idx = num; vp_iowrite16(num, avail_idx_addr); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index ed100a35e596..90913365def4 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1320,6 +1320,8 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * static int create_rqt(struct mlx5_vdpa_net *ndev) { + int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); + int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); __be32 *list; void *rqtc; int inlen; @@ -1327,7 +1329,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) int i, j; int err; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1336,12 +1338,12 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); - MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size); + MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) + for (i = 0, j = 0; i < act_sz; i++, j += 2) list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); - MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); + MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); kfree(in); if (err) @@ -1354,6 +1356,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) { + int act_sz = roundup_pow_of_two(num / 2); __be32 *list; void *rqtc; int inlen; @@ -1361,7 +1364,7 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) int i, j; int err; - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1372,10 +1375,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2) + for (i = 0, j = 0; i < act_sz; i++, j = j + 2) list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); - MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size); + MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); kfree(in); if (err) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 41c0b29739f1..35dceee3ed56 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -673,10 +673,15 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vduse_dev *dev = vdpa_to_vduse(vdpa); - if (offset > dev->config_size || - len > dev->config_size - offset) + /* Initialize the buffer in case of partial copy. */ + memset(buf, 0, len); + + if (offset > dev->config_size) return; + if (len > dev->config_size - offset) + len = dev->config_size - offset; + memcpy(buf, dev->config + offset, len); } diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 21e154516bf2..f14478643b91 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -142,39 +142,6 @@ config BINFMT_ZFLAT help Support FLAT format compressed binaries -config HAVE_AOUT - def_bool n - -config BINFMT_AOUT - tristate "Kernel support for a.out and ECOFF binaries" - depends on HAVE_AOUT - help - A.out (Assembler.OUTput) is a set of formats for libraries and - executables used in the earliest versions of UNIX. Linux used - the a.out formats QMAGIC and ZMAGIC until they were replaced - with the ELF format. - - The conversion to ELF started in 1995. This option is primarily - provided for historical interest and for the benefit of those - who need to run binaries from that era. - - Most people should answer N here. If you think you may have - occasional use for this format, enable module support above - and answer M here to compile this support as a module called - binfmt_aout. - - If any crucial components of your system (such as /sbin/init - or /lib/ld.so) are still in a.out format, you will have to - say Y here. - -config OSF4_COMPAT - bool "OSF/1 v4 readv/writev compatibility" - depends on ALPHA && BINFMT_AOUT - help - Say Y if you are using OSF/1 binaries (like Netscape and Acrobat) - with v4 shared libraries freely available from Compaq. If you're - going to use shared libraries from Tru64 version 5.0 or later, say N. - config BINFMT_MISC tristate "Kernel support for MISC binaries" help diff --git a/fs/Makefile b/fs/Makefile index 93b80529f8e8..4dea17840761 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -38,7 +38,6 @@ obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o -obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c deleted file mode 100644 index 0dcfc691e7e2..000000000000 --- a/fs/binfmt_aout.c +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/fs/binfmt_aout.c - * - * Copyright (C) 1991, 1992, 1996 Linus Torvalds - */ - -#include <linux/module.h> - -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/a.out.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/binfmts.h> -#include <linux/personality.h> -#include <linux/init.h> -#include <linux/coredump.h> -#include <linux/slab.h> -#include <linux/sched/task_stack.h> - -#include <linux/uaccess.h> -#include <asm/cacheflush.h> - -static int load_aout_binary(struct linux_binprm *); -static int load_aout_library(struct file*); - -static struct linux_binfmt aout_format = { - .module = THIS_MODULE, - .load_binary = load_aout_binary, - .load_shlib = load_aout_library, -}; - -#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) - -static int set_brk(unsigned long start, unsigned long end) -{ - start = PAGE_ALIGN(start); - end = PAGE_ALIGN(end); - if (end > start) - return vm_brk(start, end - start); - return 0; -} - -/* - * create_aout_tables() parses the env- and arg-strings in new user - * memory and creates the pointer tables from them, and puts their - * addresses on the "stack", returning the new stack pointer value. - */ -static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm) -{ - char __user * __user *argv; - char __user * __user *envp; - unsigned long __user *sp; - int argc = bprm->argc; - int envc = bprm->envc; - - sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); -#ifdef __alpha__ -/* whee.. test-programs are so much fun. */ - put_user(0, --sp); - put_user(0, --sp); - if (bprm->loader) { - put_user(0, --sp); - put_user(1003, --sp); - put_user(bprm->loader, --sp); - put_user(1002, --sp); - } - put_user(bprm->exec, --sp); - put_user(1001, --sp); -#endif - sp -= envc+1; - envp = (char __user * __user *) sp; - sp -= argc+1; - argv = (char __user * __user *) sp; -#ifndef __alpha__ - put_user((unsigned long) envp,--sp); - put_user((unsigned long) argv,--sp); -#endif - put_user(argc,--sp); - current->mm->arg_start = (unsigned long) p; - while (argc-->0) { - char c; - put_user(p,argv++); - do { - get_user(c,p++); - } while (c); - } - put_user(NULL,argv); - current->mm->arg_end = current->mm->env_start = (unsigned long) p; - while (envc-->0) { - char c; - put_user(p,envp++); - do { - get_user(c,p++); - } while (c); - } - put_user(NULL,envp); - current->mm->env_end = (unsigned long) p; - return sp; -} - -/* - * These are the functions used to load a.out style executables and shared - * libraries. There is no binary dependent code anywhere else. - */ - -static int load_aout_binary(struct linux_binprm * bprm) -{ - struct pt_regs *regs = current_pt_regs(); - struct exec ex; - unsigned long error; - unsigned long fd_offset; - unsigned long rlim; - int retval; - - ex = *((struct exec *) bprm->buf); /* exec-header */ - if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && - N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || - N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - return -ENOEXEC; - } - - /* - * Requires a mmap handler. This prevents people from using a.out - * as part of an exploit attack against /proc-related vulnerabilities. - */ - if (!bprm->file->f_op->mmap) - return -ENOEXEC; - - fd_offset = N_TXTOFF(ex); - - /* Check initial limits. This avoids letting people circumvent - * size limits imposed on them by creating programs with large - * arrays in the data or bss. - */ - rlim = rlimit(RLIMIT_DATA); - if (rlim >= RLIM_INFINITY) - rlim = ~0; - if (ex.a_data + ex.a_bss > rlim) - return -ENOMEM; - - /* Flush all traces of the currently running executable */ - retval = begin_new_exec(bprm); - if (retval) - return retval; - - /* OK, This is the point of no return */ -#ifdef __alpha__ - SET_AOUT_PERSONALITY(bprm, ex); -#else - set_personality(PER_LINUX); -#endif - setup_new_exec(bprm); - - current->mm->end_code = ex.a_text + - (current->mm->start_code = N_TXTADDR(ex)); - current->mm->end_data = ex.a_data + - (current->mm->start_data = N_DATADDR(ex)); - current->mm->brk = ex.a_bss + - (current->mm->start_brk = N_BSSADDR(ex)); - - retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) - return retval; - - - if (N_MAGIC(ex) == OMAGIC) { - unsigned long text_addr, map_size; - loff_t pos; - - text_addr = N_TXTADDR(ex); - -#ifdef __alpha__ - pos = fd_offset; - map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1; -#else - pos = 32; - map_size = ex.a_text+ex.a_data; -#endif - error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error) - return error; - - error = read_code(bprm->file, text_addr, pos, - ex.a_text+ex.a_data); - if ((signed long)error < 0) - return error; - } else { - if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) - { - printk(KERN_NOTICE "executable not page aligned\n"); - } - - if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit()) - { - printk(KERN_WARNING - "fd_offset is not page aligned. Please convert program: %pD\n", - bprm->file); - } - - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { - error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - if (error) - return error; - - read_code(bprm->file, N_TXTADDR(ex), fd_offset, - ex.a_text + ex.a_data); - goto beyond_if; - } - - error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, - PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, - fd_offset); - - if (error != N_TXTADDR(ex)) - return error; - - error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE, - fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) - return error; - } -beyond_if: - set_binfmt(&aout_format); - - retval = set_brk(current->mm->start_brk, current->mm->brk); - if (retval < 0) - return retval; - - current->mm->start_stack = - (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); -#ifdef __alpha__ - regs->gp = ex.a_gpvalue; -#endif - finalize_exec(bprm); - start_thread(regs, ex.a_entry, current->mm->start_stack); - return 0; -} - -static int load_aout_library(struct file *file) -{ - struct inode * inode; - unsigned long bss, start_addr, len; - unsigned long error; - int retval; - struct exec ex; - loff_t pos = 0; - - inode = file_inode(file); - - retval = -ENOEXEC; - error = kernel_read(file, &ex, sizeof(ex), &pos); - if (error != sizeof(ex)) - goto out; - - /* We come in here for the regular a.out style of shared libraries */ - if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || - N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || - i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - goto out; - } - - /* - * Requires a mmap handler. This prevents people from using a.out - * as part of an exploit attack against /proc-related vulnerabilities. - */ - if (!file->f_op->mmap) - goto out; - - if (N_FLAGS(ex)) - goto out; - - /* For QMAGIC, the starting address is 0x20 into the page. We mask - this off to get the starting address for the page */ - - start_addr = ex.a_entry & 0xfffff000; - - if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - if (printk_ratelimit()) - { - printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %pD\n", - file); - } - retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - if (retval) - goto out; - - read_code(file, start_addr, N_TXTOFF(ex), - ex.a_text + ex.a_data); - retval = 0; - goto out; - } - /* Now use mmap to map the library into memory. */ - error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE, - N_TXTOFF(ex)); - retval = error; - if (error != start_addr) - goto out; - - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { - retval = vm_brk(start_addr + len, bss - len); - if (retval) - goto out; - } - retval = 0; -out: - return retval; -} - -static int __init init_aout_binfmt(void) -{ - register_binfmt(&aout_format); - return 0; -} - -static void __exit exit_aout_binfmt(void) -{ - unregister_binfmt(&aout_format); -} - -core_initcall(init_aout_binfmt); -module_exit(exit_aout_binfmt); -MODULE_LICENSE("GPL"); diff --git a/fs/coredump.c b/fs/coredump.c index 9f4aae202109..3538f3a63965 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -832,6 +832,39 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) } } +static int dump_emit_page(struct coredump_params *cprm, struct page *page) +{ + struct bio_vec bvec = { + .bv_page = page, + .bv_offset = 0, + .bv_len = PAGE_SIZE, + }; + struct iov_iter iter; + struct file *file = cprm->file; + loff_t pos; + ssize_t n; + + if (cprm->to_skip) { + if (!__dump_skip(cprm, cprm->to_skip)) + return 0; + cprm->to_skip = 0; + } + if (cprm->written + PAGE_SIZE > cprm->limit) + return 0; + if (dump_interrupted()) + return 0; + pos = file->f_pos; + iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE); + n = __kernel_write_iter(cprm->file, &iter, &pos); + if (n != PAGE_SIZE) + return 0; + file->f_pos = pos; + cprm->written += PAGE_SIZE; + cprm->pos += PAGE_SIZE; + + return 1; +} + int dump_emit(struct coredump_params *cprm, const void *addr, int nr) { if (cprm->to_skip) { @@ -863,7 +896,6 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page; - int stop; /* * To avoid having to allocate page tables for virtual address @@ -874,10 +906,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, */ page = get_dump_page(addr); if (page) { - void *kaddr = kmap_local_page(page); - - stop = !dump_emit(cprm, kaddr, PAGE_SIZE); - kunmap_local(kaddr); + int stop = !dump_emit_page(cprm, page); put_page(page); if (stop) return 0; diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 2217fe5ece6f..1b4403136d05 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -25,21 +25,25 @@ * then this function isn't applicable. This function may sleep, so it must be * called from a workqueue rather than from the bio's bi_end_io callback. * - * This function sets PG_error on any pages that contain any blocks that failed - * to be decrypted. The filesystem must not mark such pages uptodate. + * Return: %true on success; %false on failure. On failure, bio->bi_status is + * also set to an error status. */ -void fscrypt_decrypt_bio(struct bio *bio) +bool fscrypt_decrypt_bio(struct bio *bio) { struct bio_vec *bv; struct bvec_iter_all iter_all; bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, + int err = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, bv->bv_offset); - if (ret) - SetPageError(page); + + if (err) { + bio->bi_status = errno_to_blk_status(err); + return false; + } } + return true; } EXPORT_SYMBOL(fscrypt_decrypt_bio); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 3afdaa084773..d5f68a0c5d15 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -184,7 +184,7 @@ struct fscrypt_symlink_data { struct fscrypt_prepared_key { struct crypto_skcipher *tfm; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - struct fscrypt_blk_crypto_key *blk_key; + struct blk_crypto_key *blk_key; #endif }; @@ -225,7 +225,7 @@ struct fscrypt_info { * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ - struct key *ci_master_key; + struct fscrypt_master_key *ci_master_key; /* * Link in list of inodes that were unlocked with the master key. @@ -344,7 +344,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_info *ci); -void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key); +void fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key); /* * Check whether the crypto transform or blk-crypto key has been allocated in @@ -390,7 +391,8 @@ fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, } static inline void -fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { } @@ -437,6 +439,40 @@ struct fscrypt_master_key_secret { struct fscrypt_master_key { /* + * Back-pointer to the super_block of the filesystem to which this + * master key has been added. Only valid if ->mk_active_refs > 0. + */ + struct super_block *mk_sb; + + /* + * Link in ->mk_sb->s_master_keys->key_hashtable. + * Only valid if ->mk_active_refs > 0. + */ + struct hlist_node mk_node; + + /* Semaphore that protects ->mk_secret and ->mk_users */ + struct rw_semaphore mk_sem; + + /* + * Active and structural reference counts. An active ref guarantees + * that the struct continues to exist, continues to be in the keyring + * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g. + * ->mk_direct_keys) that have been prepared continue to exist. + * A structural ref only guarantees that the struct continues to exist. + * + * There is one active ref associated with ->mk_secret being present, + * and one active ref for each inode in ->mk_decrypted_inodes. + * + * There is one structural ref associated with the active refcount being + * nonzero. Finding a key in the keyring also takes a structural ref, + * which is then held temporarily while the key is operated on. + */ + refcount_t mk_active_refs; + refcount_t mk_struct_refs; + + struct rcu_head mk_rcu_head; + + /* * The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is * executed, this is wiped and no new inodes can be unlocked with this * key; however, there may still be inodes in ->mk_decrypted_inodes @@ -444,7 +480,10 @@ struct fscrypt_master_key { * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again. * - * Locking: protected by this master key's key->sem. + * While ->mk_secret is present, one ref in ->mk_active_refs is held. + * + * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs + * associated with this field is protected by ->mk_sem as well. */ struct fscrypt_master_key_secret mk_secret; @@ -465,23 +504,13 @@ struct fscrypt_master_key { * * This is NULL for v1 policy keys; those can only be added by root. * - * Locking: in addition to this keyring's own semaphore, this is - * protected by this master key's key->sem, so we can do atomic - * search+insert. It can also be searched without taking any locks, but - * in that case the returned key may have already been removed. + * Locking: protected by ->mk_sem. (We don't just rely on the keyrings + * subsystem semaphore ->mk_users->sem, as we need support for atomic + * search+insert along with proper synchronization with ->mk_secret.) */ struct key *mk_users; /* - * Length of ->mk_decrypted_inodes, plus one if mk_secret is present. - * Once this goes to 0, the master key is removed from ->s_master_keys. - * The 'struct fscrypt_master_key' will continue to live as long as the - * 'struct key' whose payload it is, but we won't let this reference - * count rise again. - */ - refcount_t mk_refcount; - - /* * List of inodes that were unlocked using this key. This allows the * inodes to be evicted efficiently if the key is removed. */ @@ -506,10 +535,10 @@ static inline bool is_master_key_secret_present(const struct fscrypt_master_key_secret *secret) { /* - * The READ_ONCE() is only necessary for fscrypt_drop_inode() and - * fscrypt_key_describe(). These run in atomic context, so they can't - * take the key semaphore and thus 'secret' can change concurrently - * which would be a data race. But they only need to know whether the + * The READ_ONCE() is only necessary for fscrypt_drop_inode(). + * fscrypt_drop_inode() runs in atomic context, so it can't take the key + * semaphore and thus 'secret' can change concurrently which would be a + * data race. But fscrypt_drop_inode() only need to know whether the * secret *was* present at the time of check, so READ_ONCE() suffices. */ return READ_ONCE(secret->size) != 0; @@ -538,7 +567,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) return 0; } -struct key * +void fscrypt_put_master_key(struct fscrypt_master_key *mk); + +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk); + +struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec); @@ -569,7 +602,8 @@ extern struct fscrypt_mode fscrypt_modes[]; int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_info *ci); -void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); +void fscrypt_destroy_prepared_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key); int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 7c01025879b3..7b8c5a1104b5 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -5,8 +5,6 @@ * Encryption hooks for higher-level filesystem operations. */ -#include <linux/key.h> - #include "fscrypt_private.h" /** @@ -142,7 +140,6 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -158,14 +155,13 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; - key = ci->ci_master_key; - mk = key->payload.data[0]; - down_read(&key->sem); + mk = ci->ci_master_key; + down_read(&mk->mk_sem); if (is_master_key_secret_present(&mk->mk_secret)) err = fscrypt_derive_dirhash_key(ci, mk); else err = -ENOKEY; - up_read(&key->sem); + up_read(&mk->mk_sem); return err; } return 0; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 90f3e68f166e..cea8b14007e6 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -21,26 +21,22 @@ #include "fscrypt_private.h" -struct fscrypt_blk_crypto_key { - struct blk_crypto_key base; - int num_devs; - struct request_queue *devs[]; -}; - -static int fscrypt_get_num_devices(struct super_block *sb) +static struct block_device **fscrypt_get_devices(struct super_block *sb, + unsigned int *num_devs) { - if (sb->s_cop->get_num_devices) - return sb->s_cop->get_num_devices(sb); - return 1; -} + struct block_device **devs; -static void fscrypt_get_devices(struct super_block *sb, int num_devs, - struct request_queue **devs) -{ - if (num_devs == 1) - devs[0] = bdev_get_queue(sb->s_bdev); - else - sb->s_cop->get_devices(sb, devs); + if (sb->s_cop->get_devices) { + devs = sb->s_cop->get_devices(sb, num_devs); + if (devs) + return devs; + } + devs = kmalloc(sizeof(*devs), GFP_KERNEL); + if (!devs) + return ERR_PTR(-ENOMEM); + devs[0] = sb->s_bdev; + *num_devs = 1; + return devs; } static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) @@ -74,15 +70,17 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) * helpful for debugging problems where the "wrong" implementation is used. */ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode, - struct request_queue **devs, - int num_devs, + struct block_device **devs, + unsigned int num_devs, const struct blk_crypto_config *cfg) { - int i; + unsigned int i; for (i = 0; i < num_devs; i++) { + struct request_queue *q = bdev_get_queue(devs[i]); + if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || - __blk_crypto_cfg_supported(devs[i]->crypto_profile, cfg)) { + __blk_crypto_cfg_supported(q->crypto_profile, cfg)) { if (!xchg(&mode->logged_blk_crypto_native, 1)) pr_info("fscrypt: %s using blk-crypto (native)\n", mode->friendly_name); @@ -99,9 +97,9 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci) const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; struct blk_crypto_config crypto_cfg; - int num_devs; - struct request_queue **devs; - int i; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; /* The file must need contents encryption, not filenames encryption */ if (!S_ISREG(inode->i_mode)) @@ -129,20 +127,20 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci) return 0; /* - * On all the filesystem's devices, blk-crypto must support the crypto - * configuration that the file would use. + * On all the filesystem's block devices, blk-crypto must support the + * crypto configuration that the file would use. */ crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode; crypto_cfg.data_unit_size = sb->s_blocksize; crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci); - num_devs = fscrypt_get_num_devices(sb); - devs = kmalloc_array(num_devs, sizeof(*devs), GFP_KERNEL); - if (!devs) - return -ENOMEM; - fscrypt_get_devices(sb, num_devs, devs); + + devs = fscrypt_get_devices(sb, &num_devs); + if (IS_ERR(devs)) + return PTR_ERR(devs); for (i = 0; i < num_devs; i++) { - if (!blk_crypto_config_supported(devs[i], &crypto_cfg)) + if (!blk_crypto_config_supported(bdev_get_queue(devs[i]), + &crypto_cfg)) goto out_free_devs; } @@ -162,49 +160,41 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; - int num_devs = fscrypt_get_num_devices(sb); - int queue_refs = 0; - struct fscrypt_blk_crypto_key *blk_key; + struct blk_crypto_key *blk_key; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; int err; - int i; - blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_KERNEL); + blk_key = kmalloc(sizeof(*blk_key), GFP_KERNEL); if (!blk_key) return -ENOMEM; - blk_key->num_devs = num_devs; - fscrypt_get_devices(sb, num_devs, blk_key->devs); - - err = blk_crypto_init_key(&blk_key->base, raw_key, crypto_mode, + err = blk_crypto_init_key(blk_key, raw_key, crypto_mode, fscrypt_get_dun_bytes(ci), sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; } - /* - * We have to start using blk-crypto on all the filesystem's devices. - * We also have to save all the request_queue's for later so that the - * key can be evicted from them. This is needed because some keys - * aren't destroyed until after the filesystem was already unmounted - * (namely, the per-mode keys in struct fscrypt_master_key). - */ + /* Start using blk-crypto on all the filesystem's block devices. */ + devs = fscrypt_get_devices(sb, &num_devs); + if (IS_ERR(devs)) { + err = PTR_ERR(devs); + goto fail; + } for (i = 0; i < num_devs; i++) { - if (!blk_get_queue(blk_key->devs[i])) { - fscrypt_err(inode, "couldn't get request_queue"); - err = -EAGAIN; - goto fail; - } - queue_refs++; - - err = blk_crypto_start_using_key(&blk_key->base, - blk_key->devs[i]); - if (err) { - fscrypt_err(inode, - "error %d starting to use blk-crypto", err); - goto fail; - } + err = blk_crypto_start_using_key(blk_key, + bdev_get_queue(devs[i])); + if (err) + break; } + kfree(devs); + if (err) { + fscrypt_err(inode, "error %d starting to use blk-crypto", err); + goto fail; + } + /* * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). * I.e., here we publish ->blk_key with a RELEASE barrier so that @@ -215,24 +205,29 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, return 0; fail: - for (i = 0; i < queue_refs; i++) - blk_put_queue(blk_key->devs[i]); kfree_sensitive(blk_key); return err; } -void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +void fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { - struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key; - int i; + struct blk_crypto_key *blk_key = prep_key->blk_key; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; - if (blk_key) { - for (i = 0; i < blk_key->num_devs; i++) { - blk_crypto_evict_key(blk_key->devs[i], &blk_key->base); - blk_put_queue(blk_key->devs[i]); - } - kfree_sensitive(blk_key); + if (!blk_key) + return; + + /* Evict the key from all the filesystem's block devices. */ + devs = fscrypt_get_devices(sb, &num_devs); + if (!IS_ERR(devs)) { + for (i = 0; i < num_devs; i++) + blk_crypto_evict_key(bdev_get_queue(devs[i]), blk_key); + kfree(devs); } + kfree_sensitive(blk_key); } bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) @@ -282,7 +277,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, ci = inode->i_crypt_info; fscrypt_generate_dun(ci, first_lblk, dun); - bio_crypt_set_ctx(bio, &ci->ci_enc_key.blk_key->base, dun, gfp_mask); + bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask); } EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx); @@ -369,7 +364,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, * uses the same pointer. I.e., there's currently no need to support * merging requests where the keys are the same but the pointers differ. */ - if (bc->bc_key != &inode->i_crypt_info->ci_enc_key.blk_key->base) + if (bc->bc_key != inode->i_crypt_info->ci_enc_key.blk_key) return false; fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); @@ -401,46 +396,45 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); /** - * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is - * supported as far as encryption is concerned - * @iocb: the file and position the I/O is targeting - * @iter: the I/O data segment(s) + * fscrypt_dio_supported() - check whether DIO (direct I/O) is supported on an + * inode, as far as encryption is concerned + * @inode: the inode in question * * Return: %true if there are no encryption constraints that prevent DIO from * being supported; %false if DIO is unsupported. (Note that in the * %true case, the filesystem might have other, non-encryption-related - * constraints that prevent DIO from actually being supported.) + * constraints that prevent DIO from actually being supported. Also, on + * encrypted files the filesystem is still responsible for only allowing + * DIO when requests are filesystem-block-aligned.) */ -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter) +bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - const unsigned int blocksize = i_blocksize(inode); + int err; /* If the file is unencrypted, no veto from us. */ if (!fscrypt_needs_contents_encryption(inode)) return true; - /* We only support DIO with inline crypto, not fs-layer crypto. */ - if (!fscrypt_inode_uses_inline_crypto(inode)) - return false; - /* - * Since the granularity of encryption is filesystem blocks, the file - * position and total I/O length must be aligned to the filesystem block - * size -- not just to the block device's logical block size as is - * traditionally the case for DIO on many filesystems. + * We only support DIO with inline crypto, not fs-layer crypto. * - * We require that the user-provided memory buffers be filesystem block - * aligned too. It is simpler to have a single alignment value required - * for all properties of the I/O, as is normally the case for DIO. - * Also, allowing less aligned buffers would imply that data units could - * cross bvecs, which would greatly complicate the I/O stack, which - * assumes that bios can be split at any bvec boundary. + * To determine whether the inode is using inline crypto, we have to set + * up the key if it wasn't already done. This is because in the current + * design of fscrypt, the decision of whether to use inline crypto or + * not isn't made until the inode's encryption key is being set up. In + * the DIO read/write case, the key will always be set up already, since + * the file will be open. But in the case of statx(), the key might not + * be set up yet, as the file might not have been opened yet. */ - if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize)) + err = fscrypt_require_key(inode); + if (err) { + /* + * Key unavailable or couldn't be set up. This edge case isn't + * worth worrying about; just report that DIO is unsupported. + */ return false; - - return true; + } + return fscrypt_inode_uses_inline_crypto(inode); } EXPORT_SYMBOL_GPL(fscrypt_dio_supported); diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index caee9f8620dd..1cca09aa43f8 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -18,6 +18,7 @@ * information about these ioctls. */ +#include <asm/unaligned.h> #include <crypto/skcipher.h> #include <linux/key-type.h> #include <linux/random.h> @@ -25,6 +26,18 @@ #include "fscrypt_private.h" +/* The master encryption keys for a filesystem (->s_master_keys) */ +struct fscrypt_keyring { + /* + * Lock that protects ->key_hashtable. It does *not* protect the + * fscrypt_master_key structs themselves. + */ + spinlock_t lock; + + /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */ + struct hlist_head key_hashtable[128]; +}; + static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret) { fscrypt_destroy_hkdf(&secret->hkdf); @@ -38,66 +51,81 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst, memzero_explicit(src, sizeof(*src)); } -static void free_master_key(struct fscrypt_master_key *mk) +static void fscrypt_free_master_key(struct rcu_head *head) { - size_t i; - - wipe_master_key_secret(&mk->mk_secret); - - for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { - fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); - fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); - fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]); - } - - key_put(mk->mk_users); + struct fscrypt_master_key *mk = + container_of(head, struct fscrypt_master_key, mk_rcu_head); + /* + * The master key secret and any embedded subkeys should have already + * been wiped when the last active reference to the fscrypt_master_key + * struct was dropped; doing it here would be unnecessarily late. + * Nevertheless, use kfree_sensitive() in case anything was missed. + */ kfree_sensitive(mk); } -static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) +void fscrypt_put_master_key(struct fscrypt_master_key *mk) { - if (spec->__reserved) - return false; - return master_key_spec_len(spec) != 0; + if (!refcount_dec_and_test(&mk->mk_struct_refs)) + return; + /* + * No structural references left, so free ->mk_users, and also free the + * fscrypt_master_key struct itself after an RCU grace period ensures + * that concurrent keyring lookups can no longer find it. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 0); + key_put(mk->mk_users); + mk->mk_users = NULL; + call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key); } -static int fscrypt_key_instantiate(struct key *key, - struct key_preparsed_payload *prep) +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk) { - key->payload.data[0] = (struct fscrypt_master_key *)prep->data; - return 0; -} + struct super_block *sb = mk->mk_sb; + struct fscrypt_keyring *keyring = sb->s_master_keys; + size_t i; -static void fscrypt_key_destroy(struct key *key) -{ - free_master_key(key->payload.data[0]); -} + if (!refcount_dec_and_test(&mk->mk_active_refs)) + return; + /* + * No active references left, so complete the full removal of this + * fscrypt_master_key struct by removing it from the keyring and + * destroying any subkeys embedded in it. + */ -static void fscrypt_key_describe(const struct key *key, struct seq_file *m) -{ - seq_puts(m, key->description); + spin_lock(&keyring->lock); + hlist_del_rcu(&mk->mk_node); + spin_unlock(&keyring->lock); - if (key_is_positive(key)) { - const struct fscrypt_master_key *mk = key->payload.data[0]; + /* + * ->mk_active_refs == 0 implies that ->mk_secret is not present and + * that ->mk_decrypted_inodes is empty. + */ + WARN_ON(is_master_key_secret_present(&mk->mk_secret)); + WARN_ON(!list_empty(&mk->mk_decrypted_inodes)); - if (!is_master_key_secret_present(&mk->mk_secret)) - seq_puts(m, ": secret removed"); + for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { + fscrypt_destroy_prepared_key( + sb, &mk->mk_direct_keys[i]); + fscrypt_destroy_prepared_key( + sb, &mk->mk_iv_ino_lblk_64_keys[i]); + fscrypt_destroy_prepared_key( + sb, &mk->mk_iv_ino_lblk_32_keys[i]); } + memzero_explicit(&mk->mk_ino_hash_key, + sizeof(mk->mk_ino_hash_key)); + mk->mk_ino_hash_key_initialized = false; + + /* Drop the structural ref associated with the active refs. */ + fscrypt_put_master_key(mk); } -/* - * Type of key in ->s_master_keys. Each key of this type represents a master - * key which has been added to the filesystem. Its payload is a - * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents - * users from adding keys of this type via the keyrings syscalls rather than via - * the intended method of FS_IOC_ADD_ENCRYPTION_KEY. - */ -static struct key_type key_type_fscrypt = { - .name = "._fscrypt", - .instantiate = fscrypt_key_instantiate, - .destroy = fscrypt_key_destroy, - .describe = fscrypt_key_describe, -}; +static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) +{ + if (spec->__reserved) + return false; + return master_key_spec_len(spec) != 0; +} static int fscrypt_user_key_instantiate(struct key *key, struct key_preparsed_payload *prep) @@ -131,32 +159,6 @@ static struct key_type key_type_fscrypt_user = { .describe = fscrypt_user_key_describe, }; -/* Search ->s_master_keys or ->mk_users */ -static struct key *search_fscrypt_keyring(struct key *keyring, - struct key_type *type, - const char *description) -{ - /* - * We need to mark the keyring reference as "possessed" so that we - * acquire permission to search it, via the KEY_POS_SEARCH permission. - */ - key_ref_t keyref = make_key_ref(keyring, true /* possessed */); - - keyref = keyring_search(keyref, type, description, false); - if (IS_ERR(keyref)) { - if (PTR_ERR(keyref) == -EAGAIN || /* not found */ - PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ - keyref = ERR_PTR(-ENOKEY); - return ERR_CAST(keyref); - } - return key_ref_to_ptr(keyref); -} - -#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \ - (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id)) - -#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1) - #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \ (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \ CONST_STRLEN("-users") + 1) @@ -164,21 +166,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring, #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \ (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1) -static void format_fs_keyring_description( - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE], - const struct super_block *sb) -{ - sprintf(description, "fscrypt-%s", sb->s_id); -} - -static void format_mk_description( - char description[FSCRYPT_MK_DESCRIPTION_SIZE], - const struct fscrypt_key_specifier *mk_spec) -{ - sprintf(description, "%*phN", - master_key_spec_len(mk_spec), (u8 *)&mk_spec->u); -} - static void format_mk_users_keyring_description( char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE], const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) @@ -199,20 +186,15 @@ static void format_mk_user_description( /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */ static int allocate_filesystem_keyring(struct super_block *sb) { - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE]; - struct key *keyring; + struct fscrypt_keyring *keyring; if (sb->s_master_keys) return 0; - format_fs_keyring_description(description, sb); - keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, - current_cred(), KEY_POS_SEARCH | - KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); - if (IS_ERR(keyring)) - return PTR_ERR(keyring); - + keyring = kzalloc(sizeof(*keyring), GFP_KERNEL); + if (!keyring) + return -ENOMEM; + spin_lock_init(&keyring->lock); /* * Pairs with the smp_load_acquire() in fscrypt_find_master_key(). * I.e., here we publish ->s_master_keys with a RELEASE barrier so that @@ -222,21 +204,75 @@ static int allocate_filesystem_keyring(struct super_block *sb) return 0; } -void fscrypt_sb_free(struct super_block *sb) +/* + * This is called at unmount time to release all encryption keys that have been + * added to the filesystem, along with the keyring that contains them. + * + * Note that besides clearing and freeing memory, this might need to evict keys + * from the keyslots of an inline crypto engine. Therefore, this must be called + * while the filesystem's underlying block device(s) are still available. + */ +void fscrypt_sb_delete(struct super_block *sb) { - key_put(sb->s_master_keys); + struct fscrypt_keyring *keyring = sb->s_master_keys; + size_t i; + + if (!keyring) + return; + + for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) { + struct hlist_head *bucket = &keyring->key_hashtable[i]; + struct fscrypt_master_key *mk; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { + /* + * Since all inodes were already evicted, every key + * remaining in the keyring should have an empty inode + * list, and should only still be in the keyring due to + * the single active ref associated with ->mk_secret. + * There should be no structural refs beyond the one + * associated with the active ref. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 1); + WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); + WARN_ON(!is_master_key_secret_present(&mk->mk_secret)); + wipe_master_key_secret(&mk->mk_secret); + fscrypt_put_master_key_activeref(mk); + } + } + kfree_sensitive(keyring); sb->s_master_keys = NULL; } +static struct hlist_head * +fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring, + const struct fscrypt_key_specifier *mk_spec) +{ + /* + * Since key specifiers should be "random" values, it is sufficient to + * use a trivial hash function that just takes the first several bits of + * the key specifier. + */ + unsigned long i = get_unaligned((unsigned long *)&mk_spec->u); + + return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)]; +} + /* - * Find the specified master key in ->s_master_keys. - * Returns ERR_PTR(-ENOKEY) if not found. + * Find the specified master key struct in ->s_master_keys and take a structural + * ref to it. The structural ref guarantees that the key struct continues to + * exist, but it does *not* guarantee that ->s_master_keys continues to contain + * the key struct. The structural ref needs to be dropped by + * fscrypt_put_master_key(). Returns NULL if the key struct is not found. */ -struct key *fscrypt_find_master_key(struct super_block *sb, - const struct fscrypt_key_specifier *mk_spec) +struct fscrypt_master_key * +fscrypt_find_master_key(struct super_block *sb, + const struct fscrypt_key_specifier *mk_spec) { - struct key *keyring; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; + struct fscrypt_keyring *keyring; + struct hlist_head *bucket; + struct fscrypt_master_key *mk; /* * Pairs with the smp_store_release() in allocate_filesystem_keyring(). @@ -246,10 +282,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb, */ keyring = smp_load_acquire(&sb->s_master_keys); if (keyring == NULL) - return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */ - - format_mk_description(description, mk_spec); - return search_fscrypt_keyring(keyring, &key_type_fscrypt, description); + return NULL; /* No keyring yet, so no keys yet. */ + + bucket = fscrypt_mk_hash_bucket(keyring, mk_spec); + rcu_read_lock(); + switch (mk_spec->type) { + case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + memcmp(mk->mk_spec.u.descriptor, + mk_spec->u.descriptor, + FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && + memcmp(mk->mk_spec.u.identifier, + mk_spec->u.identifier, + FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + } + mk = NULL; +out: + rcu_read_unlock(); + return mk; } static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) @@ -277,17 +341,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) static struct key *find_master_key_user(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; + key_ref_t keyref; format_mk_user_description(description, mk->mk_spec.u.identifier); - return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user, - description); + + /* + * We need to mark the keyring reference as "possessed" so that we + * acquire permission to search it, via the KEY_POS_SEARCH permission. + */ + keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/), + &key_type_fscrypt_user, description, false); + if (IS_ERR(keyref)) { + if (PTR_ERR(keyref) == -EAGAIN || /* not found */ + PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ + keyref = ERR_PTR(-ENOKEY); + return ERR_CAST(keyref); + } + return key_ref_to_ptr(keyref); } /* * Give the current user a "key" in ->mk_users. This charges the user's quota * and marks the master key as added by the current user, so that it cannot be - * removed by another user with the key. Either the master key's key->sem must - * be held for write, or the master key must be still undergoing initialization. + * removed by another user with the key. Either ->mk_sem must be held for + * write, or the master key must be still undergoing initialization. */ static int add_master_key_user(struct fscrypt_master_key *mk) { @@ -309,7 +386,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk) /* * Remove the current user's "key" from ->mk_users. - * The master key's key->sem must be held for write. + * ->mk_sem must be held for write. * * Returns 0 if removed, -ENOKEY if not found, or another -errno code. */ @@ -327,63 +404,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk) } /* - * Allocate a new fscrypt_master_key which contains the given secret, set it as - * the payload of a new 'struct key' of type fscrypt, and link the 'struct key' - * into the given keyring. Synchronized by fscrypt_add_key_mutex. + * Allocate a new fscrypt_master_key, transfer the given secret over to it, and + * insert it into sb->s_master_keys. */ -static int add_new_master_key(struct fscrypt_master_key_secret *secret, - const struct fscrypt_key_specifier *mk_spec, - struct key *keyring) +static int add_new_master_key(struct super_block *sb, + struct fscrypt_master_key_secret *secret, + const struct fscrypt_key_specifier *mk_spec) { + struct fscrypt_keyring *keyring = sb->s_master_keys; struct fscrypt_master_key *mk; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; - struct key *key; int err; mk = kzalloc(sizeof(*mk), GFP_KERNEL); if (!mk) return -ENOMEM; + mk->mk_sb = sb; + init_rwsem(&mk->mk_sem); + refcount_set(&mk->mk_struct_refs, 1); mk->mk_spec = *mk_spec; - move_master_key_secret(&mk->mk_secret, secret); - - refcount_set(&mk->mk_refcount, 1); /* secret is present */ INIT_LIST_HEAD(&mk->mk_decrypted_inodes); spin_lock_init(&mk->mk_decrypted_inodes_lock); if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { err = allocate_master_key_users_keyring(mk); if (err) - goto out_free_mk; + goto out_put; err = add_master_key_user(mk); if (err) - goto out_free_mk; + goto out_put; } - /* - * Note that we don't charge this key to anyone's quota, since when - * ->mk_users is in use those keys are charged instead, and otherwise - * (when ->mk_users isn't in use) only root can add these keys. - */ - format_mk_description(description, mk_spec); - key = key_alloc(&key_type_fscrypt, description, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), - KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (IS_ERR(key)) { - err = PTR_ERR(key); - goto out_free_mk; - } - err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL); - key_put(key); - if (err) - goto out_free_mk; + move_master_key_secret(&mk->mk_secret, secret); + refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */ + spin_lock(&keyring->lock); + hlist_add_head_rcu(&mk->mk_node, + fscrypt_mk_hash_bucket(keyring, mk_spec)); + spin_unlock(&keyring->lock); return 0; -out_free_mk: - free_master_key(mk); +out_put: + fscrypt_put_master_key(mk); return err; } @@ -392,42 +455,34 @@ out_free_mk: static int add_existing_master_key(struct fscrypt_master_key *mk, struct fscrypt_master_key_secret *secret) { - struct key *mk_user; - bool rekey; int err; /* * If the current user is already in ->mk_users, then there's nothing to - * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.) + * do. Otherwise, we need to add the user to ->mk_users. (Neither is + * applicable for v1 policy keys, which have NULL ->mk_users.) */ if (mk->mk_users) { - mk_user = find_master_key_user(mk); + struct key *mk_user = find_master_key_user(mk); + if (mk_user != ERR_PTR(-ENOKEY)) { if (IS_ERR(mk_user)) return PTR_ERR(mk_user); key_put(mk_user); return 0; } - } - - /* If we'll be re-adding ->mk_secret, try to take the reference. */ - rekey = !is_master_key_secret_present(&mk->mk_secret); - if (rekey && !refcount_inc_not_zero(&mk->mk_refcount)) - return KEY_DEAD; - - /* Add the current user to ->mk_users, if applicable. */ - if (mk->mk_users) { err = add_master_key_user(mk); - if (err) { - if (rekey && refcount_dec_and_test(&mk->mk_refcount)) - return KEY_DEAD; + if (err) return err; - } } /* Re-add the secret if needed. */ - if (rekey) + if (!is_master_key_secret_present(&mk->mk_secret)) { + if (!refcount_inc_not_zero(&mk->mk_active_refs)) + return KEY_DEAD; move_master_key_secret(&mk->mk_secret, secret); + } + return 0; } @@ -436,38 +491,36 @@ static int do_add_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec) { static DEFINE_MUTEX(fscrypt_add_key_mutex); - struct key *key; + struct fscrypt_master_key *mk; int err; mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */ -retry: - key = fscrypt_find_master_key(sb, mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); - if (err != -ENOKEY) - goto out_unlock; + + mk = fscrypt_find_master_key(sb, mk_spec); + if (!mk) { /* Didn't find the key in ->s_master_keys. Add it. */ err = allocate_filesystem_keyring(sb); - if (err) - goto out_unlock; - err = add_new_master_key(secret, mk_spec, sb->s_master_keys); + if (!err) + err = add_new_master_key(sb, secret, mk_spec); } else { /* * Found the key in ->s_master_keys. Re-add the secret if * needed, and add the user to ->mk_users if needed. */ - down_write(&key->sem); - err = add_existing_master_key(key->payload.data[0], secret); - up_write(&key->sem); + down_write(&mk->mk_sem); + err = add_existing_master_key(mk, secret); + up_write(&mk->mk_sem); if (err == KEY_DEAD) { - /* Key being removed or needs to be removed */ - key_invalidate(key); - key_put(key); - goto retry; + /* + * We found a key struct, but it's already been fully + * removed. Ignore the old struct and add a new one. + * fscrypt_add_key_mutex means we don't need to worry + * about concurrent adds. + */ + err = add_new_master_key(sb, secret, mk_spec); } - key_put(key); + fscrypt_put_master_key(mk); } -out_unlock: mutex_unlock(&fscrypt_add_key_mutex); return err; } @@ -771,19 +824,19 @@ int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { struct fscrypt_key_specifier mk_spec; - struct key *key, *mk_user; struct fscrypt_master_key *mk; + struct key *mk_user; int err; mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); - key = fscrypt_find_master_key(sb, &mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &mk_spec); + if (!mk) { + err = -ENOKEY; goto out; } - mk = key->payload.data[0]; + down_read(&mk->mk_sem); mk_user = find_master_key_user(mk); if (IS_ERR(mk_user)) { err = PTR_ERR(mk_user); @@ -791,7 +844,8 @@ int fscrypt_verify_key_added(struct super_block *sb, key_put(mk_user); err = 0; } - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (err == -ENOKEY && capable(CAP_FOWNER)) err = 0; @@ -953,11 +1007,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_remove_key_arg __user *uarg = _uarg; struct fscrypt_remove_key_arg arg; - struct key *key; struct fscrypt_master_key *mk; u32 status_flags = 0; int err; - bool dead; + bool inodes_remain; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; @@ -977,12 +1030,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) return -EACCES; /* Find the key being removed. */ - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) - return PTR_ERR(key); - mk = key->payload.data[0]; - - down_write(&key->sem); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) + return -ENOKEY; + down_write(&mk->mk_sem); /* If relevant, remove current user's (or all users) claim to the key */ if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -991,7 +1042,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) else err = remove_master_key_user(mk); if (err) { - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } if (mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -1003,26 +1054,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) status_flags |= FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS; err = 0; - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } } /* No user claims remaining. Go ahead and wipe the secret. */ - dead = false; + err = -ENOKEY; if (is_master_key_secret_present(&mk->mk_secret)) { wipe_master_key_secret(&mk->mk_secret); - dead = refcount_dec_and_test(&mk->mk_refcount); - } - up_write(&key->sem); - if (dead) { - /* - * No inodes reference the key, and we wiped the secret, so the - * key object is free to be removed from the keyring. - */ - key_invalidate(key); + fscrypt_put_master_key_activeref(mk); err = 0; - } else { + } + inodes_remain = refcount_read(&mk->mk_active_refs) > 0; + up_write(&mk->mk_sem); + + if (inodes_remain) { /* Some inodes still reference this key; try to evict them. */ err = try_to_lock_encrypted_files(sb, mk); if (err == -EBUSY) { @@ -1038,7 +1085,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) * has been fully removed including all files locked. */ out_put_key: - key_put(key); + fscrypt_put_master_key(mk); if (err == 0) err = put_user(status_flags, &uarg->removal_status_flags); return err; @@ -1085,7 +1132,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_get_key_status_arg arg; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -1102,19 +1148,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) arg.user_count = 0; memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved)); - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY)) - return PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) { arg.status = FSCRYPT_KEY_STATUS_ABSENT; err = 0; goto out; } - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); if (!is_master_key_secret_present(&mk->mk_secret)) { - arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED; + arg.status = refcount_read(&mk->mk_active_refs) > 0 ? + FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED : + FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */; err = 0; goto out_release_key; } @@ -1136,8 +1181,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) } err = 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (!err && copy_to_user(uarg, &arg, sizeof(arg))) err = -EFAULT; @@ -1149,13 +1194,9 @@ int __init fscrypt_init_keyring(void) { int err; - err = register_key_type(&key_type_fscrypt); - if (err) - return err; - err = register_key_type(&key_type_fscrypt_user); if (err) - goto err_unregister_fscrypt; + return err; err = register_key_type(&key_type_fscrypt_provisioning); if (err) @@ -1165,7 +1206,5 @@ int __init fscrypt_init_keyring(void) err_unregister_fscrypt_user: unregister_key_type(&key_type_fscrypt_user); -err_unregister_fscrypt: - unregister_key_type(&key_type_fscrypt); return err; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index fbc71abdabe3..f7407071a952 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -9,7 +9,6 @@ */ #include <crypto/skcipher.h> -#include <linux/key.h> #include <linux/random.h> #include "fscrypt_private.h" @@ -155,10 +154,12 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, } /* Destroy a crypto transform object and/or blk-crypto key. */ -void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) +void fscrypt_destroy_prepared_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { crypto_free_skcipher(prep_key->tfm); - fscrypt_destroy_inline_crypt_key(prep_key); + fscrypt_destroy_inline_crypt_key(sb, prep_key); + memzero_explicit(prep_key, sizeof(*prep_key)); } /* Given a per-file encryption key, set up the file's crypto transform object */ @@ -412,20 +413,18 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, /* * Find the master key, then set up the inode's actual encryption key. * - * If the master key is found in the filesystem-level keyring, then the - * corresponding 'struct key' is returned in *master_key_ret with its semaphore - * read-locked. This is needed to ensure that only one task links the - * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create - * an fscrypt_info for the same inode), and to synchronize the master key being - * removed with a new inode starting to use it. + * If the master key is found in the filesystem-level keyring, then it is + * returned in *mk_ret with its semaphore read-locked. This is needed to ensure + * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as + * multiple tasks may race to create an fscrypt_info for the same inode), and to + * synchronize the master key being removed with a new inode starting to use it. */ static int setup_file_encryption_key(struct fscrypt_info *ci, bool need_dirhash_key, - struct key **master_key_ret) + struct fscrypt_master_key **mk_ret) { - struct key *key; - struct fscrypt_master_key *mk = NULL; struct fscrypt_key_specifier mk_spec; + struct fscrypt_master_key *mk; int err; err = fscrypt_select_encryption_impl(ci); @@ -436,11 +435,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, if (err) return err; - key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY) || - ci->ci_policy.version != FSCRYPT_POLICY_V1) - return PTR_ERR(key); + mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); + if (!mk) { + if (ci->ci_policy.version != FSCRYPT_POLICY_V1) + return -ENOKEY; /* * As a legacy fallback for v1 policies, search for the key in @@ -450,9 +448,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, */ return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci); } - - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */ if (!is_master_key_secret_present(&mk->mk_secret)) { @@ -480,18 +476,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, if (err) goto out_release_key; - *master_key_ret = key; + *mk_ret = mk; return 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); return err; } static void put_crypt_info(struct fscrypt_info *ci) { - struct key *key; + struct fscrypt_master_key *mk; if (!ci) return; @@ -499,26 +495,21 @@ static void put_crypt_info(struct fscrypt_info *ci) if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); else if (ci->ci_owns_key) - fscrypt_destroy_prepared_key(&ci->ci_enc_key); - - key = ci->ci_master_key; - if (key) { - struct fscrypt_master_key *mk = key->payload.data[0]; + fscrypt_destroy_prepared_key(ci->ci_inode->i_sb, + &ci->ci_enc_key); + mk = ci->ci_master_key; + if (mk) { /* * Remove this inode from the list of inodes that were unlocked - * with the master key. - * - * In addition, if we're removing the last inode from a key that - * already had its secret removed, invalidate the key so that it - * gets removed from ->s_master_keys. + * with the master key. In addition, if we're removing the last + * inode from a master key struct that already had its secret + * removed, then complete the full removal of the struct. */ spin_lock(&mk->mk_decrypted_inodes_lock); list_del(&ci->ci_master_key_link); spin_unlock(&mk->mk_decrypted_inodes_lock); - if (refcount_dec_and_test(&mk->mk_refcount)) - key_invalidate(key); - key_put(key); + fscrypt_put_master_key_activeref(mk); } memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_info_cachep, ci); @@ -532,7 +523,7 @@ fscrypt_setup_encryption_info(struct inode *inode, { struct fscrypt_info *crypt_info; struct fscrypt_mode *mode; - struct key *master_key = NULL; + struct fscrypt_master_key *mk = NULL; int res; res = fscrypt_initialize(inode->i_sb->s_cop->flags); @@ -555,8 +546,7 @@ fscrypt_setup_encryption_info(struct inode *inode, WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); crypt_info->ci_mode = mode; - res = setup_file_encryption_key(crypt_info, need_dirhash_key, - &master_key); + res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); if (res) goto out; @@ -571,12 +561,9 @@ fscrypt_setup_encryption_info(struct inode *inode, * We won the race and set ->i_crypt_info to our crypt_info. * Now link it into the master key's inode list. */ - if (master_key) { - struct fscrypt_master_key *mk = - master_key->payload.data[0]; - - refcount_inc(&mk->mk_refcount); - crypt_info->ci_master_key = key_get(master_key); + if (mk) { + crypt_info->ci_master_key = mk; + refcount_inc(&mk->mk_active_refs); spin_lock(&mk->mk_decrypted_inodes_lock); list_add(&crypt_info->ci_master_key_link, &mk->mk_decrypted_inodes); @@ -586,9 +573,9 @@ fscrypt_setup_encryption_info(struct inode *inode, } res = 0; out: - if (master_key) { - up_read(&master_key->sem); - key_put(master_key); + if (mk) { + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); } put_crypt_info(crypt_info); return res; @@ -753,7 +740,6 @@ EXPORT_SYMBOL(fscrypt_free_inode); int fscrypt_drop_inode(struct inode *inode) { const struct fscrypt_info *ci = fscrypt_get_info(inode); - const struct fscrypt_master_key *mk; /* * If ci is NULL, then the inode doesn't have an encryption key set up @@ -763,7 +749,6 @@ int fscrypt_drop_inode(struct inode *inode) */ if (!ci || !ci->ci_master_key) return 0; - mk = ci->ci_master_key->payload.data[0]; /* * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes @@ -782,6 +767,6 @@ int fscrypt_drop_inode(struct inode *inode) * then the thread removing the key will either evict the inode itself * or will correctly detect that it wasn't evicted due to the race. */ - return !is_master_key_secret_present(&mk->mk_secret); + return !is_master_key_secret_present(&ci->ci_master_key->mk_secret); } EXPORT_SYMBOL_GPL(fscrypt_drop_inode); diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 2762c5350432..75dabd9b27f9 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -143,6 +143,7 @@ invalid: /* Master key referenced by DIRECT_KEY policy */ struct fscrypt_direct_key { + struct super_block *dk_sb; struct hlist_node dk_node; refcount_t dk_refcount; const struct fscrypt_mode *dk_mode; @@ -154,7 +155,7 @@ struct fscrypt_direct_key { static void free_direct_key(struct fscrypt_direct_key *dk) { if (dk) { - fscrypt_destroy_prepared_key(&dk->dk_key); + fscrypt_destroy_prepared_key(dk->dk_sb, &dk->dk_key); kfree_sensitive(dk); } } @@ -231,6 +232,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) dk = kzalloc(sizeof(*dk), GFP_KERNEL); if (!dk) return ERR_PTR(-ENOMEM); + dk->dk_sb = ci->ci_inode->i_sb; refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 80b8ca0f340b..46757c3052ef 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -744,12 +744,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data) * delayed key setup that requires the inode number. */ if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && - (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { - const struct fscrypt_master_key *mk = - ci->ci_master_key->payload.data[0]; - - fscrypt_hash_inode_number(ci, mk); - } + (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) + fscrypt_hash_inode_number(ci, ci->ci_master_key); return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data); } @@ -833,19 +829,6 @@ bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, } EXPORT_SYMBOL_GPL(fscrypt_dummy_policies_equal); -/* Deprecated, do not use */ -int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, - struct fscrypt_dummy_policy *dummy_policy) -{ - struct fs_parameter param = { - .type = fs_value_is_string, - .string = arg ? (char *)arg : "", - }; - return fscrypt_parse_test_dummy_encryption(¶m, dummy_policy) ?: - fscrypt_add_test_dummy_key(sb, dummy_policy); -} -EXPORT_SYMBOL_GPL(fscrypt_set_test_dummy_encryption); - /** * fscrypt_show_test_dummy_encryption() - show '-o test_dummy_encryption' * @seq: the seq_file to print the option to diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 19ef136f9e4f..d60a8d8f109d 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -200,13 +200,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, if (!prev_seq) { kref_get(&lkb->lkb_ref); + mutex_lock(&ls->ls_cb_mutex); if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { - mutex_lock(&ls->ls_cb_mutex); list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); - mutex_unlock(&ls->ls_cb_mutex); } else { queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); } + mutex_unlock(&ls->ls_cb_mutex); } out: mutex_unlock(&lkb->lkb_cb_mutex); @@ -288,10 +288,13 @@ void dlm_callback_stop(struct dlm_ls *ls) void dlm_callback_suspend(struct dlm_ls *ls) { - set_bit(LSFL_CB_DELAY, &ls->ls_flags); + if (ls->ls_callback_wq) { + mutex_lock(&ls->ls_cb_mutex); + set_bit(LSFL_CB_DELAY, &ls->ls_flags); + mutex_unlock(&ls->ls_cb_mutex); - if (ls->ls_callback_wq) flush_workqueue(ls->ls_callback_wq); + } } #define MAX_CB_QUEUE 25 @@ -302,11 +305,11 @@ void dlm_callback_resume(struct dlm_ls *ls) int count = 0, sum = 0; bool empty; - clear_bit(LSFL_CB_DELAY, &ls->ls_flags); - if (!ls->ls_callback_wq) return; + clear_bit(LSFL_CB_DELAY, &ls->ls_flags); + more: mutex_lock(&ls->ls_cb_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h index 181ad7d20c4d..e5e05fcc5813 100644 --- a/fs/dlm/ast.h +++ b/fs/dlm/ast.h @@ -11,7 +11,6 @@ #ifndef __ASTD_DOT_H__ #define __ASTD_DOT_H__ -void dlm_del_ast(struct dlm_lkb *lkb); int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq); int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 8aca8085d24e..e34c3d2639a5 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -661,7 +661,7 @@ struct dlm_ls { spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; wait_queue_head_t ls_recover_lock_wait; - struct mutex ls_clear_proc_locks; + spinlock_t ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ struct rw_semaphore ls_root_sem; /* protect root_list */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index dac7eb75dba9..94a72ede5764 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -401,7 +401,7 @@ static int pre_rsb_struct(struct dlm_ls *ls) unlock any spinlocks, go back and call pre_rsb_struct again. Otherwise, take an rsb off the list and return it. */ -static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, +static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len, struct dlm_rsb **r_ret) { struct dlm_rsb *r; @@ -412,7 +412,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, count = ls->ls_new_rsb_count; spin_unlock(&ls->ls_new_rsb_spin); log_debug(ls, "find_rsb retry %d %d %s", - count, dlm_config.ci_new_rsb_count, name); + count, dlm_config.ci_new_rsb_count, + (const char *)name); return -EAGAIN; } @@ -448,7 +449,7 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); } -int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, +int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len, struct dlm_rsb **r_ret) { struct rb_node *node = tree->rb_node; @@ -546,7 +547,7 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) * while that rsb has a potentially stale master.) */ -static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, +static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len, uint32_t hash, uint32_t b, int dir_nodeid, int from_nodeid, unsigned int flags, struct dlm_rsb **r_ret) @@ -724,7 +725,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, dlm_recover_locks) before we've made ourself master (in dlm_recover_masters). */ -static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, +static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len, uint32_t hash, uint32_t b, int dir_nodeid, int from_nodeid, unsigned int flags, struct dlm_rsb **r_ret) @@ -818,8 +819,9 @@ static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, return error; } -static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid, - unsigned int flags, struct dlm_rsb **r_ret) +static int find_rsb(struct dlm_ls *ls, const void *name, int len, + int from_nodeid, unsigned int flags, + struct dlm_rsb **r_ret) { uint32_t hash, b; int dir_nodeid; @@ -2864,17 +2866,9 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_args *args) { - int rv = -EINVAL; + int rv = -EBUSY; if (args->flags & DLM_LKF_CONVERT) { - if (lkb->lkb_flags & DLM_IFL_MSTCPY) - goto out; - - if (args->flags & DLM_LKF_QUECVT && - !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) - goto out; - - rv = -EBUSY; if (lkb->lkb_status != DLM_LKSTS_GRANTED) goto out; @@ -2884,6 +2878,14 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, if (is_overlap(lkb)) goto out; + + rv = -EINVAL; + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_QUECVT && + !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) + goto out; } lkb->lkb_exflags = args->flags; @@ -2900,11 +2902,25 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, #endif rv = 0; out: - if (rv) - log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s", + switch (rv) { + case 0: + break; + case -EINVAL: + /* annoy the user because dlm usage is wrong */ + WARN_ON(1); + log_error(ls, "%s %d %x %x %x %d %d %s", __func__, + rv, lkb->lkb_id, lkb->lkb_flags, args->flags, + lkb->lkb_status, lkb->lkb_wait_type, + lkb->lkb_resource->res_name); + break; + default: + log_debug(ls, "%s %d %x %x %x %d %d %s", __func__, rv, lkb->lkb_id, lkb->lkb_flags, args->flags, lkb->lkb_status, lkb->lkb_wait_type, lkb->lkb_resource->res_name); + break; + } + return rv; } @@ -2918,23 +2934,12 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; - int rv = -EINVAL; - - if (lkb->lkb_flags & DLM_IFL_MSTCPY) { - log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id); - dlm_print_lkb(lkb); - goto out; - } - - /* an lkb may still exist even though the lock is EOL'ed due to a - cancel, unlock or failed noqueue request; an app can't use these - locks; return same error as if the lkid had not been found at all */ + int rv = -EBUSY; - if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) { - log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id); - rv = -ENOENT; + /* normal unlock not allowed if there's any op in progress */ + if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) && + (lkb->lkb_wait_type || lkb->lkb_wait_count)) goto out; - } /* an lkb may be waiting for an rsb lookup to complete where the lookup was initiated by another lock */ @@ -2949,7 +2954,24 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) unhold_lkb(lkb); /* undoes create_lkb() */ } /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */ - rv = -EBUSY; + goto out; + } + + rv = -EINVAL; + if (lkb->lkb_flags & DLM_IFL_MSTCPY) { + log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id); + dlm_print_lkb(lkb); + goto out; + } + + /* an lkb may still exist even though the lock is EOL'ed due to a + * cancel, unlock or failed noqueue request; an app can't use these + * locks; return same error as if the lkid had not been found at all + */ + + if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) { + log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id); + rv = -ENOENT; goto out; } @@ -3022,14 +3044,8 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) goto out; } /* add_to_waiters() will set OVERLAP_UNLOCK */ - goto out_ok; } - /* normal unlock not allowed if there's any op in progress */ - rv = -EBUSY; - if (lkb->lkb_wait_type || lkb->lkb_wait_count) - goto out; - out_ok: /* an overlapping op shouldn't blow away exflags from other op */ lkb->lkb_exflags |= args->flags; @@ -3037,11 +3053,25 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) lkb->lkb_astparam = args->astparam; rv = 0; out: - if (rv) - log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv, + switch (rv) { + case 0: + break; + case -EINVAL: + /* annoy the user because dlm usage is wrong */ + WARN_ON(1); + log_error(ls, "%s %d %x %x %x %x %d %s", __func__, rv, + lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags, + args->flags, lkb->lkb_wait_type, + lkb->lkb_resource->res_name); + break; + default: + log_debug(ls, "%s %d %x %x %x %x %d %s", __func__, rv, lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags, args->flags, lkb->lkb_wait_type, lkb->lkb_resource->res_name); + break; + } + return rv; } @@ -3292,8 +3322,9 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) * request_lock(), convert_lock(), unlock_lock(), cancel_lock() */ -static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, - int len, struct dlm_args *args) +static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + const void *name, int len, + struct dlm_args *args) { struct dlm_rsb *r; int error; @@ -3392,7 +3423,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*ast) (void *astarg), @@ -3438,7 +3469,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, if (error == -EINPROGRESS) error = 0; out_put: - trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error); + trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, true); if (convert || error) __put_lkb(ls, lkb); @@ -3623,7 +3654,7 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, case cpu_to_le32(DLM_MSG_REQUEST_REPLY): case cpu_to_le32(DLM_MSG_CONVERT_REPLY): case cpu_to_le32(DLM_MSG_GRANT): - if (!lkb->lkb_lvbptr) + if (!lkb->lkb_lvbptr || !(lkb->lkb_exflags & DLM_LKF_VALBLK)) break; memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); break; @@ -5080,8 +5111,11 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) down_read(&ls->ls_recv_active); if (hd->h_cmd == DLM_MSG) dlm_receive_message(ls, &p->message, nodeid); - else + else if (hd->h_cmd == DLM_RCOM) dlm_receive_rcom(ls, &p->rcom, nodeid); + else + log_error(ls, "invalid h_cmd %d from %d lockspace %x", + hd->h_cmd, nodeid, le32_to_cpu(hd->u.h_lockspace)); up_read(&ls->ls_recv_active); dlm_put_lockspace(ls); @@ -5801,6 +5835,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, { struct dlm_lkb *lkb; struct dlm_args args; + bool do_put = true; int error; dlm_lock_recovery(ls); @@ -5811,13 +5846,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, goto out; } + trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags); + if (flags & DLM_LKF_VALBLK) { ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS); if (!ua->lksb.sb_lvbptr) { kfree(ua); - __put_lkb(ls, lkb); error = -ENOMEM; - goto out; + goto out_put; } } #ifdef CONFIG_DLM_DEPRECATED_API @@ -5831,8 +5867,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, kfree(ua->lksb.sb_lvbptr); ua->lksb.sb_lvbptr = NULL; kfree(ua); - __put_lkb(ls, lkb); - goto out; + goto out_put; } /* After ua is attached to lkb it will be freed by dlm_free_lkb(). @@ -5851,8 +5886,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, error = 0; fallthrough; default: - __put_lkb(ls, lkb); - goto out; + goto out_put; } /* add this new lkb to the per-process list of locks */ @@ -5860,6 +5894,11 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, hold_lkb(lkb); list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); spin_unlock(&ua->proc->locks_spin); + do_put = false; + out_put: + trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, false); + if (do_put) + __put_lkb(ls, lkb); out: dlm_unlock_recovery(ls); return error; @@ -5885,6 +5924,8 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_lock_start(ls, lkb, NULL, 0, mode, flags); + /* user can change the params on its lock when it converts it, or add an lvb that didn't exist before */ @@ -5922,6 +5963,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK) error = 0; out_put: + trace_dlm_lock_end(ls, lkb, NULL, 0, mode, flags, error, false); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6014,6 +6056,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; if (lvb_in && ua->lksb.sb_lvbptr) @@ -6042,6 +6086,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6063,6 +6108,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; if (ua_tmp->castparam) ua->castparam = ua_tmp->castparam; @@ -6080,6 +6127,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error == -EBUSY) error = 0; out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6101,6 +6149,8 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; error = set_unlock_args(flags, ua, &args); @@ -6129,6 +6179,7 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) if (error == -EBUSY) error = 0; out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6184,7 +6235,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, { struct dlm_lkb *lkb = NULL; - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); if (list_empty(&proc->locks)) goto out; @@ -6196,7 +6247,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, else lkb->lkb_flags |= DLM_IFL_DEAD; out: - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); return lkb; } @@ -6233,7 +6284,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); /* in-progress unlocks */ list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { @@ -6249,7 +6300,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); dlm_unlock_recovery(ls); } diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index a7b6474f009d..40c76b5544da 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -36,7 +36,7 @@ static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { } int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len, unsigned int flags, int *r_nodeid, int *result); -int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, +int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len, struct dlm_rsb **r_ret); void dlm_recover_purge(struct dlm_ls *ls); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 3972f4d86c75..bae050df7abf 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -416,7 +416,7 @@ static int new_lockspace(const char *name, const char *cluster, if (namelen > DLM_LOCKSPACE_LEN || namelen == 0) return -EINVAL; - if (!lvblen || (lvblen % 8)) + if (lvblen % 8) return -EINVAL; if (!try_module_get(THIS_MODULE)) @@ -584,7 +584,7 @@ static int new_lockspace(const char *name, const char *cluster, atomic_set(&ls->ls_requestqueue_cnt, 0); init_waitqueue_head(&ls->ls_requestqueue_wait); mutex_init(&ls->ls_requestqueue_mutex); - mutex_init(&ls->ls_clear_proc_locks); + spin_lock_init(&ls->ls_clear_proc_locks); /* Due backwards compatibility with 3.1 we need to use maximum * possible dlm message size to be sure the message will fit and @@ -703,10 +703,11 @@ static int new_lockspace(const char *name, const char *cluster, return error; } -int dlm_new_lockspace(const char *name, const char *cluster, - uint32_t flags, int lvblen, - const struct dlm_lockspace_ops *ops, void *ops_arg, - int *ops_result, dlm_lockspace_t **lockspace) +static int __dlm_new_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) { int error = 0; @@ -732,6 +733,25 @@ int dlm_new_lockspace(const char *name, const char *cluster, return error; } +int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags, + int lvblen, const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) +{ + return __dlm_new_lockspace(name, cluster, flags | DLM_LSFL_FS, lvblen, + ops, ops_arg, ops_result, lockspace); +} + +int dlm_new_user_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) +{ + return __dlm_new_lockspace(name, cluster, flags, lvblen, ops, + ops_arg, ops_result, lockspace); +} + static int lkb_idr_is_local(int id, void *p, void *data) { struct dlm_lkb *lkb = p; diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 306fc4f4ea15..03f4a4a3a871 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h @@ -12,6 +12,14 @@ #ifndef __LOCKSPACE_DOT_H__ #define __LOCKSPACE_DOT_H__ +/* DLM_LSFL_FS + * The lockspace user is in the kernel (i.e. filesystem). Enables + * direct bast/cast callbacks. + * + * internal lockspace flag - will be removed in future + */ +#define DLM_LSFL_FS 0x00000004 + int dlm_lockspace_init(void); void dlm_lockspace_exit(void); struct dlm_ls *dlm_find_lockspace_global(uint32_t id); @@ -20,6 +28,11 @@ struct dlm_ls *dlm_find_lockspace_device(int minor); void dlm_put_lockspace(struct dlm_ls *ls); void dlm_stop_lockspaces(void); void dlm_stop_lockspaces_check(void); +int dlm_new_user_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace); #endif /* __LOCKSPACE_DOT_H__ */ diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index a4e84e8d94c8..59f64c596233 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1336,6 +1336,8 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, return NULL; } + /* for dlm_lowcomms_commit_msg() */ + kref_get(&msg->ref); /* we assume if successful commit must called */ msg->idx = idx; return msg; @@ -1375,6 +1377,8 @@ void dlm_lowcomms_commit_msg(struct dlm_msg *msg) { _dlm_lowcomms_commit_msg(msg); srcu_read_unlock(&connections_srcu, msg->idx); + /* because dlm_lowcomms_new_msg() */ + kref_put(&msg->ref, dlm_msg_release); } #endif diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 99e8f0744513..c5d27bccc3dc 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -16,6 +16,8 @@ #include <linux/slab.h> #include <linux/sched/signal.h> +#include <trace/events/dlm.h> + #include "dlm_internal.h" #include "lockspace.h" #include "lock.h" @@ -184,7 +186,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, return; ls = lkb->lkb_resource->res_ls; - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed @@ -230,7 +232,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, spin_unlock(&proc->locks_spin); } out: - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); } static int device_user_lock(struct dlm_user_proc *proc, @@ -421,9 +423,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags, - DLM_USER_LVB_LEN, NULL, NULL, NULL, - &lockspace); + error = dlm_new_user_lockspace(params->name, dlm_config.ci_cluster_name, + params->flags, DLM_USER_LVB_LEN, NULL, + NULL, NULL, &lockspace); if (error) return error; @@ -882,7 +884,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, goto try_another; } - if (cb.flags & DLM_CB_CAST) { + if (cb.flags & DLM_CB_BAST) { + trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb.mode); + } else if (cb.flags & DLM_CB_CAST) { new_mode = cb.mode; if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr && @@ -891,6 +895,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, lkb->lkb_lksb->sb_status = cb.sb_status; lkb->lkb_lksb->sb_flags = cb.sb_flags; + trace_dlm_ast(lkb->lkb_resource->res_ls, lkb); } rv = copy_result_to_user(lkb->lkb_ua, diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2d55569f96ac..51b7ac7166d9 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -317,52 +317,61 @@ dstmap_out: return ret; } -static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, - struct page **pagepool) +static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, + struct page **pagepool) { - const unsigned int nrpages_out = + const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; + const unsigned int outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; const unsigned int righthalf = min_t(unsigned int, rq->outputsize, PAGE_SIZE - rq->pageofs_out); const unsigned int lefthalf = rq->outputsize - righthalf; + const unsigned int interlaced_offset = + rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out; unsigned char *src, *dst; - if (nrpages_out > 2) { + if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) { DBG_BUGON(1); - return -EIO; + return -EFSCORRUPTED; } if (rq->out[0] == *rq->in) { - DBG_BUGON(nrpages_out != 1); + DBG_BUGON(rq->pageofs_out); return 0; } - src = kmap_atomic(*rq->in) + rq->pageofs_in; + src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in; if (rq->out[0]) { - dst = kmap_atomic(rq->out[0]); - memcpy(dst + rq->pageofs_out, src, righthalf); - kunmap_atomic(dst); + dst = kmap_local_page(rq->out[0]); + memcpy(dst + rq->pageofs_out, src + interlaced_offset, + righthalf); + kunmap_local(dst); } - if (nrpages_out == 2) { - DBG_BUGON(!rq->out[1]); - if (rq->out[1] == *rq->in) { + if (outpages > inpages) { + DBG_BUGON(!rq->out[outpages - 1]); + if (rq->out[outpages - 1] != rq->in[inpages - 1]) { + dst = kmap_local_page(rq->out[outpages - 1]); + memcpy(dst, interlaced_offset ? src : + (src + righthalf), lefthalf); + kunmap_local(dst); + } else if (!interlaced_offset) { memmove(src, src + righthalf, lefthalf); - } else { - dst = kmap_atomic(rq->out[1]); - memcpy(dst, src + righthalf, lefthalf); - kunmap_atomic(dst); } } - kunmap_atomic(src); + kunmap_local(src); return 0; } static struct z_erofs_decompressor decompressors[] = { [Z_EROFS_COMPRESSION_SHIFTED] = { - .decompress = z_erofs_shifted_transform, + .decompress = z_erofs_transform_plain, .name = "shifted" }, + [Z_EROFS_COMPRESSION_INTERLACED] = { + .decompress = z_erofs_transform_plain, + .name = "interlaced" + }, [Z_EROFS_COMPRESSION_LZ4] = { .decompress = z_erofs_lz4_decompress, .name = "lz4" diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 5e59b3f523eb..091fd5adf818 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -217,6 +217,9 @@ again: strm->buf.out_size = min_t(u32, outlen, PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; + if (!rq->out[no] && rq->fillgaps) /* deduped */ + rq->out[no] = erofs_allocpage(pagepool, + GFP_KERNEL | __GFP_NOFAIL); if (rq->out[no]) strm->buf.out = kmap(rq->out[no]) + pageofs; pageofs = 0; diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 2b48373f690b..dbcd24371002 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -25,6 +25,8 @@ #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 +#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 +#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 #define EROFS_ALL_FEATURE_INCOMPAT \ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ @@ -32,7 +34,9 @@ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ - EROFS_FEATURE_INCOMPAT_ZTAILPACKING) + EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ + EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ + EROFS_FEATURE_INCOMPAT_DEDUPE) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -71,7 +75,9 @@ struct erofs_super_block { } __packed u1; __le16 extra_devices; /* # of devices besides the primary device */ __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ - __u8 reserved2[38]; + __u8 reserved[6]; + __le64 packed_nid; /* nid of the special packed inode */ + __u8 reserved2[24]; }; /* @@ -295,16 +301,27 @@ struct z_erofs_lzma_cfgs { * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) + * bit 4 : interlaced plain pcluster (0 - off; 1 - on) + * bit 5 : fragment pcluster (0 - off; 1 - on) */ #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 +#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 +#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 +#define Z_EROFS_FRAGMENT_INODE_BIT 7 struct z_erofs_map_header { - __le16 h_reserved1; - /* indicates the encoded size of tailpacking data */ - __le16 h_idata_size; + union { + /* fragment data offset in the packed inode */ + __le32 h_fragmentoff; + struct { + __le16 h_reserved1; + /* indicates the encoded size of tailpacking data */ + __le16 h_idata_size; + }; + }; __le16 h_advise; /* * bit 0-3 : algorithm type of head 1 (logical cluster type 01); @@ -313,7 +330,8 @@ struct z_erofs_map_header { __u8 h_algorithmtype; /* * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; - * bit 3-7 : reserved. + * bit 3-6 : reserved; + * bit 7 : move the whole file into packed inode or not. */ __u8 h_clusterbits; }; @@ -355,6 +373,9 @@ enum { #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 +/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ +#define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15) + /* * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the * compressed block count of a compressed extent (in logical clusters, aka. @@ -402,6 +423,10 @@ struct erofs_dirent { /* check the EROFS on-disk layout strictly at compile time */ static inline void erofs_check_ondisk_layout_definitions(void) { + const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) { + .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT + }; + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); @@ -419,6 +444,9 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); + /* exclude old compiler versions like gcc 7.5.0 */ + BUILD_BUG_ON(__builtin_constant_p(fmh) ? + fmh != cpu_to_le64(1ULL << 63) : 0); } #endif diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index b5fd9d71e67f..998cd26a1b3b 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -1,10 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2022, Alibaba Cloud + * Copyright (C) 2022, Bytedance Inc. All rights reserved. */ #include <linux/fscache.h> #include "internal.h" +static DEFINE_MUTEX(erofs_domain_list_lock); +static DEFINE_MUTEX(erofs_domain_cookies_lock); +static LIST_HEAD(erofs_domain_list); +static struct vfsmount *erofs_pseudo_mnt; + static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, loff_t start, size_t len) { @@ -234,113 +240,111 @@ out: return ret; } -static int erofs_fscache_read_folio_inline(struct folio *folio, - struct erofs_map_blocks *map) -{ - struct super_block *sb = folio_mapping(folio)->host->i_sb; - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - erofs_blk_t blknr; - size_t offset, len; - void *src, *dst; - - /* For tail packing layout, the offset may be non-zero. */ - offset = erofs_blkoff(map->m_pa); - blknr = erofs_blknr(map->m_pa); - len = map->m_llen; - - src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = kmap_local_folio(folio, 0); - memcpy(dst, src + offset, len); - memset(dst + len, 0, PAGE_SIZE - len); - kunmap_local(dst); - - erofs_put_metabuf(&buf); - return 0; -} - -static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +/* + * Read into page cache in the range described by (@pos, @len). + * + * On return, the caller is responsible for page unlocking if the output @unlock + * is true, or the callee will take this responsibility through netfs_io_request + * interface. + * + * The return value is the number of bytes successfully handled, or negative + * error code on failure. The only exception is that, the length of the range + * instead of the error code is returned on failure after netfs_io_request is + * allocated, so that .readahead() could advance rac accordingly. + */ +static int erofs_fscache_data_read(struct address_space *mapping, + loff_t pos, size_t len, bool *unlock) { - struct inode *inode = folio_mapping(folio)->host; + struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; + struct netfs_io_request *rreq; struct erofs_map_blocks map; struct erofs_map_dev mdev; - struct netfs_io_request *rreq; - erofs_off_t pos; - loff_t pstart; + struct iov_iter iter; + size_t count; int ret; - DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); + *unlock = true; - pos = folio_pos(folio); map.m_la = pos; - ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); if (ret) - goto out_unlock; + return ret; - if (!(map.m_flags & EROFS_MAP_MAPPED)) { - folio_zero_range(folio, 0, folio_size(folio)); - goto out_uptodate; + if (map.m_flags & EROFS_MAP_META) { + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + erofs_blk_t blknr; + size_t offset, size; + void *src; + + /* For tail packing layout, the offset may be non-zero. */ + offset = erofs_blkoff(map.m_pa); + blknr = erofs_blknr(map.m_pa); + size = map.m_llen; + + src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); + if (IS_ERR(src)) + return PTR_ERR(src); + + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); + if (copy_to_iter(src + offset, size, &iter) != size) + return -EFAULT; + iov_iter_zero(PAGE_SIZE - size, &iter); + erofs_put_metabuf(&buf); + return PAGE_SIZE; } - if (map.m_flags & EROFS_MAP_META) { - ret = erofs_fscache_read_folio_inline(folio, &map); - goto out_uptodate; + count = min_t(size_t, map.m_llen - (pos - map.m_la), len); + DBG_BUGON(!count || count % PAGE_SIZE); + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); + iov_iter_zero(count, &iter); + return count; } mdev = (struct erofs_map_dev) { .m_deviceid = map.m_deviceid, .m_pa = map.m_pa, }; - ret = erofs_map_dev(sb, &mdev); if (ret) - goto out_unlock; + return ret; + rreq = erofs_fscache_alloc_request(mapping, pos, count); + if (IS_ERR(rreq)) + return PTR_ERR(rreq); - rreq = erofs_fscache_alloc_request(folio_mapping(folio), - folio_pos(folio), folio_size(folio)); - if (IS_ERR(rreq)) { - ret = PTR_ERR(rreq); - goto out_unlock; - } - - pstart = mdev.m_pa + (pos - map.m_la); - return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, pstart); - -out_uptodate: - if (!ret) - folio_mark_uptodate(folio); -out_unlock: - folio_unlock(folio); - return ret; + *unlock = false; + erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + rreq, mdev.m_pa + (pos - map.m_la)); + return count; } -static void erofs_fscache_advance_folios(struct readahead_control *rac, - size_t len, bool unlock) +static int erofs_fscache_read_folio(struct file *file, struct folio *folio) { - while (len) { - struct folio *folio = readahead_folio(rac); - len -= folio_size(folio); - if (unlock) { + bool unlock; + int ret; + + DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); + + ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), + folio_size(folio), &unlock); + if (unlock) { + if (ret > 0) folio_mark_uptodate(folio); - folio_unlock(folio); - } + folio_unlock(folio); } + return ret < 0 ? ret : 0; } static void erofs_fscache_readahead(struct readahead_control *rac) { - struct inode *inode = rac->mapping->host; - struct super_block *sb = inode->i_sb; - size_t len, count, done = 0; - erofs_off_t pos; - loff_t start, offset; - int ret; + struct folio *folio; + size_t len, done = 0; + loff_t start, pos; + bool unlock; + int ret, size; if (!readahead_count(rac)) return; @@ -349,67 +353,22 @@ static void erofs_fscache_readahead(struct readahead_control *rac) len = readahead_length(rac); do { - struct erofs_map_blocks map; - struct erofs_map_dev mdev; - struct netfs_io_request *rreq; - pos = start + done; - map.m_la = pos; - - ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); - if (ret) + ret = erofs_fscache_data_read(rac->mapping, pos, + len - done, &unlock); + if (ret <= 0) return; - offset = start + done; - count = min_t(size_t, map.m_llen - (pos - map.m_la), - len - done); - - if (!(map.m_flags & EROFS_MAP_MAPPED)) { - struct iov_iter iter; - - iov_iter_xarray(&iter, READ, &rac->mapping->i_pages, - offset, count); - iov_iter_zero(count, &iter); - - erofs_fscache_advance_folios(rac, count, true); - ret = count; - continue; - } - - if (map.m_flags & EROFS_MAP_META) { - struct folio *folio = readahead_folio(rac); - - ret = erofs_fscache_read_folio_inline(folio, &map); - if (!ret) { + size = ret; + while (size) { + folio = readahead_folio(rac); + size -= folio_size(folio); + if (unlock) { folio_mark_uptodate(folio); - ret = folio_size(folio); + folio_unlock(folio); } - - folio_unlock(folio); - continue; } - - mdev = (struct erofs_map_dev) { - .m_deviceid = map.m_deviceid, - .m_pa = map.m_pa, - }; - ret = erofs_map_dev(sb, &mdev); - if (ret) - return; - - rreq = erofs_fscache_alloc_request(rac->mapping, offset, count); - if (IS_ERR(rreq)) - return; - /* - * Drop the ref of folios here. Unlock them in - * rreq_unlock_folios() when rreq complete. - */ - erofs_fscache_advance_folios(rac, count, false); - ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, mdev.m_pa + (pos - map.m_la)); - if (!ret) - ret = count; - } while (ret > 0 && ((done += ret) < len)); + } while ((done += ret) < len); } static const struct address_space_operations erofs_fscache_meta_aops = { @@ -421,9 +380,114 @@ const struct address_space_operations erofs_fscache_access_aops = { .readahead = erofs_fscache_readahead, }; -int erofs_fscache_register_cookie(struct super_block *sb, - struct erofs_fscache **fscache, - char *name, bool need_inode) +static void erofs_fscache_domain_put(struct erofs_domain *domain) +{ + if (!domain) + return; + mutex_lock(&erofs_domain_list_lock); + if (refcount_dec_and_test(&domain->ref)) { + list_del(&domain->list); + if (list_empty(&erofs_domain_list)) { + kern_unmount(erofs_pseudo_mnt); + erofs_pseudo_mnt = NULL; + } + mutex_unlock(&erofs_domain_list_lock); + fscache_relinquish_volume(domain->volume, NULL, false); + kfree(domain->domain_id); + kfree(domain); + return; + } + mutex_unlock(&erofs_domain_list_lock); +} + +static int erofs_fscache_register_volume(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + char *domain_id = sbi->opt.domain_id; + struct fscache_volume *volume; + char *name; + int ret = 0; + + name = kasprintf(GFP_KERNEL, "erofs,%s", + domain_id ? domain_id : sbi->opt.fsid); + if (!name) + return -ENOMEM; + + volume = fscache_acquire_volume(name, NULL, NULL, 0); + if (IS_ERR_OR_NULL(volume)) { + erofs_err(sb, "failed to register volume for %s", name); + ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; + volume = NULL; + } + + sbi->volume = volume; + kfree(name); + return ret; +} + +static int erofs_fscache_init_domain(struct super_block *sb) +{ + int err; + struct erofs_domain *domain; + struct erofs_sb_info *sbi = EROFS_SB(sb); + + domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); + if (!domain) + return -ENOMEM; + + domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); + if (!domain->domain_id) { + kfree(domain); + return -ENOMEM; + } + + err = erofs_fscache_register_volume(sb); + if (err) + goto out; + + if (!erofs_pseudo_mnt) { + erofs_pseudo_mnt = kern_mount(&erofs_fs_type); + if (IS_ERR(erofs_pseudo_mnt)) { + err = PTR_ERR(erofs_pseudo_mnt); + goto out; + } + } + + domain->volume = sbi->volume; + refcount_set(&domain->ref, 1); + list_add(&domain->list, &erofs_domain_list); + sbi->domain = domain; + return 0; +out: + kfree(domain->domain_id); + kfree(domain); + return err; +} + +static int erofs_fscache_register_domain(struct super_block *sb) +{ + int err; + struct erofs_domain *domain; + struct erofs_sb_info *sbi = EROFS_SB(sb); + + mutex_lock(&erofs_domain_list_lock); + list_for_each_entry(domain, &erofs_domain_list, list) { + if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { + sbi->domain = domain; + sbi->volume = domain->volume; + refcount_inc(&domain->ref); + mutex_unlock(&erofs_domain_list_lock); + return 0; + } + } + err = erofs_fscache_init_domain(sb); + mutex_unlock(&erofs_domain_list_lock); + return err; +} + +static +struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, + char *name, bool need_inode) { struct fscache_volume *volume = EROFS_SB(sb)->volume; struct erofs_fscache *ctx; @@ -432,7 +496,7 @@ int erofs_fscache_register_cookie(struct super_block *sb, ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) - return -ENOMEM; + return ERR_PTR(-ENOMEM); cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, name, strlen(name), NULL, 0, 0); @@ -462,63 +526,146 @@ int erofs_fscache_register_cookie(struct super_block *sb, ctx->inode = inode; } - *fscache = ctx; - return 0; + return ctx; err_cookie: fscache_unuse_cookie(ctx->cookie, NULL, NULL); fscache_relinquish_cookie(ctx->cookie, false); - ctx->cookie = NULL; err: kfree(ctx); - return ret; + return ERR_PTR(ret); } -void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) +static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) { - struct erofs_fscache *ctx = *fscache; - - if (!ctx) - return; - fscache_unuse_cookie(ctx->cookie, NULL, NULL); fscache_relinquish_cookie(ctx->cookie, false); - ctx->cookie = NULL; - iput(ctx->inode); - ctx->inode = NULL; - + kfree(ctx->name); kfree(ctx); - *fscache = NULL; +} + +static +struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, + char *name, bool need_inode) +{ + int err; + struct inode *inode; + struct erofs_fscache *ctx; + struct erofs_domain *domain = EROFS_SB(sb)->domain; + + ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); + if (IS_ERR(ctx)) + return ctx; + + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) { + err = -ENOMEM; + goto out; + } + + inode = new_inode(erofs_pseudo_mnt->mnt_sb); + if (!inode) { + err = -ENOMEM; + goto out; + } + + ctx->domain = domain; + ctx->anon_inode = inode; + inode->i_private = ctx; + refcount_inc(&domain->ref); + return ctx; +out: + erofs_fscache_relinquish_cookie(ctx); + return ERR_PTR(err); +} + +static +struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, + char *name, bool need_inode) +{ + struct inode *inode; + struct erofs_fscache *ctx; + struct erofs_domain *domain = EROFS_SB(sb)->domain; + struct super_block *psb = erofs_pseudo_mnt->mnt_sb; + + mutex_lock(&erofs_domain_cookies_lock); + list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { + ctx = inode->i_private; + if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) + continue; + igrab(inode); + mutex_unlock(&erofs_domain_cookies_lock); + return ctx; + } + ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); + mutex_unlock(&erofs_domain_cookies_lock); + return ctx; +} + +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, bool need_inode) +{ + if (EROFS_SB(sb)->opt.domain_id) + return erofs_domain_register_cookie(sb, name, need_inode); + return erofs_fscache_acquire_cookie(sb, name, need_inode); +} + +void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) +{ + bool drop; + struct erofs_domain *domain; + + if (!ctx) + return; + domain = ctx->domain; + if (domain) { + mutex_lock(&erofs_domain_cookies_lock); + drop = atomic_read(&ctx->anon_inode->i_count) == 1; + iput(ctx->anon_inode); + mutex_unlock(&erofs_domain_cookies_lock); + if (!drop) + return; + } + + erofs_fscache_relinquish_cookie(ctx); + erofs_fscache_domain_put(domain); } int erofs_fscache_register_fs(struct super_block *sb) { + int ret; struct erofs_sb_info *sbi = EROFS_SB(sb); - struct fscache_volume *volume; - char *name; - int ret = 0; + struct erofs_fscache *fscache; - name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid); - if (!name) - return -ENOMEM; + if (sbi->opt.domain_id) + ret = erofs_fscache_register_domain(sb); + else + ret = erofs_fscache_register_volume(sb); + if (ret) + return ret; - volume = fscache_acquire_volume(name, NULL, NULL, 0); - if (IS_ERR_OR_NULL(volume)) { - erofs_err(sb, "failed to register volume for %s", name); - ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; - volume = NULL; - } + /* acquired domain/volume will be relinquished in kill_sb() on error */ + fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); + if (IS_ERR(fscache)) + return PTR_ERR(fscache); - sbi->volume = volume; - kfree(name); - return ret; + sbi->s_fscache = fscache; + return 0; } void erofs_fscache_unregister_fs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - fscache_relinquish_volume(sbi->volume, NULL, false); + erofs_fscache_unregister_cookie(sbi->s_fscache); + + if (sbi->domain) + erofs_fscache_domain_put(sbi->domain); + else + fscache_relinquish_volume(sbi->volume, NULL, false); + + sbi->s_fscache = NULL; sbi->volume = NULL; + sbi->domain = NULL; } diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 95a403720e8c..ad2a82f2eb4c 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -214,7 +214,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, /* if it cannot be handled with fast symlink scheme */ if (vi->datalayout != EROFS_INODE_FLAT_INLINE || - inode->i_size >= EROFS_BLKSIZ) { + inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) { inode->i_op = &erofs_symlink_iops; return 0; } @@ -241,7 +241,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, return 0; } -static int erofs_fill_inode(struct inode *inode, int isdir) +static int erofs_fill_inode(struct inode *inode) { struct erofs_inode *vi = EROFS_I(inode); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; @@ -249,7 +249,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) unsigned int ofs; int err = 0; - trace_erofs_fill_inode(inode, isdir); + trace_erofs_fill_inode(inode); /* read inode base data from disk */ kaddr = erofs_read_inode(&buf, inode, &ofs); @@ -324,21 +324,13 @@ static int erofs_iget_set_actor(struct inode *inode, void *opaque) return 0; } -static inline struct inode *erofs_iget_locked(struct super_block *sb, - erofs_nid_t nid) +struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) { const unsigned long hashval = erofs_inode_hash(nid); + struct inode *inode; - return iget5_locked(sb, hashval, erofs_ilookup_test_actor, + inode = iget5_locked(sb, hashval, erofs_ilookup_test_actor, erofs_iget_set_actor, &nid); -} - -struct inode *erofs_iget(struct super_block *sb, - erofs_nid_t nid, - bool isdir) -{ - struct inode *inode = erofs_iget_locked(sb, nid); - if (!inode) return ERR_PTR(-ENOMEM); @@ -348,10 +340,10 @@ struct inode *erofs_iget(struct super_block *sb, vi->nid = nid; - err = erofs_fill_inode(inode, isdir); - if (!err) + err = erofs_fill_inode(inode); + if (!err) { unlock_new_inode(inode); - else { + } else { iget_failed(inode); inode = ERR_PTR(err); } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a01cc82795a2..1701df48c446 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -76,6 +76,7 @@ struct erofs_mount_opts { #endif unsigned int mount_opt; char *fsid; + char *domain_id; }; struct erofs_dev_context { @@ -98,9 +99,19 @@ struct erofs_sb_lz4_info { u16 max_pclusterblks; }; +struct erofs_domain { + refcount_t ref; + struct list_head list; + struct fscache_volume *volume; + char *domain_id; +}; + struct erofs_fscache { struct fscache_cookie *cookie; struct inode *inode; + struct inode *anon_inode; + struct erofs_domain *domain; + char *name; }; struct erofs_sb_info { @@ -120,6 +131,7 @@ struct erofs_sb_info { struct inode *managed_cache; struct erofs_sb_lz4_info lz4; + struct inode *packed_inode; #endif /* CONFIG_EROFS_FS_ZIP */ struct erofs_dev_context *devs; struct dax_device *dax_dev; @@ -157,6 +169,7 @@ struct erofs_sb_info { /* fscache support */ struct fscache_volume *volume; struct erofs_fscache *s_fscache; + struct erofs_domain *domain; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -183,7 +196,6 @@ enum { EROFS_ZIP_CACHE_READAROUND }; -#ifdef CONFIG_EROFS_FS_ZIP #define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) /* basic unit of the workstation of a super_block */ @@ -223,7 +235,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) return atomic_cond_read_relaxed(&grp->refcount, VAL != EROFS_LOCKED_MAGIC); } -#endif /* !CONFIG_EROFS_FS_ZIP */ /* we strictly follow PAGE_SIZE and no buffer head yet */ #define LOG_BLOCK_SIZE PAGE_SHIFT @@ -277,6 +288,8 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) +EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) +EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) /* atomic flag definitions */ @@ -312,8 +325,13 @@ struct erofs_inode { unsigned char z_algorithmtype[2]; unsigned char z_logical_clusterbits; unsigned long z_tailextent_headlcn; - erofs_off_t z_idataoff; - unsigned short z_idata_size; + union { + struct { + erofs_off_t z_idataoff; + unsigned short z_idata_size; + }; + erofs_off_t z_fragmentoff; + }; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; @@ -364,6 +382,7 @@ struct page *erofs_grab_cache_page_nowait(struct address_space *mapping, } extern const struct super_operations erofs_sops; +extern struct file_system_type erofs_fs_type; extern const struct address_space_operations erofs_raw_access_aops; extern const struct address_space_operations z_erofs_aops; @@ -371,6 +390,8 @@ extern const struct address_space_operations z_erofs_aops; enum { BH_Encoded = BH_PrivateStart, BH_FullMapped, + BH_Fragment, + BH_Partialref, }; /* Has a disk mapping */ @@ -381,6 +402,10 @@ enum { #define EROFS_MAP_ENCODED (1 << BH_Encoded) /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) +/* Located in the special packed inode */ +#define EROFS_MAP_FRAGMENT (1 << BH_Fragment) +/* The extent refers to partial decompressed data */ +#define EROFS_MAP_PARTIAL_REF (1 << BH_Partialref) struct erofs_map_blocks { struct erofs_buf buf; @@ -402,11 +427,12 @@ struct erofs_map_blocks { #define EROFS_GET_BLOCKS_FIEMAP 0x0002 /* Used to map the whole extent if non-negligible data is requested for LZMA */ #define EROFS_GET_BLOCKS_READMORE 0x0004 -/* Used to map tail extent for tailpacking inline pcluster */ +/* Used to map tail extent for tailpacking inline or fragment pcluster */ #define EROFS_GET_BLOCKS_FINDTAIL 0x0008 enum { Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, + Z_EROFS_COMPRESSION_INTERLACED, Z_EROFS_COMPRESSION_RUNTIME_MAX }; @@ -466,7 +492,7 @@ extern const struct inode_operations erofs_generic_iops; extern const struct inode_operations erofs_symlink_iops; extern const struct inode_operations erofs_fast_symlink_iops; -struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); +struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); @@ -581,27 +607,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb, int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); -int erofs_fscache_register_cookie(struct super_block *sb, - struct erofs_fscache **fscache, - char *name, bool need_inode); -void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, bool need_inode); +void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); extern const struct address_space_operations erofs_fscache_access_aops; #else static inline int erofs_fscache_register_fs(struct super_block *sb) { - return 0; + return -EOPNOTSUPP; } static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} -static inline int erofs_fscache_register_cookie(struct super_block *sb, - struct erofs_fscache **fscache, - char *name, bool need_inode) +static inline +struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, + char *name, bool need_inode) { - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); } -static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) +static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) { } #endif diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index fd75506799c4..0dc34721080c 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -185,7 +185,6 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid, if (IS_ERR(de)) return PTR_ERR(de); - /* the target page has been mapped */ if (ndirents) de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); @@ -197,9 +196,7 @@ int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid, return PTR_ERR_OR_ZERO(de); } -/* NOTE: i_mutex is already held by vfs */ -static struct dentry *erofs_lookup(struct inode *dir, - struct dentry *dentry, +static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int err; @@ -207,17 +204,11 @@ static struct dentry *erofs_lookup(struct inode *dir, unsigned int d_type; struct inode *inode; - DBG_BUGON(!d_really_is_negative(dentry)); - /* dentry must be unhashed in lookup, no need to worry about */ - DBG_BUGON(!d_unhashed(dentry)); - trace_erofs_lookup(dir, dentry, flags); - /* file name exceeds fs limit */ if (dentry->d_name.len > EROFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - /* false uninitialized warnings on gcc 4.8.x */ err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); if (err == -ENOENT) { @@ -228,7 +219,7 @@ static struct dentry *erofs_lookup(struct inode *dir, } else { erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__, dentry, nid, d_type); - inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR); + inode = erofs_iget(dir->i_sb, nid); } return d_splice_alias(inode, dentry); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 3173debeaa5a..2cf96ce1c32e 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -224,10 +224,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, struct erofs_device_info *dif, erofs_off_t *pos) { struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_fscache *fscache; struct erofs_deviceslot *dis; struct block_device *bdev; void *ptr; - int ret; ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); if (IS_ERR(ptr)) @@ -245,10 +245,10 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, } if (erofs_is_fscache_mode(sb)) { - ret = erofs_fscache_register_cookie(sb, &dif->fscache, - dif->path, false); - if (ret) - return ret; + fscache = erofs_fscache_register_cookie(sb, dif->path, false); + if (IS_ERR(fscache)) + return PTR_ERR(fscache); + dif->fscache = fscache; } else { bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, sb->s_type); @@ -381,6 +381,17 @@ static int erofs_read_superblock(struct super_block *sb) #endif sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); sbi->root_nid = le16_to_cpu(dsb->root_nid); +#ifdef CONFIG_EROFS_FS_ZIP + sbi->packed_inode = NULL; + if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { + sbi->packed_inode = + erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); + if (IS_ERR(sbi->packed_inode)) { + ret = PTR_ERR(sbi->packed_inode); + goto out; + } + } +#endif sbi->inos = le64_to_cpu(dsb->inos); sbi->build_time = le64_to_cpu(dsb->build_time); @@ -411,6 +422,10 @@ static int erofs_read_superblock(struct super_block *sb) erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); if (erofs_is_fscache_mode(sb)) erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); + if (erofs_sb_has_fragments(sbi)) + erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); + if (erofs_sb_has_dedupe(sbi)) + erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); out: erofs_put_metabuf(&buf); return ret; @@ -440,6 +455,7 @@ enum { Opt_dax_enum, Opt_device, Opt_fsid, + Opt_domain_id, Opt_err }; @@ -465,6 +481,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), fsparam_string("device", Opt_device), fsparam_string("fsid", Opt_fsid), + fsparam_string("domain_id", Opt_domain_id), {} }; @@ -570,6 +587,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, errorfc(fc, "fsid option not supported"); #endif break; + case Opt_domain_id: +#ifdef CONFIG_EROFS_FS_ONDEMAND + kfree(ctx->opt.domain_id); + ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); + if (!ctx->opt.domain_id) + return -ENOMEM; +#else + errorfc(fc, "domain_id option not supported"); +#endif + break; default: return -ENOPARAM; } @@ -641,7 +668,7 @@ static int erofs_init_managed_cache(struct super_block *sb) { return 0; } static struct inode *erofs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { - return erofs_iget(sb, ino, false); + return erofs_iget(sb, ino); } static struct dentry *erofs_fh_to_dentry(struct super_block *sb, @@ -667,7 +694,7 @@ static struct dentry *erofs_get_parent(struct dentry *child) err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); if (err) return ERR_PTR(err); - return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR)); + return d_obtain_alias(erofs_iget(child->d_sb, nid)); } static const struct export_operations erofs_export_ops = { @@ -676,6 +703,13 @@ static const struct export_operations erofs_export_ops = { .get_parent = erofs_get_parent, }; +static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) +{ + static const struct tree_descr empty_descr = {""}; + + return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); +} + static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -695,6 +729,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; ctx->opt.fsid = NULL; + ctx->opt.domain_id = NULL; sbi->devs = ctx->devs; ctx->devs = NULL; @@ -706,11 +741,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - err = erofs_fscache_register_cookie(sb, &sbi->s_fscache, - sbi->opt.fsid, true); - if (err) - return err; - err = super_setup_bdi(sb); if (err) return err; @@ -752,7 +782,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) #endif /* get the root inode */ - inode = erofs_iget(sb, ROOT_NID(sbi), true); + inode = erofs_iget(sb, ROOT_NID(sbi)); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -781,6 +811,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return 0; } +static int erofs_fc_anon_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); +} + static int erofs_fc_get_tree(struct fs_context *fc) { struct erofs_fs_context *ctx = fc->fs_private; @@ -817,7 +852,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data) fs_put_dax(dif->dax_dev, NULL); if (dif->bdev) blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); - erofs_fscache_unregister_cookie(&dif->fscache); + erofs_fscache_unregister_cookie(dif->fscache); + dif->fscache = NULL; kfree(dif->path); kfree(dif); return 0; @@ -838,6 +874,7 @@ static void erofs_fc_free(struct fs_context *fc) erofs_free_dev_context(ctx->devs); kfree(ctx->opt.fsid); + kfree(ctx->opt.domain_id); kfree(ctx); } @@ -848,10 +885,21 @@ static const struct fs_context_operations erofs_context_ops = { .free = erofs_fc_free, }; +static const struct fs_context_operations erofs_anon_context_ops = { + .get_tree = erofs_fc_anon_get_tree, +}; + static int erofs_init_fs_context(struct fs_context *fc) { - struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + struct erofs_fs_context *ctx; + + /* pseudo mount for anon inodes */ + if (fc->sb_flags & SB_KERNMOUNT) { + fc->ops = &erofs_anon_context_ops; + return 0; + } + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); @@ -878,8 +926,14 @@ static void erofs_kill_sb(struct super_block *sb) WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); + /* pseudo mount for anon inodes */ + if (sb->s_flags & SB_KERNMOUNT) { + kill_anon_super(sb); + return; + } + if (erofs_is_fscache_mode(sb)) - generic_shutdown_super(sb); + kill_anon_super(sb); else kill_block_super(sb); @@ -889,9 +943,9 @@ static void erofs_kill_sb(struct super_block *sb) erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); - erofs_fscache_unregister_cookie(&sbi->s_fscache); erofs_fscache_unregister_fs(sb); kfree(sbi->opt.fsid); + kfree(sbi->opt.domain_id); kfree(sbi); sb->s_fs_info = NULL; } @@ -908,11 +962,13 @@ static void erofs_put_super(struct super_block *sb) #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); sbi->managed_cache = NULL; + iput(sbi->packed_inode); + sbi->packed_inode = NULL; #endif - erofs_fscache_unregister_cookie(&sbi->s_fscache); + erofs_fscache_unregister_fs(sb); } -static struct file_system_type erofs_fs_type = { +struct file_system_type erofs_fs_type = { .owner = THIS_MODULE, .name = "erofs", .init_fs_context = erofs_init_fs_context, @@ -1044,6 +1100,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_EROFS_FS_ONDEMAND if (opt->fsid) seq_printf(seq, ",fsid=%s", opt->fsid); + if (opt->domain_id) + seq_printf(seq, ",domain_id=%s", opt->domain_id); #endif return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index c1383e508bbe..783bb7b21b51 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -76,6 +76,8 @@ EROFS_ATTR_FEATURE(device_table); EROFS_ATTR_FEATURE(compr_head2); EROFS_ATTR_FEATURE(sb_chksum); EROFS_ATTR_FEATURE(ztailpacking); +EROFS_ATTR_FEATURE(fragments); +EROFS_ATTR_FEATURE(dedupe); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -86,6 +88,8 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(compr_head2), ATTR_LIST(sb_chksum), ATTR_LIST(ztailpacking), + ATTR_LIST(fragments), + ATTR_LIST(dedupe), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); @@ -201,12 +205,27 @@ static struct kobject erofs_feat = { int erofs_register_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); + char *name; + char *str = NULL; int err; + if (erofs_is_fscache_mode(sb)) { + if (sbi->opt.domain_id) { + str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id, + sbi->opt.fsid); + if (!str) + return -ENOMEM; + name = str; + } else { + name = sbi->opt.fsid; + } + } else { + name = sb->s_id; + } sbi->s_kobj.kset = &erofs_root; init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", - erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id); + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", name); + kfree(str); if (err) goto put_sb_kobj; return 0; diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 332462c59f11..0a43c9ee9f8f 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -39,9 +39,7 @@ static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi, #ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; -#ifdef CONFIG_EROFS_FS_SECURITY extern const struct xattr_handler erofs_xattr_security_handler; -#endif static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx) { diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5792ca9e0d5e..cce56dde135c 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -650,6 +650,35 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, la < fe->headoffset; } +static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, + struct page *page, unsigned int pageofs, + unsigned int len) +{ + struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + u8 *src, *dst; + unsigned int i, cnt; + + pos += EROFS_I(inode)->z_fragmentoff; + for (i = 0; i < len; i += cnt) { + cnt = min_t(unsigned int, len - i, + EROFS_BLKSIZ - erofs_blkoff(pos)); + src = erofs_bread(&buf, packed_inode, + erofs_blknr(pos), EROFS_KMAP); + if (IS_ERR(src)) { + erofs_put_metabuf(&buf); + return PTR_ERR(src); + } + + dst = kmap_local_page(page); + memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt); + kunmap_local(dst); + pos += cnt; + } + erofs_put_metabuf(&buf); + return 0; +} + static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct page *page, struct page **pagepool) { @@ -688,7 +717,8 @@ repeat: /* didn't get a valid pcluster previously (very rare) */ } - if (!(map->m_flags & EROFS_MAP_MAPPED)) + if (!(map->m_flags & EROFS_MAP_MAPPED) || + map->m_flags & EROFS_MAP_FRAGMENT) goto hitted; err = z_erofs_collector_begin(fe); @@ -735,6 +765,24 @@ hitted: zero_user_segment(page, cur, end); goto next_part; } + if (map->m_flags & EROFS_MAP_FRAGMENT) { + unsigned int pageofs, skip, len; + + if (offset > map->m_la) { + pageofs = 0; + skip = offset - map->m_la; + } else { + pageofs = map->m_la & ~PAGE_MASK; + skip = 0; + } + len = min_t(unsigned int, map->m_llen - skip, end - cur); + err = z_erofs_read_fragment(inode, skip, page, pageofs, len); + if (err) + goto out; + ++spiltted; + tight = false; + goto next_part; + } exclusive = (!cur && (!spiltted || tight)); if (cur) @@ -766,6 +814,7 @@ retry: fe->pcl->multibases = true; if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && fe->pcl->length == map->m_llen) fe->pcl->partial = false; if (fe->pcl->length < offset + end - map->m_la) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index d58549ca1df9..44c27ef39c43 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -17,7 +17,7 @@ int z_erofs_fill_inode(struct inode *inode) struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); if (!erofs_sb_has_big_pcluster(sbi) && - !erofs_sb_has_ztailpacking(sbi) && + !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) && vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { vi->z_advise = 0; vi->z_algorithmtype[0] = 0; @@ -55,10 +55,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; - DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && - !erofs_sb_has_ztailpacking(EROFS_SB(sb)) && - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); - pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), @@ -69,6 +65,16 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) } h = kaddr + erofs_blkoff(pos); + /* + * if the highest bit of the 8-byte map header is set, the whole file + * is stored in the packed inode. The rest bits keeps z_fragmentoff. + */ + if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { + vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; + vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); + vi->z_tailextent_headlcn = 0; + goto unmap_done; + } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; @@ -123,6 +129,20 @@ unmap_done: if (err < 0) goto out_unlock; } + + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && + !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { + struct erofs_map_blocks map = { + .buf = __EROFS_BUF_INITIALIZER + }; + + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); + err = z_erofs_do_map_blocks(inode, &map, + EROFS_GET_BLOCKS_FINDTAIL); + erofs_put_metabuf(&map.buf); + if (err < 0) + goto out_unlock; + } /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); @@ -143,20 +163,9 @@ struct z_erofs_maprecorder { u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; + bool partialref; }; -static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, - erofs_blk_t eblk) -{ - struct super_block *const sb = m->inode->i_sb; - - m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk, - EROFS_KMAP_ATOMIC); - if (IS_ERR(m->kaddr)) - return PTR_ERR(m->kaddr); - return 0; -} - static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned long lcn) { @@ -169,11 +178,11 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, lcn * sizeof(struct z_erofs_vle_decompressed_index); struct z_erofs_vle_decompressed_index *di; unsigned int advise, type; - int err; - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); - if (err) - return err; + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, + erofs_blknr(pos), EROFS_KMAP_ATOMIC); + if (IS_ERR(m->kaddr)) + return PTR_ERR(m->kaddr); m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index); m->lcn = lcn; @@ -201,6 +210,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + if (advise & Z_EROFS_VLE_DI_PARTIAL_REF) + m->partialref = true; m->clusterofs = le16_to_cpu(di->di_clusterofs); m->pblk = le32_to_cpu(di->di_u.blkaddr); break; @@ -370,7 +381,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, unsigned int compacted_4b_initial, compacted_2b; unsigned int amortizedshift; erofs_off_t pos; - int err; if (lclusterbits != 12) return -EOPNOTSUPP; @@ -407,9 +417,10 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, amortizedshift = 2; out: pos += lcn * (1 << amortizedshift); - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); - if (err) - return err; + m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, + erofs_blknr(pos), EROFS_KMAP_ATOMIC); + if (IS_ERR(m->kaddr)) + return PTR_ERR(m->kaddr); return unpack_compacted_index(m, amortizedshift, pos, lookahead); } @@ -598,6 +609,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, { struct erofs_inode *const vi = EROFS_I(inode); bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; + bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; struct z_erofs_maprecorder m = { .inode = inode, .map = map, @@ -663,15 +675,23 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EOPNOTSUPP; goto unmap_out; } - + if (m.partialref) + map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_llen = end - map->m_la; - if (flags & EROFS_GET_BLOCKS_FINDTAIL) + if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; + /* for non-compact indexes, fragmentoff is 64 bits */ + if (fragment && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + vi->z_fragmentoff |= (u64)m.pblk << 32; + } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; map->m_pa = vi->z_idataoff; map->m_plen = vi->z_idata_size; + } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { + map->m_flags |= EROFS_MAP_FRAGMENT; } else { map->m_pa = blknr_to_addr(m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -679,12 +699,18 @@ static int z_erofs_do_map_blocks(struct inode *inode, goto out; } - if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) - map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; - else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) + if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) { + if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) + map->m_algorithmformat = + Z_EROFS_COMPRESSION_INTERLACED; + else + map->m_algorithmformat = + Z_EROFS_COMPRESSION_SHIFTED; + } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { map->m_algorithmformat = vi->z_algorithmtype[1]; - else + } else { map->m_algorithmformat = vi->z_algorithmtype[0]; + } if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && @@ -705,10 +731,10 @@ out: return err; } -int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, +int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { + struct erofs_inode *const vi = EROFS_I(inode); int err = 0; trace_z_erofs_map_blocks_iter_enter(inode, map, flags); @@ -725,6 +751,15 @@ int z_erofs_map_blocks_iter(struct inode *inode, if (err) goto out; + if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && + !vi->z_tailextent_headlcn) { + map->m_la = 0; + map->m_llen = inode->i_size; + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | + EROFS_MAP_FRAGMENT; + goto out; + } + err = z_erofs_do_map_blocks(inode, map, flags); out: trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); @@ -751,7 +786,8 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_pa; + iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; diff --git a/fs/exec.c b/fs/exec.c index d046dbb9cbd0..69a572fc57db 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -957,8 +957,7 @@ struct file *open_exec(const char *name) } EXPORT_SYMBOL(open_exec); -#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \ - defined(CONFIG_BINFMT_ELF_FDPIC) +#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC) ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) { ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3bf9a6926798..e5f2f5ca5120 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2977,6 +2977,7 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct user_namespace *, struct dentry *, struct iattr *); +extern u32 ext4_dio_alignment(struct inode *inode); extern int ext4_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 109d07629f81..8bb1c35fd6dd 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -36,19 +36,34 @@ #include "acl.h" #include "truncate.h" -static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) +/* + * Returns %true if the given DIO request should be attempted with DIO, or + * %false if it should fall back to buffered I/O. + * + * DIO isn't well specified; when it's unsupported (either due to the request + * being misaligned, or due to the file not supporting DIO at all), filesystems + * either fall back to buffered I/O or return EINVAL. For files that don't use + * any special features like encryption or verity, ext4 has traditionally + * returned EINVAL for misaligned DIO. iomap_dio_rw() uses this convention too. + * In this case, we should attempt the DIO, *not* fall back to buffered I/O. + * + * In contrast, in cases where DIO is unsupported due to ext4 features, ext4 + * traditionally falls back to buffered I/O. + * + * This function implements the traditional ext4 behavior in all these cases. + */ +static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); + u32 dio_align = ext4_dio_alignment(inode); - if (!fscrypt_dio_supported(iocb, iter)) - return false; - if (fsverity_active(inode)) + if (dio_align == 0) return false; - if (ext4_should_journal_data(inode)) - return false; - if (ext4_has_inline_data(inode)) - return false; - return true; + + if (dio_align == 1) + return true; + + return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); } static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -63,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); } - if (!ext4_dio_supported(iocb, to)) { + if (!ext4_should_use_dio(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on @@ -511,7 +526,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* Fallback to buffered I/O if the inode does not support direct I/O. */ - if (!ext4_dio_supported(iocb, from)) { + if (!ext4_should_use_dio(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 601214453c3a..364774230d87 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5550,6 +5550,22 @@ err_out: return error; } +u32 ext4_dio_alignment(struct inode *inode) +{ + if (fsverity_active(inode)) + return 0; + if (ext4_should_journal_data(inode)) + return 0; + if (ext4_has_inline_data(inode)) + return 0; + if (IS_ENCRYPTED(inode)) { + if (!fscrypt_dio_supported(inode)) + return 0; + return i_blocksize(inode); + } + return 1; /* use the iomap defaults */ +} + int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -5565,6 +5581,27 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->btime.tv_nsec = ei->i_crtime.tv_nsec; } + /* + * Return the DIO alignment restrictions if requested. We only return + * this information when requested, since on encrypted files it might + * take a fair bit of work to get if the file wasn't opened recently. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + u32 dio_align = ext4_dio_alignment(inode); + + stat->result_mask |= STATX_DIOALIGN; + if (dio_align == 1) { + struct block_device *bdev = inode->i_sb->s_bdev; + + /* iomap defaults */ + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + } else { + stat->dio_mem_align = dio_align; + stat->dio_offset_align = dio_align; + } + } + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (flags & EXT4_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index e02a5f14e021..3d21eae267fc 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -75,7 +75,7 @@ static void __read_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, iter_all) { page = bv->bv_page; - /* PG_error was set if any post_read step failed */ + /* PG_error was set if verity failed. */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); /* will re-read again later */ @@ -96,10 +96,12 @@ static void decrypt_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; - fscrypt_decrypt_bio(ctx->bio); - - bio_post_read_processing(ctx); + if (fscrypt_decrypt_bio(bio)) + bio_post_read_processing(ctx); + else + __read_end_io(bio); } static void verity_work(struct work_struct *work) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa3ccddfa037..93cc2ec51c2a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -139,7 +139,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) continue; } - /* PG_error was set if decryption or verity failed. */ + /* PG_error was set if verity failed. */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); /* will re-read again later */ @@ -185,7 +185,7 @@ static void f2fs_verify_bio(struct work_struct *work) struct page *page = bv->bv_page; if (!f2fs_is_compressed_page(page) && - !PageError(page) && !fsverity_verify_page(page)) + !fsverity_verify_page(page)) SetPageError(page); } } else { @@ -236,10 +236,9 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bio_for_each_segment_all(bv, ctx->bio, iter_all) { struct page *page = bv->bv_page; - /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, PageError(page), - blkaddr, in_task); + f2fs_end_read_compressed_page(page, false, blkaddr, + in_task); else all_compressed = false; @@ -259,14 +258,17 @@ static void f2fs_post_read_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; - if (ctx->enabled_steps & STEP_DECRYPT) - fscrypt_decrypt_bio(ctx->bio); + if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) { + f2fs_finish_read_bio(bio, true); + return; + } if (ctx->enabled_steps & STEP_DECOMPRESS) f2fs_handle_step_decompress(ctx, true); - f2fs_verify_and_finish_bio(ctx->bio, true); + f2fs_verify_and_finish_bio(bio, true); } static void f2fs_read_end_io(struct bio *bio) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3c7cdb70fe2e..aea816a133a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4471,17 +4471,6 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode, f2fs_mark_inode_dirty_sync(inode, true); } -static inline int block_unaligned_IO(struct inode *inode, - struct kiocb *iocb, struct iov_iter *iter) -{ - unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); - unsigned int blocksize_mask = (1 << i_blkbits) - 1; - loff_t offset = iocb->ki_pos; - unsigned long align = offset | iov_iter_alignment(iter); - - return align & blocksize_mask; -} - static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, int flag) { @@ -4492,35 +4481,6 @@ static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, return sbi->aligned_blksize; } -static inline bool f2fs_force_buffered_io(struct inode *inode, - struct kiocb *iocb, struct iov_iter *iter) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int rw = iov_iter_rw(iter); - - if (!fscrypt_dio_supported(iocb, iter)) - return true; - if (fsverity_active(inode)) - return true; - if (f2fs_compressed_file(inode)) - return true; - - /* disallow direct IO if any of devices has unaligned blksize */ - if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) - return true; - - if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { - if (block_unaligned_IO(inode, iocb, iter)) - return true; - if (F2FS_IO_ALIGNED(sbi)) - return true; - } - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) - return true; - - return false; -} - static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) { return fsverity_active(inode) && diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce4905a073b3..791770507328 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -808,6 +808,29 @@ int f2fs_truncate(struct inode *inode) return 0; } +static bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!fscrypt_dio_supported(inode)) + return true; + if (fsverity_active(inode)) + return true; + if (f2fs_compressed_file(inode)) + return true; + + /* disallow direct IO if any of devices has unaligned blksize */ + if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) + return true; + + if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi)) + return true; + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + return true; + + return false; +} + int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -824,6 +847,24 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } + /* + * Return the DIO alignment restrictions if requested. We only return + * this information when requested, since on encrypted files it might + * take a fair bit of work to get if the file wasn't opened recently. + * + * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN + * cannot represent that, so in that case we report no DIO support. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + unsigned int bsize = i_blocksize(inode); + + stat->result_mask |= STATX_DIOALIGN; + if (!f2fs_force_buffered_io(inode, WRITE)) { + stat->dio_mem_align = bsize; + stat->dio_offset_align = bsize; + } + } + flags = fi->i_flags; if (flags & F2FS_COMPR_FL) stat->attributes |= STATX_ATTR_COMPRESSED; @@ -4182,7 +4223,7 @@ static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, if (!(iocb->ki_flags & IOCB_DIRECT)) return false; - if (f2fs_force_buffered_io(inode, iocb, iter)) + if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) return false; /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2451623c05a7..26817b5aeac7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3039,23 +3039,24 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, *lblk_bits_ret = 8 * sizeof(block_t); } -static int f2fs_get_num_devices(struct super_block *sb) +static struct block_device **f2fs_get_devices(struct super_block *sb, + unsigned int *num_devs) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct block_device **devs; + int i; - if (f2fs_is_multi_device(sbi)) - return sbi->s_ndevs; - return 1; -} + if (!f2fs_is_multi_device(sbi)) + return NULL; -static void f2fs_get_devices(struct super_block *sb, - struct request_queue **devs) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - int i; + devs = kmalloc_array(sbi->s_ndevs, sizeof(*devs), GFP_KERNEL); + if (!devs) + return ERR_PTR(-ENOMEM); for (i = 0; i < sbi->s_ndevs; i++) - devs[i] = bdev_get_queue(FDEV(i).bdev); + devs[i] = FDEV(i).bdev; + *num_devs = sbi->s_ndevs; + return devs; } static const struct fscrypt_operations f2fs_cryptops = { @@ -3066,7 +3067,6 @@ static const struct fscrypt_operations f2fs_cryptops = { .empty_dir = f2fs_empty_dir, .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, - .get_num_devices = f2fs_get_num_devices, .get_devices = f2fs_get_devices, }; #endif diff --git a/fs/fat/file.c b/fs/fat/file.c index 3e4eb3467cb4..8a6b493b5b5f 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -461,8 +461,9 @@ static int fat_allow_set_time(struct user_namespace *mnt_userns, { umode_t allow_utime = sbi->options.allow_utime; - if (!uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) { - if (in_group_p(i_gid_into_mnt(mnt_userns, inode))) + if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), + current_fsuid())) { + if (vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode))) allow_utime >>= 3; if (allow_utime & MAY_WRITE) return 1; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 6ce369b096d4..71911bf9ab34 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1302,7 +1302,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) memcpy(cluster, table, strlen(table) - strlen(fsname)); fsname++; - flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + flags = DLM_LSFL_NEWEXCL; /* * create/join lockspace diff --git a/fs/internal.h b/fs/internal.h index 87e96b9024ce..3e206d3e317c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -16,6 +16,7 @@ struct shrink_control; struct fs_context; struct user_namespace; struct pipe_inode_info; +struct iov_iter; /* * block/bdev.c @@ -221,3 +222,5 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns, int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct xattr_ctx *ctx); + +ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f802223e71ab..cdc8e12cdac4 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -164,7 +164,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; host->h_cred = get_cred(ni->cred); - strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); + strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index bf274f23969b..284b019cb652 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -521,6 +521,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "NULL", @@ -530,6 +531,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_testres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, .pc_name = "TEST", @@ -539,6 +541,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "LOCK", @@ -548,6 +551,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "CANCEL", @@ -557,6 +561,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "UNLOCK", @@ -566,6 +571,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "GRANTED", @@ -575,6 +581,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_MSG", @@ -584,6 +591,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_MSG", @@ -593,6 +601,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_MSG", @@ -602,6 +611,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_MSG", @@ -611,6 +621,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_MSG", @@ -620,6 +631,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_RES", @@ -629,6 +641,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_RES", @@ -638,6 +651,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_RES", @@ -647,6 +661,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_RES", @@ -656,6 +671,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_res, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_RES", @@ -665,6 +681,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_reboot, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), + .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "SM_NOTIFY", @@ -674,6 +691,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -683,6 +701,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -692,6 +711,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "UNUSED", @@ -701,6 +721,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "SHARE", @@ -710,6 +731,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "UNSHARE", @@ -719,6 +741,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "NM_LOCK", @@ -728,6 +751,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_notify, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "FREE_ALL", diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index b09ca35b527c..e35c05e27806 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -555,6 +555,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "NULL", @@ -564,6 +565,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_testres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, .pc_name = "TEST", @@ -573,6 +575,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "LOCK", @@ -582,6 +585,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "CANCEL", @@ -591,6 +595,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "UNLOCK", @@ -600,6 +605,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "GRANTED", @@ -609,6 +615,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_MSG", @@ -618,6 +625,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_MSG", @@ -627,6 +635,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_MSG", @@ -636,6 +645,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_MSG", @@ -645,6 +655,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_MSG", @@ -654,6 +665,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "TEST_RES", @@ -663,6 +675,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "LOCK_RES", @@ -672,6 +685,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "CANCEL_RES", @@ -681,6 +695,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNLOCK_RES", @@ -690,6 +705,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_res, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), + .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "GRANTED_RES", @@ -699,6 +715,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_reboot, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), + .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "SM_NOTIFY", @@ -708,6 +725,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -717,6 +735,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -726,6 +745,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), + .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, .pc_name = "UNUSED", @@ -735,6 +755,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "SHARE", @@ -744,6 +765,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, .pc_name = "UNSHARE", @@ -753,6 +775,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, .pc_name = "NM_LOCK", @@ -762,6 +785,7 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_notify, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), + .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, .pc_name = "FREE_ALL", diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 8dcb08e1a885..d0cccddb7d08 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -1065,6 +1065,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = { .pc_func = nfs4_callback_compound, .pc_encode = nfs4_encode_void, .pc_argsize = 256, + .pc_argzero = 256, .pc_ressize = 256, .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, .pc_name = "COMPOUND", diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 65c331f75e9c..f21259ead64b 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,6 +84,6 @@ int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -int nfsd_reply_cache_stats_open(struct inode *, struct file *); +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index eeed4ae5b4ad..d5c57360b418 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1212,7 +1212,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) +int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { unsigned long releases = 0, pages_flushed = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; @@ -1259,8 +1259,3 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } - -int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, nfsd_file_cache_stats_show, NULL); -} diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 8e8c0c47d67d..357832bac736 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,5 +60,5 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -int nfsd_file_cache_stats_open(struct inode *, struct file *); +int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ffe17743cc74..8c854ba3285b 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -192,6 +192,10 @@ struct nfsd_net { atomic_t nfs4_client_count; int nfs4_max_clients; + + atomic_t nfsd_courtesy_clients; + struct shrinker nfsd_client_shrinker; + struct delayed_work nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 9edd3c1a30fb..13e6e6897f6c 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -331,6 +331,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -342,6 +343,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_getaclres, .pc_release = nfsaclsvc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), @@ -353,6 +355,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -364,6 +367,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -375,6 +379,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_accessres, .pc_release = nfsaclsvc_release_access, .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 9446c6743664..2fb9ee356455 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -252,6 +252,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -263,6 +264,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_getaclres, .pc_release = nfs3svc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), @@ -274,6 +276,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_setaclres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd3_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index a41cca619338..923d9a80df92 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -150,7 +150,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) { struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readres *resp = rqstp->rq_resp; - u32 max_blocksize = svc_max_payload(rqstp); unsigned int len; int v; @@ -159,7 +158,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp) (unsigned long) argp->count, (unsigned long long) argp->offset); - argp->count = min_t(u32, argp->count, max_blocksize); + argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); + argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); if (argp->offset > (u64)OFFSET_MAX) argp->offset = (u64)OFFSET_MAX; if (argp->offset + argp->count > (u64)OFFSET_MAX) @@ -563,25 +563,18 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - - count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); + unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, + svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = count - XDR_UNIT * 2; + buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); + buf->buflen -= XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - /* This is xdr_init_encode(), but it assumes that - * the head kvec has already been consumed. */ - xdr_set_scratch_buffer(xdr, NULL, 0); - xdr->buf = buf; - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); - xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -808,6 +801,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, @@ -819,6 +813,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_getattrres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_attrstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -830,6 +825,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_argzero = sizeof(struct nfsd3_sattrargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -841,6 +837,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_lookupres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+pAT+pAT, @@ -852,6 +849,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_accessres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1, @@ -863,6 +861,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readlinkres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, @@ -874,6 +873,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_argzero = sizeof(struct nfsd3_readargs), .pc_ressize = sizeof(struct nfsd3_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, @@ -885,6 +885,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_writeres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_argzero = sizeof(struct nfsd3_writeargs), .pc_ressize = sizeof(struct nfsd3_writeres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+4, @@ -896,6 +897,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_argzero = sizeof(struct nfsd3_createargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -907,6 +909,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_argzero = sizeof(struct nfsd3_mkdirargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -918,6 +921,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_argzero = sizeof(struct nfsd3_symlinkargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -929,6 +933,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_argzero = sizeof(struct nfsd3_mknodargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, @@ -940,6 +945,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -951,6 +957,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, @@ -962,6 +969,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_renameres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_argzero = sizeof(struct nfsd3_renameargs), .pc_ressize = sizeof(struct nfsd3_renameres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+WC, @@ -973,6 +981,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_linkres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_argzero = sizeof(struct nfsd3_linkargs), .pc_ressize = sizeof(struct nfsd3_linkres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+pAT+WC, @@ -984,6 +993,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_argzero = sizeof(struct nfsd3_readdirargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIR", @@ -994,6 +1004,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_argzero = sizeof(struct nfsd3_readdirplusargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIRPLUS", @@ -1003,6 +1014,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsstatres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+2*6+1, @@ -1013,6 +1025,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsinfores, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsinfores), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+12, @@ -1023,6 +1036,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_pathconfres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_pathconfres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+6, @@ -1034,6 +1048,7 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_commitres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_argzero = sizeof(struct nfsd3_commitargs), .pc_ressize = sizeof(struct nfsd3_commitres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+WC+2, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0293b8d65f10..3308dd671ef0 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -571,10 +571,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) args->count = max_blocksize; args->len = max_blocksize; } - if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) - return false; - return true; + return xdr_stream_subsegment(xdr, &args->payload, args->count); } bool @@ -616,8 +614,6 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - size_t remaining; if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) return false; @@ -626,16 +622,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) return false; - /* request sanity */ - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->tlen)) - return false; - - args->first.iov_base = xdr->p; + /* symlink_data */ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - - return true; + args->first.iov_base = xdr_inline_decode(xdr, args->tlen); + return args->first.iov_base != NULL; } bool diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4ce328209f61..f0e69edf5f0f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1371,11 +1371,21 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_holds_slot = false; } -void nfsd4_run_cb(struct nfsd4_callback *cb) +/** + * nfsd4_run_cb - queue up a callback job to run + * @cb: callback to queue + * + * Kick off a callback to do its thing. Returns false if it was already + * on a queue, true otherwise. + */ +bool nfsd4_run_cb(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + bool queued; nfsd41_cb_inflight_begin(clp); - if (!nfsd4_queue_cb(cb)) + queued = nfsd4_queue_cb(cb); + if (!queued) nfsd41_cb_inflight_end(clp); + return queued; } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index f92161ce1f97..e70a1a2999b7 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -82,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->id = itm->id; new->type = itm->type; - strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->authname)); + strscpy(new->name, itm->name, sizeof(new->name)); + strscpy(new->authname, itm->authname, sizeof(new->authname)); } static void @@ -548,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) return nfserr_badowner; @@ -584,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) return encode_ascii_id(xdr, id); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2c05692a9abf..3564d1c6f610 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -658,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; - + trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); switch (task->tk_status) { case 0: case -NFS4ERR_DELAY: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a72ab97f77ef..8beb2bc4c328 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -141,7 +141,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) static __be32 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode) { - __be32 status; if (open->op_truncate && !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) @@ -156,9 +155,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs if (open->op_share_deny & NFS4_SHARE_DENY_READ) accmode |= NFSD_MAY_WRITE; - status = fh_verify(rqstp, current_fh, S_IFREG, accmode); - - return status; + return fh_verify(rqstp, current_fh, S_IFREG, accmode); } static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) @@ -454,7 +451,6 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { struct svc_fh *current_fh = &cstate->current_fh; - __be32 status; int accmode = 0; /* We don't know the target directory, and therefore can not @@ -479,9 +475,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, accmode); - - return status; + return do_open_permission(rqstp, current_fh, open, accmode); } static void @@ -668,11 +662,9 @@ static __be32 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - __be32 status; - fh_put(&cstate->current_fh); - status = exp_pseudoroot(rqstp, &cstate->current_fh); - return status; + + return exp_pseudoroot(rqstp, &cstate->current_fh); } static __be32 @@ -1343,7 +1335,7 @@ try_again: return 0; } if (work) { - strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); + strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); @@ -1621,6 +1613,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, struct rpc_task *task) { + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); + + trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); return 1; } @@ -1768,7 +1764,13 @@ static int nfsd4_do_async_copy(void *data) filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, ©->stateid); if (IS_ERR(filp)) { - nfserr = nfserr_offload_denied; + switch (PTR_ERR(filp)) { + case -EBADF: + nfserr = nfserr_wrong_type; + break; + default: + nfserr = nfserr_offload_denied; + } nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } @@ -1826,7 +1828,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); - memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid, + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, @@ -1862,7 +1864,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1916,7 +1918,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cps = nfs4_alloc_init_cpntf_state(nn, stid); if (!cps) goto out; - memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t)); memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); @@ -2633,9 +2635,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = nfserr_minor_vers_mismatch; if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) goto out; - status = nfserr_resource; - if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - goto out; status = nfs41_check_op_ordering(args); if (status) { @@ -2648,10 +2647,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; + if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { + /* If there are still more operations to process, + * stop here and report NFS4ERR_RESOURCE. */ + if (cstate->minorversion == 0 && + args->client_opcnt > resp->opcnt) { + op->status = nfserr_resource; + goto encode_op; + } + } + /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2727,8 +2736,8 @@ encode_op: status = op->status; } - trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, - nfsd4_op_name(op->opnum)); + trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, + status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); @@ -2762,28 +2771,49 @@ out: #define op_encode_channel_attrs_maxsz (6 + 1 + 1) -static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +/* + * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which + * is called before sunrpc sets rq_res.buflen. Thus we have to compute + * the maximum payload size here, based on transport limits and the size + * of the remaining space in the rq_pages array. + */ +static u32 nfsd4_max_payload(const struct svc_rqst *rqstp) +{ + u32 buflen; + + buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE; + buflen -= rqstp->rq_auth_slack; + buflen -= rqstp->rq_res.head[0].iov_len; + return min_t(u32, buflen, svc_max_payload(rqstp)); +} + +static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size) * sizeof(__be32); } -static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); } -static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { /* ac_supported, ac_resp_access */ return (op_encode_hdr_size + 2)* sizeof(__be32); } -static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -2794,17 +2824,17 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op * the op prematurely if the estimate is too large. We may turn off splice * reads unnecessarily. */ -static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 *bmap = op->u.getattr.ga_bmval; + const u32 *bmap = op->u.getattr.ga_bmval; u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; u32 ret = 0; if (bmap0 & FATTR4_WORD0_ACL) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap1 & FATTR4_WORD1_OWNER) { ret += IDMAP_NAMESZ + 4; @@ -2832,24 +2862,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, return ret; } -static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE; } -static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_lock_denied_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz + op_encode_change_info_maxsz + 1 @@ -2857,20 +2891,18 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + op_encode_delegation_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.read.rd_length, maxcount); + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = svc_max_payload(rqstp); - u32 rlen = min(op->u.read.rd_length, maxcount); + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); /* * If we detect that the file changed during hole encoding, then we * recover by encoding the remaining reply as data. This means we need @@ -2881,70 +2913,77 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.readdir.rd_maxcount, maxcount); + u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE; } -static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } -static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids) * sizeof(__be32); } -static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR * (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32); } -static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * sizeof(__be32); } -static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1 + 1 + /* eir_flags, spr_how */\ @@ -2958,14 +2997,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); } -static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); } -static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ @@ -2974,7 +3015,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd op_encode_channel_attrs_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* wr_callback */ + @@ -2986,16 +3028,16 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1 /* cr_synchronous */) * sizeof(__be32); } -static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 /* osr_count */ + 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } -static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3 /* cnr_lease_time */ + @@ -3010,12 +3052,10 @@ static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, } #ifdef CONFIG_NFSD_PNFS -static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); + u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 1 /* gd_layout_type*/ + @@ -3028,7 +3068,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4 * so we need to define an arbitrary upper bound here. */ #define MAX_LAYOUT_SIZE 128 -static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* logr_return_on_close */ + @@ -3037,14 +3078,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op MAX_LAYOUT_SIZE) * sizeof(__be32); } -static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* locr_newsize */ + 2 /* ns_size */) * sizeof(__be32); } -static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* lrs_stateid */ + @@ -3053,41 +3096,36 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_ #endif /* CONFIG_NFSD_PNFS */ -static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3) * sizeof(__be32); } -static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount, rlen; - - maxcount = svc_max_payload(rqstp); - rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); + u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount, rlen; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); + u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); @@ -3576,6 +3614,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 1, @@ -3586,6 +3625,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_decode = nfs4svc_decode_compoundargs, .pc_encode = nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_argzero = offsetof(struct nfsd4_compoundargs, iops), .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c634483d85d2..5d680045fa2c 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -807,16 +807,18 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; name.data = memdup_user(&ci->cc_name.cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); - if (IS_ERR_OR_NULL(princhash.data)) - return -EFAULT; + if (IS_ERR(princhash.data)) { + kfree(name.data); + return PTR_ERR(princhash.data); + } princhash.len = princhashlen; } else princhash.len = 0; @@ -827,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &cnm->cn_len)) return -EFAULT; name.data = memdup_user(&cnm->cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; } if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c5d199d7e6b4..198d7abf34e4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -160,6 +160,13 @@ static bool is_client_expired(struct nfs4_client *clp) return clp->cl_time == 0; } +static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, + struct nfs4_client *clp) +{ + if (clp->cl_state != NFSD4_ACTIVE) + atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); +} + static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -169,6 +176,7 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); + nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } @@ -190,6 +198,7 @@ renew_client_locked(struct nfs4_client *clp) list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); + nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; } @@ -357,6 +366,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { + trace_nfsd_cb_notify_lock_done(&zero_stateid, task); + /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and @@ -963,19 +974,19 @@ out_free: * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, - unsigned char sc_type) + unsigned char cs_type) { int new_id; - stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; - stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; - stid->sc_type = sc_type; + stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; + stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->cs_type = cs_type; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); - stid->stid.si_opaque.so_id = new_id; - stid->stid.si_generation = 1; + stid->cs_stid.si_opaque.so_id = new_id; + stid->cs_stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) @@ -997,7 +1008,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); - refcount_set(&cps->cp_stateid.sc_count, 1); + refcount_set(&cps->cp_stateid.cs_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); @@ -1013,11 +1024,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; - WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); + WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, - copy->cp_stateid.stid.si_opaque.so_id); + copy->cp_stateid.cs_stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } @@ -1049,6 +1060,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { + struct nfs4_delegation *dp = delegstateid(stid); + + WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); + WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); + WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); + WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -1462,6 +1479,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); + WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(stateid_slab, stid); } @@ -2233,6 +2251,7 @@ __destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); atomic_add_unless(&nn->nfs4_client_count, -1, 0); + nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } @@ -2478,7 +2497,7 @@ static const char *cb_state2str(int state) static int client_info_show(struct seq_file *m, void *v) { - struct inode *inode = m->private; + struct inode *inode = file_inode(m->file); struct nfs4_client *clp; u64 clid; @@ -2518,17 +2537,7 @@ static int client_info_show(struct seq_file *m, void *v) return 0; } -static int client_info_open(struct inode *inode, struct file *file) -{ - return single_open(file, client_info_show, inode); -} - -static const struct file_operations client_info_fops = { - .open = client_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(client_info); static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) @@ -4337,7 +4346,27 @@ out: return -ENOMEM; } -void nfsd4_init_leases_net(struct nfsd_net *nn) +static unsigned long +nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cnt; + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_client_shrinker); + + cnt = atomic_read(&nn->nfsd_courtesy_clients); + if (cnt > 0) + mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + return (unsigned long)cnt; +} + +static unsigned long +nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return SHRINK_STOP; +} + +int +nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; u64 max_clients; @@ -4356,6 +4385,18 @@ void nfsd4_init_leases_net(struct nfsd_net *nn) max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); max_clients *= NFS4_CLIENTS_PER_GB; nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); + + atomic_set(&nn->nfsd_courtesy_clients, 0); + nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; + nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); +} + +void +nfsd4_leases_net_shutdown(struct nfsd_net *nn) +{ + unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -4715,6 +4756,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } +static bool nfsd4_deleg_present(const struct inode *inode) +{ + struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + + return ctx && !list_empty_careful(&ctx->flc_lease); +} + +/** + * nfsd_wait_for_delegreturn - wait for delegations to be returned + * @rqstp: the RPC transaction being executed + * @inode: in-core inode of the file being waited for + * + * The timeout prevents deadlock if all nfsd threads happen to be + * tied up waiting for returning delegations. + * + * Return values: + * %true: delegation was returned + * %false: timed out waiting for delegreturn + */ +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) +{ + long __maybe_unused timeo; + + timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), + NFSD_DELEGRETURN_TIMEOUT); + trace_nfsd_delegret_wakeup(rqstp, inode, timeo); + return timeo > 0; +} + static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); @@ -4743,6 +4813,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, { struct nfs4_delegation *dp = cb_to_delegation(cb); + trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); + if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID || dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) return 1; @@ -4788,18 +4860,17 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the - * i_lock) we know the server hasn't removed the lease yet, and + * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); } -/* Called from break_lease() with i_lock held. */ +/* Called from break_lease() with flc_lock held. */ static bool nfsd_break_deleg_cb(struct file_lock *fl) { - bool ret = false; struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfs4_client *clp = dp->dl_stid.sc_client; @@ -4825,7 +4896,7 @@ nfsd_break_deleg_cb(struct file_lock *fl) fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); - return ret; + return false; } /** @@ -5878,8 +5949,11 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, goto exp_client; if (!state_expired(lt, clp->cl_time)) break; - if (!atomic_read(&clp->cl_rpc_users)) + if (!atomic_read(&clp->cl_rpc_users)) { + if (clp->cl_state == NFSD4_ACTIVE) + atomic_inc(&nn->nfsd_courtesy_clients); clp->cl_state = NFSD4_COURTESY; + } if (!client_has_state(clp)) goto exp_client; if (!nfs4_anylock_blockers(clp)) @@ -5894,10 +5968,49 @@ exp_client: spin_unlock(&nn->client_lock); } +static void +nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, + struct list_head *reaplist) +{ + unsigned int maxreap = 0, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; + INIT_LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_ACTIVE) + break; + if (reapcnt >= maxreap) + break; + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static void +nfs4_process_client_reaplist(struct list_head *reaplist) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + + list_for_each_safe(pos, next, reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + trace_nfsd_clid_purged(&clp->cl_clientid); + list_del_init(&clp->cl_lru); + expire_client(clp); + } +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { - struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; @@ -5920,18 +6033,14 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && state_expired(<, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); nfs4_get_client_reaplist(nn, &reaplist, <); - list_for_each_safe(pos, next, &reaplist) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - trace_nfsd_clid_purged(&clp->cl_clientid); - list_del_init(&clp->cl_lru); - expire_client(clp); - } + nfs4_process_client_reaplist(&reaplist); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); @@ -6014,6 +6123,18 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } +static void +courtesy_client_reaper(struct work_struct *reaper) +{ + struct list_head reaplist; + struct delayed_work *dwork = to_delayed_work(reaper); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + nfsd_shrinker_work); + + nfs4_get_courtesy_client_reaplist(nn, &reaplist); + nfs4_process_client_reaplist(&reaplist); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -6149,6 +6270,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + struct nfs4_stid *stid; bool return_revoked = false; /* @@ -6171,15 +6293,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, } if (status) return status; - *s = find_stateid_by_type(cstate->clp, stateid, typemask); - if (!*s) + stid = find_stateid_by_type(cstate->clp, stateid, typemask); + if (!stid) return nfserr_bad_stateid; - if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { - nfs4_put_stid(*s); + if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(stid); if (cstate->minorversion) return nfserr_deleg_revoked; return nfserr_bad_stateid; } + *s = stid; return nfs_ok; } @@ -6244,12 +6367,12 @@ out: static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { - WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); - if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, - cps->cp_stateid.stid.si_opaque.so_id); + cps->cp_stateid.cs_stid.si_opaque.so_id); kfree(cps); } /* @@ -6271,12 +6394,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) - refcount_inc(&state->cp_stateid.sc_count); + refcount_inc(&state->cp_stateid.cs_count); else _free_cpntf_state_locked(nn, state); } @@ -6684,6 +6807,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; LIST_HEAD(reaplist); + struct nfs4_ol_stateid *stp; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -6692,6 +6816,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) if (unhashed) put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); + list_for_each_entry(stp, &reaplist, st_locks) + nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); @@ -6775,6 +6901,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: nfs4_put_stid(&dp->dl_stid); @@ -7830,6 +7957,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); get_net(net); return 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1e9690a061ec..bcfeb1a922c0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -42,6 +42,8 @@ #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include <linux/xattr.h> +#include <linux/vmalloc.h> + #include <uapi/linux/xattr.h> #include "idmap.h" @@ -791,6 +793,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) return nfserr_bad_xdr; + memset(&commit->co_verf, 0, sizeof(commit->co_verf)); return nfs_ok; } @@ -799,6 +802,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create { __be32 *p, status; + memset(create, 0, sizeof(*create)); if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0) return nfserr_bad_xdr; switch (create->cr_type) { @@ -848,6 +852,7 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu static inline __be32 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) { + memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } @@ -855,6 +860,7 @@ nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *geta static __be32 nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) { + memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -903,6 +909,7 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) static __be32 nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { + memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) @@ -919,6 +926,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) static __be32 nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) { + memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) @@ -1140,11 +1148,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) __be32 status; u32 dummy; - memset(open->op_bmval, 0, sizeof(open->op_bmval)); - open->op_iattr.ia_valid = 0; - open->op_openowner = NULL; + memset(open, 0, sizeof(*open)); - open->op_xdr_error = 0; if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0) return nfserr_bad_xdr; /* deleg_want is ignored */ @@ -1179,6 +1184,8 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0) return nfserr_bad_xdr; + memset(&open_conf->oc_resp_stateid, 0, + sizeof(open_conf->oc_resp_stateid)); return nfs_ok; } @@ -1187,6 +1194,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d { __be32 status; + memset(open_down, 0, sizeof(*open_down)); status = nfsd4_decode_stateid4(argp, &open_down->od_stateid); if (status) return status; @@ -1216,6 +1224,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) if (!putfh->pf_fhval) return nfserr_jukebox; + putfh->no_verify = false; return nfs_ok; } @@ -1232,6 +1241,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { __be32 status; + memset(read, 0, sizeof(*read)); status = nfsd4_decode_stateid4(argp, &read->rd_stateid); if (status) return status; @@ -1248,6 +1258,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read { __be32 status; + memset(readdir, 0, sizeof(*readdir)); if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0) return nfserr_bad_xdr; status = nfsd4_decode_verifier4(argp, &readdir->rd_verf); @@ -1267,6 +1278,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read static __be32 nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) { + memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } @@ -1275,6 +1287,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename { __be32 status; + memset(rename, 0, sizeof(*rename)); status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen); if (status) return status; @@ -1291,6 +1304,7 @@ static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, struct nfsd4_secinfo *secinfo) { + secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } @@ -1299,6 +1313,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta { __be32 status; + memset(setattr, 0, sizeof(*setattr)); status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid); if (status) return status; @@ -1313,6 +1328,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { __be32 *p, status; + memset(setclientid, 0, sizeof(*setclientid)); + if (argp->minorversion >= 1) return nfserr_notsupp; @@ -1369,6 +1386,8 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify { __be32 *p, status; + memset(verify, 0, sizeof(*verify)); + status = nfsd4_decode_bitmap4(argp, verify->ve_bmval, ARRAY_SIZE(verify->ve_bmval)); if (status) @@ -1408,6 +1427,9 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) return nfserr_bad_xdr; + write->wr_bytes_written = 0; + write->wr_how_written = 0; + memset(&write->wr_verifier, 0, sizeof(write->wr_verifier)); return nfs_ok; } @@ -1432,6 +1454,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) { + memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); @@ -1442,6 +1465,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, u32 use_conn_in_rdma_mode; __be32 status; + memset(bcts, 0, sizeof(*bcts)); status = nfsd4_decode_sessionid4(argp, &bcts->sessionid); if (status) return status; @@ -1583,6 +1607,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, { __be32 status; + memset(exid, 0, sizeof(*exid)); status = nfsd4_decode_verifier4(argp, &exid->verifier); if (status) return status; @@ -1635,6 +1660,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, { __be32 status; + memset(sess, 0, sizeof(*sess)); status = nfsd4_decode_clientid4(argp, &sess->clientid); if (status) return status; @@ -1650,11 +1676,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, return status; if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0) return nfserr_bad_xdr; - status = nfsd4_decode_cb_sec(argp, &sess->cb_sec); - if (status) - return status; - - return nfs_ok; + return nfsd4_decode_cb_sec(argp, &sess->cb_sec); } static __be32 @@ -1678,6 +1700,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, { __be32 status; + memset(gdev, 0, sizeof(*gdev)); status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); if (status) return status; @@ -1698,6 +1721,7 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, { __be32 *p, status; + memset(lcp, 0, sizeof(*lcp)); if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0) @@ -1733,6 +1757,7 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, { __be32 status; + memset(lgp, 0, sizeof(*lgp)); if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0) @@ -1758,6 +1783,7 @@ static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, struct nfsd4_layoutreturn *lrp) { + memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0) @@ -1773,6 +1799,8 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, { if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; + + sin->sin_exp = NULL; return nfs_ok; } @@ -1793,6 +1821,7 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, seq->maxslots = be32_to_cpup(p++); seq->cachethis = be32_to_cpup(p); + seq->status_flags = 0; return nfs_ok; } @@ -1803,6 +1832,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta __be32 status; u32 i; + memset(test_stateid, 0, sizeof(*test_stateid)); if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0) return nfserr_bad_xdr; @@ -1900,6 +1930,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) struct nl4_server *ns_dummy; __be32 status; + memset(copy, 0, sizeof(*copy)); status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); if (status) return status; @@ -1955,6 +1986,7 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, { __be32 status; + memset(cn, 0, sizeof(*cn)); cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); if (cn->cpn_src == NULL) return nfserr_jukebox; @@ -1972,6 +2004,8 @@ static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, struct nfsd4_offload_status *os) { + os->count = 0; + os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } @@ -1988,6 +2022,8 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0) return nfserr_bad_xdr; + seek->seek_eof = 0; + seek->seek_pos = 0; return nfs_ok; } @@ -2123,6 +2159,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, __be32 status; u32 maxcount; + memset(getxattr, 0, sizeof(*getxattr)); status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); if (status) return status; @@ -2131,8 +2168,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); getxattr->getxa_len = maxcount; - - return status; + return nfs_ok; } static __be32 @@ -2142,6 +2178,8 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, u32 flags, maxcount, size; __be32 status; + memset(setxattr, 0, sizeof(*setxattr)); + if (xdr_stream_decode_u32(argp->xdr, &flags) < 0) return nfserr_bad_xdr; @@ -2180,6 +2218,8 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, { u32 maxcount; + memset(listxattrs, 0, sizeof(*listxattrs)); + if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0) return nfserr_bad_xdr; @@ -2207,6 +2247,7 @@ static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, struct nfsd4_removexattr *removexattr) { + memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } @@ -2357,22 +2398,15 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) + if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) return false; - - /* - * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS - * here, so we return success at the xdr level so that - * nfsd4_proc can handle this is an NFS-level error. - */ - if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - return true; + argp->opcnt = min_t(u32, argp->client_opcnt, + NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); if (!argp->ops) { argp->ops = argp->iops; - dprintk("nfsd: couldn't allocate room for COMPOUND\n"); return false; } } @@ -2774,9 +2808,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 } -static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) { struct path path = exp->ex_path; + struct kstat stat; int err; path_get(&path); @@ -2784,8 +2819,10 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); path_put(&path); + if (!err) + *pino = stat.ino; return err; } @@ -3282,22 +3319,21 @@ out_acl: *p++ = cpu_to_be32(stat.btime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - struct kstat parent_stat; u64 ino = stat.ino; p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; /* - * Get parent's attributes if not ignoring crossmount - * and this is the root of a cross-mounted filesystem. + * Get ino of mountpoint in parent filesystem, if not ignoring + * crossmount and this is the root of a cross-mounted + * filesystem. */ if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) { - err = get_parent_attributes(exp, &parent_stat); + err = nfsd4_get_mounted_on_ino(exp, &ino); if (err) goto out_nfserr; - ino = parent_stat.ino; } p = xdr_encode_hyper(p, ino); } @@ -3994,7 +4030,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } if (resp->xdr->buf->page_len && splice_ok) { WARN_ON_ONCE(1); - return nfserr_resource; + return nfserr_serverfault; } xdr_commit_encode(xdr); @@ -5394,7 +5430,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) struct nfsd4_compoundargs *args = rqstp->rq_argp; if (args->ops != args->iops) { - kfree(args->ops); + vfree(args->ops); args->ops = args->iops; } while (args->to_free) { @@ -5423,12 +5459,8 @@ bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = xdr->buf; __be32 *p; - WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + - buf->tail[0].iov_len); - /* * Send buffer space for the following items is reserved * at the top of nfsd4_proc_compound(). diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 9b31e1103e7b..3e64a3d50a1c 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -604,9 +604,10 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - struct nfsd_net *nn = m->private; + struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info, + nfsd_net_id); seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", @@ -626,11 +627,3 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } - -int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) -{ - struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, - nfsd_net_id); - - return single_open(file, nfsd_reply_cache_stats_show, nn); -} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 917fa1892fd2..6a29bcfc9390 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -185,17 +185,7 @@ static int export_features_show(struct seq_file *m, void *v) return 0; } -static int export_features_open(struct inode *inode, struct file *file) -{ - return single_open(file, export_features_show, NULL); -} - -static const struct file_operations export_features_operations = { - .open = export_features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(export_features); #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) @@ -204,17 +194,7 @@ static int supported_enctypes_show(struct seq_file *m, void *v) return 0; } -static int supported_enctypes_open(struct inode *inode, struct file *file) -{ - return single_open(file, supported_enctypes_show, NULL); -} - -static const struct file_operations supported_enctypes_ops = { - .open = supported_enctypes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(supported_enctypes); #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ static const struct file_operations pool_stats_operations = { @@ -224,19 +204,9 @@ static const struct file_operations pool_stats_operations = { .release = nfsd_pool_stats_release, }; -static const struct file_operations reply_cache_stats_operations = { - .open = nfsd_reply_cache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); -static const struct file_operations filecache_ops = { - .open = nfsd_file_cache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); /*----------------------------------------------------------------------------*/ /* @@ -1365,7 +1335,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) /* Per-export io stats use same ops as exports file */ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", - &export_features_operations, S_IRUGO}, + &export_features_fops, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", @@ -1374,14 +1344,16 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, - [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", + &nfsd_reply_cache_stats_fops, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, - [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO}, + [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) - [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", + &supported_enctypes_fops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1481,11 +1453,12 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + retval = nfsd4_init_leases_net(nn); + if (retval) + goto out_drc_error; retval = nfsd_reply_cache_init(nn); if (retval) goto out_drc_error; - nfsd4_init_leases_net(nn); - get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); @@ -1507,6 +1480,7 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); + nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 57a468ed85c3..09726c5b9a31 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -164,6 +164,7 @@ char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); int nfsd4_create_laundry_wq(void); void nfsd4_destroy_laundry_wq(void); +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode); #else static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } @@ -179,6 +180,11 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) } static inline int nfsd4_create_laundry_wq(void) { return 0; }; static inline void nfsd4_destroy_laundry_wq(void) {}; +static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, + struct inode *inode) +{ + return false; +} #endif /* @@ -343,6 +349,7 @@ void nfsd_lockd_shutdown(void); #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 #define NFS4_CLIENTS_PER_GB 1024 +#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ /* * The following attributes are currently not supported by the NFSv4 server: @@ -498,7 +505,8 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern void nfsd4_init_leases_net(struct nfsd_net *nn); +extern int nfsd4_init_leases_net(struct nfsd_net *nn); +extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -506,7 +514,8 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {}; +static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; +static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a5b71526cee0..d73434200df9 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -392,13 +392,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); - - if (error) { - dprintk("fh_verify: %pd2 permission failure, " - "acc=%x, error=%d\n", - dentry, - access, ntohl(error)); - } + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); out: if (error == nfserr_stale) nfsd_stats_fh_stale_inc(exp); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 7381972f1677..82b3ddeacc33 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -185,6 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) argp->count, argp->offset); argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); + argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); v = 0; len = argp->count; @@ -390,9 +391,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) resp->status = nfs_ok; if (!inode) { /* File doesn't exist. Create it and set attrs */ - resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name, - argp->len, &attrs, type, rdev, - newfhp); + resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type, + rdev, newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", argp->name, attr->ia_valid, (long) attr->ia_size); @@ -567,24 +567,15 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); - memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = count - XDR_UNIT * 2; + buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); + buf->buflen -= XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; - /* This is xdr_init_encode(), but it assumes that - * the head kvec has already been consumed. */ - xdr_set_scratch_buffer(xdr, NULL, 0); - xdr->buf = buf; - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); - xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -646,6 +637,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -657,6 +649,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, @@ -668,6 +661,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_argzero = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, @@ -678,6 +672,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -689,6 +684,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+AT, @@ -699,6 +695,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_readlinkres, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, @@ -710,6 +707,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_readres, .pc_release = nfssvc_release_readres, .pc_argsize = sizeof(struct nfsd_readargs), + .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, @@ -720,6 +718,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_voidarg, .pc_encode = nfssvc_encode_voidres, .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, @@ -731,6 +730,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_attrstatres, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_argzero = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, @@ -742,6 +742,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, @@ -752,6 +753,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_diropargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -762,6 +764,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_renameargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_renameargs), + .pc_argzero = sizeof(struct nfsd_renameargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -772,6 +775,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_linkargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_linkargs), + .pc_argzero = sizeof(struct nfsd_linkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -782,6 +786,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_symlinkargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_symlinkargs), + .pc_argzero = sizeof(struct nfsd_symlinkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -793,6 +798,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, @@ -803,6 +809,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_diropargs, .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, @@ -813,6 +820,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_readdirargs, .pc_encode = nfssvc_encode_readdirres, .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_argzero = sizeof(struct nfsd_readdirargs), .pc_ressize = sizeof(struct nfsd_readdirres), .pc_cachetype = RC_NOCACHE, .pc_name = "READDIR", @@ -822,6 +830,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_statfsres, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_statfsres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+5, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 4bb5baa17040..bfbd9f672f59 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -799,7 +799,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - strlcpy(nn->nfsd_name, utsname()->nodename, + strscpy(nn->nfsd_name, utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index aba8520b4b8b..caf6355b18fa 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -338,10 +338,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (args->len > NFSSVC_MAXBLKSIZE_V2) return false; - if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) - return false; - return true; + return xdr_stream_subsegment(xdr, &args->payload, args->len); } bool diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ae596dbf8667..e2daef3cc003 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -57,11 +57,11 @@ typedef struct { } stateid_t; typedef struct { - stateid_t stid; + stateid_t cs_stid; #define NFS4_COPY_STID 1 #define NFS4_COPYNOTIFY_STID 2 - unsigned char sc_type; - refcount_t sc_count; + unsigned char cs_type; + refcount_t cs_count; } copy_stateid_t; struct nfsd4_callback { @@ -175,7 +175,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 16 +#define NFSD_MAX_OPS_PER_COMPOUND 50 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -692,12 +692,11 @@ extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); -extern void nfsd4_run_cb(struct nfsd4_callback *cb); +extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); -extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index a8c5a02a84f0..777e24e5da33 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -32,7 +32,7 @@ struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; -static int nfsd_proc_show(struct seq_file *seq, void *v) +static int nfsd_show(struct seq_file *seq, void *v) { int i; @@ -72,17 +72,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) return 0; } -static int nfsd_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, nfsd_proc_show, NULL); -} - -static const struct proc_ops nfsd_proc_ops = { - .proc_open = nfsd_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, -}; +DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9ebd67d461f9..06a96e955bd0 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -84,19 +84,26 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) TRACE_EVENT(nfsd_compound, - TP_PROTO(const struct svc_rqst *rqst, - u32 args_opcnt), - TP_ARGS(rqst, args_opcnt), + TP_PROTO( + const struct svc_rqst *rqst, + const char *tag, + u32 taglen, + u32 opcnt + ), + TP_ARGS(rqst, tag, taglen, opcnt), TP_STRUCT__entry( __field(u32, xid) - __field(u32, args_opcnt) + __field(u32, opcnt) + __string_len(tag, tag, taglen) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->args_opcnt = args_opcnt; + __entry->opcnt = opcnt; + __assign_str_len(tag, tag, taglen); ), - TP_printk("xid=0x%08x opcnt=%u", - __entry->xid, __entry->args_opcnt) + TP_printk("xid=0x%08x opcnt=%u tag=%s", + __entry->xid, __entry->opcnt, __get_str(tag) + ) ) TRACE_EVENT(nfsd_compound_status, @@ -195,7 +202,7 @@ TRACE_EVENT(nfsd_fh_verify, __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) __field(u32, xid) __field(u32, fh_hash) - __field(void *, inode) + __field(const void *, inode) __field(unsigned long, type) __field(unsigned long, access) ), @@ -211,13 +218,55 @@ TRACE_EVENT(nfsd_fh_verify, __entry->type = type; __entry->access = access; ), - TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s", - __entry->xid, __entry->fh_hash, __entry->inode, + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s", + __entry->xid, __entry->fh_hash, show_fs_file_type(__entry->type), show_nfsd_may_flags(__entry->access) ) ); +TRACE_EVENT_CONDITION(nfsd_fh_verify_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access, + __be32 error + ), + TP_ARGS(rqstp, fhp, type, access, error), + TP_CONDITION(error), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(const void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + __field(int, error) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + __entry->error = be32_to_cpu(error); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access), + __entry->error + ) +); + DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -489,6 +538,29 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); #include "filecache.h" #include "vfs.h" +TRACE_EVENT(nfsd_delegret_wakeup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + long timeo + ), + TP_ARGS(rqstp, inode, timeo), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(long, timeo) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->timeo = timeo; + ), + TP_printk("xid=0x%08x inode=%p%s", + __entry->xid, __entry->inode, + __entry->timeo == 0 ? " (timed out)" : "" + ) +); + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), @@ -1399,6 +1471,45 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +DECLARE_EVENT_CLASS(nfsd_cb_done_class, + TP_PROTO( + const stateid_t *stp, + const struct rpc_task *task + ), + TP_ARGS(stp, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + __field(int, status) + ), + TP_fast_assign( + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + __entry->status = task->tk_status; + ), + TP_printk("client %08x:%08x stateid %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->si_id, + __entry->si_generation, __entry->status + ) +); + +#define DEFINE_NFSD_CB_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_done_class, name, \ + TP_PROTO( \ + const stateid_t *stp, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(stp, task)) + +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fc17b0ac8729..83be89905cbf 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -343,8 +343,61 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserrno(get_write_access(inode)); } -/* - * Set various file attributes. After this call fhp needs an fh_put. +static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) +{ + int host_err; + + if (iap->ia_valid & ATTR_SIZE) { + /* + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) + */ + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; + + if (iap->ia_size < 0) + return -EFBIG; + + host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + if (host_err) + return host_err; + iap->ia_valid &= ~ATTR_SIZE; + + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + return 0; + } + + if (!iap->ia_valid) + return 0; + + iap->ia_valid |= ATTR_CTIME; + return notify_change(&init_user_ns, dentry, iap, NULL); +} + +/** + * nfsd_setattr - Set various file attributes. + * @rqstp: controlling RPC transaction + * @fhp: filehandle of target + * @attr: attributes to set + * @check_guard: set to 1 if guardtime is a valid timestamp + * @guardtime: do not act if ctime.tv_sec does not match this timestamp + * + * This call may adjust the contents of @attr (in particular, this + * call may change the bits in the na_iattr.ia_valid field). + * + * Returns nfs_ok on success, otherwise an NFS status code is + * returned. Caller must release @fhp by calling fh_put in either + * case. */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -357,9 +410,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; - int host_err = 0; + int host_err; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); + int retries; if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -414,43 +468,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, } inode_lock(inode); - if (size_change) { - /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) - */ - struct iattr size_attr = { - .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, - .ia_size = iap->ia_size, - }; - - host_err = -EFBIG; - if (iap->ia_size < 0) - goto out_unlock; - - host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); - if (host_err) - goto out_unlock; - iap->ia_valid &= ~ATTR_SIZE; - - /* - * Avoid the additional setattr call below if the only other - * attribute that the client sends is the mtime, as we update - * it as part of the size change above. - */ - if ((iap->ia_valid & ~ATTR_MTIME) == 0) - goto out_unlock; - } - - if (iap->ia_valid) { - iap->ia_valid |= ATTR_CTIME; - host_err = notify_change(&init_user_ns, dentry, iap, NULL); + for (retries = 1;;) { + host_err = __nfsd_setattr(dentry, iap); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, inode)) + break; } - -out_unlock: if (attr->na_seclabel && attr->na_seclabel->len) attr->na_labelerr = security_inode_setsecctx(dentry, attr->na_seclabel->data, attr->na_seclabel->len); @@ -1255,7 +1279,7 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct nfsd_attrs *attrs, + struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; @@ -1382,8 +1406,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out_unlock; fh_fill_pre_attrs(fhp); - err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, - rdev, resfhp); + err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); fh_fill_post_attrs(fhp); out_unlock: inode_unlock(dentry->d_inode); @@ -1673,7 +1696,15 @@ retry: .new_dir = tdir, .new_dentry = ndentry, }; - host_err = vfs_rename(&rd); + int retries; + + for (retries = 1;;) { + host_err = vfs_rename(&rd); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry))) + break; + } if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1757,9 +1788,18 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { + int retries; + if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) nfsd_close_cached_files(rdentry); - host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + + for (retries = 1;;) { + host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, rinode)) + break; + } } else { host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index c95cd414b4bb..120521bc7b24 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -79,8 +79,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct nfsd_attrs *attrs, - int type, dev_t rdev, struct svc_fh *res); + struct nfsd_attrs *attrs, int type, dev_t rdev, + struct svc_fh *res); __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 96267258e629..0eb00105d845 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -717,13 +717,13 @@ struct nfsd4_compoundargs { struct svcxdr_tmpbuf *to_free; struct svc_rqst *rqstp; - u32 taglen; char * tag; + u32 taglen; u32 minorversion; + u32 client_opcnt; u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; - int cachetype; }; struct nfsd4_compoundres { @@ -732,8 +732,8 @@ struct nfsd4_compoundres { struct svc_rqst * rqstp; __be32 *statusp; - u32 taglen; char * tag; + u32 taglen; u32 opcnt; struct nfsd4_compound_state cstate; @@ -888,7 +888,8 @@ struct nfsd4_operation { u32 op_flags; char *op_name; /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + u32 (*op_rsize_bop)(const struct svc_rqst *rqstp, + const struct nfsd4_op *op); void (*op_get_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); void (*op_set_currentstateid)(struct nfsd4_compound_state *, diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 51363d4e8636..26a76ebfe58f 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1927,8 +1927,6 @@ const struct inode_operations ntfs_link_inode_operations = { .setattr = ntfs3_setattr, .listxattr = ntfs_listxattr, .permission = ntfs_permission, - .get_acl = ntfs_get_acl, - .set_acl = ntfs_set_acl, }; const struct address_space_operations ntfs_aops = { diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 6ae1f56b7358..7de8718c68a9 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -625,67 +625,6 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); } -static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, - struct inode *inode, int type, void *buffer, - size_t size) -{ - struct posix_acl *acl; - int err; - - if (!(inode->i_sb->s_flags & SB_POSIXACL)) { - ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); - return -EOPNOTSUPP; - } - - acl = ntfs_get_acl(inode, type, false); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (!acl) - return -ENODATA; - - err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - - return err; -} - -static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, - struct inode *inode, int type, const void *value, - size_t size) -{ - struct posix_acl *acl; - int err; - - if (!(inode->i_sb->s_flags & SB_POSIXACL)) { - ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); - return -EOPNOTSUPP; - } - - if (!inode_owner_or_capable(mnt_userns, inode)) - return -EPERM; - - if (!value) { - acl = NULL; - } else { - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (acl) { - err = posix_acl_valid(&init_user_ns, acl); - if (err) - goto release_and_out; - } - } - - err = ntfs_set_acl(mnt_userns, inode, acl, type); - -release_and_out: - posix_acl_release(acl); - return err; -} - /* * ntfs_init_acl - Initialize the ACLs of a new inode. * @@ -852,23 +791,6 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, goto out; } -#ifdef CONFIG_NTFS3_FS_POSIX_ACL - if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && - !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, - sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || - (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && - !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, - sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { - /* TODO: init_user_ns? */ - err = ntfs_xattr_get_acl( - &init_user_ns, inode, - name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 - ? ACL_TYPE_ACCESS - : ACL_TYPE_DEFAULT, - buffer, size); - goto out; - } -#endif /* Deal with NTFS extended attribute. */ err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL); @@ -981,22 +903,6 @@ set_new_fa: goto out; } -#ifdef CONFIG_NTFS3_FS_POSIX_ACL - if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && - !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, - sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || - (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && - !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, - sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { - err = ntfs_xattr_set_acl( - mnt_userns, inode, - name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 - ? ACL_TYPE_ACCESS - : ACL_TYPE_DEFAULT, - value, size); - goto out; - } -#endif /* Deal with NTFS extended attribute. */ err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); @@ -1086,7 +992,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry) } // clang-format off -static const struct xattr_handler ntfs_xattr_handler = { +static const struct xattr_handler ntfs_other_xattr_handler = { .prefix = "", .get = ntfs_getxattr, .set = ntfs_setxattr, @@ -1094,7 +1000,11 @@ static const struct xattr_handler ntfs_xattr_handler = { }; const struct xattr_handler *ntfs_xattr_handlers[] = { - &ntfs_xattr_handler, +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif + &ntfs_other_xattr_handler, NULL, }; // clang-format on diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index a75e2b7d67f5..64e6ddcfe329 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -991,7 +991,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) lc->oc_type = NO_CONTROLD; rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name, - DLM_LSFL_FS | DLM_LSFL_NEWEXCL, DLM_LVB_LEN, + DLM_LSFL_NEWEXCL, DLM_LVB_LEN, &ocfs2_ls_ops, conn, &ops_rv, &fsdlm); if (rc) { if (rc == -EEXIST || rc == -EPROTO) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 87759165d32b..ee93c825b06b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -250,7 +250,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry, size_t size, int flags) { int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name, - (void *)value, size, flags); + value, size, flags); pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n", dentry, name, min((int)size, 48), value, size, flags, err); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ec746d447f1b..5da771b218d1 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1022,7 +1022,20 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, /* Check that everything is OK before copy-up */ if (value) { - acl = posix_acl_from_xattr(&init_user_ns, value, size); + /* The above comment can be understood in two ways: + * + * 1. We just want to check whether the basic POSIX ACL format + * is ok. For example, if the header is correct and the size + * is sane. + * 2. We want to know whether the ACL_{GROUP,USER} entries can + * be mapped according to the underlying filesystem. + * + * Currently, we only check 1. If we wanted to check 2. we + * would need to pass the mnt_userns and the fs_userns of the + * underlying filesystem. But frankly, I think checking 1. is + * enough to start the copy-up. + */ + acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 5af33800743e..b4f109875e79 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -710,9 +710,9 @@ EXPORT_SYMBOL(posix_acl_update_mode); /* * Fix up the uids and gids in posix acl extended attributes in place. */ -static int posix_acl_fix_xattr_common(void *value, size_t size) +static int posix_acl_fix_xattr_common(const void *value, size_t size) { - struct posix_acl_xattr_header *header = value; + const struct posix_acl_xattr_header *header = value; int count; if (!header) @@ -720,13 +720,13 @@ static int posix_acl_fix_xattr_common(void *value, size_t size) if (size < sizeof(struct posix_acl_xattr_header)) return -EINVAL; if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) - return -EINVAL; + return -EOPNOTSUPP; count = posix_acl_xattr_count(size); if (count < 0) return -EINVAL; if (count == 0) - return -EINVAL; + return 0; return count; } @@ -748,7 +748,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, return; count = posix_acl_fix_xattr_common(value, size); - if (count < 0) + if (count <= 0) return; for (end = entry + count; entry != end; entry++) { @@ -771,46 +771,6 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, } } -void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size) -{ - struct posix_acl_xattr_header *header = value; - struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; - struct user_namespace *fs_userns = i_user_ns(inode); - int count; - vfsuid_t vfsuid; - vfsgid_t vfsgid; - kuid_t uid; - kgid_t gid; - - if (no_idmapping(mnt_userns, i_user_ns(inode))) - return; - - count = posix_acl_fix_xattr_common(value, size); - if (count < 0) - return; - - for (end = entry + count; entry != end; entry++) { - switch (le16_to_cpu(entry->e_tag)) { - case ACL_USER: - uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsuid = VFSUIDT_INIT(uid); - uid = from_vfsuid(mnt_userns, fs_userns, vfsuid); - entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid)); - break; - case ACL_GROUP: - gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsgid = VFSGIDT_INIT(gid); - gid = from_vfsgid(mnt_userns, fs_userns, vfsgid); - entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid)); - break; - default: - break; - } - } -} - static void posix_acl_fix_xattr_userns( struct user_namespace *to, struct user_namespace *from, void *value, size_t size) @@ -822,7 +782,7 @@ static void posix_acl_fix_xattr_userns( kgid_t gid; count = posix_acl_fix_xattr_common(value, size); - if (count < 0) + if (count <= 0) return; for (end = entry + count; entry != end; entry++) { @@ -857,12 +817,32 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size) posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size); } -/* - * Convert from extended attribute to in-memory representation. +/** + * make_posix_acl - convert POSIX ACLs from uapi to VFS format using the + * provided callbacks to map ACL_{GROUP,USER} entries into the + * appropriate format + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @value: the uapi representation of POSIX ACLs + * @size: the size of @void + * @uid_cb: callback to use for mapping the uid stored in ACL_USER entries + * @gid_cb: callback to use for mapping the gid stored in ACL_GROUP entries + * + * The make_posix_acl() helper is an abstraction to translate from uapi format + * into the VFS format allowing the caller to specific callbacks to map + * ACL_{GROUP,USER} entries into the expected format. This is used in + * posix_acl_from_xattr() and vfs_set_acl_prepare() and avoids pointless code + * duplication. + * + * Return: Allocated struct posix_acl on success, NULL for a valid header but + * without actual POSIX ACL entries, or ERR_PTR() encoded error code. */ -struct posix_acl * -posix_acl_from_xattr(struct user_namespace *user_ns, - const void *value, size_t size) +static struct posix_acl *make_posix_acl(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, const void *value, size_t size, + kuid_t (*uid_cb)(struct user_namespace *, struct user_namespace *, + const struct posix_acl_xattr_entry *), + kgid_t (*gid_cb)(struct user_namespace *, struct user_namespace *, + const struct posix_acl_xattr_entry *)) { const struct posix_acl_xattr_header *header = value; const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end; @@ -870,16 +850,9 @@ posix_acl_from_xattr(struct user_namespace *user_ns, struct posix_acl *acl; struct posix_acl_entry *acl_e; - if (!value) - return NULL; - if (size < sizeof(struct posix_acl_xattr_header)) - return ERR_PTR(-EINVAL); - if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) - return ERR_PTR(-EOPNOTSUPP); - - count = posix_acl_xattr_count(size); + count = posix_acl_fix_xattr_common(value, size); if (count < 0) - return ERR_PTR(-EINVAL); + return ERR_PTR(count); if (count == 0) return NULL; @@ -900,16 +873,12 @@ posix_acl_from_xattr(struct user_namespace *user_ns, break; case ACL_USER: - acl_e->e_uid = - make_kuid(user_ns, - le32_to_cpu(entry->e_id)); + acl_e->e_uid = uid_cb(mnt_userns, fs_userns, entry); if (!uid_valid(acl_e->e_uid)) goto fail; break; case ACL_GROUP: - acl_e->e_gid = - make_kgid(user_ns, - le32_to_cpu(entry->e_id)); + acl_e->e_gid = gid_cb(mnt_userns, fs_userns, entry); if (!gid_valid(acl_e->e_gid)) goto fail; break; @@ -924,6 +893,181 @@ fail: posix_acl_release(acl); return ERR_PTR(-EINVAL); } + +/** + * vfs_set_acl_prepare_kuid - map ACL_USER uid according to mount- and + * filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @e: a ACL_USER entry in POSIX ACL uapi format + * + * The uid stored as ACL_USER entry in @e is a kuid_t stored as a raw {g,u}id + * value. The vfs_set_acl_prepare_kuid() will recover the kuid_t through + * KUIDT_INIT() and then map it according to the idmapped mount. The resulting + * kuid_t is the value which the filesystem can map up into a raw backing store + * id in the filesystem's idmapping. + * + * This is used in vfs_set_acl_prepare() to generate the proper VFS + * representation of POSIX ACLs with ACL_USER entries during setxattr(). + * + * Return: A kuid in @fs_userns for the uid stored in @e. + */ +static inline kuid_t +vfs_set_acl_prepare_kuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const struct posix_acl_xattr_entry *e) +{ + kuid_t kuid = KUIDT_INIT(le32_to_cpu(e->e_id)); + return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); +} + +/** + * vfs_set_acl_prepare_kgid - map ACL_GROUP gid according to mount- and + * filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @e: a ACL_GROUP entry in POSIX ACL uapi format + * + * The gid stored as ACL_GROUP entry in @e is a kgid_t stored as a raw {g,u}id + * value. The vfs_set_acl_prepare_kgid() will recover the kgid_t through + * KGIDT_INIT() and then map it according to the idmapped mount. The resulting + * kgid_t is the value which the filesystem can map up into a raw backing store + * id in the filesystem's idmapping. + * + * This is used in vfs_set_acl_prepare() to generate the proper VFS + * representation of POSIX ACLs with ACL_GROUP entries during setxattr(). + * + * Return: A kgid in @fs_userns for the gid stored in @e. + */ +static inline kgid_t +vfs_set_acl_prepare_kgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const struct posix_acl_xattr_entry *e) +{ + kgid_t kgid = KGIDT_INIT(le32_to_cpu(e->e_id)); + return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); +} + +/** + * vfs_set_acl_prepare - convert POSIX ACLs from uapi to VFS format taking + * mount and filesystem idmappings into account + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @value: the uapi representation of POSIX ACLs + * @size: the size of @void + * + * When setting POSIX ACLs with ACL_{GROUP,USER} entries they need to be + * mapped according to the relevant mount- and filesystem idmapping. It is + * important that the ACL_{GROUP,USER} entries in struct posix_acl will be + * mapped into k{g,u}id_t that are supposed to be mapped up in the filesystem + * idmapping. This is crucial since the resulting struct posix_acl might be + * cached filesystem wide. The vfs_set_acl_prepare() function will take care to + * perform all necessary idmappings. + * + * Note, that since basically forever the {g,u}id values encoded as + * ACL_{GROUP,USER} entries in the uapi POSIX ACLs passed via @value contain + * values that have been mapped according to the caller's idmapping. In other + * words, POSIX ACLs passed in uapi format as @value during setxattr() contain + * {g,u}id values in their ACL_{GROUP,USER} entries that should actually have + * been stored as k{g,u}id_t. + * + * This means, vfs_set_acl_prepare() needs to first recover the k{g,u}id_t by + * calling K{G,U}IDT_INIT(). Afterwards they can be interpreted as vfs{g,u}id_t + * through from_vfs{g,u}id() to account for any idmapped mounts. The + * vfs_set_acl_prepare_k{g,u}id() helpers will take care to generate the + * correct k{g,u}id_t. + * + * The filesystem will then receive the POSIX ACLs ready to be cached + * filesystem wide and ready to be written to the backing store taking the + * filesystem's idmapping into account. + * + * Return: Allocated struct posix_acl on success, NULL for a valid header but + * without actual POSIX ACL entries, or ERR_PTR() encoded error code. + */ +struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const void *value, size_t size) +{ + return make_posix_acl(mnt_userns, fs_userns, value, size, + vfs_set_acl_prepare_kuid, + vfs_set_acl_prepare_kgid); +} +EXPORT_SYMBOL(vfs_set_acl_prepare); + +/** + * posix_acl_from_xattr_kuid - map ACL_USER uid into filesystem idmapping + * @mnt_userns: unused + * @fs_userns: the filesystem's idmapping + * @e: a ACL_USER entry in POSIX ACL uapi format + * + * Map the uid stored as ACL_USER entry in @e into the filesystem's idmapping. + * This is used in posix_acl_from_xattr() to generate the proper VFS + * representation of POSIX ACLs with ACL_USER entries. + * + * Return: A kuid in @fs_userns for the uid stored in @e. + */ +static inline kuid_t +posix_acl_from_xattr_kuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const struct posix_acl_xattr_entry *e) +{ + return make_kuid(fs_userns, le32_to_cpu(e->e_id)); +} + +/** + * posix_acl_from_xattr_kgid - map ACL_GROUP gid into filesystem idmapping + * @mnt_userns: unused + * @fs_userns: the filesystem's idmapping + * @e: a ACL_GROUP entry in POSIX ACL uapi format + * + * Map the gid stored as ACL_GROUP entry in @e into the filesystem's idmapping. + * This is used in posix_acl_from_xattr() to generate the proper VFS + * representation of POSIX ACLs with ACL_GROUP entries. + * + * Return: A kgid in @fs_userns for the gid stored in @e. + */ +static inline kgid_t +posix_acl_from_xattr_kgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const struct posix_acl_xattr_entry *e) +{ + return make_kgid(fs_userns, le32_to_cpu(e->e_id)); +} + +/** + * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format + * @fs_userns: the filesystem's idmapping + * @value: the uapi representation of POSIX ACLs + * @size: the size of @void + * + * Filesystems that store POSIX ACLs in the unaltered uapi format should use + * posix_acl_from_xattr() when reading them from the backing store and + * converting them into the struct posix_acl VFS format. The helper is + * specifically intended to be called from the ->get_acl() inode operation. + * + * The posix_acl_from_xattr() function will map the raw {g,u}id values stored + * in ACL_{GROUP,USER} entries into the filesystem idmapping in @fs_userns. The + * posix_acl_from_xattr_k{g,u}id() helpers will take care to generate the + * correct k{g,u}id_t. The returned struct posix_acl can be cached. + * + * Note that posix_acl_from_xattr() does not take idmapped mounts into account. + * If it did it calling is from the ->get_acl() inode operation would return + * POSIX ACLs mapped according to an idmapped mount which would mean that the + * value couldn't be cached for the filesystem. Idmapped mounts are taken into + * account on the fly during permission checking or right at the VFS - + * userspace boundary before reporting them to the user. + * + * Return: Allocated struct posix_acl on success, NULL for a valid header but + * without actual POSIX ACL entries, or ERR_PTR() encoded error code. + */ +struct posix_acl * +posix_acl_from_xattr(struct user_namespace *fs_userns, + const void *value, size_t size) +{ + return make_posix_acl(&init_user_ns, fs_userns, value, size, + posix_acl_from_xattr_kuid, + posix_acl_from_xattr_kgid); +} EXPORT_SYMBOL (posix_acl_from_xattr); /* @@ -1027,7 +1171,17 @@ posix_acl_xattr_set(const struct xattr_handler *handler, int ret; if (value) { - acl = posix_acl_from_xattr(&init_user_ns, value, size); + /* + * By the time we end up here the {g,u}ids stored in + * ACL_{GROUP,USER} have already been mapped according to the + * caller's idmapping. The vfs_set_acl_prepare() helper will + * recover them and take idmapped mounts into account. The + * filesystem will receive the POSIX ACLs in the correct + * format ready to be cached or written to the backing store + * taking the filesystem idmapping into account. + */ + acl = vfs_set_acl_prepare(mnt_userns, i_user_ns(inode), + value, size); if (IS_ERR(acl)) return PTR_ERR(acl); } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b2fd3c20e7c2..0c034ea39954 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -28,14 +28,11 @@ #include <linux/crypto.h> #include <linux/string.h> #include <linux/timer.h> -#include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/jiffies.h> #include <linux/workqueue.h> -#include <crypto/acompress.h> - #include "internal.h" /* @@ -93,8 +90,7 @@ module_param(compress, charp, 0444); MODULE_PARM_DESC(compress, "compression to use"); /* Compression parameters */ -static struct crypto_acomp *tfm; -static struct acomp_req *creq; +static struct crypto_comp *tfm; struct pstore_zbackend { int (*zbufsize)(size_t size); @@ -272,21 +268,12 @@ static const struct pstore_zbackend zbackends[] = { static int pstore_compress(const void *in, void *out, unsigned int inlen, unsigned int outlen) { - struct scatterlist src, dst; int ret; if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS)) return -EINVAL; - sg_init_table(&src, 1); - sg_set_buf(&src, in, inlen); - - sg_init_table(&dst, 1); - sg_set_buf(&dst, out, outlen); - - acomp_request_set_params(creq, &src, &dst, inlen, outlen); - - ret = crypto_acomp_compress(creq); + ret = crypto_comp_compress(tfm, in, inlen, out, &outlen); if (ret) { pr_err("crypto_comp_compress failed, ret = %d!\n", ret); return ret; @@ -297,7 +284,7 @@ static int pstore_compress(const void *in, void *out, static void allocate_buf_for_compression(void) { - struct crypto_acomp *acomp; + struct crypto_comp *ctx; int size; char *buf; @@ -309,7 +296,7 @@ static void allocate_buf_for_compression(void) if (!psinfo || tfm) return; - if (!crypto_has_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC)) { + if (!crypto_has_comp(zbackend->name, 0, 0)) { pr_err("Unknown compression: %s\n", zbackend->name); return; } @@ -328,24 +315,16 @@ static void allocate_buf_for_compression(void) return; } - acomp = crypto_alloc_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(acomp)) { + ctx = crypto_alloc_comp(zbackend->name, 0, 0); + if (IS_ERR_OR_NULL(ctx)) { kfree(buf); pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name, - PTR_ERR(acomp)); - return; - } - - creq = acomp_request_alloc(acomp); - if (!creq) { - crypto_free_acomp(acomp); - kfree(buf); - pr_err("acomp_request_alloc('%s') failed\n", zbackend->name); + PTR_ERR(ctx)); return; } /* A non-NULL big_oops_buf indicates compression is available. */ - tfm = acomp; + tfm = ctx; big_oops_buf_sz = size; big_oops_buf = buf; @@ -355,8 +334,7 @@ static void allocate_buf_for_compression(void) static void free_buf_for_compression(void) { if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) { - acomp_request_free(creq); - crypto_free_acomp(tfm); + crypto_free_comp(tfm); tfm = NULL; } kfree(big_oops_buf); @@ -693,8 +671,6 @@ static void decompress_record(struct pstore_record *record) int ret; int unzipped_len; char *unzipped, *workspace; - struct acomp_req *dreq; - struct scatterlist src, dst; if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed) return; @@ -718,30 +694,16 @@ static void decompress_record(struct pstore_record *record) if (!workspace) return; - dreq = acomp_request_alloc(tfm); - if (!dreq) { - kfree(workspace); - return; - } - - sg_init_table(&src, 1); - sg_set_buf(&src, record->buf, record->size); - - sg_init_table(&dst, 1); - sg_set_buf(&dst, workspace, unzipped_len); - - acomp_request_set_params(dreq, &src, &dst, record->size, unzipped_len); - /* After decompression "unzipped_len" is almost certainly smaller. */ - ret = crypto_acomp_decompress(dreq); + ret = crypto_comp_decompress(tfm, record->buf, record->size, + workspace, &unzipped_len); if (ret) { - pr_err("crypto_acomp_decompress failed, ret = %d!\n", ret); + pr_err("crypto_comp_decompress failed, ret = %d!\n", ret); kfree(workspace); return; } /* Append ECC notice to decompressed buffer. */ - unzipped_len = dreq->dlen; memcpy(workspace + unzipped_len, record->buf + record->size, record->ecc_notice_size); @@ -749,7 +711,6 @@ static void decompress_record(struct pstore_record *record) unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size, GFP_KERNEL); kfree(workspace); - acomp_request_free(dreq); if (!unzipped) return; diff --git a/fs/read_write.c b/fs/read_write.c index 1a261dcf1778..328ce8cf9a85 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -496,14 +496,9 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t } /* caller is responsible for file_start_write/file_end_write */ -ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) +ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos) { - struct kvec iov = { - .iov_base = (void *)buf, - .iov_len = min_t(size_t, count, MAX_RW_COUNT), - }; struct kiocb kiocb; - struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) @@ -519,8 +514,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t init_sync_kiocb(&kiocb, file); kiocb.ki_pos = pos ? *pos : 0; - iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); - ret = file->f_op->write_iter(&kiocb, &iter); + ret = file->f_op->write_iter(&kiocb, from); if (ret > 0) { if (pos) *pos = kiocb.ki_pos; @@ -530,6 +524,18 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t inc_syscw(current); return ret; } + +/* caller is responsible for file_start_write/file_end_write */ +ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) +{ + struct kvec iov = { + .iov_base = (void *)buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct iov_iter iter; + iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); + return __kernel_write_iter(file, &iter, pos); +} /* * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", * but autofs is one of the few internal kernel users that actually diff --git a/fs/stat.c b/fs/stat.c index 9ced8860e0f3..ef50573c72a2 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -5,6 +5,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +#include <linux/blkdev.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/errno.h> @@ -230,11 +231,22 @@ retry: goto out; error = vfs_getattr(&path, stat, request_mask, flags); + stat->mnt_id = real_mount(path.mnt)->mnt_id; stat->result_mask |= STATX_MNT_ID; + if (path.mnt->mnt_root == path.dentry) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; + + /* Handle STATX_DIOALIGN for block devices. */ + if (request_mask & STATX_DIOALIGN) { + struct inode *inode = d_backing_inode(path.dentry); + + if (S_ISBLK(inode->i_mode)) + bdev_statx_dioalign(inode, stat); + } + path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -611,6 +623,8 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev); tmp.stx_mnt_id = stat->mnt_id; + tmp.stx_dio_mem_align = stat->dio_mem_align; + tmp.stx_dio_offset_align = stat->dio_offset_align; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } diff --git a/fs/super.c b/fs/super.c index 734ed584a946..6a82660e1adb 100644 --- a/fs/super.c +++ b/fs/super.c @@ -291,7 +291,6 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); - fscrypt_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); @@ -480,6 +479,7 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); /* only nonzero refcount inodes can have marks */ fsnotify_sb_delete(sb); + fscrypt_sb_delete(sb); security_sb_delete(sb); if (sb->s_dio_done_wq) { diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 175de70e3adf..0c1d33c4f74c 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -991,7 +991,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new, int fd; fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new, - O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); + O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); if (fd < 0) return fd; @@ -2094,7 +2094,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) mmgrab(ctx->mm); fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx, - O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); + O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); if (fd < 0) { mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); diff --git a/fs/verity/read_metadata.c b/fs/verity/read_metadata.c index 6ee849dc7bc1..2aefc5565152 100644 --- a/fs/verity/read_metadata.c +++ b/fs/verity/read_metadata.c @@ -53,14 +53,14 @@ static int fsverity_read_merkle_tree(struct inode *inode, break; } - virt = kmap(page); + virt = kmap_local_page(page); if (copy_to_user(buf, virt + offs_in_page, bytes_to_copy)) { - kunmap(page); + kunmap_local(virt); put_page(page); err = -EFAULT; break; } - kunmap(page); + kunmap_local(virt); put_page(page); retval += bytes_to_copy; diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 14e2fb49cff5..bde8c9b7d25f 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -39,16 +39,6 @@ static void hash_at_level(const struct merkle_tree_params *params, (params->log_blocksize - params->log_arity); } -/* Extract a hash from a hash page */ -static void extract_hash(struct page *hpage, unsigned int hoffset, - unsigned int hsize, u8 *out) -{ - void *virt = kmap_atomic(hpage); - - memcpy(out, virt + hoffset, hsize); - kunmap_atomic(virt); -} - static inline int cmp_hashes(const struct fsverity_info *vi, const u8 *want_hash, const u8 *real_hash, pgoff_t index, int level) @@ -129,7 +119,7 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, } if (PageChecked(hpage)) { - extract_hash(hpage, hoffset, hsize, _want_hash); + memcpy_from_page(_want_hash, hpage, hoffset, hsize); want_hash = _want_hash; put_page(hpage); pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n", @@ -158,7 +148,7 @@ descend: if (err) goto out; SetPageChecked(hpage); - extract_hash(hpage, hoffset, hsize, _want_hash); + memcpy_from_page(_want_hash, hpage, hoffset, hsize); want_hash = _want_hash; put_page(hpage); pr_debug("Verified hash page at level %d, now want %s:%*phN\n", diff --git a/fs/xattr.c b/fs/xattr.c index a1f4998bc6be..61107b6bbed2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -290,7 +290,7 @@ static inline bool is_posix_acl_xattr(const char *name) int vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, - const char *name, void *value, size_t size, int flags) + const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; struct inode *delegated_inode = NULL; @@ -298,16 +298,12 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, int error; if (size && strcmp(name, XATTR_NAME_CAPS) == 0) { - error = cap_convert_nscap(mnt_userns, dentry, - (const void **)&value, size); + error = cap_convert_nscap(mnt_userns, dentry, &value, size); if (error < 0) return error; size = error; } - if (size && is_posix_acl_xattr(name)) - posix_acl_setxattr_idmapped_mnt(mnt_userns, inode, value, size); - retry_deleg: inode_lock(inode); error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size, @@ -587,9 +583,7 @@ int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) static void setxattr_convert(struct user_namespace *mnt_userns, struct dentry *d, struct xattr_ctx *ctx) { - if (ctx->size && - ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || - (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) + if (ctx->size && is_posix_acl_xattr(ctx->kname->name)) posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size); } @@ -705,8 +699,7 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d, error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size); if (error > 0) { - if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || - (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) + if (is_posix_acl_xattr(kname)) posix_acl_fix_xattr_to_user(ctx->kvalue, error); if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error)) error = -EFAULT; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 45518b8c613c..f51c60d7e205 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -604,6 +604,16 @@ xfs_vn_getattr( stat->blksize = BLKDEV_IOSIZE; stat->rdev = inode->i_rdev; break; + case S_IFREG: + if (request_mask & STATX_DIOALIGN) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct block_device *bdev = target->bt_bdev; + + stat->result_mask |= STATX_DIOALIGN; + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + } + fallthrough; default: stat->blksize = xfs_stat_blksize(ip); stat->rdev = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e7d27373ff71..c09d72986968 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -365,7 +365,6 @@ struct acpi_device { int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; - struct acpi_device *parent; struct list_head wakeup_list; struct list_head del_list; struct acpi_device_status status; @@ -458,6 +457,14 @@ static inline void *acpi_driver_data(struct acpi_device *d) #define to_acpi_device(d) container_of(d, struct acpi_device, dev) #define to_acpi_driver(d) container_of(d, struct acpi_driver, drv) +static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev) +{ + if (adev->dev.parent) + return to_acpi_device(adev->dev.parent); + + return NULL; +} + static inline void acpi_set_device_status(struct acpi_device *adev, u32 sta) { *((u32 *)&adev->status) = sta; @@ -512,7 +519,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); * External Functions */ -struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); @@ -613,8 +619,7 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, struct fwnode_handle *fwnode, const struct iommu_ops *ops); -int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, - u64 *size); +int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); static inline int acpi_dma_configure(struct device *dev, @@ -733,10 +738,24 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev) } bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2); +int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer); void acpi_dev_clear_dependencies(struct acpi_device *supplier); bool acpi_dev_ready_for_enumeration(const struct acpi_device *device); -struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier); +struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier, + struct acpi_device *start); + +/** + * for_each_acpi_consumer_dev - iterate over the consumer ACPI devices for a + * given supplier + * @supplier: Pointer to the supplier's ACPI device + * @consumer: Pointer to &struct acpi_device to hold the consumer, initially NULL + */ +#define for_each_acpi_consumer_dev(supplier, consumer) \ + for (consumer = acpi_dev_get_next_consumer_dev(supplier, NULL); \ + consumer; \ + consumer = acpi_dev_get_next_consumer_dev(supplier, consumer)) + struct acpi_device * acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv); struct acpi_device * @@ -767,9 +786,10 @@ static inline void acpi_dev_put(struct acpi_device *adev) put_device(&adev->dev); } -struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); +struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); +struct acpi_device *acpi_get_acpi_dev(acpi_handle handle); -static inline void acpi_bus_put_acpi_device(struct acpi_device *adev) +static inline void acpi_put_acpi_dev(struct acpi_device *adev) { acpi_dev_put(adev); } diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f73d357ecdf5..c5614444031f 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -140,6 +140,7 @@ extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); +extern bool cppc_perf_ctrs_in_pcc(void); extern bool acpi_cpc_valid(void); extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); @@ -173,6 +174,10 @@ static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps) { return -ENOTSUPP; } +static inline bool cppc_perf_ctrs_in_pcc(void) +{ + return false; +} static inline bool acpi_cpc_valid(void) { return false; diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index ba1f860af38b..4050b191e1a9 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -220,22 +220,6 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...); # define WARN_ON_SMP(x) ({0;}) #endif -/* - * WARN_ON_FUNCTION_MISMATCH() warns if a value doesn't match a - * function address, and can be useful for catching issues with - * callback functions, for example. - * - * With CONFIG_CFI_CLANG, the warning is disabled because the - * compiler replaces function addresses taken in C code with - * local jump table addresses, which breaks cross-module function - * address equality. - */ -#if defined(CONFIG_CFI_CLANG) && defined(CONFIG_MODULES) -# define WARN_ON_FUNCTION_MISMATCH(x, fn) ({ 0; }) -#else -# define WARN_ON_FUNCTION_MISMATCH(x, fn) WARN_ON_ONCE((x) != (fn)) -#endif - #endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 2a1a7c10cbd9..a232f518d98a 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,6 +430,22 @@ #endif /* + * .kcfi_traps contains a list KCFI trap locations. + */ +#ifndef KCFI_TRAPS +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +#define KCFI_TRAPS \ + __kcfi_traps : AT(ADDR(__kcfi_traps) - LOAD_OFFSET) { \ + __start___kcfi_traps = .; \ + KEEP(*(.kcfi_traps)) \ + __stop___kcfi_traps = .; \ + } +#else +#define KCFI_TRAPS +#endif +#endif + +/* * Read only Data */ #define RO_DATA(align) \ @@ -537,6 +553,8 @@ __stop___modver = .; \ } \ \ + KCFI_TRAPS \ + \ RO_EXCEPTION_TABLE \ NOTES \ BTF \ @@ -546,20 +564,6 @@ /* - * .text..L.cfi.jumptable.* contain Control-Flow Integrity (CFI) - * jump table entries. - */ -#ifdef CONFIG_CFI_CLANG -#define TEXT_CFI_JT \ - ALIGN_FUNCTION(); \ - __cfi_jt_start = .; \ - *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \ - __cfi_jt_end = .; -#else -#define TEXT_CFI_JT -#endif - -/* * Non-instrumentable text section */ #define NOINSTR_TEXT \ @@ -586,7 +590,6 @@ *(.text..refcount) \ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ - TEXT_CFI_JT \ MEM_KEEP(init.text*) \ MEM_KEEP(exit.text*) \ @@ -1015,8 +1018,7 @@ * keep any .init_array.* sections. * https://bugs.llvm.org/show_bug.cgi?id=46478 */ -#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) || \ - defined(CONFIG_CFI_CLANG) +#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) # ifdef CONFIG_CONSTRUCTORS # define SANITIZER_DISCARDS \ *(.eh_frame) diff --git a/include/linux/a.out.h b/include/linux/a.out.h deleted file mode 100644 index 600cf45645c6..000000000000 --- a/include/linux/a.out.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __A_OUT_GNU_H__ -#define __A_OUT_GNU_H__ - -#include <uapi/linux/a.out.h> - -#ifndef __ASSEMBLY__ -#ifdef linux -#include <asm/page.h> -#if defined(__i386__) || defined(__mc68000__) -#else -#ifndef SEGMENT_SIZE -#define SEGMENT_SIZE PAGE_SIZE -#endif -#endif -#endif -#endif /*__ASSEMBLY__ */ -#endif /* __A_OUT_GNU_H__ */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6f64b2f3dc54..2f9193b8dfc1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,14 +279,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); +#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) +void acpi_arch_dma_setup(struct device *dev); +#else +static inline void acpi_arch_dma_setup(struct device *dev) { } +#endif + #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } -static inline void -acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); @@ -506,6 +509,7 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); +int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); @@ -798,6 +802,11 @@ acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *u return false; } +static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) +{ + return -ENODEV; +} + static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { @@ -977,8 +986,7 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, - u64 *offset, u64 *size) +static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { return -ENODEV; } diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3dc20c4f394c..8d51f69f9f5e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -43,9 +43,6 @@ struct linux_binprm { * original userspace. */ point_of_no_return:1; -#ifdef __alpha__ - unsigned int taso:1; -#endif struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b13fdd34a7..8038c5fbde40 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1498,6 +1498,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) @@ -1514,6 +1515,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } +static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +{ +} static inline void printk_all_partitions(void) { } diff --git a/include/linux/cfi.h b/include/linux/cfi.h index c6dfc1ed0626..5e134f4ce8b7 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -2,49 +2,38 @@ /* * Clang Control Flow Integrity (CFI) support. * - * Copyright (C) 2021 Google LLC + * Copyright (C) 2022 Google LLC */ #ifndef _LINUX_CFI_H #define _LINUX_CFI_H -#ifdef CONFIG_CFI_CLANG -typedef void (*cfi_check_fn)(uint64_t id, void *ptr, void *diag); - -/* Compiler-generated function in each module, and the kernel */ -extern void __cfi_check(uint64_t id, void *ptr, void *diag); - -/* - * Force the compiler to generate a CFI jump table entry for a function - * and store the jump table address to __cfi_jt_<function>. - */ -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn - -#ifdef CONFIG_CFI_CLANG_SHADOW - -extern void cfi_module_add(struct module *mod, unsigned long base_addr); -extern void cfi_module_remove(struct module *mod, unsigned long base_addr); - -#else +#include <linux/bug.h> +#include <linux/module.h> -static inline void cfi_module_add(struct module *mod, unsigned long base_addr) {} -static inline void cfi_module_remove(struct module *mod, unsigned long base_addr) {} - -#endif /* CONFIG_CFI_CLANG_SHADOW */ - -#else /* !CONFIG_CFI_CLANG */ - -#ifdef CONFIG_X86_KERNEL_IBT - -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn +#ifdef CONFIG_CFI_CLANG +enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, + unsigned long *target, u32 type); -#endif /* CONFIG_X86_KERNEL_IBT */ +static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, + unsigned long addr) +{ + return report_cfi_failure(regs, addr, NULL, 0); +} +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +bool is_cfi_trap(unsigned long addr); +#endif #endif /* CONFIG_CFI_CLANG */ -#ifndef __CFI_ADDRESSABLE -#define __CFI_ADDRESSABLE(fn, __attr) -#endif +#ifdef CONFIG_MODULES +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod); +#else +static inline void module_cfi_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) {} +#endif /* CONFIG_ARCH_USES_CFI_TRAPS */ +#endif /* CONFIG_MODULES */ #endif /* _LINUX_CFI_H */ diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index c84fec767445..42e55579d649 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -66,17 +66,9 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#define __nocfi __attribute__((__no_sanitize__("cfi"))) -#define __cficanonical __attribute__((__cfi_canonical_jump_table__)) - -#if defined(CONFIG_CFI_CLANG) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function address - * references with the address of the function's CFI jump table - * entry. The function_nocfi macro always returns the address of the - * actual function instead. - */ -#define function_nocfi(x) __builtin_function_start(x) +#if __has_feature(kcfi) +/* Disable CFI checking inside a function. */ +#define __nocfi __attribute__((__no_sanitize__("kcfi"))) #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7713d7bcdaea..973a1bfd7ef5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -203,16 +203,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function addresses in - * instrumented C code with jump table addresses. Architectures that - * support CFI can define this macro to return the actual function address - * when needed. - */ -#ifndef function_nocfi -#define function_nocfi(x) (x) -#endif - #endif /* __KERNEL__ */ /* @@ -221,9 +211,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * otherwise, or eliminated entirely due to lack of references that are * visible to the compiler. */ -#define __ADDRESSABLE(sym) \ - static void * __section(".discard.addressable") __used \ +#define ___ADDRESSABLE(sym, __attrs) \ + static void * __used __attrs \ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; +#define __ADDRESSABLE(sym) \ + ___ADDRESSABLE(sym, __section(".discard.addressable")) /** * offset_to_ptr - convert a relative memory offset to an absolute pointer diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4f2a819fd60a..d9d98e8a9a3b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,8 +4,12 @@ #ifndef __ASSEMBLY__ +/* + * Skipped when running bindgen due to a libclang issue; + * see https://github.com/rust-lang/rust-bindgen/issues/2244. + */ #if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ - __has_attribute(btf_type_tag) + __has_attribute(btf_type_tag) && !defined(__BINDGEN__) # define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value))) #else # define BTF_TYPE_TAG(value) /* nothing */ @@ -265,10 +269,6 @@ struct ftrace_likely_data { # define __nocfi #endif -#ifndef __cficanonical -# define __cficanonical -#endif - /* * Any place that could be marked with the "alloc_size" attribute is also * a place to be marked with the "malloc" attribute. Do this as part of the diff --git a/include/linux/dlm.h b/include/linux/dlm.h index ff951e9f6f20..c6bc2b5ee7e6 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -56,9 +56,6 @@ struct dlm_lockspace_ops { * DLM_LSFL_TIMEWARN * The dlm should emit netlink messages if locks have been waiting * for a configurable amount of time. (Unused.) - * DLM_LSFL_FS - * The lockspace user is in the kernel (i.e. filesystem). Enables - * direct bast/cast callbacks. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. * @@ -134,7 +131,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*lockast) (void *astarg), diff --git a/include/linux/edac.h b/include/linux/edac.h index e730b3468719..fa4bda2a70f6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -231,21 +231,21 @@ enum mem_type { #define MEM_FLAG_DDR BIT(MEM_DDR) #define MEM_FLAG_RDDR BIT(MEM_RDDR) #define MEM_FLAG_RMBS BIT(MEM_RMBS) -#define MEM_FLAG_DDR2 BIT(MEM_DDR2) -#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) -#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) -#define MEM_FLAG_XDR BIT(MEM_XDR) -#define MEM_FLAG_DDR3 BIT(MEM_DDR3) -#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) -#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) -#define MEM_FLAG_DDR4 BIT(MEM_DDR4) -#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) -#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) -#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) -#define MEM_FLAG_DDR5 BIT(MEM_DDR5) -#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) -#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) -#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_DDR2 BIT(MEM_DDR2) +#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) +#define MEM_FLAG_XDR BIT(MEM_XDR) +#define MEM_FLAG_DDR3 BIT(MEM_DDR3) +#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) +#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) +#define MEM_FLAG_DDR4 BIT(MEM_DDR4) +#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) +#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) +#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 3b401fa0f374..b62c90cfafaf 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bug.h> #include <linux/const.h> +#include <linux/limits.h> #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) @@ -17,9 +19,10 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" #define __compiletime_strlen(p) \ ({ \ unsigned char *__p = (unsigned char *)(p); \ - size_t __ret = (size_t)-1; \ - size_t __p_size = __builtin_object_size(p, 1); \ - if (__p_size != (size_t)-1) { \ + size_t __ret = SIZE_MAX; \ + size_t __p_size = __member_size(p); \ + if (__p_size != SIZE_MAX && \ + __builtin_constant_p(*__p)) { \ size_t __p_len = __p_size - 1; \ if (__builtin_constant_p(__p[__p_len]) && \ __p[__p_len] == '\0') \ @@ -69,20 +72,59 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __underlying_memcpy(dst, src, bytes) /* - * Clang's use of __builtin_object_size() within inlines needs hinting via - * __pass_object_size(). The preference is to only ever use type 1 (member + * Clang's use of __builtin_*object_size() within inlines needs hinting via + * __pass_*object_size(). The preference is to only ever use type 1 (member * size, rather than struct size), but there remain some stragglers using * type 0 that will be converted in the future. */ -#define POS __pass_object_size(1) -#define POS0 __pass_object_size(0) +#define POS __pass_object_size(1) +#define POS0 __pass_object_size(0) +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#define __compiletime_lessthan(bounds, length) ( \ + __builtin_constant_p((bounds) < (length)) && \ + (bounds) < (length) \ +) + +/** + * strncpy - Copy a string to memory with non-guaranteed NUL padding + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * @size: bytes to write at @p + * + * If strlen(@q) >= @size, the copy of @q will stop after @size bytes, + * and @p will NOT be NUL-terminated + * + * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes + * will be written to @p until @size total bytes have been written. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * over-reads of @q, it cannot defend against writing unterminated + * results to @p. Using strncpy() remains ambiguous and fragile. + * Instead, please choose an alternative, so that the expectation + * of @p's contents is unambiguous: + * + * +--------------------+-----------------+------------+ + * | @p needs to be: | padded to @size | not padded | + * +====================+=================+============+ + * | NUL-terminated | strscpy_pad() | strscpy() | + * +--------------------+-----------------+------------+ + * | not NUL-terminated | strtomem_pad() | strtomem() | + * +--------------------+-----------------+------------+ + * + * Note strscpy*()'s differing return values for detecting truncation, + * and strtomem*()'s expectation that the destination is marked with + * __nonstring when it is a character array. + * + */ __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) char *strncpy(char * const POS p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) fortify_panic(__func__); @@ -92,9 +134,9 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) fortify_panic(__func__); @@ -104,12 +146,12 @@ char *strcat(char * const POS p, const char *q) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); size_t p_len = __compiletime_strlen(p); size_t ret; /* We can take compile-time actions when maxlen is const. */ - if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) { + if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) { /* If p is const, we can use its compile-time-known len. */ if (maxlen >= p_size) return p_len; @@ -134,10 +176,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) __kernel_size_t __fortify_strlen(const char * const POS p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); /* Give up if we don't know how large p is. */ - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) @@ -149,12 +191,12 @@ __kernel_size_t __fortify_strlen(const char * const POS p) extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t q_len; /* Full count of source string length. */ size_t len; /* Count of characters going into destination. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strlcpy(p, q, size); q_len = strlen(q); len = (q_len >= size) ? size - 1 : q_len; @@ -178,18 +220,18 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s { size_t len; /* Use string size rather than possible enclosing struct size. */ - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); /* If we cannot get size of p and q default to call strscpy. */ - if (p_size == (size_t) -1 && q_size == (size_t) -1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strscpy(p, q, size); /* * If size can be known at compile time and is greater than * p_size, generate a compile time write overflow error. */ - if (__builtin_constant_p(size) && size > p_size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* @@ -224,10 +266,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); @@ -246,15 +288,16 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); /* Warn when write size is larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); } /* @@ -268,10 +311,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if (p_size != (size_t)(-1) && p_size < size) + if (p_size != SIZE_MAX && p_size < size) fortify_panic("memset"); } @@ -282,11 +325,11 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ - __builtin_object_size(p, 0), __builtin_object_size(p, 1)) + __struct_size(p), __member_size(p)) /* * To make sure the compiler can enforce protection against buffer overflows, @@ -319,7 +362,7 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * V = vulnerable to run-time overflow (will need refactoring to solve) * */ -__FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t p_size, const size_t q_size, const size_t p_size_field, @@ -330,25 +373,28 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); - if (q_size > q_size_field && q_size < size) + if (__compiletime_lessthan(q_size_field, q_size) && + __compiletime_lessthan(q_size, size)) __read_overflow2(); /* Warn when write size argument larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); /* * Warn for source field over-read when building with W=1 * or when an over-write happened, so both can be fixed at * the same time. */ - if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || p_size_field < size) && - q_size_field < size) + if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || + __compiletime_lessthan(p_size_field, size)) && + __compiletime_lessthan(q_size_field, size)) __read_overflow2_field(q_size_field, size); } /* @@ -362,41 +408,104 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != (size_t)(-1) && p_size < size) || - (q_size != (size_t)(-1) && q_size < size)) + if ((p_size != SIZE_MAX && p_size < size) || + (q_size != SIZE_MAX && q_size < size)) fortify_panic(func); + + /* + * Warn when writing beyond destination field size. + * + * We must ignore p_size_field == 0 for existing 0-element + * fake flexible arrays, until they are all converted to + * proper flexible arrays. + * + * The implementation of __builtin_*object_size() behaves + * like sizeof() when not directly referencing a flexible + * array member, which means there will be many bounds checks + * that will appear at run-time, without a way for them to be + * detected at compile-time (as can be done when the destination + * is specifically the flexible array member). + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 + */ + if (p_size_field != 0 && p_size_field != SIZE_MAX && + p_size != p_size_field && p_size_field < size) + return true; + + return false; } #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ size_t __fortify_size = (size_t)(size); \ - fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ + p_size_field, q_size_field, #op), \ + #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ + __fortify_size, \ + "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * Notes about compile-time buffer size detection: + * + * With these types... + * + * struct middle { + * u16 a; + * u8 middle_buf[16]; + * int b; + * }; + * struct end { + * u16 a; + * u8 end_buf[16]; + * }; + * struct flex { + * int a; + * u8 flex_buf[]; + * }; + * + * void func(TYPE *ptr) { ... } + * + * Cases where destination size cannot be currently detected: + * - the size of ptr's object (seemingly by design, gcc & clang fail): + * __builtin_object_size(ptr, 1) == SIZE_MAX + * - the size of flexible arrays in ptr's obj (by design, dynamic size): + * __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX + * - the size of ANY array at the end of ptr's obj (gcc and clang bug): + * __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836 + * + * Cases where destination size is currently detected: + * - the size of non-array members within ptr's object: + * __builtin_object_size(ptr->a, 1) == 2 + * - the size of non-flexible-array in the middle of ptr's obj: + * __builtin_object_size(ptr->middle_buf, 1) == 16 + * + */ + +/* + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memcpy) #define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -406,13 +515,13 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __struct_size(p); + size_t q_size = __struct_size(q); if (__builtin_constant_p(size)) { - if (p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); - if (q_size < size) + if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } if (p_size < size || q_size < size) @@ -423,9 +532,9 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) void *memchr(const void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -435,9 +544,9 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -447,9 +556,9 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -460,16 +569,18 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t size; /* If neither buffer size is known, immediately give up. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (__builtin_constant_p(p_size) && + __builtin_constant_p(q_size) && + p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strcpy(p, q); size = strlen(q) + 1; /* Compile-time check for const size overflow. */ - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eced4cc286e..0830486f47ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1472,7 +1472,7 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; - struct key *s_master_keys; /* master crypto keys in use */ + struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7d2f1e0f23b1..cad78b569c7e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -161,24 +161,21 @@ struct fscrypt_operations { int *ino_bits_ret, int *lblk_bits_ret); /* - * Return the number of block devices to which the filesystem may write - * encrypted file contents. + * Return an array of pointers to the block devices to which the + * filesystem may write encrypted file contents, NULL if the filesystem + * only has a single such block device, or an ERR_PTR() on error. + * + * On successful non-NULL return, *num_devs is set to the number of + * devices in the returned array. The caller must free the returned + * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ - int (*get_num_devices)(struct super_block *sb); - - /* - * If ->get_num_devices() returns a value greater than 1, then this - * function is called to get the array of request_queues that the - * filesystem is using -- one per block device. (There may be duplicate - * entries in this array, as block devices can share a request_queue.) - */ - void (*get_devices)(struct super_block *sb, - struct request_queue **devs); + struct block_device **(*get_devices)(struct super_block *sb, + unsigned int *num_devs); }; static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) @@ -295,8 +292,6 @@ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); -int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, - struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool @@ -312,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_free(struct super_block *sb); +void fscrypt_sb_delete(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_add_test_dummy_key(struct super_block *sb, const struct fscrypt_dummy_policy *dummy_policy); @@ -353,7 +348,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ -void fscrypt_decrypt_bio(struct bio *bio); +bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); @@ -526,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_free(struct super_block *sb) +static inline void fscrypt_sb_delete(struct super_block *sb) { } @@ -646,8 +641,9 @@ static inline int fscrypt_d_revalidate(struct dentry *dentry, } /* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) +static inline bool fscrypt_decrypt_bio(struct bio *bio) { + return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, @@ -768,7 +764,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); +bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); @@ -801,11 +797,8 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, return true; } -static inline bool fscrypt_dio_supported(struct kiocb *iocb, - struct iov_iter *iter) +static inline bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - return !fscrypt_needs_contents_encryption(inode); } diff --git a/include/linux/init.h b/include/linux/init.h index baf0b29a7010..a0a90cd73ebe 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline __nocfi +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") @@ -220,8 +220,8 @@ extern bool initcall_debug; __initcall_name(initstub, __iid, id) #define __define_initcall_stub(__stub, fn) \ - int __init __cficanonical __stub(void); \ - int __init __cficanonical __stub(void) \ + int __init __stub(void); \ + int __init __stub(void) \ { \ return fn(); \ } \ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ad39636e0c3f..649faac31ddb 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -15,7 +15,7 @@ #include <asm/sections.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 60fff133c0b1..f8715ddbfcf4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) +LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, u32 *secid) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3aa6030302f5..4ec80b96c22e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -806,6 +806,10 @@ * security attributes, e.g. for /proc/pid inodes. * @p contains the task_struct for the task. * @inode contains the inode structure for the inode. + * @userns_create: + * Check permission prior to creating a new user namespace. + * @cred points to prepared creds. + * Return 0 if successful, otherwise < 0 error code. * * Security hooks for Netlink messaging. * diff --git a/include/linux/module.h b/include/linux/module.h index 518296ea7f73..ec61fb53979a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -27,7 +27,6 @@ #include <linux/tracepoint-defs.h> #include <linux/srcu.h> #include <linux/static_call_types.h> -#include <linux/cfi.h> #include <linux/percpu.h> #include <asm/module.h> @@ -132,7 +131,7 @@ extern void cleanup_module(void); { return initfn; } \ int init_module(void) __copy(initfn) \ __attribute__((alias(#initfn))); \ - __CFI_ADDRESSABLE(init_module, __initdata); + ___ADDRESSABLE(init_module, __initdata); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ @@ -140,7 +139,7 @@ extern void cleanup_module(void); { return exitfn; } \ void cleanup_module(void) __copy(exitfn) \ __attribute__((alias(#exitfn))); \ - __CFI_ADDRESSABLE(cleanup_module, __exitdata); + ___ADDRESSABLE(cleanup_module, __exitdata); #endif @@ -387,8 +386,9 @@ struct module { const s32 *crcs; unsigned int num_syms; -#ifdef CONFIG_CFI_CLANG - cfi_check_fn cfi_check; +#ifdef CONFIG_ARCH_USES_CFI_TRAPS + s32 *kcfi_traps; + s32 *kcfi_traps_end; #endif /* Kernel parameters. */ diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0eb3b192f07a..19dfdd74835e 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -51,40 +51,50 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -/* - * For simplicity and code hygiene, the fallback code below insists on - * a, b and *d having the same type (similar to the min() and max() - * macros), whereas gcc's type-generic overflow checkers accept - * different types. Hence we don't just make check_add_overflow an - * alias for __builtin_add_overflow, but add type checks similar to - * below. +/** check_add_overflow() - Calculate addition with overflow checking + * + * @a: first addend + * @b: second addend + * @d: pointer to store sum + * + * Returns 0 on success. + * + * *@d holds the results of the attempted addition, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * sum has overflowed or been truncated. */ -#define check_add_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_add_overflow(__a, __b, __d); \ -})) +#define check_add_overflow(a, b, d) \ + __must_check_overflow(__builtin_add_overflow(a, b, d)) -#define check_sub_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_sub_overflow(__a, __b, __d); \ -})) +/** check_sub_overflow() - Calculate subtraction with overflow checking + * + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * @d: pointer to store difference + * + * Returns 0 on success. + * + * *@d holds the results of the attempted subtraction, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * difference has underflowed or been truncated. + */ +#define check_sub_overflow(a, b, d) \ + __must_check_overflow(__builtin_sub_overflow(a, b, d)) -#define check_mul_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_mul_overflow(__a, __b, __d); \ -})) +/** check_mul_overflow() - Calculate multiplication with overflow checking + * + * @a: first factor + * @b: second factor + * @d: pointer to store product + * + * Returns 0 on success. + * + * *@d holds the results of the attempted multiplication, but is not + * considered "safe for use" on a non-zero return value, which indicates + * that the product has overflowed or been truncated. + */ +#define check_mul_overflow(a, b, d) \ + __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** check_shl_overflow() - Calculate a left-shifted value and check overflow * diff --git a/include/linux/pci.h b/include/linux/pci.h index 060af91bafcd..5da0846aa3c1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2019,8 +2019,8 @@ enum pci_fixup_pass { #ifdef CONFIG_LTO_CLANG #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ class_shift, hook, stub) \ - void __cficanonical stub(struct pci_dev *dev); \ - void __cficanonical stub(struct pci_dev *dev) \ + void stub(struct pci_dev *dev); \ + void stub(struct pci_dev *dev) \ { \ hook(dev); \ } \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 15b49e655ce3..b362d90eb9b0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -75,6 +75,9 @@ #define PCI_CLASS_COMMUNICATION_MODEM 0x0703 #define PCI_CLASS_COMMUNICATION_OTHER 0x0780 +/* Interface for SERIAL/MODEM */ +#define PCI_SERIAL_16550_COMPATIBLE 0x02 + #define PCI_BASE_CLASS_SYSTEM 0x08 #define PCI_CLASS_SYSTEM_PIC 0x0800 #define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h index 126d082c3f2e..e605a2cab07f 100644 --- a/include/linux/platform_data/tps68470.h +++ b/include/linux/platform_data/tps68470.h @@ -27,9 +27,14 @@ struct tps68470_regulator_platform_data { const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS]; }; -struct tps68470_clk_platform_data { +struct tps68470_clk_consumer { const char *consumer_dev_name; const char *consumer_con_id; }; +struct tps68470_clk_platform_data { + unsigned int n_consumers; + struct tps68470_clk_consumer consumers[]; +}; + #endif diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index b6bd3eac2bcc..8163dd48c430 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -38,9 +38,6 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size); void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, const struct inode *inode, void *value, size_t size); -void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { @@ -54,18 +51,15 @@ posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, size_t size) { } -static inline void -posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, void *value, - size_t size) -{ -} #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); +struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const void *value, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..08605ce7379d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -42,7 +42,31 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); + +struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); + +// Maximum number of unsigned long values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_OLDSTATE 2 + +/** + * same_state_synchronize_rcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or + * get_completed_synchronize_rcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} #ifdef CONFIG_PREEMPT_RCU @@ -496,13 +520,21 @@ do { \ * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. + * Within an RCU read-side critical section, there is little reason to + * use rcu_access_pointer(). + * + * It is usually best to test the rcu_access_pointer() return value + * directly in order to avoid accidental dereferences being introduced + * by later inattentive changes. In other words, assigning the + * rcu_access_pointer() return value to a local variable results in an + * accident waiting to happen. * * It is also permissible to use rcu_access_pointer() when read-side - * access to the pointer was removed at least one grace period ago, as - * is the case in the context of the RCU callback that is freeing up - * the data, or after a synchronize_rcu() returns. This can be useful - * when tearing down multi-linked structures after a grace period - * has elapsed. + * access to the pointer was removed at least one grace period ago, as is + * the case in the context of the RCU callback that is freeing up the data, + * or after a synchronize_rcu() returns. This can be useful when tearing + * down multi-linked structures after a grace period has elapsed. However, + * rcu_dereference_protected() is normally preferred for this use case. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 62815c0a2dce..768196a5f39d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,25 +14,75 @@ #include <asm/param.h> /* for HZ */ +struct rcu_gp_oldstate { + unsigned long rgos_norm; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 2 + +/* + * Are the two oldstate values the same? See the Tree RCU version for + * docbook header. + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm; +} + unsigned long get_state_synchronize_rcu(void); + +static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = get_state_synchronize_rcu(); +} + unsigned long start_poll_synchronize_rcu(void); + +static inline void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu(); +} + bool poll_state_synchronize_rcu(unsigned long oldstate); +static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + return poll_state_synchronize_rcu(rgosp->rgos_norm); +} + static inline void cond_synchronize_rcu(unsigned long oldstate) { might_sleep(); } +static inline void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu(rgosp->rgos_norm); +} + static inline unsigned long start_poll_synchronize_rcu_expedited(void) { return start_poll_synchronize_rcu(); } +static inline void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu_expedited(); +} + static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) { cond_synchronize_rcu(oldstate); } +static inline void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu_expedited(rgosp->rgos_norm); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 47eaa4cb0df7..5efb51486e8a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,12 +40,52 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); + +struct rcu_gp_oldstate { + unsigned long rgos_norm; + unsigned long rgos_exp; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 4 + +/** + * same_state_synchronize_rcu_full - Are two old-state values identical? + * @rgosp1: First old-state value. + * @rgosp2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or get_completed_synchronize_rcu_full(). Returns @true if the two + * values are identical and @false otherwise. This allows structures + * whose lifetimes are tracked by old-state values to push these values + * to a list header, allowing those structures to be slightly smaller. + * + * Note that equality is judged on a bitwise basis, so that an + * @rcu_gp_oldstate structure with an already-completed state in one field + * will compare not-equal to a structure with an already-completed state + * in the other field. After all, the @rcu_gp_oldstate structure is opaque + * so how did such a situation come to pass in the first place? + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm && rgosp1->rgos_exp == rgosp2->rgos_exp; +} + unsigned long start_poll_synchronize_rcu_expedited(void); +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); unsigned long get_state_synchronize_rcu(void); +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool rcu_is_idle_cpu(int cpu); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 21deb5212bbd..0cf5b20c6ddf 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m, #endif +/* max value for struct rdt_domain's mbps_val */ +#define MBA_MAX_MBPS U32_MAX + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -29,6 +32,16 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded @@ -53,6 +66,9 @@ struct resctrl_staged_config { * @cqm_work_cpu: worker CPU for CQM h/w counters * @plr: pseudo-locked region (if any) associated with domain * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid */ struct rdt_domain { struct list_head list; @@ -67,6 +83,7 @@ struct rdt_domain { int cqm_work_cpu; struct pseudo_lock_region *plr; struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; }; /** @@ -130,8 +147,6 @@ struct resctrl_schema; /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available @@ -150,8 +165,6 @@ struct resctrl_schema; */ struct rdt_resource { int rid; - bool alloc_enabled; - bool mon_enabled; bool alloc_capable; bool mon_capable; int num_rmid; @@ -194,7 +207,50 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); + +/* + * Update the ctrl_val and apply this config right now. + * Must be called on one of the domain's CPUs. + */ +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val); + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); + +/** + * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid + * for this resource and domain. + * @r: resource that the counter should be read from. + * @d: domain that the counter should be read from. + * @rmid: rmid of the counter to read. + * @eventid: eventid to read, e.g. L3 occupancy. + * @val: result of the counter read in bytes. + * + * Call from process context on a CPU that belongs to domain @d. + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_rmid() - Reset any private state associated with rmid + * and eventid. + * @r: The domain's resource. + * @d: The rmid's domain. + * @rmid: The rmid whose counter values should be reset. + * @eventid: The eventid whose counter values should be reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid); + +extern unsigned int resctrl_rmid_realloc_threshold; +extern unsigned int resctrl_rmid_realloc_limit; #endif /* _RESCTRL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 7bd0c490703d..3480f61e1b2d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); +int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); int security_msg_msg_alloc(struct msg_msg *msg); @@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2, static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { } +static inline int security_create_user_ns(const struct cred *cred) +{ + return 0; +} + static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 6cfaa0a9a9b9..5aa5e0faf6a1 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -15,10 +15,10 @@ struct srcu_struct { short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ - unsigned short srcu_idx; /* Current reader array element in bit 0x2. */ - unsigned short srcu_idx_max; /* Furthest future srcu_idx request. */ u8 srcu_gp_running; /* GP workqueue running? */ u8 srcu_gp_waiting; /* GP waiting for readers? */ + unsigned long srcu_idx; /* Current reader array element in bit 0x2. */ + unsigned long srcu_idx_max; /* Furthest future srcu_idx request. */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ @@ -82,10 +82,12 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, int idx; idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1; - pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd) gp: %lu->%lu\n", tt, tf, idx, data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])), - data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); + data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])), + data_race(READ_ONCE(ssp->srcu_idx)), + data_race(READ_ONCE(ssp->srcu_idx_max))); } #endif diff --git a/include/linux/stat.h b/include/linux/stat.h index 7df06931f25d..ff277ced50e9 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,6 +50,8 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; + u32 dio_mem_align; + u32 dio_offset_align; }; #endif diff --git a/include/linux/string.h b/include/linux/string.h index 61ec7e4f6311..cf7607b32102 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -261,6 +261,49 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad); /** + * strtomem_pad - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * @pad: Padding character to fill any remaining bytes of @dest after copy + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * an explicit padding character. If padding is not required, use strtomem(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem_pad(dest, src, pad) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ +} while (0) + +/** + * strtomem - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * without trailing padding. If padding is required, use strtomem_pad(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem(dest, src) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ +} while (0) + +/** * memset_after - Set a value after a struct member to the end of a struct * * @obj: Address of target struct instance diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index daecb009c05b..88de45491376 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -472,6 +472,7 @@ struct svc_procedure { /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ @@ -544,16 +545,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) } /** - * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * svcxdr_init_decode - Prepare an xdr_stream for Call decoding * @rqstp: controlling server RPC transaction context * + * This function currently assumes the RPC header in rq_arg has + * already been decoded. Upon return, xdr->p points to the + * location of the upper layer header. */ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct kvec *argv = rqstp->rq_arg.head; + struct xdr_buf *buf = &rqstp->rq_arg; + struct kvec *argv = buf->head; - xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + /* + * svc_getnl() and friends do not keep the xdr_buf's ::len + * field up to date. Refresh that field before initializing + * the argument decoding stream. + */ + buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; + + xdr_init_decode(xdr, buf, argv->iov_base, NULL); xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } @@ -576,7 +588,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; buf->len = resv->iov_len; xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); buf->buflen -= rqstp->rq_auth_slack; xdr->rqst = NULL; } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 69175029abbb..f84e2a1358e1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); +extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 70f2921e2e70..23a253df7f6b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -75,7 +75,7 @@ extern struct suspend_stats suspend_stats; static inline void dpm_save_failed_dev(const char *name) { - strlcpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], + strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], name, sizeof(suspend_stats.failed_devs[0])); suspend_stats.last_failed_dev++; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1386c713885d..6f1ec4fb7ef8 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -17,8 +17,6 @@ #include <linux/workqueue.h> #include <uapi/linux/thermal.h> -#define THERMAL_MAX_TRIPS 12 - /* invalid cooling state */ #define THERMAL_CSTATE_INVALID -1UL @@ -296,82 +294,53 @@ struct thermal_zone_params { int offset; }; -/** - * struct thermal_zone_of_device_ops - callbacks for handling DT based zones - * - * Mandatory: - * @get_temp: a pointer to a function that reads the sensor temperature. - * - * Optional: - * @get_trend: a pointer to a function that reads the sensor temperature trend. - * @set_trips: a pointer to a function that sets a temperature window. When - * this window is left the driver must inform the thermal core via - * thermal_zone_device_update. - * @set_emul_temp: a pointer to a function that sets sensor emulated - * temperature. - * @set_trip_temp: a pointer to a function that sets the trip temperature on - * hardware. - * @change_mode: a pointer to a function that notifies the thermal zone - * mode change. - */ -struct thermal_zone_of_device_ops { - int (*get_temp)(void *, int *); - int (*get_trend)(void *, int, enum thermal_trend *); - int (*set_trips)(void *, int, int); - int (*set_emul_temp)(void *, int); - int (*set_trip_temp)(void *, int, int); - int (*change_mode) (void *, enum thermal_device_mode); -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops); + +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + +void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + int thermal_zone_of_get_sensor_id(struct device_node *tz_np, struct device_node *sensor_np, u32 *id); -struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); -struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); #else - -static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id) -{ - return -ENOENT; -} -static inline struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops) { - return ERR_PTR(-ENODEV); + return ERR_PTR(-ENOTSUPP); } static inline -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops) { + return ERR_PTR(-ENOTSUPP); } -static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz) { - return ERR_PTR(-ENODEV); } -static inline -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +static inline void devm_thermal_of_zone_unregister(struct device *dev, + struct thermal_zone_device *tz) { } +static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, + struct device_node *sensor_np, + u32 *id) +{ + return -ENOENT; +} #endif #ifdef CONFIG_THERMAL diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 979a9d3e5bfb..4c379d23ec6e 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, - void *, size_t, int); + const void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, const char *, struct inode **); diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index bad21222130e..da0eaae98fa3 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -49,7 +49,7 @@ /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_lock_start, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags), TP_ARGS(ls, lkb, name, namelen, mode, flags), @@ -91,10 +91,11 @@ TRACE_EVENT(dlm_lock_start, TRACE_EVENT(dlm_lock_end, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, - unsigned int namelen, int mode, __u32 flags, int error), + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, + unsigned int namelen, int mode, __u32 flags, int error, + bool kernel_lock), - TP_ARGS(ls, lkb, name, namelen, mode, flags, error), + TP_ARGS(ls, lkb, name, namelen, mode, flags, error, kernel_lock), TP_STRUCT__entry( __field(__u32, ls_id) @@ -113,6 +114,7 @@ TRACE_EVENT(dlm_lock_end, __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; + __entry->error = error; r = lkb->lkb_resource; if (r) @@ -122,14 +124,14 @@ TRACE_EVENT(dlm_lock_end, memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); - /* return value will be zeroed in those cases by dlm_lock() - * we do it here again to not introduce more overhead if - * trace isn't running and error reflects the return value. - */ - if (error == -EAGAIN || error == -EDEADLK) - __entry->error = 0; - else - __entry->error = error; + if (kernel_lock) { + /* return value will be zeroed in those cases by dlm_lock() + * we do it here again to not introduce more overhead if + * trace isn't running and error reflects the return value. + */ + if (error == -EAGAIN || error == -EDEADLK) + __entry->error = 0; + } ), diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 57de057bd503..4f4c44ea3a65 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -53,15 +53,14 @@ TRACE_EVENT(erofs_lookup, ); TRACE_EVENT(erofs_fill_inode, - TP_PROTO(struct inode *inode, int isdir), - TP_ARGS(inode, isdir), + TP_PROTO(struct inode *inode), + TP_ARGS(inode), TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) __field(erofs_blk_t, blkaddr ) __field(unsigned int, ofs ) - __field(int, isdir ) ), TP_fast_assign( @@ -69,13 +68,11 @@ TRACE_EVENT(erofs_fill_inode, __entry->nid = EROFS_I(inode)->nid; __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); - __entry->isdir = isdir; ), - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", show_dev_nid(__entry), - __entry->blkaddr, __entry->ofs, - __entry->isdir) + __entry->blkaddr, __entry->ofs) ); TRACE_EVENT(erofs_readpage, diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index 0d2eca287567..1923f4f3b05e 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -69,7 +69,6 @@ struct dlm_lksb { /* dlm_new_lockspace() flags */ #define DLM_LSFL_TIMEWARN 0x00000002 -#define DLM_LSFL_FS 0x00000004 #define DLM_LSFL_NEWEXCL 0x00000008 diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 23df4e0e8ace..9c4bcc37a455 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -26,7 +26,7 @@ struct landlock_ruleset_attr { * Landlock filesystem access rights that are not part of * handled_access_fs are allowed. This is needed for backward * compatibility reasons. One exception is the - * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly + * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly * handled, but must still be explicitly handled to add new rules with * this access right. */ @@ -128,11 +128,11 @@ struct landlock_path_beneath_attr { * hierarchy must also always have the same or a superset of restrictions of * the source hierarchy. If it is not the case, or if the domain doesn't * handle this access right, such actions are denied by default with errno - * set to EXDEV. Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access - * right on the destination directory, and renaming also requires a - * LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or + * set to ``EXDEV``. Linking also requires a ``LANDLOCK_ACCESS_FS_MAKE_*`` + * access right on the destination directory, and renaming also requires a + * ``LANDLOCK_ACCESS_FS_REMOVE_*`` access right on the source's (file or * directory) parent. Otherwise, such actions are denied with errno set to - * EACCES. The EACCES errno prevails over EXDEV to let user space + * ``EACCES``. The ``EACCES`` errno prevails over ``EXDEV`` to let user space * efficiently deal with an unrecoverable error. * * .. warning:: diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 1500a0f58041..7cab2c65d3d7 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -124,7 +124,8 @@ struct statx { __u32 stx_dev_minor; /* 0x90 */ __u64 stx_mnt_id; - __u64 __spare2; + __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ /* 0xa0 */ __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ @@ -152,6 +153,7 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ +#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/init/Kconfig b/init/Kconfig index 532362fcfe31..a078cb026523 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -60,6 +60,17 @@ config LLD_VERSION default $(ld-version) if LD_IS_LLD default 0 +config RUST_IS_AVAILABLE + def_bool $(success,$(srctree)/scripts/rust_is_available.sh) + help + This shows whether a suitable Rust toolchain is available (found). + + Please see Documentation/rust/quick-start.rst for instructions on how + to satify the build requirements of Rust support. + + In particular, the Makefile target 'rustavailable' is useful to check + why the Rust toolchain is not being detected. + config CC_CAN_LINK bool default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT @@ -147,7 +158,8 @@ config WERROR default COMPILE_TEST help A kernel build should not cause any compiler warnings, and this - enables the '-Werror' flag to enforce that rule by default. + enables the '-Werror' (for C) and '-Dwarnings' (for Rust) flags + to enforce that rule by default. However, if you have a new (or very old) compiler with odd and unusual warnings, or you have some architecture with problems, @@ -1899,6 +1911,38 @@ config PROFILING Say Y here to enable the extended profiling support mechanisms used by profilers. +config RUST + bool "Rust support" + depends on HAVE_RUST + depends on RUST_IS_AVAILABLE + depends on !MODVERSIONS + depends on !GCC_PLUGINS + depends on !RANDSTRUCT + depends on !DEBUG_INFO_BTF + select CONSTRUCTORS + help + Enables Rust support in the kernel. + + This allows other Rust-related options, like drivers written in Rust, + to be selected. + + It is also required to be able to load external kernel modules + written in Rust. + + See Documentation/rust/ for more information. + + If unsure, say N. + +config RUSTC_VERSION_TEXT + string + depends on RUST + default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + +config BINDGEN_VERSION_TEXT + string + depends on RUST + default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version || echo n) + # # Place an empty function call at each tracepoint site. Can be # dynamically changed for a probe function. diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2965b354efc8..242d896c00f3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3357,6 +3357,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, goto err; } + if (ctx->flags & IORING_SETUP_SINGLE_ISSUER + && !(ctx->flags & IORING_SETUP_R_DISABLED)) + ctx->submitter_task = get_task_struct(current); + file = io_uring_get_file(ctx); if (IS_ERR(file)) { ret = PTR_ERR(file); @@ -3548,6 +3552,9 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (!(ctx->flags & IORING_SETUP_R_DISABLED)) return -EBADFD; + if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) + ctx->submitter_task = get_task_struct(current); + if (ctx->restrictions.registered) ctx->restricted = 1; diff --git a/io_uring/poll.c b/io_uring/poll.c index d5bad0bea6e4..0d9f49c575e0 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -857,7 +857,7 @@ int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->buf_index || sqe->off || sqe->addr) return -EINVAL; flags = READ_ONCE(sqe->len); - if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL)) + if (flags & ~IORING_POLL_ADD_MULTI) return -EINVAL; if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) return -EINVAL; diff --git a/kernel/audit.c b/kernel/audit.c index a75978ae38ad..9bc0b0301198 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -321,7 +321,6 @@ static inline int audit_rate_check(void) static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; - unsigned long elapsed; int retval = 0; if (!audit_rate_limit) return 1; @@ -330,9 +329,8 @@ static inline int audit_rate_check(void) if (++messages < audit_rate_limit) { retval = 1; } else { - now = jiffies; - elapsed = now - last_check; - if (elapsed > HZ) { + now = jiffies; + if (time_after(now, last_check + HZ)) { last_check = now; messages = 0; retval = 1; @@ -366,7 +364,7 @@ void audit_log_lost(const char *message) if (!print) { spin_lock_irqsave(&lock, flags); now = jiffies; - if (now - last_msg > HZ) { + if (time_after(now, last_msg + HZ)) { print = 1; last_msg = now; } diff --git a/kernel/audit.h b/kernel/audit.h index 58b66543b4d5..c57b008b9914 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -133,7 +133,7 @@ struct audit_context { struct sockaddr_storage *sockaddr; size_t sockaddr_len; /* Save things to print about task_struct */ - pid_t pid, ppid; + pid_t ppid; kuid_t uid, euid, suid, fsuid; kgid_t gid, egid, sgid, fsgid; unsigned long personality; @@ -245,8 +245,6 @@ struct audit_netlink_list { int audit_send_list_thread(void *_dest); -extern int selinux_audit_rule_update(void); - extern struct mutex audit_filter_mutex; extern int audit_del_rule(struct audit_entry *entry); extern void audit_free_rule_rcu(struct rcu_head *head); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 79a5da1bc5bb..9f8c05228d6d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -965,7 +965,7 @@ static void audit_reset_context(struct audit_context *ctx) if (!ctx) return; - /* if ctx is non-null, reset the "ctx->state" regardless */ + /* if ctx is non-null, reset the "ctx->context" regardless */ ctx->context = AUDIT_CTX_UNUSED; if (ctx->dummy) return; @@ -1002,7 +1002,7 @@ static void audit_reset_context(struct audit_context *ctx) kfree(ctx->sockaddr); ctx->sockaddr = NULL; ctx->sockaddr_len = 0; - ctx->pid = ctx->ppid = 0; + ctx->ppid = 0; ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); ctx->personality = 0; @@ -1016,7 +1016,6 @@ static void audit_reset_context(struct audit_context *ctx) WARN_ON(!list_empty(&ctx->killed_trees)); audit_free_module(ctx); ctx->fds[0] = -1; - audit_proctitle_free(ctx); ctx->type = 0; /* reset last for audit_free_*() */ } @@ -1077,6 +1076,7 @@ static inline void audit_free_context(struct audit_context *context) { /* resetting is extra work, but it is likely just noise */ audit_reset_context(context); + audit_proctitle_free(context); free_tree_refs(context); kfree(context->filterkey); kfree(context); @@ -1833,7 +1833,7 @@ void __audit_free(struct task_struct *tsk) /* We are called either by do_exit() or the fork() error handling code; * in the former case tsk == current and in the latter tsk is a - * random task_struct that doesn't doesn't have any meaningful data we + * random task_struct that doesn't have any meaningful data we * need to log via audit_log_exit(). */ if (tsk == current && !context->dummy) { @@ -2069,7 +2069,7 @@ void __audit_syscall_exit(int success, long return_code) /* run through both filters to ensure we set the filterkey properly */ audit_filter_syscall(current, context); audit_filter_inodes(current, context); - if (context->current_state < AUDIT_STATE_RECORD) + if (context->current_state != AUDIT_STATE_RECORD) goto out; audit_log_exit(); diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 4fd845bc5a12..d6c9b3705f24 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -340,6 +340,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_to_inode) +BTF_ID(func, bpf_lsm_userns_create) BTF_SET_END(sleepable_lsm_hooks) bool bpf_lsm_is_sleepable_hook(u32 btf_id) diff --git a/kernel/cfi.c b/kernel/cfi.c index 2046276ee234..08caad776717 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -1,339 +1,101 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Clang Control Flow Integrity (CFI) error and slowpath handling. + * Clang Control Flow Integrity (CFI) error handling. * - * Copyright (C) 2021 Google LLC + * Copyright (C) 2022 Google LLC */ -#include <linux/hardirq.h> -#include <linux/kallsyms.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/printk.h> -#include <linux/ratelimit.h> -#include <linux/rcupdate.h> -#include <linux/vmalloc.h> -#include <asm/cacheflush.h> -#include <asm/set_memory.h> +#include <linux/cfi.h> -/* Compiler-defined handler names */ -#ifdef CONFIG_CFI_PERMISSIVE -#define cfi_failure_handler __ubsan_handle_cfi_check_fail -#else -#define cfi_failure_handler __ubsan_handle_cfi_check_fail_abort -#endif - -static inline void handle_cfi_failure(void *ptr) +enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, + unsigned long *target, u32 type) { - if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) - WARN_RATELIMIT(1, "CFI failure (target: %pS):\n", ptr); + if (target) + pr_err("CFI failure at %pS (target: %pS; expected type: 0x%08x)\n", + (void *)addr, (void *)*target, type); else - panic("CFI failure (target: %pS)\n", ptr); -} - -#ifdef CONFIG_MODULES -#ifdef CONFIG_CFI_CLANG_SHADOW -/* - * Index type. A 16-bit index can address at most (2^16)-2 pages (taking - * into account SHADOW_INVALID), i.e. ~256M with 4k pages. - */ -typedef u16 shadow_t; -#define SHADOW_INVALID ((shadow_t)~0UL) - -struct cfi_shadow { - /* Page index for the beginning of the shadow */ - unsigned long base; - /* An array of __cfi_check locations (as indices to the shadow) */ - shadow_t shadow[1]; -} __packed; - -/* - * The shadow covers ~128M from the beginning of the module region. If - * the region is larger, we fall back to __module_address for the rest. - */ -#define __SHADOW_RANGE (_UL(SZ_128M) >> PAGE_SHIFT) - -/* The in-memory size of struct cfi_shadow, always at least one page */ -#define __SHADOW_PAGES ((__SHADOW_RANGE * sizeof(shadow_t)) >> PAGE_SHIFT) -#define SHADOW_PAGES max(1UL, __SHADOW_PAGES) -#define SHADOW_SIZE (SHADOW_PAGES << PAGE_SHIFT) - -/* The actual size of the shadow array, minus metadata */ -#define SHADOW_ARR_SIZE (SHADOW_SIZE - offsetof(struct cfi_shadow, shadow)) -#define SHADOW_ARR_SLOTS (SHADOW_ARR_SIZE / sizeof(shadow_t)) - -static DEFINE_MUTEX(shadow_update_lock); -static struct cfi_shadow __rcu *cfi_shadow __read_mostly; + pr_err("CFI failure at %pS (no target information)\n", + (void *)addr); -/* Returns the index in the shadow for the given address */ -static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr) -{ - unsigned long index; - unsigned long page = ptr >> PAGE_SHIFT; - - if (unlikely(page < s->base)) - return -1; /* Outside of module area */ - - index = page - s->base; - - if (index >= SHADOW_ARR_SLOTS) - return -1; /* Cannot be addressed with shadow */ - - return (int)index; -} - -/* Returns the page address for an index in the shadow */ -static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s, - int index) -{ - if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS)) - return 0; - - return (s->base + index) << PAGE_SHIFT; -} - -/* Returns the __cfi_check function address for the given shadow location */ -static inline unsigned long shadow_to_check_fn(const struct cfi_shadow *s, - int index) -{ - if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS)) - return 0; - - if (unlikely(s->shadow[index] == SHADOW_INVALID)) - return 0; - - /* __cfi_check is always page aligned */ - return (s->base + s->shadow[index]) << PAGE_SHIFT; -} - -static void prepare_next_shadow(const struct cfi_shadow __rcu *prev, - struct cfi_shadow *next) -{ - int i, index, check; - - /* Mark everything invalid */ - memset(next->shadow, 0xFF, SHADOW_ARR_SIZE); - - if (!prev) - return; /* No previous shadow */ - - /* If the base address didn't change, an update is not needed */ - if (prev->base == next->base) { - memcpy(next->shadow, prev->shadow, SHADOW_ARR_SIZE); - return; - } - - /* Convert the previous shadow to the new address range */ - for (i = 0; i < SHADOW_ARR_SLOTS; ++i) { - if (prev->shadow[i] == SHADOW_INVALID) - continue; - - index = ptr_to_shadow(next, shadow_to_ptr(prev, i)); - if (index < 0) - continue; - - check = ptr_to_shadow(next, - shadow_to_check_fn(prev, prev->shadow[i])); - if (check < 0) - continue; - - next->shadow[index] = (shadow_t)check; - } -} - -static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod, - unsigned long min_addr, unsigned long max_addr) -{ - int check_index; - unsigned long check = (unsigned long)mod->cfi_check; - unsigned long ptr; - - if (unlikely(!PAGE_ALIGNED(check))) { - pr_warn("cfi: not using shadow for module %s\n", mod->name); - return; + if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) { + __warn(NULL, 0, (void *)addr, 0, regs, NULL); + return BUG_TRAP_TYPE_WARN; } - check_index = ptr_to_shadow(s, check); - if (check_index < 0) - return; /* Module not addressable with shadow */ - - /* For each page, store the check function index in the shadow */ - for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) { - int index = ptr_to_shadow(s, ptr); - - if (index >= 0) { - /* Each page must only contain one module */ - WARN_ON_ONCE(s->shadow[index] != SHADOW_INVALID); - s->shadow[index] = (shadow_t)check_index; - } - } + return BUG_TRAP_TYPE_BUG; } -static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod, - unsigned long min_addr, unsigned long max_addr) +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +static inline unsigned long trap_address(s32 *p) { - unsigned long ptr; - - for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) { - int index = ptr_to_shadow(s, ptr); - - if (index >= 0) - s->shadow[index] = SHADOW_INVALID; - } + return (unsigned long)((long)p + (long)*p); } -typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *, - unsigned long min_addr, unsigned long max_addr); - -static void update_shadow(struct module *mod, unsigned long base_addr, - update_shadow_fn fn) +static bool is_trap(unsigned long addr, s32 *start, s32 *end) { - struct cfi_shadow *prev; - struct cfi_shadow *next; - unsigned long min_addr, max_addr; - - next = vmalloc(SHADOW_SIZE); - - mutex_lock(&shadow_update_lock); - prev = rcu_dereference_protected(cfi_shadow, - mutex_is_locked(&shadow_update_lock)); - - if (next) { - next->base = base_addr >> PAGE_SHIFT; - prepare_next_shadow(prev, next); + s32 *p; - min_addr = (unsigned long)mod->core_layout.base; - max_addr = min_addr + mod->core_layout.text_size; - fn(next, mod, min_addr & PAGE_MASK, max_addr & PAGE_MASK); - - set_memory_ro((unsigned long)next, SHADOW_PAGES); - } - - rcu_assign_pointer(cfi_shadow, next); - mutex_unlock(&shadow_update_lock); - synchronize_rcu(); - - if (prev) { - set_memory_rw((unsigned long)prev, SHADOW_PAGES); - vfree(prev); + for (p = start; p < end; ++p) { + if (trap_address(p) == addr) + return true; } -} -void cfi_module_add(struct module *mod, unsigned long base_addr) -{ - update_shadow(mod, base_addr, add_module_to_shadow); + return false; } -void cfi_module_remove(struct module *mod, unsigned long base_addr) -{ - update_shadow(mod, base_addr, remove_module_from_shadow); -} - -static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s, - unsigned long ptr) +#ifdef CONFIG_MODULES +/* Populates `kcfi_trap(_end)?` fields in `struct module`. */ +void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { - int index; - - if (unlikely(!s)) - return NULL; /* No shadow available */ - - index = ptr_to_shadow(s, ptr); - if (index < 0) - return NULL; /* Cannot be addressed with shadow */ + char *secstrings; + unsigned int i; - return (cfi_check_fn)shadow_to_check_fn(s, index); -} - -static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) -{ - cfi_check_fn fn; + mod->kcfi_traps = NULL; + mod->kcfi_traps_end = NULL; - rcu_read_lock_sched_notrace(); - fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr); - rcu_read_unlock_sched_notrace(); + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - return fn; -} - -#else /* !CONFIG_CFI_CLANG_SHADOW */ + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, "__kcfi_traps")) + continue; -static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) -{ - return NULL; + mod->kcfi_traps = (s32 *)sechdrs[i].sh_addr; + mod->kcfi_traps_end = (s32 *)(sechdrs[i].sh_addr + sechdrs[i].sh_size); + break; + } } -#endif /* CONFIG_CFI_CLANG_SHADOW */ - -static inline cfi_check_fn find_module_check_fn(unsigned long ptr) +static bool is_module_cfi_trap(unsigned long addr) { - cfi_check_fn fn = NULL; struct module *mod; + bool found = false; rcu_read_lock_sched_notrace(); - mod = __module_address(ptr); - if (mod) - fn = mod->cfi_check; - rcu_read_unlock_sched_notrace(); - - return fn; -} - -static inline cfi_check_fn find_check_fn(unsigned long ptr) -{ - cfi_check_fn fn = NULL; - unsigned long flags; - bool rcu_idle; - - if (is_kernel_text(ptr)) - return __cfi_check; - /* - * Indirect call checks can happen when RCU is not watching. Both - * the shadow and __module_address use RCU, so we need to wake it - * up if necessary. - */ - rcu_idle = !rcu_is_watching(); - if (rcu_idle) { - local_irq_save(flags); - ct_irq_enter(); - } - - if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) - fn = find_shadow_check_fn(ptr); - if (!fn) - fn = find_module_check_fn(ptr); + mod = __module_address(addr); + if (mod) + found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); - if (rcu_idle) { - ct_irq_exit(); - local_irq_restore(flags); - } + rcu_read_unlock_sched_notrace(); - return fn; + return found; } - -void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag) +#else /* CONFIG_MODULES */ +static inline bool is_module_cfi_trap(unsigned long addr) { - cfi_check_fn fn = find_check_fn((unsigned long)ptr); - - if (likely(fn)) - fn(id, ptr, diag); - else /* Don't allow unchecked modules */ - handle_cfi_failure(ptr); + return false; } -EXPORT_SYMBOL(__cfi_slowpath_diag); +#endif /* CONFIG_MODULES */ -#else /* !CONFIG_MODULES */ +extern s32 __start___kcfi_traps[]; +extern s32 __stop___kcfi_traps[]; -void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag) +bool is_cfi_trap(unsigned long addr) { - handle_cfi_failure(ptr); /* No modules */ -} -EXPORT_SYMBOL(__cfi_slowpath_diag); + if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps)) + return true; -#endif /* CONFIG_MODULES */ - -void cfi_failure_handler(void *data, void *ptr, void *vtable) -{ - handle_cfi_failure(ptr); + return is_module_cfi_trap(addr); } -EXPORT_SYMBOL(cfi_failure_handler); +#endif /* CONFIG_ARCH_USES_CFI_TRAPS */ diff --git a/kernel/configs/rust.config b/kernel/configs/rust.config new file mode 100644 index 000000000000..38a7c5362c9c --- /dev/null +++ b/kernel/configs/rust.config @@ -0,0 +1 @@ +CONFIG_RUST=y diff --git a/kernel/events/core.c b/kernel/events/core.c index 2621fd24ad26..ff4bffc502c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6893,9 +6893,16 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; + unsigned long flags; u64 values[6]; int n = 0; + /* + * Disabling interrupts avoids all counter scheduling + * (context switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -6931,6 +6938,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } + + local_irq_restore(flags); } #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 3e7e2c2ad2f7..60c20f301a6b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -50,12 +50,20 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, data = &kallsyms_names[off]; len = *data; data++; + off++; + + /* If MSB is 1, it is a "big" symbol, so needs an additional byte. */ + if ((len & 0x80) != 0) { + len = (len & 0x7F) | (*data << 7); + data++; + off++; + } /* * Update the offset to return the offset for the next symbol on * the compressed stream. */ - off += len + 1; + off += len; /* * For every byte on the compressed symbol data, copy the table @@ -108,7 +116,7 @@ static char kallsyms_get_symbol_type(unsigned int off) static unsigned int get_symbol_offset(unsigned long pos) { const u8 *name; - int i; + int i, len; /* * Use the closest marker we have. We have markers every 256 positions, @@ -122,8 +130,18 @@ static unsigned int get_symbol_offset(unsigned long pos) * so we just need to add the len to the current pointer for every * symbol we wish to skip. */ - for (i = 0; i < (pos & 0xFF); i++) - name = name + (*name) + 1; + for (i = 0; i < (pos & 0xFF); i++) { + len = *name; + + /* + * If MSB is 1, it is a "big" symbol, so we need to look into + * the next byte (and skip it, too). + */ + if ((len & 0x80) != 0) + len = ((len & 0x7F) | (name[1] << 7)) + 1; + + name = name + len + 1; + } return name - kallsyms_names; } @@ -159,7 +177,6 @@ static bool cleanup_symbol_name(char *s) * character in an identifier in C. Suffixes observed: * - foo.llvm.[0-9a-f]+ * - foo.[0-9a-f]+ - * - foo.[0-9a-f]+.cfi_jt */ res = strchr(s, '.'); if (res) { @@ -167,22 +184,6 @@ static bool cleanup_symbol_name(char *s) return true; } - if (!IS_ENABLED(CONFIG_CFI_CLANG) || - !IS_ENABLED(CONFIG_LTO_CLANG_THIN) || - CONFIG_CLANG_VERSION >= 130000) - return false; - - /* - * Prior to LLVM 13, the following suffixes were observed when thinLTO - * and CFI are both enabled: - * - foo$[0-9]+ - */ - res = strrchr(s, '$'); - if (res) { - *res = '\0'; - return true; - } - return false; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 3c677918d8f2..28a6b7ab4a0f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1050,8 +1050,7 @@ static void __kthread_queue_delayed_work(struct kthread_worker *worker, struct timer_list *timer = &dwork->timer; struct kthread_work *work = &dwork->work; - WARN_ON_FUNCTION_MISMATCH(timer->function, - kthread_delayed_work_timer_fn); + WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn); /* * If @delay is 0, queue @dwork->work immediately. This is for diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index bc475e62279d..ec06ce59d728 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -213,7 +213,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, * we use the smallest/strictest upper bound possible (56, based on * the current definition of MODULE_NAME_LEN) to prevent overflows. */ - BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128); + BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 512); relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ @@ -227,7 +227,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, /* Format: .klp.sym.sym_objname.sym_name,sympos */ cnt = sscanf(strtab + sym->st_name, - ".klp.sym.%55[^.].%127[^,],%lu", + ".klp.sym.%55[^.].%511[^,],%lu", sym_objname, sym_name, &sympos); if (cnt != 3) { pr_err("symbol %s has an incorrectly formatted name\n", diff --git a/kernel/module/main.c b/kernel/module/main.c index a4e4d84b6f4e..70c0b2c6fef8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -53,6 +53,7 @@ #include <linux/bsearch.h> #include <linux/dynamic_debug.h> #include <linux/audit.h> +#include <linux/cfi.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1144,8 +1145,6 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static void cfi_cleanup(struct module *mod); - /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1190,9 +1189,6 @@ static void free_module(struct module *mod) mod->name); mutex_unlock(&module_mutex); - /* Clean up CFI for the module. */ - cfi_cleanup(mod); - /* This may be empty, but that's OK */ module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); @@ -2602,8 +2598,9 @@ static int complete_formation(struct module *mod, struct load_info *info) if (err < 0) goto out; - /* This relies on module_mutex for list integrity. */ + /* These rely on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); + module_cfi_finalize(info->hdr, info->sechdrs, mod); if (module_check_misalignment(mod)) goto out_misaligned; @@ -2665,8 +2662,6 @@ static int unknown_module_param_cb(char *param, char *val, const char *modname, return 0; } -static void cfi_init(struct module *mod); - /* * Allocate and load the module: note that size of section 0 is always * zero, and we rely on this for optional sections. @@ -2796,9 +2791,6 @@ static int load_module(struct load_info *info, const char __user *uargs, flush_module_icache(mod); - /* Setup CFI for the module. */ - cfi_init(mod); - /* Now copy in args */ mod->args = strndup_user(uargs, ~0UL >> 1); if (IS_ERR(mod->args)) { @@ -2875,7 +2867,6 @@ static int load_module(struct load_info *info, const char __user *uargs, synchronize_rcu(); kfree(mod->args); free_arch_cleanup: - cfi_cleanup(mod); module_arch_cleanup(mod); free_modinfo: free_modinfo(mod); @@ -2961,41 +2952,6 @@ static inline int within(unsigned long addr, void *start, unsigned long size) return ((void *)addr >= start && (void *)addr < start + size); } -static void cfi_init(struct module *mod) -{ -#ifdef CONFIG_CFI_CLANG - initcall_t *init; -#ifdef CONFIG_MODULE_UNLOAD - exitcall_t *exit; -#endif - - rcu_read_lock_sched(); - mod->cfi_check = (cfi_check_fn) - find_kallsyms_symbol_value(mod, "__cfi_check"); - init = (initcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_init_module"); - /* Fix init/exit functions to point to the CFI jump table */ - if (init) - mod->init = *init; -#ifdef CONFIG_MODULE_UNLOAD - exit = (exitcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); - if (exit) - mod->exit = *exit; -#endif - rcu_read_unlock_sched(); - - cfi_module_add(mod, mod_tree.addr_min); -#endif -} - -static void cfi_cleanup(struct module *mod) -{ -#ifdef CONFIG_CFI_CLANG - cfi_module_remove(mod, mod_tree.addr_min); -#endif -} - /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */ char *module_flags(struct module *mod, char *buf, bool show_state) { diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d8e1b270a065..503c2aa845a4 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -84,10 +84,15 @@ torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress test torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()"); torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives"); +torture_param(bool, gp_cond_full, false, "Use conditional/async full-state GP wait primitives"); +torture_param(bool, gp_cond_exp_full, false, + "Use conditional/async full-stateexpedited GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); torture_param(bool, gp_poll, false, "Use polling GP wait primitives"); torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives"); +torture_param(bool, gp_poll_full, false, "Use polling full-state GP wait primitives"); +torture_param(bool, gp_poll_exp_full, false, "Use polling full-state expedited GP wait primitives"); torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers"); @@ -194,16 +199,24 @@ static int rcu_torture_writer_state; #define RTWS_DEF_FREE 3 #define RTWS_EXP_SYNC 4 #define RTWS_COND_GET 5 -#define RTWS_COND_GET_EXP 6 -#define RTWS_COND_SYNC 7 -#define RTWS_COND_SYNC_EXP 8 -#define RTWS_POLL_GET 9 -#define RTWS_POLL_GET_EXP 10 -#define RTWS_POLL_WAIT 11 -#define RTWS_POLL_WAIT_EXP 12 -#define RTWS_SYNC 13 -#define RTWS_STUTTER 14 -#define RTWS_STOPPING 15 +#define RTWS_COND_GET_FULL 6 +#define RTWS_COND_GET_EXP 7 +#define RTWS_COND_GET_EXP_FULL 8 +#define RTWS_COND_SYNC 9 +#define RTWS_COND_SYNC_FULL 10 +#define RTWS_COND_SYNC_EXP 11 +#define RTWS_COND_SYNC_EXP_FULL 12 +#define RTWS_POLL_GET 13 +#define RTWS_POLL_GET_FULL 14 +#define RTWS_POLL_GET_EXP 15 +#define RTWS_POLL_GET_EXP_FULL 16 +#define RTWS_POLL_WAIT 17 +#define RTWS_POLL_WAIT_FULL 18 +#define RTWS_POLL_WAIT_EXP 19 +#define RTWS_POLL_WAIT_EXP_FULL 20 +#define RTWS_SYNC 21 +#define RTWS_STUTTER 22 +#define RTWS_STOPPING 23 static const char * const rcu_torture_writer_state_names[] = { "RTWS_FIXED_DELAY", "RTWS_DELAY", @@ -211,13 +224,21 @@ static const char * const rcu_torture_writer_state_names[] = { "RTWS_DEF_FREE", "RTWS_EXP_SYNC", "RTWS_COND_GET", + "RTWS_COND_GET_FULL", "RTWS_COND_GET_EXP", + "RTWS_COND_GET_EXP_FULL", "RTWS_COND_SYNC", + "RTWS_COND_SYNC_FULL", "RTWS_COND_SYNC_EXP", + "RTWS_COND_SYNC_EXP_FULL", "RTWS_POLL_GET", + "RTWS_POLL_GET_FULL", "RTWS_POLL_GET_EXP", + "RTWS_POLL_GET_EXP_FULL", "RTWS_POLL_WAIT", + "RTWS_POLL_WAIT_FULL", "RTWS_POLL_WAIT_EXP", + "RTWS_POLL_WAIT_EXP_FULL", "RTWS_SYNC", "RTWS_STUTTER", "RTWS_STOPPING", @@ -332,13 +353,21 @@ struct rcu_torture_ops { void (*exp_sync)(void); unsigned long (*get_gp_state_exp)(void); unsigned long (*start_gp_poll_exp)(void); + void (*start_gp_poll_exp_full)(struct rcu_gp_oldstate *rgosp); bool (*poll_gp_state_exp)(unsigned long oldstate); void (*cond_sync_exp)(unsigned long oldstate); + void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*get_gp_state)(void); + void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*get_gp_completed)(void); + void (*get_gp_completed_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*start_gp_poll)(void); + void (*start_gp_poll_full)(struct rcu_gp_oldstate *rgosp); bool (*poll_gp_state)(unsigned long oldstate); + bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp); + bool (*poll_need_2gp)(bool poll, bool poll_full); void (*cond_sync)(unsigned long oldstate); + void (*cond_sync_full)(struct rcu_gp_oldstate *rgosp); call_rcu_func_t call; void (*cb_barrier)(void); void (*fqs)(void); @@ -489,6 +518,11 @@ static void rcu_sync_torture_init(void) INIT_LIST_HEAD(&rcu_torture_removed); } +static bool rcu_poll_need_2gp(bool poll, bool poll_full) +{ + return poll; +} + static struct rcu_torture_ops rcu_ops = { .ttype = RCU_FLAVOR, .init = rcu_sync_torture_init, @@ -502,12 +536,19 @@ static struct rcu_torture_ops rcu_ops = { .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .get_gp_state = get_state_synchronize_rcu, + .get_gp_state_full = get_state_synchronize_rcu_full, .get_gp_completed = get_completed_synchronize_rcu, + .get_gp_completed_full = get_completed_synchronize_rcu_full, .start_gp_poll = start_poll_synchronize_rcu, + .start_gp_poll_full = start_poll_synchronize_rcu_full, .poll_gp_state = poll_state_synchronize_rcu, + .poll_gp_state_full = poll_state_synchronize_rcu_full, + .poll_need_2gp = rcu_poll_need_2gp, .cond_sync = cond_synchronize_rcu, + .cond_sync_full = cond_synchronize_rcu_full, .get_gp_state_exp = get_state_synchronize_rcu, .start_gp_poll_exp = start_poll_synchronize_rcu_expedited, + .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full, .poll_gp_state_exp = poll_state_synchronize_rcu, .cond_sync_exp = cond_synchronize_rcu_expedited, .call = call_rcu, @@ -709,6 +750,9 @@ static struct rcu_torture_ops srcud_ops = { .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, .exp_sync = srcu_torture_synchronize_expedited, + .get_gp_state = srcu_torture_get_gp_state, + .start_gp_poll = srcu_torture_start_gp_poll, + .poll_gp_state = srcu_torture_poll_gp_state, .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, @@ -1148,15 +1192,35 @@ static int nsynctypes; */ static void rcu_torture_write_types(void) { - bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_exp1 = gp_exp; - bool gp_poll_exp1 = gp_poll_exp, gp_normal1 = gp_normal, gp_poll1 = gp_poll; - bool gp_sync1 = gp_sync; + bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_cond_full1 = gp_cond_full; + bool gp_cond_exp_full1 = gp_cond_exp_full, gp_exp1 = gp_exp, gp_poll_exp1 = gp_poll_exp; + bool gp_poll_exp_full1 = gp_poll_exp_full, gp_normal1 = gp_normal, gp_poll1 = gp_poll; + bool gp_poll_full1 = gp_poll_full, gp_sync1 = gp_sync; /* Initialize synctype[] array. If none set, take default. */ - if (!gp_cond1 && !gp_cond_exp1 && !gp_exp1 && !gp_poll_exp && - !gp_normal1 && !gp_poll1 && !gp_sync1) - gp_cond1 = gp_cond_exp1 = gp_exp1 = gp_poll_exp1 = - gp_normal1 = gp_poll1 = gp_sync1 = true; + if (!gp_cond1 && + !gp_cond_exp1 && + !gp_cond_full1 && + !gp_cond_exp_full1 && + !gp_exp1 && + !gp_poll_exp1 && + !gp_poll_exp_full1 && + !gp_normal1 && + !gp_poll1 && + !gp_poll_full1 && + !gp_sync1) { + gp_cond1 = true; + gp_cond_exp1 = true; + gp_cond_full1 = true; + gp_cond_exp_full1 = true; + gp_exp1 = true; + gp_poll_exp1 = true; + gp_poll_exp_full1 = true; + gp_normal1 = true; + gp_poll1 = true; + gp_poll_full1 = true; + gp_sync1 = true; + } if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) { synctype[nsynctypes++] = RTWS_COND_GET; pr_info("%s: Testing conditional GPs.\n", __func__); @@ -1169,6 +1233,19 @@ static void rcu_torture_write_types(void) } else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) { pr_alert("%s: gp_cond_exp without primitives.\n", __func__); } + if (gp_cond_full1 && cur_ops->get_gp_state && cur_ops->cond_sync_full) { + synctype[nsynctypes++] = RTWS_COND_GET_FULL; + pr_info("%s: Testing conditional full-state GPs.\n", __func__); + } else if (gp_cond_full && (!cur_ops->get_gp_state || !cur_ops->cond_sync_full)) { + pr_alert("%s: gp_cond_full without primitives.\n", __func__); + } + if (gp_cond_exp_full1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp_full) { + synctype[nsynctypes++] = RTWS_COND_GET_EXP_FULL; + pr_info("%s: Testing conditional full-state expedited GPs.\n", __func__); + } else if (gp_cond_exp_full && + (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp_full)) { + pr_alert("%s: gp_cond_exp_full without primitives.\n", __func__); + } if (gp_exp1 && cur_ops->exp_sync) { synctype[nsynctypes++] = RTWS_EXP_SYNC; pr_info("%s: Testing expedited GPs.\n", __func__); @@ -1187,12 +1264,25 @@ static void rcu_torture_write_types(void) } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { pr_alert("%s: gp_poll without primitives.\n", __func__); } + if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) { + synctype[nsynctypes++] = RTWS_POLL_GET_FULL; + pr_info("%s: Testing polling full-state GPs.\n", __func__); + } else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) { + pr_alert("%s: gp_poll_full without primitives.\n", __func__); + } if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) { synctype[nsynctypes++] = RTWS_POLL_GET_EXP; pr_info("%s: Testing polling expedited GPs.\n", __func__); } else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) { pr_alert("%s: gp_poll_exp without primitives.\n", __func__); } + if (gp_poll_exp_full1 && cur_ops->start_gp_poll_exp_full && cur_ops->poll_gp_state_full) { + synctype[nsynctypes++] = RTWS_POLL_GET_EXP_FULL; + pr_info("%s: Testing polling full-state expedited GPs.\n", __func__); + } else if (gp_poll_exp_full && + (!cur_ops->start_gp_poll_exp_full || !cur_ops->poll_gp_state_full)) { + pr_alert("%s: gp_poll_exp_full without primitives.\n", __func__); + } if (gp_sync1 && cur_ops->sync) { synctype[nsynctypes++] = RTWS_SYNC; pr_info("%s: Testing normal GPs.\n", __func__); @@ -1202,6 +1292,40 @@ static void rcu_torture_write_types(void) } /* + * Do the specified rcu_torture_writer() synchronous grace period, + * while also testing out the polled APIs. Note well that the single-CPU + * grace-period optimizations must be accounted for. + */ +static void do_rtws_sync(struct torture_random_state *trsp, void (*sync)(void)) +{ + unsigned long cookie; + struct rcu_gp_oldstate cookie_full; + bool dopoll; + bool dopoll_full; + unsigned long r = torture_random(trsp); + + dopoll = cur_ops->get_gp_state && cur_ops->poll_gp_state && !(r & 0x300); + dopoll_full = cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full && !(r & 0xc00); + if (dopoll || dopoll_full) + cpus_read_lock(); + if (dopoll) + cookie = cur_ops->get_gp_state(); + if (dopoll_full) + cur_ops->get_gp_state_full(&cookie_full); + if (cur_ops->poll_need_2gp && cur_ops->poll_need_2gp(dopoll, dopoll_full)) + sync(); + sync(); + WARN_ONCE(dopoll && !cur_ops->poll_gp_state(cookie), + "%s: Cookie check 3 failed %pS() online %*pbl.", + __func__, sync, cpumask_pr_args(cpu_online_mask)); + WARN_ONCE(dopoll_full && !cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 4 failed %pS() online %*pbl", + __func__, sync, cpumask_pr_args(cpu_online_mask)); + if (dopoll || dopoll_full) + cpus_read_unlock(); +} + +/* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure * after a series of grace periods (the "pipeline"). @@ -1212,8 +1336,10 @@ rcu_torture_writer(void *arg) bool boot_ended; bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); unsigned long cookie; + struct rcu_gp_oldstate cookie_full; int expediting = 0; unsigned long gp_snap; + struct rcu_gp_oldstate gp_snap_full; int i; int idx; int oldnice = task_nice(current); @@ -1261,11 +1387,12 @@ rcu_torture_writer(void *arg) atomic_inc(&rcu_torture_wcount[i]); WRITE_ONCE(old_rp->rtort_pipe_count, old_rp->rtort_pipe_count + 1); + + // Make sure readers block polled grace periods. if (cur_ops->get_gp_state && cur_ops->poll_gp_state) { idx = cur_ops->readlock(); cookie = cur_ops->get_gp_state(); - WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE && - cur_ops->poll_gp_state(cookie), + WARN_ONCE(cur_ops->poll_gp_state(cookie), "%s: Cookie check 1 failed %s(%d) %lu->%lu\n", __func__, rcu_torture_writer_state_getname(), @@ -1277,6 +1404,21 @@ rcu_torture_writer(void *arg) } cur_ops->readunlock(idx); } + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) { + idx = cur_ops->readlock(); + cur_ops->get_gp_state_full(&cookie_full); + WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 5 failed %s(%d) online %*pbl\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cpumask_pr_args(cpu_online_mask)); + if (cur_ops->get_gp_completed_full) { + cur_ops->get_gp_completed_full(&cookie_full); + WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full)); + } + cur_ops->readunlock(idx); + } switch (synctype[torture_random(&rand) % nsynctypes]) { case RTWS_DEF_FREE: rcu_torture_writer_state = RTWS_DEF_FREE; @@ -1284,12 +1426,7 @@ rcu_torture_writer(void *arg) break; case RTWS_EXP_SYNC: rcu_torture_writer_state = RTWS_EXP_SYNC; - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); - cur_ops->exp_sync(); - cur_ops->exp_sync(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); + do_rtws_sync(&rand, cur_ops->exp_sync); rcu_torture_pipe_update(old_rp); break; case RTWS_COND_GET: @@ -1308,6 +1445,22 @@ rcu_torture_writer(void *arg) cur_ops->cond_sync_exp(gp_snap); rcu_torture_pipe_update(old_rp); break; + case RTWS_COND_GET_FULL: + rcu_torture_writer_state = RTWS_COND_GET_FULL; + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + rcu_torture_writer_state = RTWS_COND_SYNC_FULL; + cur_ops->cond_sync_full(&gp_snap_full); + rcu_torture_pipe_update(old_rp); + break; + case RTWS_COND_GET_EXP_FULL: + rcu_torture_writer_state = RTWS_COND_GET_EXP_FULL; + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + rcu_torture_writer_state = RTWS_COND_SYNC_EXP_FULL; + cur_ops->cond_sync_exp_full(&gp_snap_full); + rcu_torture_pipe_update(old_rp); + break; case RTWS_POLL_GET: rcu_torture_writer_state = RTWS_POLL_GET; gp_snap = cur_ops->start_gp_poll(); @@ -1317,6 +1470,15 @@ rcu_torture_writer(void *arg) &rand); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET_FULL: + rcu_torture_writer_state = RTWS_POLL_GET_FULL; + cur_ops->start_gp_poll_full(&gp_snap_full); + rcu_torture_writer_state = RTWS_POLL_WAIT_FULL; + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_POLL_GET_EXP: rcu_torture_writer_state = RTWS_POLL_GET_EXP; gp_snap = cur_ops->start_gp_poll_exp(); @@ -1326,14 +1488,18 @@ rcu_torture_writer(void *arg) &rand); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET_EXP_FULL: + rcu_torture_writer_state = RTWS_POLL_GET_EXP_FULL; + cur_ops->start_gp_poll_exp_full(&gp_snap_full); + rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL; + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_SYNC: rcu_torture_writer_state = RTWS_SYNC; - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); - cur_ops->sync(); - cur_ops->sync(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); + do_rtws_sync(&rand, cur_ops->sync); rcu_torture_pipe_update(old_rp); break; default: @@ -1400,6 +1566,7 @@ static int rcu_torture_fakewriter(void *arg) { unsigned long gp_snap; + struct rcu_gp_oldstate gp_snap_full; DEFINE_TORTURE_RANDOM(rand); VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started"); @@ -1438,6 +1605,16 @@ rcu_torture_fakewriter(void *arg) torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); cur_ops->cond_sync_exp(gp_snap); break; + case RTWS_COND_GET_FULL: + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + cur_ops->cond_sync_full(&gp_snap_full); + break; + case RTWS_COND_GET_EXP_FULL: + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + cur_ops->cond_sync_exp_full(&gp_snap_full); + break; case RTWS_POLL_GET: gp_snap = cur_ops->start_gp_poll(); while (!cur_ops->poll_gp_state(gp_snap)) { @@ -1445,6 +1622,13 @@ rcu_torture_fakewriter(void *arg) &rand); } break; + case RTWS_POLL_GET_FULL: + cur_ops->start_gp_poll_full(&gp_snap_full); + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) { + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + } + break; case RTWS_POLL_GET_EXP: gp_snap = cur_ops->start_gp_poll_exp(); while (!cur_ops->poll_gp_state_exp(gp_snap)) { @@ -1452,6 +1636,13 @@ rcu_torture_fakewriter(void *arg) &rand); } break; + case RTWS_POLL_GET_EXP_FULL: + cur_ops->start_gp_poll_exp_full(&gp_snap_full); + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) { + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + } + break; case RTWS_SYNC: cur_ops->sync(); break; @@ -1715,7 +1906,9 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp, */ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) { + bool checkpolling = !(torture_random(trsp) & 0xfff); unsigned long cookie; + struct rcu_gp_oldstate cookie_full; int i; unsigned long started; unsigned long completed; @@ -1731,8 +1924,12 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) WARN_ON_ONCE(!rcu_is_watching()); newstate = rcutorture_extend_mask(readstate, trsp); rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); + if (checkpolling) { + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + cookie = cur_ops->get_gp_state(); + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) + cur_ops->get_gp_state_full(&cookie_full); + } started = cur_ops->get_gp_seq(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, @@ -1766,13 +1963,22 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ONCE(cur_ops->poll_gp_state(cookie), - "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", - __func__, - rcu_torture_writer_state_getname(), - rcu_torture_writer_state, - cookie, cur_ops->get_gp_state()); + if (checkpolling) { + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + WARN_ONCE(cur_ops->poll_gp_state(cookie), + "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cookie, cur_ops->get_gp_state()); + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) + WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 6 failed %s(%d) online %*pbl\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cpumask_pr_args(cpu_online_mask)); + } rcutorture_one_extend(&readstate, 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -2600,12 +2806,12 @@ static int rcutorture_oom_notify(struct notifier_block *self, for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); - rcu_barrier(); + cur_ops->cb_barrier(); ncbs = 0; for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); - rcu_barrier(); + cur_ops->cb_barrier(); ncbs = 0; for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 92c002d65482..33adafdad261 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -117,7 +117,7 @@ void srcu_drive_gp(struct work_struct *wp) struct srcu_struct *ssp; ssp = container_of(wp, struct srcu_struct, srcu_work); - if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) + if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ @@ -150,17 +150,17 @@ void srcu_drive_gp(struct work_struct *wp) * straighten that out. */ WRITE_ONCE(ssp->srcu_gp_running, false); - if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) + if (ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { - unsigned short cookie; + unsigned long cookie; cookie = get_state_synchronize_srcu(ssp); - if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) + if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) return; WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { @@ -215,7 +215,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) barrier(); ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1; barrier(); - return ret & USHRT_MAX; + return ret; } EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); @@ -240,10 +240,10 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); */ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { - bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie); + unsigned long cur_s = READ_ONCE(ssp->srcu_idx); barrier(); - return ret; + return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3); } EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 83c7e6620d40..f5bf6fb430da 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -560,7 +560,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, + WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); // If the grace-period kthread is running, use it. @@ -1500,6 +1500,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop) if (rcu_tasks_trace_pertask_prep(t, true)) trc_add_holdout(t, hop); rcu_read_unlock(); + cond_resched_tasks_rcu_qs(); } // Only after all running tasks have been accounted for is it @@ -1520,6 +1521,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop) raw_spin_lock_irqsave_rcu_node(rtpcp, flags); } raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); + cond_resched_tasks_rcu_qs(); } // Re-enable CPU hotplug now that the holdout list is populated. @@ -1619,6 +1621,7 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, trc_del_holdout(t); else if (needreport) show_stalled_task_trace(t, firstreport); + cond_resched_tasks_rcu_qs(); } // Re-enable CPU hotplug now that the holdout list scan has completed. diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index f0561ee16b9c..a33a8d4942c3 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -158,6 +158,10 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +static void tiny_rcu_leak_callback(struct rcu_head *rhp) +{ +} + /* * Post an RCU callback to be invoked after the end of an RCU grace * period. But since we have but one CPU, that would be after any @@ -165,9 +169,20 @@ EXPORT_SYMBOL_GPL(synchronize_rcu); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { + static atomic_t doublefrees; unsigned long flags; - debug_rcu_head_queue(head); + if (debug_rcu_head_queue(head)) { + if (atomic_inc_return(&doublefrees) < 4) { + pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func); + mem_dump_obj(head); + } + + if (!__is_kvfree_rcu_offset((unsigned long)head->func)) + WRITE_ONCE(head->func, tiny_rcu_leak_callback); + return; + } + head->func = func; head->next = NULL; @@ -184,6 +199,16 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) EXPORT_SYMBOL_GPL(call_rcu); /* + * Store a grace-period-counter "cookie". For more information, + * see the Tree RCU header comment. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + +/* * Return a grace-period-counter "cookie". For more information, * see the Tree RCU header comment. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 79aea7df4345..6bb8e72bc815 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -76,6 +76,7 @@ /* Data structures. */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { + .gpwrap = true, #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_RCU_CORE, #endif @@ -1755,6 +1756,8 @@ static noinline void rcu_gp_cleanup(void) dump_blkd_tasks(rnp, 10); WARN_ON_ONCE(rnp->qsmask); WRITE_ONCE(rnp->gp_seq, new_gp_seq); + if (!rnp->parent) + smp_mb(); // Order against failing poll_state_synchronize_rcu_full(). rdp = this_cpu_ptr(&rcu_data); if (rnp == rdp->mynode) needgp = __note_gp_changes(rnp, rdp) || needgp; @@ -2341,8 +2344,8 @@ void rcu_sched_clock_irq(int user) rcu_flavor_sched_clock_irq(user); if (rcu_pending(user)) invoke_rcu_core(); - if (user) - rcu_tasks_classic_qs(current, false); + if (user || rcu_is_cpu_rrupt_from_idle()) + rcu_note_voluntary_context_switch(current); lockdep_assert_irqs_disabled(); trace_rcu_utilization(TPS("End scheduler-tick")); @@ -2832,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu); /* Maximum number of jiffies to wait before draining a batch. */ -#define KFREE_DRAIN_JIFFIES (HZ / 50) +#define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 #define FREE_N_CHANNELS 2 @@ -3093,6 +3096,21 @@ need_offload_krc(struct kfree_rcu_cpu *krcp) return !!krcp->head; } +static void +schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) +{ + long delay, delay_left; + + delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES; + if (delayed_work_pending(&krcp->monitor_work)) { + delay_left = krcp->monitor_work.timer.expires - jiffies; + if (delay < delay_left) + mod_delayed_work(system_wq, &krcp->monitor_work, delay); + return; + } + queue_delayed_work(system_wq, &krcp->monitor_work, delay); +} + /* * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. */ @@ -3150,7 +3168,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // work to repeat an attempt. Because previous batches are // still in progress. if (need_offload_krc(krcp)) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } @@ -3183,15 +3201,16 @@ static void fill_page_cache_func(struct work_struct *work) bnode = (struct kvfree_rcu_bulk_data *) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (bnode) { - raw_spin_lock_irqsave(&krcp->lock, flags); - pushed = put_cached_bnode(krcp, bnode); - raw_spin_unlock_irqrestore(&krcp->lock, flags); + if (!bnode) + break; - if (!pushed) { - free_page((unsigned long) bnode); - break; - } + raw_spin_lock_irqsave(&krcp->lock, flags); + pushed = put_cached_bnode(krcp, bnode); + raw_spin_unlock_irqrestore(&krcp->lock, flags); + + if (!pushed) { + free_page((unsigned long) bnode); + break; } } @@ -3338,7 +3357,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); unlock_return: krc_this_cpu_unlock(krcp, flags); @@ -3371,7 +3390,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) atomic_set(&krcp->backoff_page_cache_fill, 1); } - return count; + return count == 0 ? SHRINK_EMPTY : count; } static unsigned long @@ -3414,49 +3433,27 @@ void __init kfree_rcu_scheduler_running(void) raw_spin_lock_irqsave(&krcp->lock, flags); if (need_offload_krc(krcp)) - schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } } /* * During early boot, any blocking grace-period wait automatically - * implies a grace period. Later on, this is never the case for PREEMPTION. + * implies a grace period. * - * However, because a context switch is a grace period for !PREEMPTION, any - * blocking grace-period wait automatically implies a grace period if - * there is only one CPU online at any point time during execution of - * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to - * occasionally incorrectly indicate that there are multiple CPUs online - * when there was in fact only one the whole time, as this just adds some - * overhead: RCU still operates correctly. + * Later on, this could in theory be the case for kernels built with + * CONFIG_SMP=y && CONFIG_PREEMPTION=y running on a single CPU, but this + * is not a common case. Furthermore, this optimization would cause + * the rcu_gp_oldstate structure to expand by 50%, so this potential + * grace-period optimization is ignored once the scheduler is running. */ static int rcu_blocking_is_gp(void) { - int ret; - - // Invoking preempt_model_*() too early gets a splat. - if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE || - preempt_model_full() || preempt_model_rt()) - return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; + if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE) + return false; might_sleep(); /* Check for RCU read-side critical section. */ - preempt_disable(); - /* - * If the rcu_state.n_online_cpus counter is equal to one, - * there is only one CPU, and that CPU sees all prior accesses - * made by any CPU that was online at the time of its access. - * Furthermore, if this counter is equal to one, its value cannot - * change until after the preempt_enable() below. - * - * Furthermore, if rcu_state.n_online_cpus is equal to one here, - * all later CPUs (both this one and any that come online later - * on) are guaranteed to see all accesses prior to this point - * in the code, without the need for additional memory barriers. - * Those memory barriers are provided by CPU-hotplug code. - */ - ret = READ_ONCE(rcu_state.n_online_cpus) <= 1; - preempt_enable(); - return ret; + return true; } /** @@ -3499,30 +3496,59 @@ static int rcu_blocking_is_gp(void) */ void synchronize_rcu(void) { + unsigned long flags; + struct rcu_node *rnp; + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (rcu_blocking_is_gp()) { - // Note well that this code runs with !PREEMPT && !SMP. - // In addition, all code that advances grace periods runs at - // process level. Therefore, this normal GP overlaps with - // other normal GPs only by being fully nested within them, - // which allows reuse of ->gp_seq_polled_snap. - rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap); - rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap); - if (rcu_init_invoked()) - cond_resched_tasks_rcu_qs(); - return; // Context allows vacuous grace periods. + if (!rcu_blocking_is_gp()) { + if (rcu_gp_is_expedited()) + synchronize_rcu_expedited(); + else + wait_rcu_gp(call_rcu); + return; } - if (rcu_gp_is_expedited()) - synchronize_rcu_expedited(); - else - wait_rcu_gp(call_rcu); + + // Context allows vacuous grace periods. + // Note well that this code runs with !PREEMPT && !SMP. + // In addition, all code that advances grace periods runs at + // process level. Therefore, this normal GP overlaps with other + // normal GPs only by being fully nested within them, which allows + // reuse of ->gp_seq_polled_snap. + rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap); + rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap); + + // Update the normal grace-period counters to record + // this grace period, but only those used by the boot CPU. + // The rcu_scheduler_starting() will take care of the rest of + // these counters. + local_irq_save(flags); + WARN_ON_ONCE(num_online_cpus() > 1); + rcu_state.gp_seq += (1 << RCU_SEQ_CTR_SHIFT); + for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent) + rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; + local_irq_restore(flags); } EXPORT_SYMBOL_GPL(synchronize_rcu); /** + * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie + * @rgosp: Place to put state cookie + * + * Stores into @rgosp a value that will always be treated by functions + * like poll_state_synchronize_rcu_full() as a cookie whose grace period + * has already completed. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; + rgosp->rgos_exp = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + +/** * get_state_synchronize_rcu - Snapshot current RCU state * * Returns a cookie that is used by a later call to cond_synchronize_rcu() @@ -3541,21 +3567,42 @@ unsigned long get_state_synchronize_rcu(void) EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); /** - * start_poll_synchronize_rcu - Snapshot and start RCU grace period + * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited + * @rgosp: location to place combined normal/expedited grace-period state * - * Returns a cookie that is used by a later call to cond_synchronize_rcu() - * or poll_state_synchronize_rcu() to determine whether or not a full - * grace period has elapsed in the meantime. If the needed grace period - * is not already slated to start, notifies RCU core of the need for that - * grace period. + * Places the normal and expedited grace-period states in @rgosp. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * The rcu_gp_oldstate structure takes up twice the memory of an unsigned + * long, but is guaranteed to see all grace periods. In contrast, the + * combined state occupies less memory, but can sometimes fail to take + * grace periods into account. * - * Interrupts must be enabled for the case where it is necessary to awaken - * the grace-period kthread. + * This does not guarantee that the needed grace period will actually + * start. */ -unsigned long start_poll_synchronize_rcu(void) +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + struct rcu_node *rnp = rcu_get_root(); + + /* + * Any prior manipulation of RCU-protected data must happen + * before the loads from ->gp_seq and ->expedited_sequence. + */ + smp_mb(); /* ^^^ */ + rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq); + rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full); + +/* + * Helper function for start_poll_synchronize_rcu() and + * start_poll_synchronize_rcu_full(). + */ +static void start_poll_synchronize_rcu_common(void) { unsigned long flags; - unsigned long gp_seq = get_state_synchronize_rcu(); bool needwake; struct rcu_data *rdp; struct rcu_node *rnp; @@ -3575,17 +3622,57 @@ unsigned long start_poll_synchronize_rcu(void) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (needwake) rcu_gp_kthread_wake(); +} + +/** + * start_poll_synchronize_rcu - Snapshot and start RCU grace period + * + * Returns a cookie that is used by a later call to cond_synchronize_rcu() + * or poll_state_synchronize_rcu() to determine whether or not a full + * grace period has elapsed in the meantime. If the needed grace period + * is not already slated to start, notifies RCU core of the need for that + * grace period. + * + * Interrupts must be enabled for the case where it is necessary to awaken + * the grace-period kthread. + */ +unsigned long start_poll_synchronize_rcu(void) +{ + unsigned long gp_seq = get_state_synchronize_rcu(); + + start_poll_synchronize_rcu_common(); return gp_seq; } EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); /** - * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period + * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full() * + * Places the normal and expedited grace-period states in *@rgos. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * If the needed grace period is not already slated to start, notifies + * RCU core of the need for that grace period. + * + * Interrupts must be enabled for the case where it is necessary to awaken + * the grace-period kthread. + */ +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + get_state_synchronize_rcu_full(rgosp); + + start_poll_synchronize_rcu_common(); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full); + +/** + * poll_state_synchronize_rcu - Has the specified RCU grace period completed? * @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu() * * If a full RCU grace period has elapsed since the earlier call from - * which oldstate was obtained, return @true, otherwise return @false. + * which @oldstate was obtained, return @true, otherwise return @false. * If @false is returned, it is the caller's responsibility to invoke this * function later on until it does return @true. Alternatively, the caller * can explicitly wait for a grace period, for example, by passing @oldstate @@ -3594,10 +3681,11 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); * Yes, this function does not take counter wrap into account. * But counter wrap is harmless. If the counter wraps, we have waited for * more than a billion grace periods (and way more on a 64-bit system!). - * Those needing to keep oldstate values for very long time periods - * (many hours even on 32-bit systems) should check them occasionally - * and either refresh them or set a flag indicating that the grace period - * has completed. + * Those needing to keep old state values for very long time periods + * (many hours even on 32-bit systems) should check them occasionally and + * either refresh them or set a flag indicating that the grace period has + * completed. Alternatively, they can use get_completed_synchronize_rcu() + * to get a guaranteed-completed grace-period state. * * This function provides the same memory-ordering guarantees that * would be provided by a synchronize_rcu() that was invoked at the call @@ -3616,8 +3704,56 @@ bool poll_state_synchronize_rcu(unsigned long oldstate) EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu); /** - * cond_synchronize_rcu - Conditionally wait for an RCU grace period + * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed? + * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full() * + * If a full RCU grace period has elapsed since the earlier call from + * which *rgosp was obtained, return @true, otherwise return @false. + * If @false is returned, it is the caller's responsibility to invoke this + * function later on until it does return @true. Alternatively, the caller + * can explicitly wait for a grace period, for example, by passing @rgosp + * to cond_synchronize_rcu() or by directly invoking synchronize_rcu(). + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited + * for more than a billion grace periods (and way more on a 64-bit + * system!). Those needing to keep rcu_gp_oldstate values for very + * long time periods (many hours even on 32-bit systems) should check + * them occasionally and either refresh them or set a flag indicating + * that the grace period has completed. Alternatively, they can use + * get_completed_synchronize_rcu_full() to get a guaranteed-completed + * grace-period state. + * + * This function provides the same memory-ordering guarantees that would + * be provided by a synchronize_rcu() that was invoked at the call to + * the function that provided @rgosp, and that returned at the end of this + * function. And this guarantee requires that the root rcu_node structure's + * ->gp_seq field be checked instead of that of the rcu_state structure. + * The problem is that the just-ending grace-period's callbacks can be + * invoked between the time that the root rcu_node structure's ->gp_seq + * field is updated and the time that the rcu_state structure's ->gp_seq + * field is updated. Therefore, if a single synchronize_rcu() is to + * cause a subsequent poll_state_synchronize_rcu_full() to return @true, + * then the root rcu_node structure is the one that needs to be polled. + */ +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + struct rcu_node *rnp = rcu_get_root(); + + smp_mb(); // Order against root rcu_node structure grace-period cleanup. + if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) || + rgosp->rgos_exp == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) { + smp_mb(); /* Ensure GP ends before subsequent accesses. */ + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full); + +/** + * cond_synchronize_rcu - Conditionally wait for an RCU grace period * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited() * * If a full RCU grace period has elapsed since the earlier call to @@ -3641,6 +3777,33 @@ void cond_synchronize_rcu(unsigned long oldstate) } EXPORT_SYMBOL_GPL(cond_synchronize_rcu); +/** + * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full() + * + * If a full RCU grace period has elapsed since the call to + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was + * obtained, just return. Otherwise, invoke synchronize_rcu() to wait + * for a full grace period. + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for a couple of additional grace periods should be just fine. + * + * This function provides the same memory-ordering guarantees that + * would be provided by a synchronize_rcu() that was invoked at the call + * to the function that provided @rgosp and that returned at the end of + * this function. + */ +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + if (!poll_state_synchronize_rcu_full(rgosp)) + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_rcu_full); + /* * Check to see if there is any immediate RCU-related work to be done by * the current CPU, returning 1 if so and zero otherwise. The checks are @@ -4312,9 +4475,20 @@ early_initcall(rcu_spawn_gp_kthread); */ void rcu_scheduler_starting(void) { + unsigned long flags; + struct rcu_node *rnp; + WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); rcu_test_sync_prims(); + + // Fix up the ->gp_seq counters. + local_irq_save(flags); + rcu_for_each_node_breadth_first(rnp) + rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; + local_irq_restore(flags); + + // Switch out of early boot mode. rcu_scheduler_active = RCU_SCHEDULER_INIT; rcu_test_sync_prims(); } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index be667583a554..18e9b4cd78ef 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -828,11 +828,13 @@ static void rcu_exp_handler(void *unused) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; + bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) return; - if (rcu_is_cpu_rrupt_from_idle()) { + if (rcu_is_cpu_rrupt_from_idle() || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) { rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); return; } @@ -906,6 +908,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) void synchronize_rcu_expedited(void) { bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; unsigned long s; @@ -924,8 +927,11 @@ void synchronize_rcu_expedited(void) // them, which allows reuse of ->gp_seq_polled_exp_snap. rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap); rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap); - if (rcu_init_invoked()) - cond_resched(); + + local_irq_save(flags); + WARN_ON_ONCE(num_online_cpus() > 1); + rcu_state.expedited_sequence += (1 << RCU_SEQ_CTR_SHIFT); + local_irq_restore(flags); return; // Context allows vacuous grace periods. } @@ -1028,6 +1034,24 @@ unsigned long start_poll_synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited); /** + * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period + * @rgosp: Place to put snapshot of grace-period state + * + * Places the normal and expedited grace-period states in rgosp. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * If the needed expedited grace period is not already slated to start, + * initiates that grace period. + */ +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + get_state_synchronize_rcu_full(rgosp); + (void)start_poll_synchronize_rcu_expedited(); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited_full); + +/** * cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period * * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited() @@ -1053,3 +1077,30 @@ void cond_synchronize_rcu_expedited(unsigned long oldstate) synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited); + +/** + * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full() + * + * If a full RCU grace period has elapsed since the call to + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was + * obtained, just return. Otherwise, invoke synchronize_rcu_expedited() + * to wait for a full grace period. + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for a couple of additional grace periods should be just fine. + * + * This function provides the same memory-ordering guarantees that + * would be provided by a synchronize_rcu() that was invoked at the call + * to the function that provided @rgosp and that returned at the end of + * this function. + */ +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + if (!poll_state_synchronize_rcu_full(rgosp)) + synchronize_rcu_expedited(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited_full); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index a8f574d8850d..0a5f0ef41484 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1111,7 +1111,7 @@ int rcu_nocb_cpu_deoffload(int cpu) if (!ret) cpumask_clear_cpu(cpu, rcu_nocb_mask); } else { - pr_info("NOCB: Can't CB-deoffload an offline CPU\n"); + pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu); ret = -EINVAL; } } @@ -1196,7 +1196,7 @@ int rcu_nocb_cpu_offload(int cpu) if (!ret) cpumask_set_cpu(cpu, rcu_nocb_mask); } else { - pr_info("NOCB: Can't CB-offload an offline CPU\n"); + pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu); ret = -EINVAL; } } @@ -1452,8 +1452,8 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp) (long)rdp->nocb_gp_seq, rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops), rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.', - rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, - show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); + rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, + show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread)); } /* Dump out nocb kthread state for the specified rcu_data structure. */ @@ -1497,7 +1497,7 @@ static void show_rcu_nocb_state(struct rcu_data *rdp) ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)], rcu_segcblist_n_cbs(&rdp->cblist), rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.', - rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, + rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1, show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); /* It is OK for GP kthreads to have GP state. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 438ecae6bd7e..e3142ee35fc6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -641,7 +641,8 @@ static void rcu_read_unlock_special(struct task_struct *t) expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) || (rdp->grpmask & READ_ONCE(rnp->expmask)) || - IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) || + (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && + ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node)) || (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node); // Need to defer quiescent state until everything is enabled. @@ -718,9 +719,6 @@ static void rcu_flavor_sched_clock_irq(int user) struct task_struct *t = current; lockdep_assert_irqs_disabled(); - if (user || rcu_is_cpu_rrupt_from_idle()) { - rcu_note_voluntary_context_switch(current); - } if (rcu_preempt_depth() > 0 || (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) { /* No QS, force context switch if deferred. */ @@ -824,6 +822,7 @@ void rcu_read_unlock_strict(void) if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) return; rdp = this_cpu_ptr(&rcu_data); + rdp->cpu_no_qs.b.norm = false; rcu_report_qs_rdp(rdp); udelay(rcu_unlock_delay); } @@ -869,7 +868,7 @@ void rcu_all_qs(void) if (!raw_cpu_read(rcu_data.rcu_urgent_qs)) return; - preempt_disable(); + preempt_disable(); // For CONFIG_PREEMPT_COUNT=y kernels /* Load rcu_urgent_qs before other flags. */ if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) { preempt_enable(); @@ -931,10 +930,13 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t) return false; } -// Except that we do need to respond to a request by an expedited grace -// period for a quiescent state from this CPU. Note that requests from -// tasks are handled when removing the task from the blocked-tasks list -// below. +// Except that we do need to respond to a request by an expedited +// grace period for a quiescent state from this CPU. Note that in +// non-preemptible kernels, there can be no context switches within RCU +// read-side critical sections, which in turn means that the leaf rcu_node +// structure's blocked-tasks list is always empty. is therefore no need to +// actually check it. Instead, a quiescent state from this CPU suffices, +// and this function is only called from such a quiescent state. notrace void rcu_preempt_deferred_qs(struct task_struct *t) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -972,7 +974,6 @@ static void rcu_flavor_sched_clock_irq(int user) * neither access nor modify, at least not while the * corresponding CPU is online. */ - rcu_qs(); } } @@ -1238,8 +1239,11 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); - if (cpumask_empty(cm)) + if (cpumask_empty(cm)) { cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); + if (outgoingcpu >= 0) + cpumask_clear_cpu(outgoingcpu, cm); + } set_cpus_allowed_ptr(t, cm); mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index c3fbbcc09327..5653560573e2 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -368,7 +368,7 @@ static void rcu_dump_cpu_stacks(void) if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { if (cpu_is_offline(cpu)) pr_err("Offline CPU %d blocking current GP.\n", cpu); - else if (!trigger_single_cpu_backtrace(cpu)) + else dump_cpu_task(cpu); } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -511,8 +511,7 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); } else { pr_err("Stack dump where RCU GP kthread last ran:\n"); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); + dump_cpu_task(cpu); } } wake_up_process(gpk); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee28253c9ac0..60fdc0faf1c9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -73,6 +73,7 @@ #include <uapi/linux/sched/types.h> +#include <asm/irq_regs.h> #include <asm/switch_to.h> #include <asm/tlb.h> @@ -11183,6 +11184,19 @@ struct cgroup_subsys cpu_cgrp_subsys = { void dump_cpu_task(int cpu) { + if (cpu == smp_processor_id() && in_hardirq()) { + struct pt_regs *regs; + + regs = get_irq_regs(); + if (regs) { + show_regs(regs); + return; + } + } + + if (trigger_single_cpu_backtrace(cpu)) + return; + pr_info("Task dump for CPU %d:\n", cpu); sched_show_task(cpu_curr(cpu)); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1207c78f85c1..9161d1136d01 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,6 +25,9 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; + /* max CPU capacity, which is equal for all CPUs in freq. domain */ + unsigned long max; + /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -48,7 +51,6 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; - unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = sg_cpu->iowait_boost * sg_policy->max; + boost >>= SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), sg_cpu->max); + map_util_perf(sg_cpu->util), + sg_policy->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0, max = 1; + unsigned long util = 0; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); - unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); - j_util = j_sg_cpu->util; - j_max = j_sg_cpu->max; - if (j_util * max > j_max * util) { - util = j_util; - max = j_max; - } + util = max(j_sg_cpu->util, util); } - return get_next_freq(sg_policy, util, max); + return get_next_freq(sg_policy, util, sg_policy->max); } static void @@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu; + unsigned int cpu = cpumask_first(policy->cpus); sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; + sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); diff --git a/kernel/smp.c b/kernel/smp.c index 650810a6f29b..e8cdc025a046 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -370,8 +370,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * if (cpu >= 0) { if (static_branch_unlikely(&csdlock_debug_extended)) csd_lock_print_extended(csd, cpu); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); + dump_cpu_task(cpu); if (!cpu_cur_csd) { pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); arch_send_call_function_single_ipi(cpu); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 5481ba44a8d6..3f464bbda0e9 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include <linux/highuid.h> #include <linux/cred.h> #include <linux/securebits.h> +#include <linux/security.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <keys/user-type.h> @@ -113,6 +114,10 @@ int create_user_ns(struct cred *new) !kgid_has_mapping(parent_ns, group)) goto fail_dec; + ret = security_create_user_ns(new); + if (ret < 0) + goto fail_dec; + ret = -ENOMEM; ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 39060a5d0905..7cd5f5e7e0a1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1651,7 +1651,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn); + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d3e5f36bb01e..f473f7d8a0a2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2511,6 +2511,18 @@ config MEMCPY_KUNIT_TEST If unsure, say N. +config IS_SIGNED_TYPE_KUNIT_TEST + tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Builds unit tests for the is_signed_type() macro. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config OVERFLOW_KUNIT_TEST tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2535,6 +2547,15 @@ config STACKINIT_KUNIT_TEST CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF, or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL. +config FORTIFY_KUNIT_TEST + tristate "Test fortified str*() and mem*() function internals at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT && FORTIFY_SOURCE + default KUNIT_ALL_TESTS + help + Builds unit tests for checking internals of FORTIFY_SOURCE as used + by the str*() and mem*() family of functions. For testing runtime + traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests. + config TEST_UDELAY tristate "udelay test driver" help @@ -2710,6 +2731,40 @@ config HYPERV_TESTING endmenu # "Kernel Testing and Coverage" +menu "Rust hacking" + +config RUST_DEBUG_ASSERTIONS + bool "Debug assertions" + depends on RUST + help + Enables rustc's `-Cdebug-assertions` codegen option. + + This flag lets you turn `cfg(debug_assertions)` conditional + compilation on or off. This can be used to enable extra debugging + code in development but not in production. For example, it controls + the behavior of the standard library's `debug_assert!` macro. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say N. + +config RUST_OVERFLOW_CHECKS + bool "Overflow checks" + default y + depends on RUST + help + Enables rustc's `-Coverflow-checks` codegen option. + + This flag allows you to control the behavior of runtime integer + overflow. When overflow-checks are enabled, a Rust panic will occur + on overflow. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say Y. + +endmenu # "Rust" + source "Documentation/Kconfig" endmenu # Kernel hacking diff --git a/lib/Makefile b/lib/Makefile index ffabc30a27d4..7d7c9f67eff6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -377,9 +377,11 @@ obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o +obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o +obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c new file mode 100644 index 000000000000..409af07f340a --- /dev/null +++ b/lib/fortify_kunit.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Runtime test cases for CONFIG_FORTIFY_SOURCE that aren't expected to + * Oops the kernel on success. (For those, see drivers/misc/lkdtm/fortify.c) + * + * For corner cases with UBSAN, try testing with: + * + * ./tools/testing/kunit/kunit.py run --arch=x86_64 \ + * --kconfig_add CONFIG_FORTIFY_SOURCE=y \ + * --kconfig_add CONFIG_UBSAN=y \ + * --kconfig_add CONFIG_UBSAN_TRAP=y \ + * --kconfig_add CONFIG_UBSAN_BOUNDS=y \ + * --kconfig_add CONFIG_UBSAN_LOCAL_BOUNDS=y \ + * --make_options LLVM=1 fortify + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/string.h> + +static const char array_of_10[] = "this is 10"; +static const char *ptr_of_11 = "this is 11!"; +static char array_unknown[] = "compiler thinks I might change"; + +static void known_sizes_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, __compiletime_strlen("88888888"), 8); + KUNIT_EXPECT_EQ(test, __compiletime_strlen(array_of_10), 10); + KUNIT_EXPECT_EQ(test, __compiletime_strlen(ptr_of_11), 11); + + KUNIT_EXPECT_EQ(test, __compiletime_strlen(array_unknown), SIZE_MAX); + /* Externally defined and dynamically sized string pointer: */ + KUNIT_EXPECT_EQ(test, __compiletime_strlen(test->name), SIZE_MAX); +} + +/* This is volatile so the optimizer can't perform DCE below. */ +static volatile int pick; + +/* Not inline to keep optimizer from figuring out which string we want. */ +static noinline size_t want_minus_one(int pick) +{ + const char *str; + + switch (pick) { + case 1: + str = "4444"; + break; + case 2: + str = "333"; + break; + default: + str = "1"; + break; + } + return __compiletime_strlen(str); +} + +static void control_flow_split_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, want_minus_one(pick), SIZE_MAX); +} + +static struct kunit_case fortify_test_cases[] = { + KUNIT_CASE(known_sizes_test), + KUNIT_CASE(control_flow_split_test), + {} +}; + +static struct kunit_suite fortify_test_suite = { + .name = "fortify", + .test_cases = fortify_test_cases, +}; + +kunit_test_suite(fortify_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c new file mode 100644 index 000000000000..207207522925 --- /dev/null +++ b/lib/is_signed_type_kunit.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * ./tools/testing/kunit/kunit.py run is_signed_type [--raw_output] + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/compiler.h> + +enum unsigned_enum { + constant_a = 3, +}; + +enum signed_enum { + constant_b = -1, + constant_c = 2, +}; + +static void is_signed_type_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, is_signed_type(bool), false); + KUNIT_EXPECT_EQ(test, is_signed_type(signed char), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned char), false); +#ifdef __CHAR_UNSIGNED__ + KUNIT_EXPECT_EQ(test, is_signed_type(char), false); +#else + KUNIT_EXPECT_EQ(test, is_signed_type(char), true); +#endif + KUNIT_EXPECT_EQ(test, is_signed_type(int), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned int), false); + KUNIT_EXPECT_EQ(test, is_signed_type(long), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned long), false); + KUNIT_EXPECT_EQ(test, is_signed_type(long long), true); + KUNIT_EXPECT_EQ(test, is_signed_type(unsigned long long), false); + KUNIT_EXPECT_EQ(test, is_signed_type(enum unsigned_enum), false); + KUNIT_EXPECT_EQ(test, is_signed_type(enum signed_enum), true); + KUNIT_EXPECT_EQ(test, is_signed_type(void *), false); + KUNIT_EXPECT_EQ(test, is_signed_type(const char *), false); +} + +static struct kunit_case is_signed_type_test_cases[] = { + KUNIT_CASE(is_signed_type_test), + {} +}; + +static struct kunit_suite is_signed_type_test_suite = { + .name = "is_signed_type", + .test_cases = is_signed_type_test_cases, +}; + +kunit_test_suite(is_signed_type_test_suite); + +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 62f8ffcbbaa3..2b5cc70ac53f 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -29,9 +29,8 @@ struct some_bytes { }; #define check(instance, v) do { \ - int i; \ BUILD_BUG_ON(sizeof(instance.data) != 32); \ - for (i = 0; i < sizeof(instance.data); i++) { \ + for (size_t i = 0; i < sizeof(instance.data); i++) { \ KUNIT_ASSERT_EQ_MSG(test, instance.data[i], v, \ "line %d: '%s' not initialized to 0x%02x @ %d (saw 0x%02x)\n", \ __LINE__, #instance, v, i, instance.data[i]); \ @@ -39,9 +38,8 @@ struct some_bytes { } while (0) #define compare(name, one, two) do { \ - int i; \ BUILD_BUG_ON(sizeof(one) != sizeof(two)); \ - for (i = 0; i < sizeof(one); i++) { \ + for (size_t i = 0; i < sizeof(one); i++) { \ KUNIT_EXPECT_EQ_MSG(test, one.data[i], two.data[i], \ "line %d: %s.data[%d] (0x%02x) != %s.data[%d] (0x%02x)\n", \ __LINE__, #one, i, one.data[i], #two, i, two.data[i]); \ @@ -272,10 +270,63 @@ static void memset_test(struct kunit *test) #undef TEST_OP } +static void strtomem_test(struct kunit *test) +{ + static const char input[sizeof(unsigned long)] = "hi"; + static const char truncate[] = "this is too long"; + struct { + unsigned long canary1; + unsigned char output[sizeof(unsigned long)] __nonstring; + unsigned long canary2; + } wrap; + + memset(&wrap, 0xFF, sizeof(wrap)); + KUNIT_EXPECT_EQ_MSG(test, wrap.canary1, ULONG_MAX, + "bad initial canary value"); + KUNIT_EXPECT_EQ_MSG(test, wrap.canary2, ULONG_MAX, + "bad initial canary value"); + + /* Check unpadded copy leaves surroundings untouched. */ + strtomem(wrap.output, input); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + KUNIT_EXPECT_EQ(test, wrap.output[0], input[0]); + KUNIT_EXPECT_EQ(test, wrap.output[1], input[1]); + for (size_t i = 2; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], 0xFF); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check truncated copy leaves surroundings untouched. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem(wrap.output, truncate); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + for (size_t i = 0; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], truncate[i]); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check padded copy leaves only string padded. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem_pad(wrap.output, input, 0xAA); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + KUNIT_EXPECT_EQ(test, wrap.output[0], input[0]); + KUNIT_EXPECT_EQ(test, wrap.output[1], input[1]); + for (size_t i = 2; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], 0xAA); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); + + /* Check truncated padded copy has no padding. */ + memset(&wrap, 0xFF, sizeof(wrap)); + strtomem(wrap.output, truncate); + KUNIT_EXPECT_EQ(test, wrap.canary1, ULONG_MAX); + for (size_t i = 0; i < sizeof(wrap.output); i++) + KUNIT_EXPECT_EQ(test, wrap.output[i], truncate[i]); + KUNIT_EXPECT_EQ(test, wrap.canary2, ULONG_MAX); +} + static struct kunit_case memcpy_test_cases[] = { KUNIT_CASE(memset_test), KUNIT_CASE(memcpy_test), KUNIT_CASE(memmove_test), + KUNIT_CASE(strtomem_test), {} }; diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 7e3e43679b73..f385ca652b74 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -16,12 +16,15 @@ #include <linux/types.h> #include <linux/vmalloc.h> -#define DEFINE_TEST_ARRAY(t) \ - static const struct test_ ## t { \ - t a, b; \ - t sum, diff, prod; \ - bool s_of, d_of, p_of; \ - } t ## _tests[] +#define DEFINE_TEST_ARRAY_TYPED(t1, t2, t) \ + static const struct test_ ## t1 ## _ ## t2 ## __ ## t { \ + t1 a; \ + t2 b; \ + t sum, diff, prod; \ + bool s_of, d_of, p_of; \ + } t1 ## _ ## t2 ## __ ## t ## _tests[] + +#define DEFINE_TEST_ARRAY(t) DEFINE_TEST_ARRAY_TYPED(t, t, t) DEFINE_TEST_ARRAY(u8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -222,21 +225,27 @@ DEFINE_TEST_ARRAY(s64) = { }; #endif -#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ - t _r; \ - bool _of; \ - \ - _of = check_ ## op ## _overflow(a, b, &_r); \ - KUNIT_EXPECT_EQ_MSG(test, _of, of, \ +#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ + int _a_orig = a, _a_bump = a + 1; \ + int _b_orig = b, _b_bump = b + 1; \ + bool _of; \ + t _r; \ + \ + _of = check_ ## op ## _overflow(a, b, &_r); \ + KUNIT_EXPECT_EQ_MSG(test, _of, of, \ "expected "fmt" "sym" "fmt" to%s overflow (type %s)\n", \ - a, b, of ? "" : " not", #t); \ - KUNIT_EXPECT_EQ_MSG(test, _r, r, \ + a, b, of ? "" : " not", #t); \ + KUNIT_EXPECT_EQ_MSG(test, _r, r, \ "expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \ - a, b, r, _r, #t); \ + a, b, r, _r, #t); \ + /* Check for internal macro side-effects. */ \ + _of = check_ ## op ## _overflow(_a_orig++, _b_orig++, &_r); \ + KUNIT_EXPECT_EQ_MSG(test, _a_orig, _a_bump, "Unexpected " #op " macro side-effect!\n"); \ + KUNIT_EXPECT_EQ_MSG(test, _b_orig, _b_bump, "Unexpected " #op " macro side-effect!\n"); \ } while (0) -#define DEFINE_TEST_FUNC(t, fmt) \ -static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \ +#define DEFINE_TEST_FUNC_TYPED(n, t, fmt) \ +static void do_test_ ## n(struct kunit *test, const struct test_ ## n *p) \ { \ check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \ check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \ @@ -245,15 +254,18 @@ static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \ check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \ } \ \ -static void t ## _overflow_test(struct kunit *test) { \ +static void n ## _overflow_test(struct kunit *test) { \ unsigned i; \ \ - for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ - do_test_ ## t(test, &t ## _tests[i]); \ + for (i = 0; i < ARRAY_SIZE(n ## _tests); ++i) \ + do_test_ ## n(test, &n ## _tests[i]); \ kunit_info(test, "%zu %s arithmetic tests finished\n", \ - ARRAY_SIZE(t ## _tests), #t); \ + ARRAY_SIZE(n ## _tests), #n); \ } +#define DEFINE_TEST_FUNC(t, fmt) \ + DEFINE_TEST_FUNC_TYPED(t ## _ ## t ## __ ## t, t, fmt) + DEFINE_TEST_FUNC(u8, "%d"); DEFINE_TEST_FUNC(s8, "%d"); DEFINE_TEST_FUNC(u16, "%d"); @@ -265,9 +277,32 @@ DEFINE_TEST_FUNC(u64, "%llu"); DEFINE_TEST_FUNC(s64, "%lld"); #endif -static void overflow_shift_test(struct kunit *test) -{ - int count = 0; +DEFINE_TEST_ARRAY_TYPED(u32, u32, u8) = { + {0, 0, 0, 0, 0, false, false, false}, + {U8_MAX, 2, 1, U8_MAX - 2, U8_MAX - 1, true, false, true}, + {U8_MAX + 1, 0, 0, 0, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(u32_u32__u8, u8, "%d"); + +DEFINE_TEST_ARRAY_TYPED(u32, u32, int) = { + {0, 0, 0, 0, 0, false, false, false}, + {U32_MAX, 0, -1, -1, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(u32_u32__int, int, "%d"); + +DEFINE_TEST_ARRAY_TYPED(u8, u8, int) = { + {0, 0, 0, 0, 0, false, false, false}, + {U8_MAX, U8_MAX, 2 * U8_MAX, 0, U8_MAX * U8_MAX, false, false, false}, + {1, 2, 3, -1, 2, false, false, false}, +}; +DEFINE_TEST_FUNC_TYPED(u8_u8__int, int, "%d"); + +DEFINE_TEST_ARRAY_TYPED(int, int, u8) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 2, 3, U8_MAX, 2, false, true, false}, + {-1, 0, U8_MAX, U8_MAX, 0, true, true, false}, +}; +DEFINE_TEST_FUNC_TYPED(int_int__u8, u8, "%d"); /* Args are: value, shift, type, expected result, overflow expected */ #define TEST_ONE_SHIFT(a, s, t, expect, of) do { \ @@ -292,6 +327,10 @@ static void overflow_shift_test(struct kunit *test) count++; \ } while (0) +static void shift_sane_test(struct kunit *test) +{ + int count = 0; + /* Sane shifts. */ TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); @@ -334,6 +373,13 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, 30, s32, 0, false); TEST_ONE_SHIFT(0, 62, s64, 0, false); + kunit_info(test, "%d sane shift tests finished\n", count); +} + +static void shift_overflow_test(struct kunit *test) +{ + int count = 0; + /* Overflow: shifted the bit off the end. */ TEST_ONE_SHIFT(1, 8, u8, 0, true); TEST_ONE_SHIFT(1, 16, u16, 0, true); @@ -381,6 +427,13 @@ static void overflow_shift_test(struct kunit *test) /* 0100000100001000001000000010000001000010000001000100010001001011 */ TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + kunit_info(test, "%d overflow shift tests finished\n", count); +} + +static void shift_truncate_test(struct kunit *test) +{ + int count = 0; + /* Overflow: values larger than destination type. */ TEST_ONE_SHIFT(0x100, 0, u8, 0, true); TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); @@ -392,6 +445,33 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + /* Overflow: shifted at or beyond entire type's bit width. */ + TEST_ONE_SHIFT(0, 8, u8, 0, true); + TEST_ONE_SHIFT(0, 9, u8, 0, true); + TEST_ONE_SHIFT(0, 8, s8, 0, true); + TEST_ONE_SHIFT(0, 9, s8, 0, true); + TEST_ONE_SHIFT(0, 16, u16, 0, true); + TEST_ONE_SHIFT(0, 17, u16, 0, true); + TEST_ONE_SHIFT(0, 16, s16, 0, true); + TEST_ONE_SHIFT(0, 17, s16, 0, true); + TEST_ONE_SHIFT(0, 32, u32, 0, true); + TEST_ONE_SHIFT(0, 33, u32, 0, true); + TEST_ONE_SHIFT(0, 32, int, 0, true); + TEST_ONE_SHIFT(0, 33, int, 0, true); + TEST_ONE_SHIFT(0, 32, s32, 0, true); + TEST_ONE_SHIFT(0, 33, s32, 0, true); + TEST_ONE_SHIFT(0, 64, u64, 0, true); + TEST_ONE_SHIFT(0, 65, u64, 0, true); + TEST_ONE_SHIFT(0, 64, s64, 0, true); + TEST_ONE_SHIFT(0, 65, s64, 0, true); + + kunit_info(test, "%d truncate shift tests finished\n", count); +} + +static void shift_nonsense_test(struct kunit *test) +{ + int count = 0; + /* Nonsense: negative initial value. */ TEST_ONE_SHIFT(-1, 0, s8, 0, true); TEST_ONE_SHIFT(-1, 0, u8, 0, true); @@ -416,26 +496,6 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, -30, s64, 0, true); TEST_ONE_SHIFT(0, -30, u64, 0, true); - /* Overflow: shifted at or beyond entire type's bit width. */ - TEST_ONE_SHIFT(0, 8, u8, 0, true); - TEST_ONE_SHIFT(0, 9, u8, 0, true); - TEST_ONE_SHIFT(0, 8, s8, 0, true); - TEST_ONE_SHIFT(0, 9, s8, 0, true); - TEST_ONE_SHIFT(0, 16, u16, 0, true); - TEST_ONE_SHIFT(0, 17, u16, 0, true); - TEST_ONE_SHIFT(0, 16, s16, 0, true); - TEST_ONE_SHIFT(0, 17, s16, 0, true); - TEST_ONE_SHIFT(0, 32, u32, 0, true); - TEST_ONE_SHIFT(0, 33, u32, 0, true); - TEST_ONE_SHIFT(0, 32, int, 0, true); - TEST_ONE_SHIFT(0, 33, int, 0, true); - TEST_ONE_SHIFT(0, 32, s32, 0, true); - TEST_ONE_SHIFT(0, 33, s32, 0, true); - TEST_ONE_SHIFT(0, 64, u64, 0, true); - TEST_ONE_SHIFT(0, 65, u64, 0, true); - TEST_ONE_SHIFT(0, 64, s64, 0, true); - TEST_ONE_SHIFT(0, 65, s64, 0, true); - /* * Corner case: for unsigned types, we fail when we've shifted * through the entire width of bits. For signed types, we might @@ -451,9 +511,9 @@ static void overflow_shift_test(struct kunit *test) TEST_ONE_SHIFT(0, 31, s32, 0, false); TEST_ONE_SHIFT(0, 63, s64, 0, false); - kunit_info(test, "%d shift tests finished\n", count); -#undef TEST_ONE_SHIFT + kunit_info(test, "%d nonsense shift tests finished\n", count); } +#undef TEST_ONE_SHIFT /* * Deal with the various forms of allocator arguments. See comments above @@ -649,18 +709,25 @@ static void overflow_size_helpers_test(struct kunit *test) } static struct kunit_case overflow_test_cases[] = { - KUNIT_CASE(u8_overflow_test), - KUNIT_CASE(s8_overflow_test), - KUNIT_CASE(u16_overflow_test), - KUNIT_CASE(s16_overflow_test), - KUNIT_CASE(u32_overflow_test), - KUNIT_CASE(s32_overflow_test), + KUNIT_CASE(u8_u8__u8_overflow_test), + KUNIT_CASE(s8_s8__s8_overflow_test), + KUNIT_CASE(u16_u16__u16_overflow_test), + KUNIT_CASE(s16_s16__s16_overflow_test), + KUNIT_CASE(u32_u32__u32_overflow_test), + KUNIT_CASE(s32_s32__s32_overflow_test), /* Clang 13 and earlier generate unwanted libcalls on 32-bit. */ #if BITS_PER_LONG == 64 - KUNIT_CASE(u64_overflow_test), - KUNIT_CASE(s64_overflow_test), + KUNIT_CASE(u64_u64__u64_overflow_test), + KUNIT_CASE(s64_s64__s64_overflow_test), #endif - KUNIT_CASE(overflow_shift_test), + KUNIT_CASE(u32_u32__u8_overflow_test), + KUNIT_CASE(u32_u32__int_overflow_test), + KUNIT_CASE(u8_u8__int_overflow_test), + KUNIT_CASE(int_int__u8_overflow_test), + KUNIT_CASE(shift_sane_test), + KUNIT_CASE(shift_overflow_test), + KUNIT_CASE(shift_truncate_test), + KUNIT_CASE(shift_nonsense_test), KUNIT_CASE(overflow_allocation_test), KUNIT_CASE(overflow_size_helpers_test), {} diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3c1853a9d1c0..c414a8d9f1ea 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2246,6 +2246,9 @@ int __init no_hash_pointers_enable(char *str) } early_param("no_hash_pointers", no_hash_pointers_enable); +/* Used for Rust formatting ('%pA'). */ +char *rust_fmt_argument(char *buf, char *end, void *ptr); + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2372,6 +2375,10 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * * Note: The default behaviour (unadorned %p) is to hash the address, * rendering it useful as a unique identifier. + * + * There is also a '%pA' format specifier, but it is only intended to be used + * from Rust code to format core::fmt::Arguments. Do *not* use it from C. + * See rust/kernel/print.rs for details. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -2444,6 +2451,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); case 'f': return fwnode_string(buf, end, ptr, spec, fmt + 1); + case 'A': + if (!IS_ENABLED(CONFIG_RUST)) { + WARN_ONCE(1, "Please remove %%pA from non-Rust code\n"); + return error_string(buf, end, "(%pA?)", spec); + } + return rust_fmt_argument(buf, end, ptr); case 'x': return pointer_string(buf, end, ptr, spec); case 'e': diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 7488e27c87c3..bdef9682d0a0 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2182,12 +2182,12 @@ static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target, if (!t) return -ENOMEM; + damon_add_target(ctx, t); if (damon_target_has_pid(ctx)) { t->pid = find_get_pid(sys_target->pid); if (!t->pid) goto destroy_targets_out; } - damon_add_target(ctx, t); err = damon_sysfs_set_regions(t, sys_target->regions); if (err) goto destroy_targets_out; diff --git a/mm/util.c b/mm/util.c index c9439c66d8cf..346e40177bc6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -619,6 +619,10 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret; + /* non-sleeping allocations are not supported by vmalloc */ + if (!gfpflags_allow_blocking(flags)) + return NULL; + /* Don't even allow crazy sizes */ if (unlikely(size > INT_MAX)) { WARN_ON_ONCE(!(flags & __GFP_NOWARN)); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7c9a0d0b1230..149171774bc6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1205,7 +1205,7 @@ svc_generic_init_request(struct svc_rqst *rqstp, goto err_bad_proc; /* Initialize storage for argp and resp */ - memset(rqstp->rq_argp, 0, procp->pc_argsize); + memset(rqstp->rq_argp, 0, procp->pc_argzero); memset(rqstp->rq_resp, 0, procp->pc_ressize); /* Bump per-procedure stats counter */ @@ -1434,8 +1434,7 @@ svc_process(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - struct svc_serv *serv = rqstp->rq_server; - u32 dir; + __be32 dir; #if IS_ENABLED(CONFIG_FAIL_SUNRPC) if (!fail_sunrpc.ignore_server_disconnect && @@ -1450,7 +1449,7 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_next_page = &rqstp->rq_respages[1]; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.pages = rqstp->rq_next_page; rqstp->rq_res.len = 0; rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; @@ -1458,18 +1457,17 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - dir = svc_getnl(argv); - if (dir != 0) { - /* direction != CALL */ - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - serv->sv_stats->rpcbadfmt++; + dir = svc_getu32(argv); + if (dir != rpc_call) + goto out_baddir; + if (!svc_process_common(rqstp, argv, resv)) goto out_drop; - } - - /* Returns 1 for send, 0 for drop */ - if (likely(svc_process_common(rqstp, argv, resv))) - return svc_send(rqstp); + return svc_send(rqstp); +out_baddir: + svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", + be32_to_cpu(dir)); + rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); return 0; @@ -1556,8 +1554,12 @@ out: EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * Return (transport-specific) limit on the rpc payload. +/** + * svc_max_payload - Return transport-specific limit on the RPC payload + * @rqstp: RPC transaction context + * + * Returns the maximum number of payload bytes the current transport + * allows. */ u32 svc_max_payload(const struct svc_rqst *rqstp) { diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 482586c23fdd..336a7c7833e4 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -947,6 +947,28 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, EXPORT_SYMBOL_GPL(xdr_init_encode); /** + * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer into which to encode data + * @pages: list of pages to decode into + * @rqst: pointer to controlling rpc_rqst, for debugging + * + */ +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst) +{ + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->page_ptr = pages; + xdr->iov = NULL; + xdr->p = page_address(*pages); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); + xdr->rqst = rqst; +} +EXPORT_SYMBOL_GPL(xdr_init_encode_pages); + +/** * __xdr_commit_encode - Ensure all data is written to buffer * @xdr: pointer to xdr_stream * @@ -1575,7 +1597,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov); * * @buf and @subbuf may be pointers to the same struct xdr_buf. * - * Returns -1 if base of length are out of bounds. + * Returns -1 if base or length are out of bounds. */ int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, unsigned int base, unsigned int len) diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 000000000000..9bd1af8e05a1 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +target.json +bindings_generated.rs +bindings_helpers_generated.rs +exports_*_generated.h +doc/ +test/ diff --git a/rust/Makefile b/rust/Makefile new file mode 100644 index 000000000000..7700d3853404 --- /dev/null +++ b/rust/Makefile @@ -0,0 +1,381 @@ +# SPDX-License-Identifier: GPL-2.0 + +always-$(CONFIG_RUST) += target.json +no-clean-files += target.json + +obj-$(CONFIG_RUST) += core.o compiler_builtins.o +always-$(CONFIG_RUST) += exports_core_generated.h + +# Missing prototypes are expected in the helpers since these are exported +# for Rust only, thus there is no header nor prototypes. +obj-$(CONFIG_RUST) += helpers.o +CFLAGS_REMOVE_helpers.o = -Wmissing-prototypes -Wmissing-declarations + +always-$(CONFIG_RUST) += libmacros.so +no-clean-files += libmacros.so + +always-$(CONFIG_RUST) += bindings/bindings_generated.rs bindings/bindings_helpers_generated.rs +obj-$(CONFIG_RUST) += alloc.o bindings.o kernel.o +always-$(CONFIG_RUST) += exports_alloc_generated.h exports_bindings_generated.h \ + exports_kernel_generated.h + +obj-$(CONFIG_RUST) += exports.o + +# Avoids running `$(RUSTC)` for the sysroot when it may not be available. +ifdef CONFIG_RUST + +# `$(rust_flags)` is passed in case the user added `--sysroot`. +rustc_sysroot := $(shell $(RUSTC) $(rust_flags) --print sysroot) +rustc_host_target := $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) +RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library + +ifeq ($(quiet),silent_) +cargo_quiet=-q +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else ifeq ($(quiet),quiet_) +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else +cargo_quiet=--verbose +endif + +core-cfgs = \ + --cfg no_fp_fmt_parse + +alloc-cfgs = \ + --cfg no_fmt \ + --cfg no_global_oom_handling \ + --cfg no_macros \ + --cfg no_rc \ + --cfg no_str \ + --cfg no_string \ + --cfg no_sync \ + --cfg no_thin + +quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< + cmd_rustdoc = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTDOC) $(if $(rustdoc_host),$(rust_common_flags),$(rust_flags)) \ + $(rustc_target_flags) -L$(objtree)/$(obj) \ + --output $(objtree)/$(obj)/doc \ + --crate-name $(subst rustdoc-,,$@) \ + @$(objtree)/include/generated/rustc_cfg $< + +# The `html_logo_url` and `html_favicon_url` forms of the `doc` attribute +# can be used to specify a custom logo. However: +# - The given value is used as-is, thus it cannot be relative or a local file +# (unlike the non-custom case) since the generated docs have subfolders. +# - It requires adding it to every crate. +# - It requires changing `core` which comes from the sysroot. +# +# Using `-Zcrate-attr` would solve the last two points, but not the first. +# The https://github.com/rust-lang/rfcs/pull/3226 RFC suggests two new +# command-like flags to solve the issue. Meanwhile, we use the non-custom case +# and then retouch the generated files. +rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \ + rustdoc-alloc rustdoc-kernel + $(Q)cp $(srctree)/Documentation/images/logo.svg $(objtree)/$(obj)/doc + $(Q)cp $(srctree)/Documentation/images/COPYING-logo $(objtree)/$(obj)/doc + $(Q)find $(objtree)/$(obj)/doc -name '*.html' -type f -print0 | xargs -0 sed -Ei \ + -e 's:rust-logo\.svg:logo.svg:g' \ + -e 's:rust-logo\.png:logo.svg:g' \ + -e 's:favicon\.svg:logo.svg:g' \ + -e 's:<link rel="alternate icon" type="image/png" href="[./]*favicon-(16x16|32x32)\.png">::g' + $(Q)echo '.logo-container > img { object-fit: contain; }' \ + >> $(objtree)/$(obj)/doc/rustdoc.css + +rustdoc-macros: private rustdoc_host = yes +rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ + --extern proc_macro +rustdoc-macros: $(src)/macros/lib.rs FORCE + $(call if_changed,rustdoc) + +rustdoc-core: private rustc_target_flags = $(core-cfgs) +rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE + $(call if_changed,rustdoc) + +rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE + $(call if_changed,rustdoc) + +# We need to allow `rustdoc::broken_intra_doc_links` because some +# `no_global_oom_handling` functions refer to non-`no_global_oom_handling` +# functions. Ideally `rustdoc` would have a way to distinguish broken links +# due to things that are "configured out" vs. entirely non-existing ones. +rustdoc-alloc: private rustc_target_flags = $(alloc-cfgs) \ + -Arustdoc::broken_intra_doc_links +rustdoc-alloc: $(src)/alloc/lib.rs rustdoc-core rustdoc-compiler_builtins FORCE + $(call if_changed,rustdoc) + +rustdoc-kernel: private rustc_target_flags = --extern alloc \ + --extern macros=$(objtree)/$(obj)/libmacros.so \ + --extern bindings +rustdoc-kernel: $(src)/kernel/lib.rs rustdoc-core rustdoc-macros \ + rustdoc-compiler_builtins rustdoc-alloc $(obj)/libmacros.so \ + $(obj)/bindings.o FORCE + $(call if_changed,rustdoc) + +quiet_cmd_rustc_test_library = RUSTC TL $< + cmd_rustc_test_library = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTC) $(rust_common_flags) \ + @$(objtree)/include/generated/rustc_cfg $(rustc_target_flags) \ + --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ + --out-dir $(objtree)/$(obj)/test --cfg testlib \ + --sysroot $(objtree)/$(obj)/test/sysroot \ + -L$(objtree)/$(obj)/test \ + --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< + +rusttestlib-macros: private rustc_target_flags = --extern proc_macro +rusttestlib-macros: private rustc_test_library_proc = yes +rusttestlib-macros: $(src)/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +rusttestlib-bindings: $(src)/bindings/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +quiet_cmd_rustdoc_test = RUSTDOC T $< + cmd_rustdoc_test = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTDOC) --test $(rust_common_flags) \ + @$(objtree)/include/generated/rustc_cfg \ + $(rustc_target_flags) $(rustdoc_test_target_flags) \ + --sysroot $(objtree)/$(obj)/test/sysroot $(rustdoc_test_quiet) \ + -L$(objtree)/$(obj)/test --output $(objtree)/$(obj)/doc \ + --crate-name $(subst rusttest-,,$@) $< + +# We cannot use `-Zpanic-abort-tests` because some tests are dynamic, +# so for the moment we skip `-Cpanic=abort`. +quiet_cmd_rustc_test = RUSTC T $< + cmd_rustc_test = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTC) --test $(rust_common_flags) \ + @$(objtree)/include/generated/rustc_cfg \ + $(rustc_target_flags) --out-dir $(objtree)/$(obj)/test \ + --sysroot $(objtree)/$(obj)/test/sysroot \ + -L$(objtree)/$(obj)/test \ + --crate-name $(subst rusttest-,,$@) $<; \ + $(objtree)/$(obj)/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ + $(rustc_test_run_flags) + +rusttest: rusttest-macros rusttest-kernel + +# This prepares a custom sysroot with our custom `alloc` instead of +# the standard one. +# +# This requires several hacks: +# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, +# including third-party crates that need to be downloaded, plus custom +# `build.rs` steps. Thus hardcoding things here is not maintainable. +# - `cargo` knows how to build the standard library, but it is an unstable +# feature so far (`-Zbuild-std`). +# - `cargo` only considers the use case of building the standard library +# to use it in a given package. Thus we need to create a dummy package +# and pick the generated libraries from there. +# - Since we only keep a subset of upstream `alloc` in-tree, we need +# to recreate it on the fly by putting our sources on top. +# - The usual ways of modifying the dependency graph in `cargo` do not seem +# to apply for the `-Zbuild-std` steps, thus we have to mislead it +# by modifying the sources in the sysroot. +# - To avoid messing with the user's Rust installation, we create a clone +# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` +# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. +# +# In the future, we hope to avoid the whole ordeal by either: +# - Making the `test` crate not depend on `std` (either improving upstream +# or having our own custom crate). +# - Making the tests run in kernel space (requires the previous point). +# - Making `std` and friends be more like a "normal" crate, so that +# `-Zbuild-std` and related hacks are not needed. +quiet_cmd_rustsysroot = RUSTSYSROOT + cmd_rustsysroot = \ + rm -rf $(objtree)/$(obj)/test; \ + mkdir -p $(objtree)/$(obj)/test; \ + cp -a $(rustc_sysroot) $(objtree)/$(obj)/test/sysroot; \ + cp -r $(srctree)/$(src)/alloc/* \ + $(objtree)/$(obj)/test/sysroot/lib/rustlib/src/rust/library/alloc/src; \ + echo '\#!/bin/sh' > $(objtree)/$(obj)/test/rustc_sysroot; \ + echo "$(RUSTC) --sysroot=$(abspath $(objtree)/$(obj)/test/sysroot) \"\$$@\"" \ + >> $(objtree)/$(obj)/test/rustc_sysroot; \ + chmod u+x $(objtree)/$(obj)/test/rustc_sysroot; \ + $(CARGO) -q new $(objtree)/$(obj)/test/dummy; \ + RUSTC=$(objtree)/$(obj)/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ + test -Zbuild-std --target $(rustc_host_target) \ + --manifest-path $(objtree)/$(obj)/test/dummy/Cargo.toml; \ + rm $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ + cp $(objtree)/$(obj)/test/dummy/target/$(rustc_host_target)/debug/deps/* \ + $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib + +rusttest-prepare: FORCE + $(call if_changed,rustsysroot) + +rusttest-macros: private rustc_target_flags = --extern proc_macro +rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro +rusttest-macros: $(src)/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustdoc_test) + +rusttest-kernel: private rustc_target_flags = --extern alloc \ + --extern macros --extern bindings +rusttest-kernel: $(src)/kernel/lib.rs rusttest-prepare \ + rusttestlib-macros rusttestlib-bindings FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustc_test_library) + +filechk_rust_target = $(objtree)/scripts/generate_rust_target < $< + +$(obj)/target.json: $(objtree)/include/config/auto.conf FORCE + $(call filechk,rust_target) + +ifdef CONFIG_CC_IS_CLANG +bindgen_c_flags = $(c_flags) +else +# bindgen relies on libclang to parse C. Ideally, bindgen would support a GCC +# plugin backend and/or the Clang driver would be perfectly compatible with GCC. +# +# For the moment, here we are tweaking the flags on the fly. This is a hack, +# and some kernel configurations may not work (e.g. `GCC_PLUGIN_RANDSTRUCT` +# if we end up using one of those structs). +bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ + -mskip-rax-setup -mgeneral-regs-only -msign-return-address=% \ + -mindirect-branch=thunk-extern -mindirect-branch-register \ + -mfunction-return=thunk-extern -mrecord-mcount -mabi=lp64 \ + -mindirect-branch-cs-prefix -mstack-protector-guard% -mtraceback=no \ + -mno-pointers-to-nested-functions -mno-string \ + -mno-strict-align -mstrict-align \ + -fconserve-stack -falign-jumps=% -falign-loops=% \ + -femit-struct-debug-baseonly -fno-ipa-cp-clone -fno-ipa-sra \ + -fno-partial-inlining -fplugin-arg-arm_ssp_per_task_plugin-% \ + -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ + -fzero-call-used-regs=% -fno-stack-clash-protection \ + -fno-inline-functions-called-once \ + --param=% --param asan-% + +# Derived from `scripts/Makefile.clang`. +BINDGEN_TARGET_x86 := x86_64-linux-gnu +BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) + +# All warnings are inhibited since GCC builds are very experimental, +# many GCC warnings are not supported by Clang, they may only appear in +# some configurations, with new GCC versions, etc. +bindgen_extra_c_flags = -w --target=$(BINDGEN_TARGET) + +bindgen_c_flags = $(filter-out $(bindgen_skip_c_flags), $(c_flags)) \ + $(bindgen_extra_c_flags) +endif + +ifdef CONFIG_LTO +bindgen_c_flags_lto = $(filter-out $(CC_FLAGS_LTO), $(bindgen_c_flags)) +else +bindgen_c_flags_lto = $(bindgen_c_flags) +endif + +bindgen_c_flags_final = $(bindgen_c_flags_lto) -D__BINDGEN__ + +quiet_cmd_bindgen = BINDGEN $@ + cmd_bindgen = \ + $(BINDGEN) $< $(bindgen_target_flags) \ + --use-core --with-derive-default --ctypes-prefix core::ffi --no-layout-tests \ + --no-debug '.*' \ + --size_t-is-usize -o $@ -- $(bindgen_c_flags_final) -DMODULE \ + $(bindgen_target_cflags) $(bindgen_target_extra) + +$(obj)/bindings/bindings_generated.rs: private bindgen_target_flags = \ + $(shell grep -v '^\#\|^$$' $(srctree)/$(src)/bindgen_parameters) +$(obj)/bindings/bindings_generated.rs: $(src)/bindings/bindings_helper.h \ + $(src)/bindgen_parameters FORCE + $(call if_changed_dep,bindgen) + +# See `CFLAGS_REMOVE_helpers.o` above. In addition, Clang on C does not warn +# with `-Wmissing-declarations` (unlike GCC), so it is not strictly needed here +# given it is `libclang`; but for consistency, future Clang changes and/or +# a potential future GCC backend for `bindgen`, we disable it too. +$(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_flags = \ + --blacklist-type '.*' --whitelist-var '' \ + --whitelist-function 'rust_helper_.*' +$(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_cflags = \ + -I$(objtree)/$(obj) -Wno-missing-prototypes -Wno-missing-declarations +$(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; \ + sed -Ei 's/pub fn rust_helper_([a-zA-Z0-9_]*)/#[link_name="rust_helper_\1"]\n pub fn \1/g' $@ +$(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers.c FORCE + $(call if_changed_dep,bindgen) + +quiet_cmd_exports = EXPORTS $@ + cmd_exports = \ + $(NM) -p --defined-only $< \ + | grep -E ' (T|R|D) ' | cut -d ' ' -f 3 \ + | xargs -Isymbol \ + echo 'EXPORT_SYMBOL_RUST_GPL(symbol);' > $@ + +$(obj)/exports_core_generated.h: $(obj)/core.o FORCE + $(call if_changed,exports) + +$(obj)/exports_alloc_generated.h: $(obj)/alloc.o FORCE + $(call if_changed,exports) + +$(obj)/exports_bindings_generated.h: $(obj)/bindings.o FORCE + $(call if_changed,exports) + +$(obj)/exports_kernel_generated.h: $(obj)/kernel.o FORCE + $(call if_changed,exports) + +quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ + cmd_rustc_procmacro = \ + $(RUSTC_OR_CLIPPY) $(rust_common_flags) \ + --emit=dep-info,link --extern proc_macro \ + --crate-type proc-macro --out-dir $(objtree)/$(obj) \ + --crate-name $(patsubst lib%.so,%,$(notdir $@)) $<; \ + mv $(objtree)/$(obj)/$(patsubst lib%.so,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +# Procedural macros can only be used with the `rustc` that compiled it. +# Therefore, to get `libmacros.so` automatically recompiled when the compiler +# version changes, we add `core.o` as a dependency (even if it is not needed). +$(obj)/libmacros.so: $(src)/macros/lib.rs $(obj)/core.o FORCE + $(call if_changed_dep,rustc_procmacro) + +quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L $@ + cmd_rustc_library = \ + OBJTREE=$(abspath $(objtree)) \ + $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ + $(filter-out $(skip_flags),$(rust_flags) $(rustc_target_flags)) \ + --emit=dep-info,obj,metadata --crate-type rlib \ + --out-dir $(objtree)/$(obj) -L$(objtree)/$(obj) \ + --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ + mv $(objtree)/$(obj)/$(patsubst %.o,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) \ + $(if $(rustc_objcopy),;$(OBJCOPY) $(rustc_objcopy) $@) + +rust-analyzer: + $(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) \ + $(RUST_LIB_SRC) > $(objtree)/rust-project.json + +$(obj)/core.o: private skip_clippy = 1 +$(obj)/core.o: private skip_flags = -Dunreachable_pub +$(obj)/core.o: private rustc_target_flags = $(core-cfgs) +$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs $(obj)/target.json FORCE + $(call if_changed_dep,rustc_library) + +$(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' +$(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE + $(call if_changed_dep,rustc_library) + +$(obj)/alloc.o: private skip_clippy = 1 +$(obj)/alloc.o: private skip_flags = -Dunreachable_pub +$(obj)/alloc.o: private rustc_target_flags = $(alloc-cfgs) +$(obj)/alloc.o: $(src)/alloc/lib.rs $(obj)/compiler_builtins.o FORCE + $(call if_changed_dep,rustc_library) + +$(obj)/bindings.o: $(src)/bindings/lib.rs \ + $(obj)/compiler_builtins.o \ + $(obj)/bindings/bindings_generated.rs \ + $(obj)/bindings/bindings_helpers_generated.rs FORCE + $(call if_changed_dep,rustc_library) + +$(obj)/kernel.o: private rustc_target_flags = --extern alloc \ + --extern macros --extern bindings +$(obj)/kernel.o: $(src)/kernel/lib.rs $(obj)/alloc.o \ + $(obj)/libmacros.so $(obj)/bindings.o FORCE + $(call if_changed_dep,rustc_library) + +endif # CONFIG_RUST diff --git a/rust/alloc/README.md b/rust/alloc/README.md new file mode 100644 index 000000000000..c89c753720b5 --- /dev/null +++ b/rust/alloc/README.md @@ -0,0 +1,33 @@ +# `alloc` + +These source files come from the Rust standard library, hosted in +the <https://github.com/rust-lang/rust> repository, licensed under +"Apache-2.0 OR MIT" and adapted for kernel use. For copyright details, +see <https://github.com/rust-lang/rust/blob/master/COPYRIGHT>. + +Please note that these files should be kept as close as possible to +upstream. In general, only additions should be performed (e.g. new +methods). Eventually, changes should make it into upstream so that, +at some point, this fork can be dropped from the kernel tree. + + +## Rationale + +On one hand, kernel folks wanted to keep `alloc` in-tree to have more +freedom in both workflow and actual features if actually needed +(e.g. receiver types if we ended up using them), which is reasonable. + +On the other hand, Rust folks wanted to keep `alloc` as close as +upstream as possible and avoid as much divergence as possible, which +is also reasonable. + +We agreed on a middle-ground: we would keep a subset of `alloc` +in-tree that would be as small and as close as possible to upstream. +Then, upstream can start adding the functions that we add to `alloc` +etc., until we reach a point where the kernel already knows exactly +what it needs in `alloc` and all the new methods are merged into +upstream, so that we can drop `alloc` from the kernel tree and go back +to using the upstream one. + +By doing this, the kernel can go a bit faster now, and Rust can +slowly incorporate and discuss the changes as needed. diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs new file mode 100644 index 000000000000..ca224a541770 --- /dev/null +++ b/rust/alloc/alloc.rs @@ -0,0 +1,440 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Memory allocation APIs + +#![stable(feature = "alloc_module", since = "1.28.0")] + +#[cfg(not(test))] +use core::intrinsics; +use core::intrinsics::{min_align_of_val, size_of_val}; + +use core::ptr::Unique; +#[cfg(not(test))] +use core::ptr::{self, NonNull}; + +#[stable(feature = "alloc_module", since = "1.28.0")] +#[doc(inline)] +pub use core::alloc::*; + +use core::marker::Destruct; + +#[cfg(test)] +mod tests; + +extern "Rust" { + // These are the magic symbols to call the global allocator. rustc generates + // them to call `__rg_alloc` etc. if there is a `#[global_allocator]` attribute + // (the code expanding that attribute macro generates those functions), or to call + // the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`) + // otherwise. + // The rustc fork of LLVM also special-cases these function names to be able to optimize them + // like `malloc`, `realloc`, and `free`, respectively. + #[rustc_allocator] + #[rustc_allocator_nounwind] + fn __rust_alloc(size: usize, align: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize); + #[rustc_allocator_nounwind] + fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8; +} + +/// The global memory allocator. +/// +/// This type implements the [`Allocator`] trait by forwarding calls +/// to the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// Note: while this type is unstable, the functionality it provides can be +/// accessed through the [free functions in `alloc`](self#functions). +#[unstable(feature = "allocator_api", issue = "32838")] +#[derive(Copy, Clone, Default, Debug)] +#[cfg(not(test))] +pub struct Global; + +#[cfg(test)] +pub use std::alloc::Global; + +/// Allocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::<u16>(); +/// let ptr = alloc(layout); +/// +/// *(ptr as *mut u16) = 42; +/// assert_eq!(*(ptr as *mut u16), 42); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[must_use = "losing the pointer will leak memory"] +#[inline] +pub unsafe fn alloc(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc(layout.size(), layout.align()) } +} + +/// Deallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::dealloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `dealloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::dealloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn dealloc(ptr: *mut u8, layout: Layout) { + unsafe { __rust_dealloc(ptr, layout.size(), layout.align()) } +} + +/// Reallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::realloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `realloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::realloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[must_use = "losing the pointer will leak memory"] +#[inline] +pub unsafe fn realloc(ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + unsafe { __rust_realloc(ptr, layout.size(), layout.align(), new_size) } +} + +/// Allocate zero-initialized memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc_zeroed`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc_zeroed` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc_zeroed`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc_zeroed, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::<u16>(); +/// let ptr = alloc_zeroed(layout); +/// +/// assert_eq!(*(ptr as *mut u16), 0); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[must_use = "losing the pointer will leak memory"] +#[inline] +pub unsafe fn alloc_zeroed(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc_zeroed(layout.size(), layout.align()) } +} + +#[cfg(not(test))] +impl Global { + #[inline] + fn alloc_impl(&self, layout: Layout, zeroed: bool) -> Result<NonNull<[u8]>, AllocError> { + match layout.size() { + 0 => Ok(NonNull::slice_from_raw_parts(layout.dangling(), 0)), + // SAFETY: `layout` is non-zero in size, + size => unsafe { + let raw_ptr = if zeroed { alloc_zeroed(layout) } else { alloc(layout) }; + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, size)) + }, + } + } + + // SAFETY: Same as `Allocator::grow` + #[inline] + unsafe fn grow_impl( + &self, + ptr: NonNull<u8>, + old_layout: Layout, + new_layout: Layout, + zeroed: bool, + ) -> Result<NonNull<[u8]>, AllocError> { + debug_assert!( + new_layout.size() >= old_layout.size(), + "`new_layout.size()` must be greater than or equal to `old_layout.size()`" + ); + + match old_layout.size() { + 0 => self.alloc_impl(new_layout, zeroed), + + // SAFETY: `new_size` is non-zero as `old_size` is greater than or equal to `new_size` + // as required by safety conditions. Other conditions must be upheld by the caller + old_size if old_layout.align() == new_layout.align() => unsafe { + let new_size = new_layout.size(); + + // `realloc` probably checks for `new_size >= old_layout.size()` or something similar. + intrinsics::assume(new_size >= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + if zeroed { + raw_ptr.add(old_size).write_bytes(0, new_size - old_size); + } + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_layout.size()` must be greater than or equal to `old_size`, + // both the old and new memory allocation are valid for reads and writes for `old_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + old_size => unsafe { + let new_ptr = self.alloc_impl(new_layout, zeroed)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), old_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +#[unstable(feature = "allocator_api", issue = "32838")] +#[cfg(not(test))] +unsafe impl Allocator for Global { + #[inline] + fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> { + self.alloc_impl(layout, false) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> { + self.alloc_impl(layout, true) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) { + if layout.size() != 0 { + // SAFETY: `layout` is non-zero in size, + // other conditions must be upheld by the caller + unsafe { dealloc(ptr.as_ptr(), layout) } + } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull<u8>, + old_layout: Layout, + new_layout: Layout, + ) -> Result<NonNull<[u8]>, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, false) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull<u8>, + old_layout: Layout, + new_layout: Layout, + ) -> Result<NonNull<[u8]>, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, true) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull<u8>, + old_layout: Layout, + new_layout: Layout, + ) -> Result<NonNull<[u8]>, AllocError> { + debug_assert!( + new_layout.size() <= old_layout.size(), + "`new_layout.size()` must be smaller than or equal to `old_layout.size()`" + ); + + match new_layout.size() { + // SAFETY: conditions must be upheld by the caller + 0 => unsafe { + self.deallocate(ptr, old_layout); + Ok(NonNull::slice_from_raw_parts(new_layout.dangling(), 0)) + }, + + // SAFETY: `new_size` is non-zero. Other conditions must be upheld by the caller + new_size if old_layout.align() == new_layout.align() => unsafe { + // `realloc` probably checks for `new_size <= old_layout.size()` or something similar. + intrinsics::assume(new_size <= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_size` must be smaller than or equal to `old_layout.size()`, + // both the old and new memory allocation are valid for reads and writes for `new_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + new_size => unsafe { + let new_ptr = self.allocate(new_layout)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), new_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +/// The allocator for unique pointers. +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[lang = "exchange_malloc"] +#[inline] +unsafe fn exchange_malloc(size: usize, align: usize) -> *mut u8 { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + match Global.allocate(layout) { + Ok(ptr) => ptr.as_mut_ptr(), + Err(_) => handle_alloc_error(layout), + } +} + +#[cfg_attr(not(test), lang = "box_free")] +#[inline] +#[rustc_const_unstable(feature = "const_box", issue = "92521")] +// This signature has to be the same as `Box`, otherwise an ICE will happen. +// When an additional parameter to `Box` is added (like `A: Allocator`), this has to be added here as +// well. +// For example if `Box` is changed to `struct Box<T: ?Sized, A: Allocator>(Unique<T>, A)`, +// this function has to be changed to `fn box_free<T: ?Sized, A: Allocator>(Unique<T>, A)` as well. +pub(crate) const unsafe fn box_free<T: ?Sized, A: ~const Allocator + ~const Destruct>( + ptr: Unique<T>, + alloc: A, +) { + unsafe { + let size = size_of_val(ptr.as_ref()); + let align = min_align_of_val(ptr.as_ref()); + let layout = Layout::from_size_align_unchecked(size, align); + alloc.deallocate(From::from(ptr.cast()), layout) + } +} + +// # Allocation error handler + +#[cfg(not(no_global_oom_handling))] +extern "Rust" { + // This is the magic symbol to call the global alloc error handler. rustc generates + // it to call `__rg_oom` if there is a `#[alloc_error_handler]`, or to call the + // default implementations below (`__rdl_oom`) otherwise. + fn __rust_alloc_error_handler(size: usize, align: usize) -> !; +} + +/// Abort on memory allocation error or failure. +/// +/// Callers of memory allocation APIs wishing to abort computation +/// in response to an allocation error are encouraged to call this function, +/// rather than directly invoking `panic!` or similar. +/// +/// The default behavior of this function is to print a message to standard error +/// and abort the process. +/// It can be replaced with [`set_alloc_error_hook`] and [`take_alloc_error_hook`]. +/// +/// [`set_alloc_error_hook`]: ../../std/alloc/fn.set_alloc_error_hook.html +/// [`take_alloc_error_hook`]: ../../std/alloc/fn.take_alloc_error_hook.html +#[stable(feature = "global_alloc", since = "1.28.0")] +#[rustc_const_unstable(feature = "const_alloc_error", issue = "92523")] +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[cold] +pub const fn handle_alloc_error(layout: Layout) -> ! { + const fn ct_error(_: Layout) -> ! { + panic!("allocation failed"); + } + + fn rt_error(layout: Layout) -> ! { + unsafe { + __rust_alloc_error_handler(layout.size(), layout.align()); + } + } + + unsafe { core::intrinsics::const_eval_select((layout,), ct_error, rt_error) } +} + +// For alloc test `std::alloc::handle_alloc_error` can be used directly. +#[cfg(all(not(no_global_oom_handling), test))] +pub use std::alloc::handle_alloc_error; + +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[doc(hidden)] +#[allow(unused_attributes)] +#[unstable(feature = "alloc_internals", issue = "none")] +pub mod __alloc_error_handler { + use crate::alloc::Layout; + + // called via generated `__rust_alloc_error_handler` + + // if there is no `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C-unwind" fn __rdl_oom(size: usize, _align: usize) -> ! { + panic!("memory allocation of {size} bytes failed") + } + + // if there is an `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C-unwind" fn __rg_oom(size: usize, align: usize) -> ! { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + extern "Rust" { + #[lang = "oom"] + fn oom_impl(layout: Layout) -> !; + } + unsafe { oom_impl(layout) } + } +} + +/// Specialize clones into pre-allocated, uninitialized memory. +/// Used by `Box::clone` and `Rc`/`Arc::make_mut`. +pub(crate) trait WriteCloneIntoRaw: Sized { + unsafe fn write_clone_into_raw(&self, target: *mut Self); +} + +impl<T: Clone> WriteCloneIntoRaw for T { + #[inline] + default unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // Having allocated *first* may allow the optimizer to create + // the cloned value in-place, skipping the local and move. + unsafe { target.write(self.clone()) }; + } +} + +impl<T: Copy> WriteCloneIntoRaw for T { + #[inline] + unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // We can always copy in-place, without ever involving a local value. + unsafe { target.copy_from_nonoverlapping(self, 1) }; + } +} diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs new file mode 100644 index 000000000000..dde4957200d4 --- /dev/null +++ b/rust/alloc/borrow.rs @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A module for working with borrowed data. + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::cmp::Ordering; +use core::hash::{Hash, Hasher}; +use core::ops::Deref; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Add, AddAssign}; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::borrow::{Borrow, BorrowMut}; + +use core::fmt; +#[cfg(not(no_global_oom_handling))] +use crate::string::String; + +use Cow::*; + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B> +where + B: ToOwned, + <B as ToOwned>::Owned: 'a, +{ + fn borrow(&self) -> &B { + &**self + } +} + +/// A generalization of `Clone` to borrowed data. +/// +/// Some types make it possible to go from borrowed to owned, usually by +/// implementing the `Clone` trait. But `Clone` works only for going from `&T` +/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data +/// from any borrow of a given type. +#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToOwned { + /// The resulting type after obtaining ownership. + #[stable(feature = "rust1", since = "1.0.0")] + type Owned: Borrow<Self>; + + /// Creates owned data from borrowed data, usually by cloning. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.to_owned(); + /// + /// let v: &[i32] = &[1, 2]; + /// let vv: Vec<i32> = v.to_owned(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "cloning is often expensive and is not expected to have side effects"] + fn to_owned(&self) -> Self::Owned; + + /// Uses borrowed data to replace owned data, usually by cloning. + /// + /// This is borrow-generalized version of `Clone::clone_from`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # #![feature(toowned_clone_into)] + /// let mut s: String = String::new(); + /// "hello".clone_into(&mut s); + /// + /// let mut v: Vec<i32> = Vec::new(); + /// [1, 2][..].clone_into(&mut v); + /// ``` + #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")] + fn clone_into(&self, target: &mut Self::Owned) { + *target = self.to_owned(); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T> ToOwned for T +where + T: Clone, +{ + type Owned = T; + fn to_owned(&self) -> T { + self.clone() + } + + fn clone_into(&self, target: &mut T) { + target.clone_from(self); + } +} + +/// A clone-on-write smart pointer. +/// +/// The type `Cow` is a smart pointer providing clone-on-write functionality: it +/// can enclose and provide immutable access to borrowed data, and clone the +/// data lazily when mutation or ownership is required. The type is designed to +/// work with general borrowed data via the `Borrow` trait. +/// +/// `Cow` implements `Deref`, which means that you can call +/// non-mutating methods directly on the data it encloses. If mutation +/// is desired, `to_mut` will obtain a mutable reference to an owned +/// value, cloning if necessary. +/// +/// If you need reference-counting pointers, note that +/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and +/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write +/// functionality as well. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// +/// fn abs_all(input: &mut Cow<[i32]>) { +/// for i in 0..input.len() { +/// let v = input[i]; +/// if v < 0 { +/// // Clones into a vector if not already owned. +/// input.to_mut()[i] = -v; +/// } +/// } +/// } +/// +/// // No clone occurs because `input` doesn't need to be mutated. +/// let slice = [0, 1, 2]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // Clone occurs because `input` needs to be mutated. +/// let slice = [-1, 0, 1]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // No clone occurs because `input` is already owned. +/// let mut input = Cow::from(vec![-1, 0, 1]); +/// abs_all(&mut input); +/// ``` +/// +/// Another example showing how to keep `Cow` in a struct: +/// +/// ``` +/// use std::borrow::Cow; +/// +/// struct Items<'a, X: 'a> where [X]: ToOwned<Owned = Vec<X>> { +/// values: Cow<'a, [X]>, +/// } +/// +/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned<Owned = Vec<X>> { +/// fn new(v: Cow<'a, [X]>) -> Self { +/// Items { values: v } +/// } +/// } +/// +/// // Creates a container from borrowed values of a slice +/// let readonly = [1, 2]; +/// let borrowed = Items::new((&readonly[..]).into()); +/// match borrowed { +/// Items { values: Cow::Borrowed(b) } => println!("borrowed {b:?}"), +/// _ => panic!("expect borrowed value"), +/// } +/// +/// let mut clone_on_write = borrowed; +/// // Mutates the data from slice into owned vec and pushes a new value on top +/// clone_on_write.values.to_mut().push(3); +/// println!("clone_on_write = {:?}", clone_on_write.values); +/// +/// // The data was mutated. Let's check it out. +/// match clone_on_write { +/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"), +/// _ => panic!("expect owned data"), +/// } +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "Cow")] +pub enum Cow<'a, B: ?Sized + 'a> +where + B: ToOwned, +{ + /// Borrowed data. + #[stable(feature = "rust1", since = "1.0.0")] + Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B), + + /// Owned data. + #[stable(feature = "rust1", since = "1.0.0")] + Owned(#[stable(feature = "rust1", since = "1.0.0")] <B as ToOwned>::Owned), +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized + ToOwned> Clone for Cow<'_, B> { + fn clone(&self) -> Self { + match *self { + Borrowed(b) => Borrowed(b), + Owned(ref o) => { + let b: &B = o.borrow(); + Owned(b.to_owned()) + } + } + } + + fn clone_from(&mut self, source: &Self) { + match (self, source) { + (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest), + (t, s) => *t = s.clone(), + } + } +} + +impl<B: ?Sized + ToOwned> Cow<'_, B> { + /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow = Cow::Borrowed("moo"); + /// assert!(cow.is_borrowed()); + /// + /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string()); + /// assert!(!bull.is_borrowed()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_borrowed(&self) -> bool { + match *self { + Borrowed(_) => true, + Owned(_) => false, + } + } + + /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string()); + /// assert!(cow.is_owned()); + /// + /// let bull = Cow::Borrowed("...moo?"); + /// assert!(!bull.is_owned()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_owned(&self) -> bool { + !self.is_borrowed() + } + + /// Acquires a mutable reference to the owned form of the data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let mut cow = Cow::Borrowed("foo"); + /// cow.to_mut().make_ascii_uppercase(); + /// + /// assert_eq!( + /// cow, + /// Cow::Owned(String::from("FOO")) as Cow<str> + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn to_mut(&mut self) -> &mut <B as ToOwned>::Owned { + match *self { + Borrowed(borrowed) => { + *self = Owned(borrowed.to_owned()); + match *self { + Borrowed(..) => unreachable!(), + Owned(ref mut owned) => owned, + } + } + Owned(ref mut owned) => owned, + } + } + + /// Extracts the owned data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// Calling `into_owned` on a `Cow::Borrowed` returns a clone of the borrowed data: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow = Cow::Borrowed(s); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + /// + /// Calling `into_owned` on a `Cow::Owned` returns the owned data. The data is moved out of the + /// `Cow` without being cloned. + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow: Cow<str> = Cow::Owned(String::from(s)); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_owned(self) -> <B as ToOwned>::Owned { + match self { + Borrowed(borrowed) => borrowed.to_owned(), + Owned(owned) => owned, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_deref", issue = "88955")] +impl<B: ?Sized + ToOwned> const Deref for Cow<'_, B> +where + B::Owned: ~const Borrow<B>, +{ + type Target = B; + + fn deref(&self) -> &B { + match *self { + Borrowed(borrowed) => borrowed, + Owned(ref owned) => owned.borrow(), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized> Eq for Cow<'_, B> where B: Eq + ToOwned {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized> Ord for Cow<'_, B> +where + B: Ord + ToOwned, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq<Cow<'b, C>> for Cow<'a, B> +where + B: PartialEq<C> + ToOwned, + C: ToOwned, +{ + #[inline] + fn eq(&self, other: &Cow<'b, C>) -> bool { + PartialEq::eq(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> PartialOrd for Cow<'a, B> +where + B: PartialOrd + ToOwned, +{ + #[inline] + fn partial_cmp(&self, other: &Cow<'a, B>) -> Option<Ordering> { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized> fmt::Debug for Cow<'_, B> +where + B: fmt::Debug + ToOwned<Owned: fmt::Debug>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Debug::fmt(b, f), + Owned(ref o) => fmt::Debug::fmt(o, f), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized> fmt::Display for Cow<'_, B> +where + B: fmt::Display + ToOwned<Owned: fmt::Display>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Display::fmt(b, f), + Owned(ref o) => fmt::Display::fmt(o, f), + } + } +} + +#[stable(feature = "default", since = "1.11.0")] +impl<B: ?Sized> Default for Cow<'_, B> +where + B: ToOwned<Owned: Default>, +{ + /// Creates an owned Cow<'a, B> with the default value for the contained owned value. + fn default() -> Self { + Owned(<B as ToOwned>::Owned::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<B: ?Sized> Hash for Cow<'_, B> +where + B: Hash + ToOwned, +{ + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + ToOwned> AsRef<T> for Cow<'_, T> { + fn as_ref(&self) -> &T { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add<&'a str> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: &'a str) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add<Cow<'a, str>> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: Cow<'a, str>) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign<&'a str> for Cow<'a, str> { + fn add_assign(&mut self, rhs: &'a str) { + if self.is_empty() { + *self = Cow::Borrowed(rhs) + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(rhs); + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign<Cow<'a, str>> for Cow<'a, str> { + fn add_assign(&mut self, rhs: Cow<'a, str>) { + if self.is_empty() { + *self = rhs + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(&rhs); + } + } +} diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs new file mode 100644 index 000000000000..dcfe87b14f3a --- /dev/null +++ b/rust/alloc/boxed.rs @@ -0,0 +1,2028 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A pointer type for heap allocation. +//! +//! [`Box<T>`], casually referred to as a 'box', provides the simplest form of +//! heap allocation in Rust. Boxes provide ownership for this allocation, and +//! drop their contents when they go out of scope. Boxes also ensure that they +//! never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! Move a value from the stack to the heap by creating a [`Box`]: +//! +//! ``` +//! let val: u8 = 5; +//! let boxed: Box<u8> = Box::new(val); +//! ``` +//! +//! Move a value from a [`Box`] back to the stack by [dereferencing]: +//! +//! ``` +//! let boxed: Box<u8> = Box::new(5); +//! let val: u8 = *boxed; +//! ``` +//! +//! Creating a recursive data structure: +//! +//! ``` +//! #[derive(Debug)] +//! enum List<T> { +//! Cons(T, Box<List<T>>), +//! Nil, +//! } +//! +//! let list: List<i32> = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil)))); +//! println!("{list:?}"); +//! ``` +//! +//! This will print `Cons(1, Cons(2, Nil))`. +//! +//! Recursive structures must be boxed, because if the definition of `Cons` +//! looked like this: +//! +//! ```compile_fail,E0072 +//! # enum List<T> { +//! Cons(T, List<T>), +//! # } +//! ``` +//! +//! It wouldn't work. This is because the size of a `List` depends on how many +//! elements are in the list, and so we don't know how much memory to allocate +//! for a `Cons`. By introducing a [`Box<T>`], which has a defined size, we know how +//! big `Cons` needs to be. +//! +//! # Memory layout +//! +//! For non-zero-sized values, a [`Box`] will use the [`Global`] allocator for +//! its allocation. It is valid to convert both ways between a [`Box`] and a +//! raw pointer allocated with the [`Global`] allocator, given that the +//! [`Layout`] used with the allocator is correct for the type. More precisely, +//! a `value: *mut T` that has been allocated with the [`Global`] allocator +//! with `Layout::for_value(&*value)` may be converted into a box using +//! [`Box::<T>::from_raw(value)`]. Conversely, the memory backing a `value: *mut +//! T` obtained from [`Box::<T>::into_raw`] may be deallocated using the +//! [`Global`] allocator with [`Layout::for_value(&*value)`]. +//! +//! For zero-sized values, the `Box` pointer still has to be [valid] for reads +//! and writes and sufficiently aligned. In particular, casting any aligned +//! non-zero integer literal to a raw pointer produces a valid pointer, but a +//! pointer pointing into previously allocated memory that since got freed is +//! not valid. The recommended way to build a Box to a ZST if `Box::new` cannot +//! be used is to use [`ptr::NonNull::dangling`]. +//! +//! So long as `T: Sized`, a `Box<T>` is guaranteed to be represented +//! as a single pointer and is also ABI-compatible with C pointers +//! (i.e. the C type `T*`). This means that if you have extern "C" +//! Rust functions that will be called from C, you can define those +//! Rust functions using `Box<T>` types, and use `T*` as corresponding +//! type on the C side. As an example, consider this C header which +//! declares functions that create and destroy some kind of `Foo` +//! value: +//! +//! ```c +//! /* C header */ +//! +//! /* Returns ownership to the caller */ +//! struct Foo* foo_new(void); +//! +//! /* Takes ownership from the caller; no-op when invoked with null */ +//! void foo_delete(struct Foo*); +//! ``` +//! +//! These two functions might be implemented in Rust as follows. Here, the +//! `struct Foo*` type from C is translated to `Box<Foo>`, which captures +//! the ownership constraints. Note also that the nullable argument to +//! `foo_delete` is represented in Rust as `Option<Box<Foo>>`, since `Box<Foo>` +//! cannot be null. +//! +//! ``` +//! #[repr(C)] +//! pub struct Foo; +//! +//! #[no_mangle] +//! pub extern "C" fn foo_new() -> Box<Foo> { +//! Box::new(Foo) +//! } +//! +//! #[no_mangle] +//! pub extern "C" fn foo_delete(_: Option<Box<Foo>>) {} +//! ``` +//! +//! Even though `Box<T>` has the same representation and C ABI as a C pointer, +//! this does not mean that you can convert an arbitrary `T*` into a `Box<T>` +//! and expect things to work. `Box<T>` values will always be fully aligned, +//! non-null pointers. Moreover, the destructor for `Box<T>` will attempt to +//! free the value with the global allocator. In general, the best practice +//! is to only use `Box<T>` for pointers that originated from the global +//! allocator. +//! +//! **Important.** At least at present, you should avoid using +//! `Box<T>` types for functions that are defined in C but invoked +//! from Rust. In those cases, you should directly mirror the C types +//! as closely as possible. Using types like `Box<T>` where the C +//! definition is just using `T*` can lead to undefined behavior, as +//! described in [rust-lang/unsafe-code-guidelines#198][ucg#198]. +//! +//! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198 +//! [dereferencing]: core::ops::Deref +//! [`Box::<T>::from_raw(value)`]: Box::from_raw +//! [`Global`]: crate::alloc::Global +//! [`Layout`]: crate::alloc::Layout +//! [`Layout::for_value(&*value)`]: crate::alloc::Layout::for_value +//! [valid]: ptr#safety + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::any::Any; +use core::async_iter::AsyncIterator; +use core::borrow; +use core::cmp::Ordering; +use core::convert::{From, TryFrom}; +use core::fmt; +use core::future::Future; +use core::hash::{Hash, Hasher}; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::iter::{FusedIterator, Iterator}; +use core::marker::{Destruct, Unpin, Unsize}; +use core::mem; +use core::ops::{ + CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver, +}; +use core::pin::Pin; +use core::ptr::{self, Unique}; +use core::task::{Context, Poll}; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::{handle_alloc_error, WriteCloneIntoRaw}; +use crate::alloc::{AllocError, Allocator, Global, Layout}; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; +use crate::raw_vec::RawVec; +#[cfg(not(no_global_oom_handling))] +use crate::str::from_boxed_utf8_unchecked; +#[cfg(not(no_global_oom_handling))] +use crate::vec::Vec; + +#[cfg(not(no_thin))] +#[unstable(feature = "thin_box", issue = "92791")] +pub use thin::ThinBox; + +#[cfg(not(no_thin))] +mod thin; + +/// A pointer type for heap allocation. +/// +/// See the [module-level documentation](../../std/boxed/index.html) for more. +#[lang = "owned_box"] +#[fundamental] +#[stable(feature = "rust1", since = "1.0.0")] +// The declaration of the `Box` struct must be kept in sync with the +// `alloc::alloc::box_free` function or ICEs will happen. See the comment +// on `box_free` for more details. +pub struct Box< + T: ?Sized, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +>(Unique<T>, A); + +impl<T> Box<T> { + /// Allocates memory on the heap and then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// let five = Box::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline(always)] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn new(x: T) -> Self { + box x + } + + /// Constructs a new box with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::<u32>::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + #[inline] + pub fn new_uninit() -> Box<mem::MaybeUninit<T>> { + Self::new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let zero = Box::<u32>::new_zeroed(); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub fn new_zeroed() -> Box<mem::MaybeUninit<T>> { + Self::new_zeroed_in(Global) + } + + /// Constructs a new `Pin<Box<T>>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "pin", since = "1.33.0")] + #[must_use] + #[inline(always)] + pub fn pin(x: T) -> Pin<Box<T>> { + (box x).into() + } + + /// Allocates memory on the heap then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// let five = Box::try_new(5)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new(x: T) -> Result<Self, AllocError> { + Self::try_new_in(x, Global) + } + + /// Constructs a new box with uninitialized contents on the heap, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let mut five = Box::<u32>::try_new_uninit()?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_uninit() -> Result<Box<mem::MaybeUninit<T>>, AllocError> { + Box::try_new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes on the heap + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let zero = Box::<u32>::try_new_zeroed()?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_zeroed() -> Result<Box<mem::MaybeUninit<T>>, AllocError> { + Box::try_new_zeroed_in(Global) + } +} + +impl<T, A: Allocator> Box<T, A> { + /// Allocates memory in the given allocator then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::new_in(5, System); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[must_use] + #[inline] + pub const fn new_in(x: T, alloc: A) -> Self + where + A: ~const Allocator + ~const Destruct, + { + let mut boxed = Self::new_uninit_in(alloc); + unsafe { + boxed.as_mut_ptr().write(x); + boxed.assume_init() + } + } + + /// Allocates memory in the given allocator then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::try_new_in(5, System)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn try_new_in(x: T, alloc: A) -> Result<Self, AllocError> + where + T: ~const Destruct, + A: ~const Allocator + ~const Destruct, + { + let mut boxed = Self::try_new_uninit_in(alloc)?; + unsafe { + boxed.as_mut_ptr().write(x); + Ok(boxed.assume_init()) + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::<u32, _>::new_uninit_in(System); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[cfg(not(no_global_oom_handling))] + #[must_use] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub const fn new_uninit_in(alloc: A) -> Box<mem::MaybeUninit<T>, A> + where + A: ~const Allocator + ~const Destruct, + { + let layout = Layout::new::<mem::MaybeUninit<T>>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_uninit_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::<u32, _>::try_new_uninit_in(System)?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + pub const fn try_new_uninit_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError> + where + A: ~const Allocator + ~const Destruct, + { + let layout = Layout::new::<mem::MaybeUninit<T>>(); + let ptr = alloc.allocate(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::<u32, _>::new_zeroed_in(System); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[cfg(not(no_global_oom_handling))] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub const fn new_zeroed_in(alloc: A) -> Box<mem::MaybeUninit<T>, A> + where + A: ~const Allocator + ~const Destruct, + { + let layout = Layout::new::<mem::MaybeUninit<T>>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_zeroed_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator, + /// returning an error if the allocation fails, + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::<u32, _>::try_new_zeroed_in(System)?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + pub const fn try_new_zeroed_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError> + where + A: ~const Allocator + ~const Destruct, + { + let layout = Layout::new::<mem::MaybeUninit<T>>(); + let ptr = alloc.allocate_zeroed(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[must_use] + #[inline(always)] + pub const fn pin_in(x: T, alloc: A) -> Pin<Self> + where + A: 'static + ~const Allocator + ~const Destruct, + { + Self::into_pin(Self::new_in(x, alloc)) + } + + /// Converts a `Box<T>` into a `Box<[T]>` + /// + /// This conversion does not allocate on the heap and happens in place. + #[unstable(feature = "box_into_boxed_slice", issue = "71582")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + pub const fn into_boxed_slice(boxed: Self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(boxed); + unsafe { Box::from_raw_in(raw as *mut [T; 1], alloc) } + } + + /// Consumes the `Box`, returning the wrapped value. + /// + /// # Examples + /// + /// ``` + /// #![feature(box_into_inner)] + /// + /// let c = Box::new(5); + /// + /// assert_eq!(Box::into_inner(c), 5); + /// ``` + #[unstable(feature = "box_into_inner", issue = "80437")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn into_inner(boxed: Self) -> T + where + Self: ~const Destruct, + { + *boxed + } +} + +impl<T> Box<[T]> { + /// Constructs a new boxed slice with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub fn new_uninit_slice(len: usize) -> Box<[mem::MaybeUninit<T>]> { + unsafe { RawVec::with_capacity(len).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let values = Box::<[u32]>::new_zeroed_slice(3); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub fn new_zeroed_slice(len: usize) -> Box<[mem::MaybeUninit<T>]> { + unsafe { RawVec::with_capacity_zeroed(len).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents. Returns an error if + /// the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let mut values = Box::<[u32]>::try_new_uninit_slice(3)?; + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new_uninit_slice(len: usize) -> Result<Box<[mem::MaybeUninit<T>]>, AllocError> { + unsafe { + let layout = match Layout::array::<mem::MaybeUninit<T>>(len) { + Ok(l) => l, + Err(_) => return Err(AllocError), + }; + let ptr = Global.allocate(layout)?; + Ok(RawVec::from_raw_parts_in(ptr.as_mut_ptr() as *mut _, len, Global).into_box(len)) + } + } + + /// Constructs a new boxed slice with uninitialized contents, with the memory + /// being filled with `0` bytes. Returns an error if the allocation fails + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let values = Box::<[u32]>::try_new_zeroed_slice(3)?; + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new_zeroed_slice(len: usize) -> Result<Box<[mem::MaybeUninit<T>]>, AllocError> { + unsafe { + let layout = match Layout::array::<mem::MaybeUninit<T>>(len) { + Ok(l) => l, + Err(_) => return Err(AllocError), + }; + let ptr = Global.allocate_zeroed(layout)?; + Ok(RawVec::from_raw_parts_in(ptr.as_mut_ptr() as *mut _, len, Global).into_box(len)) + } + } +} + +impl<T, A: Allocator> Box<[T], A> { + /// Constructs a new boxed slice with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut values = Box::<[u32], _>::new_uninit_slice_in(3, System); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub fn new_uninit_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit<T>], A> { + unsafe { RawVec::with_capacity_in(len, alloc).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents in the provided allocator, + /// with the memory being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let values = Box::<[u32], _>::new_zeroed_slice_in(3, System); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[must_use] + pub fn new_zeroed_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit<T>], A> { + unsafe { RawVec::with_capacity_zeroed_in(len, alloc).into_box(len) } + } +} + +impl<T, A: Allocator> Box<mem::MaybeUninit<T>, A> { + /// Converts to `Box<T, A>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::<u32>::new_uninit(); + /// + /// let five: Box<u32> = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const unsafe fn assume_init(self) -> Box<T, A> { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut T, alloc) } + } + + /// Writes the value and converts to `Box<T, A>`. + /// + /// This method converts the box similarly to [`Box::assume_init`] but + /// writes `value` into it before conversion thus guaranteeing safety. + /// In some scenarios use of this method may improve performance because + /// the compiler may be able to optimize copying from stack. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let big_box = Box::<[usize; 1024]>::new_uninit(); + /// + /// let mut array = [0; 1024]; + /// for (i, place) in array.iter_mut().enumerate() { + /// *place = i; + /// } + /// + /// // The optimizer may be able to elide this copy, so previous code writes + /// // to heap directly. + /// let big_box = Box::write(big_box, array); + /// + /// for (i, x) in big_box.iter().enumerate() { + /// assert_eq!(*x, i); + /// } + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn write(mut boxed: Self, value: T) -> Box<T, A> { + unsafe { + (*boxed).write(value); + boxed.assume_init() + } + } +} + +impl<T, A: Allocator> Box<[mem::MaybeUninit<T>], A> { + /// Converts to `Box<[T], A>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the values + /// really are in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut [T], alloc) } + } +} + +impl<T: ?Sized> Box<T> { + /// Constructs a box from a raw pointer. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// The safety conditions are described in the [memory layout] section. + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw`]: + /// ``` + /// let x = Box::new(5); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manually create a `Box` from scratch by using the global allocator: + /// ``` + /// use std::alloc::{alloc, Layout}; + /// + /// unsafe { + /// let ptr = alloc(Layout::new::<i32>()) as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw(ptr); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub unsafe fn from_raw(raw: *mut T) -> Self { + unsafe { Self::from_raw_in(raw, Global) } + } +} + +impl<T: ?Sized, A: Allocator> Box<T, A> { + /// Constructs a box from a raw pointer in the given allocator. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw_with_allocator`]: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(5, System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manually create a `Box` from scratch by using the system allocator: + /// ``` + /// #![feature(allocator_api, slice_ptr_get)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// + /// unsafe { + /// let ptr = System.allocate(Layout::new::<i32>())?.as_mut_ptr() as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw_in(ptr, System); + /// } + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const unsafe fn from_raw_in(raw: *mut T, alloc: A) -> Self { + Box(unsafe { Unique::new_unchecked(raw) }, alloc) + } + + /// Consumes the `Box`, returning a wrapped raw pointer. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw(b)` instead of `b.into_raw()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw`] + /// for automatic cleanup: + /// ``` + /// let x = Box::new(String::from("Hello")); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// use std::alloc::{dealloc, Layout}; + /// use std::ptr; + /// + /// let x = Box::new(String::from("Hello")); + /// let p = Box::into_raw(x); + /// unsafe { + /// ptr::drop_in_place(p); + /// dealloc(p as *mut u8, Layout::new::<String>()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub fn into_raw(b: Self) -> *mut T { + Self::into_raw_with_allocator(b).0 + } + + /// Consumes the `Box`, returning a wrapped raw pointer and the allocator. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw_in`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw_with_allocator(b)` instead of `b.into_raw_with_allocator()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw_in`] + /// for automatic cleanup: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// use std::ptr::{self, NonNull}; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// unsafe { + /// ptr::drop_in_place(ptr); + /// let non_null = NonNull::new_unchecked(ptr); + /// alloc.deallocate(non_null.cast(), Layout::new::<String>()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn into_raw_with_allocator(b: Self) -> (*mut T, A) { + let (leaked, alloc) = Box::into_unique(b); + (leaked.as_ptr(), alloc) + } + + #[unstable( + feature = "ptr_internals", + issue = "none", + reason = "use `Box::leak(b).into()` or `Unique::from(Box::leak(b))` instead" + )] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + #[doc(hidden)] + pub const fn into_unique(b: Self) -> (Unique<T>, A) { + // Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a + // raw pointer for the type system. Turning it directly into a raw pointer would not be + // recognized as "releasing" the unique pointer to permit aliased raw accesses, + // so all raw pointer methods have to go through `Box::leak`. Turning *that* to a raw pointer + // behaves correctly. + let alloc = unsafe { ptr::read(&b.1) }; + (Unique::from(Box::leak(b)), alloc) + } + + /// Returns a reference to the underlying allocator. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::allocator(&b)` instead of `b.allocator()`. This + /// is so that there is no conflict with a method on the inner type. + #[unstable(feature = "allocator_api", issue = "32838")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn allocator(b: &Self) -> &A { + &b.1 + } + + /// Consumes and leaks the `Box`, returning a mutable reference, + /// `&'a mut T`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. If this is not acceptable, the reference should first be wrapped + /// with the [`Box::from_raw`] function producing a `Box`. This `Box` can + /// then be dropped which will properly destroy `T` and release the + /// allocated memory. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::leak(b)` instead of `b.leak()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = Box::new(41); + /// let static_ref: &'static mut usize = Box::leak(x); + /// *static_ref += 1; + /// assert_eq!(*static_ref, 42); + /// ``` + /// + /// Unsized data: + /// + /// ``` + /// let x = vec![1, 2, 3].into_boxed_slice(); + /// let static_ref = Box::leak(x); + /// static_ref[0] = 4; + /// assert_eq!(*static_ref, [4, 2, 3]); + /// ``` + #[stable(feature = "box_leak", since = "1.26.0")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + #[inline] + pub const fn leak<'a>(b: Self) -> &'a mut T + where + A: 'a, + { + unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() } + } + + /// Converts a `Box<T>` into a `Pin<Box<T>>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// This is also available via [`From`]. + #[unstable(feature = "box_into_pin", issue = "62370")] + #[rustc_const_unstable(feature = "const_box", issue = "92521")] + pub const fn into_pin(boxed: Self) -> Pin<Self> + where + A: 'static, + { + // It's not possible to move or replace the insides of a `Pin<Box<T>>` + // when `T: !Unpin`, so it's safe to pin it directly without any + // additional requirements. + unsafe { Pin::new_unchecked(boxed) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> { + fn drop(&mut self) { + // FIXME: Do nothing, drop is currently performed by compiler. + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Default> Default for Box<T> { + /// Creates a `Box<T>`, with the `Default` value for T. + fn default() -> Self { + box T::default() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")] +impl<T> const Default for Box<[T]> { + fn default() -> Self { + let ptr: Unique<[T]> = Unique::<[T; 0]>::dangling(); + Box(ptr, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "default_box_extra", since = "1.17.0")] +#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")] +impl const Default for Box<str> { + fn default() -> Self { + // SAFETY: This is the same as `Unique::cast<U>` but with an unsized `U = str`. + let ptr: Unique<str> = unsafe { + let bytes: Unique<[u8]> = Unique::<[u8; 0]>::dangling(); + Unique::new_unchecked(bytes.as_ptr() as *mut str) + }; + Box(ptr, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Clone, A: Allocator + Clone> Clone for Box<T, A> { + /// Returns a new box with a `clone()` of this box's contents. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let y = x.clone(); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // But they are unique objects + /// assert_ne!(&*x as *const i32, &*y as *const i32); + /// ``` + #[inline] + fn clone(&self) -> Self { + // Pre-allocate memory to allow writing the cloned value directly. + let mut boxed = Self::new_uninit_in(self.1.clone()); + unsafe { + (**self).write_clone_into_raw(boxed.as_mut_ptr()); + boxed.assume_init() + } + } + + /// Copies `source`'s contents into `self` without creating a new allocation. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let mut y = Box::new(10); + /// let yp: *const i32 = &*y; + /// + /// y.clone_from(&x); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // And no allocation occurred + /// assert_eq!(yp, &*y); + /// ``` + #[inline] + fn clone_from(&mut self, source: &Self) { + (**self).clone_from(&(**source)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl Clone for Box<str> { + fn clone(&self) -> Self { + // this makes a copy of the data + let buf: Box<[u8]> = self.as_bytes().into(); + unsafe { from_boxed_utf8_unchecked(buf) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + PartialEq, A: Allocator> PartialEq for Box<T, A> { + #[inline] + fn eq(&self, other: &Self) -> bool { + PartialEq::eq(&**self, &**other) + } + #[inline] + fn ne(&self, other: &Self) -> bool { + PartialEq::ne(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + PartialOrd, A: Allocator> PartialOrd for Box<T, A> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + PartialOrd::partial_cmp(&**self, &**other) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + PartialOrd::lt(&**self, &**other) + } + #[inline] + fn le(&self, other: &Self) -> bool { + PartialOrd::le(&**self, &**other) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + PartialOrd::ge(&**self, &**other) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + PartialOrd::gt(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + Ord, A: Allocator> Ord for Box<T, A> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + Eq, A: Allocator> Eq for Box<T, A> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized + Hash, A: Allocator> Hash for Box<T, A> { + fn hash<H: Hasher>(&self, state: &mut H) { + (**self).hash(state); + } +} + +#[stable(feature = "indirect_hasher_impl", since = "1.22.0")] +impl<T: ?Sized + Hasher, A: Allocator> Hasher for Box<T, A> { + fn finish(&self) -> u64 { + (**self).finish() + } + fn write(&mut self, bytes: &[u8]) { + (**self).write(bytes) + } + fn write_u8(&mut self, i: u8) { + (**self).write_u8(i) + } + fn write_u16(&mut self, i: u16) { + (**self).write_u16(i) + } + fn write_u32(&mut self, i: u32) { + (**self).write_u32(i) + } + fn write_u64(&mut self, i: u64) { + (**self).write_u64(i) + } + fn write_u128(&mut self, i: u128) { + (**self).write_u128(i) + } + fn write_usize(&mut self, i: usize) { + (**self).write_usize(i) + } + fn write_i8(&mut self, i: i8) { + (**self).write_i8(i) + } + fn write_i16(&mut self, i: i16) { + (**self).write_i16(i) + } + fn write_i32(&mut self, i: i32) { + (**self).write_i32(i) + } + fn write_i64(&mut self, i: i64) { + (**self).write_i64(i) + } + fn write_i128(&mut self, i: i128) { + (**self).write_i128(i) + } + fn write_isize(&mut self, i: isize) { + (**self).write_isize(i) + } + fn write_length_prefix(&mut self, len: usize) { + (**self).write_length_prefix(len) + } + fn write_str(&mut self, s: &str) { + (**self).write_str(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_for_ptrs", since = "1.6.0")] +impl<T> From<T> for Box<T> { + /// Converts a `T` into a `Box<T>` + /// + /// The conversion allocates on the heap and moves `t` + /// from the stack into it. + /// + /// # Examples + /// + /// ```rust + /// let x = 5; + /// let boxed = Box::new(5); + /// + /// assert_eq!(Box::from(x), boxed); + /// ``` + fn from(t: T) -> Self { + Box::new(t) + } +} + +#[stable(feature = "pin", since = "1.33.0")] +#[rustc_const_unstable(feature = "const_box", issue = "92521")] +impl<T: ?Sized, A: Allocator> const From<Box<T, A>> for Pin<Box<T, A>> +where + A: 'static, +{ + /// Converts a `Box<T>` into a `Pin<Box<T>>` + /// + /// This conversion does not allocate on the heap and happens in place. + fn from(boxed: Box<T, A>) -> Self { + Box::into_pin(boxed) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl<T: Copy> From<&[T]> for Box<[T]> { + /// Converts a `&[T]` into a `Box<[T]>` + /// + /// This conversion allocates on the heap + /// and performs a copy of `slice`. + /// + /// # Examples + /// ```rust + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice: Box<[u8]> = Box::from(slice); + /// + /// println!("{boxed_slice:?}"); + /// ``` + fn from(slice: &[T]) -> Box<[T]> { + let len = slice.len(); + let buf = RawVec::with_capacity(len); + unsafe { + ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len); + buf.into_box(slice.len()).assume_init() + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl<T: Copy> From<Cow<'_, [T]>> for Box<[T]> { + /// Converts a `Cow<'_, [T]>` into a `Box<[T]>` + /// + /// When `cow` is the `Cow::Borrowed` variant, this + /// conversion allocates on the heap and copies the + /// underlying slice. Otherwise, it will try to reuse the owned + /// `Vec`'s allocation. + #[inline] + fn from(cow: Cow<'_, [T]>) -> Box<[T]> { + match cow { + Cow::Borrowed(slice) => Box::from(slice), + Cow::Owned(slice) => Box::from(slice), + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl From<&str> for Box<str> { + /// Converts a `&str` into a `Box<str>` + /// + /// This conversion allocates on the heap + /// and performs a copy of `s`. + /// + /// # Examples + /// + /// ```rust + /// let boxed: Box<str> = Box::from("hello"); + /// println!("{boxed}"); + /// ``` + #[inline] + fn from(s: &str) -> Box<str> { + unsafe { from_boxed_utf8_unchecked(Box::from(s.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From<Cow<'_, str>> for Box<str> { + /// Converts a `Cow<'_, str>` into a `Box<str>` + /// + /// When `cow` is the `Cow::Borrowed` variant, this + /// conversion allocates on the heap and copies the + /// underlying `str`. Otherwise, it will try to reuse the owned + /// `String`'s allocation. + /// + /// # Examples + /// + /// ```rust + /// use std::borrow::Cow; + /// + /// let unboxed = Cow::Borrowed("hello"); + /// let boxed: Box<str> = Box::from(unboxed); + /// println!("{boxed}"); + /// ``` + /// + /// ```rust + /// # use std::borrow::Cow; + /// let unboxed = Cow::Owned("hello".to_string()); + /// let boxed: Box<str> = Box::from(unboxed); + /// println!("{boxed}"); + /// ``` + #[inline] + fn from(cow: Cow<'_, str>) -> Box<str> { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[stable(feature = "boxed_str_conv", since = "1.19.0")] +impl<A: Allocator> From<Box<str, A>> for Box<[u8], A> { + /// Converts a `Box<str>` into a `Box<[u8]>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// # Examples + /// ```rust + /// // create a Box<str> which will be used to create a Box<[u8]> + /// let boxed: Box<str> = Box::from("hello"); + /// let boxed_str: Box<[u8]> = Box::from(boxed); + /// + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice = Box::from(slice); + /// + /// assert_eq!(boxed_slice, boxed_str); + /// ``` + #[inline] + fn from(s: Box<str, A>) -> Self { + let (raw, alloc) = Box::into_raw_with_allocator(s); + unsafe { Box::from_raw_in(raw as *mut [u8], alloc) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_array", since = "1.45.0")] +impl<T, const N: usize> From<[T; N]> for Box<[T]> { + /// Converts a `[T; N]` into a `Box<[T]>` + /// + /// This conversion moves the array to newly heap-allocated memory. + /// + /// # Examples + /// + /// ```rust + /// let boxed: Box<[u8]> = Box::from([4, 2]); + /// println!("{boxed:?}"); + /// ``` + fn from(array: [T; N]) -> Box<[T]> { + box array + } +} + +#[stable(feature = "boxed_slice_try_from", since = "1.43.0")] +impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> { + type Error = Box<[T]>; + + /// Attempts to convert a `Box<[T]>` into a `Box<[T; N]>`. + /// + /// The conversion occurs in-place and does not require a + /// new memory allocation. + /// + /// # Errors + /// + /// Returns the old `Box<[T]>` in the `Err` variant if + /// `boxed_slice.len()` does not equal `N`. + fn try_from(boxed_slice: Box<[T]>) -> Result<Self, Self::Error> { + if boxed_slice.len() == N { + Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) }) + } else { + Err(boxed_slice) + } + } +} + +impl<A: Allocator> Box<dyn Any, A> { + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box<dyn Any>) { + /// if let Ok(string) = value.downcast::<String>() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn downcast<T: Any>(self) -> Result<Box<T, A>, Self> { + if self.is::<T>() { unsafe { Ok(self.downcast_unchecked::<T>()) } } else { Err(self) } + } + + /// Downcasts the box to a concrete type. + /// + /// For a safe alternative see [`downcast`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(downcast_unchecked)] + /// + /// use std::any::Any; + /// + /// let x: Box<dyn Any> = Box::new(1_usize); + /// + /// unsafe { + /// assert_eq!(*x.downcast_unchecked::<usize>(), 1); + /// } + /// ``` + /// + /// # Safety + /// + /// The contained value must be of type `T`. Calling this method + /// with the incorrect type is *undefined behavior*. + /// + /// [`downcast`]: Self::downcast + #[inline] + #[unstable(feature = "downcast_unchecked", issue = "90850")] + pub unsafe fn downcast_unchecked<T: Any>(self) -> Box<T, A> { + debug_assert!(self.is::<T>()); + unsafe { + let (raw, alloc): (*mut dyn Any, _) = Box::into_raw_with_allocator(self); + Box::from_raw_in(raw as *mut T, alloc) + } + } +} + +impl<A: Allocator> Box<dyn Any + Send, A> { + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box<dyn Any + Send>) { + /// if let Ok(string) = value.downcast::<String>() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn downcast<T: Any>(self) -> Result<Box<T, A>, Self> { + if self.is::<T>() { unsafe { Ok(self.downcast_unchecked::<T>()) } } else { Err(self) } + } + + /// Downcasts the box to a concrete type. + /// + /// For a safe alternative see [`downcast`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(downcast_unchecked)] + /// + /// use std::any::Any; + /// + /// let x: Box<dyn Any + Send> = Box::new(1_usize); + /// + /// unsafe { + /// assert_eq!(*x.downcast_unchecked::<usize>(), 1); + /// } + /// ``` + /// + /// # Safety + /// + /// The contained value must be of type `T`. Calling this method + /// with the incorrect type is *undefined behavior*. + /// + /// [`downcast`]: Self::downcast + #[inline] + #[unstable(feature = "downcast_unchecked", issue = "90850")] + pub unsafe fn downcast_unchecked<T: Any>(self) -> Box<T, A> { + debug_assert!(self.is::<T>()); + unsafe { + let (raw, alloc): (*mut (dyn Any + Send), _) = Box::into_raw_with_allocator(self); + Box::from_raw_in(raw as *mut T, alloc) + } + } +} + +impl<A: Allocator> Box<dyn Any + Send + Sync, A> { + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box<dyn Any + Send + Sync>) { + /// if let Ok(string) = value.downcast::<String>() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + #[inline] + #[stable(feature = "box_send_sync_any_downcast", since = "1.51.0")] + pub fn downcast<T: Any>(self) -> Result<Box<T, A>, Self> { + if self.is::<T>() { unsafe { Ok(self.downcast_unchecked::<T>()) } } else { Err(self) } + } + + /// Downcasts the box to a concrete type. + /// + /// For a safe alternative see [`downcast`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(downcast_unchecked)] + /// + /// use std::any::Any; + /// + /// let x: Box<dyn Any + Send + Sync> = Box::new(1_usize); + /// + /// unsafe { + /// assert_eq!(*x.downcast_unchecked::<usize>(), 1); + /// } + /// ``` + /// + /// # Safety + /// + /// The contained value must be of type `T`. Calling this method + /// with the incorrect type is *undefined behavior*. + /// + /// [`downcast`]: Self::downcast + #[inline] + #[unstable(feature = "downcast_unchecked", issue = "90850")] + pub unsafe fn downcast_unchecked<T: Any>(self) -> Box<T, A> { + debug_assert!(self.is::<T>()); + unsafe { + let (raw, alloc): (*mut (dyn Any + Send + Sync), _) = + Box::into_raw_with_allocator(self); + Box::from_raw_in(raw as *mut T, alloc) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: fmt::Display + ?Sized, A: Allocator> fmt::Display for Box<T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: fmt::Debug + ?Sized, A: Allocator> fmt::Debug for Box<T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: ?Sized, A: Allocator> fmt::Pointer for Box<T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's not possible to extract the inner Uniq directly from the Box, + // instead we cast it to a *const which aliases the Unique + let ptr: *const T = &**self; + fmt::Pointer::fmt(&ptr, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_box", issue = "92521")] +impl<T: ?Sized, A: Allocator> const Deref for Box<T, A> { + type Target = T; + + fn deref(&self) -> &T { + &**self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_box", issue = "92521")] +impl<T: ?Sized, A: Allocator> const DerefMut for Box<T, A> { + fn deref_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[unstable(feature = "receiver_trait", issue = "none")] +impl<T: ?Sized, A: Allocator> Receiver for Box<T, A> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<I: Iterator + ?Sized, A: Allocator> Iterator for Box<I, A> { + type Item = I::Item; + fn next(&mut self) -> Option<I::Item> { + (**self).next() + } + fn size_hint(&self) -> (usize, Option<usize>) { + (**self).size_hint() + } + fn nth(&mut self, n: usize) -> Option<I::Item> { + (**self).nth(n) + } + fn last(self) -> Option<I::Item> { + BoxIter::last(self) + } +} + +trait BoxIter { + type Item; + fn last(self) -> Option<Self::Item>; +} + +impl<I: Iterator + ?Sized, A: Allocator> BoxIter for Box<I, A> { + type Item = I::Item; + default fn last(self) -> Option<I::Item> { + #[inline] + fn some<T>(_: Option<T>, x: T) -> Option<T> { + Some(x) + } + + self.fold(None, some) + } +} + +/// Specialization for sized `I`s that uses `I`s implementation of `last()` +/// instead of the default. +#[stable(feature = "rust1", since = "1.0.0")] +impl<I: Iterator, A: Allocator> BoxIter for Box<I, A> { + fn last(self) -> Option<I::Item> { + (*self).last() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<I: DoubleEndedIterator + ?Sized, A: Allocator> DoubleEndedIterator for Box<I, A> { + fn next_back(&mut self) -> Option<I::Item> { + (**self).next_back() + } + fn nth_back(&mut self, n: usize) -> Option<I::Item> { + (**self).nth_back(n) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl<I: ExactSizeIterator + ?Sized, A: Allocator> ExactSizeIterator for Box<I, A> { + fn len(&self) -> usize { + (**self).len() + } + fn is_empty(&self) -> bool { + (**self).is_empty() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl<I: FusedIterator + ?Sized, A: Allocator> FusedIterator for Box<I, A> {} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl<Args, F: FnOnce<Args> + ?Sized, A: Allocator> FnOnce<Args> for Box<F, A> { + type Output = <F as FnOnce<Args>>::Output; + + extern "rust-call" fn call_once(self, args: Args) -> Self::Output { + <F as FnOnce<Args>>::call_once(*self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl<Args, F: FnMut<Args> + ?Sized, A: Allocator> FnMut<Args> for Box<F, A> { + extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output { + <F as FnMut<Args>>::call_mut(self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl<Args, F: Fn<Args> + ?Sized, A: Allocator> Fn<Args> for Box<F, A> { + extern "rust-call" fn call(&self, args: Args) -> Self::Output { + <F as Fn<Args>>::call(self, args) + } +} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl<T: ?Sized + Unsize<U>, U: ?Sized, A: Allocator> CoerceUnsized<Box<U, A>> for Box<T, A> {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl<T: ?Sized + Unsize<U>, U: ?Sized> DispatchFromDyn<Box<U>> for Box<T, Global> {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "boxed_slice_from_iter", since = "1.32.0")] +impl<I> FromIterator<I> for Box<[I]> { + fn from_iter<T: IntoIterator<Item = I>>(iter: T) -> Self { + iter.into_iter().collect::<Vec<_>>().into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl<T: Clone, A: Allocator + Clone> Clone for Box<[T], A> { + fn clone(&self) -> Self { + let alloc = Box::allocator(self).clone(); + self.to_vec_in(alloc).into_boxed_slice() + } + + fn clone_from(&mut self, other: &Self) { + if self.len() == other.len() { + self.clone_from_slice(&other); + } else { + *self = other.clone(); + } + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl<T: ?Sized, A: Allocator> borrow::Borrow<T> for Box<T, A> { + fn borrow(&self) -> &T { + &**self + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl<T: ?Sized, A: Allocator> borrow::BorrowMut<T> for Box<T, A> { + fn borrow_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl<T: ?Sized, A: Allocator> AsRef<T> for Box<T, A> { + fn as_ref(&self) -> &T { + &**self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl<T: ?Sized, A: Allocator> AsMut<T> for Box<T, A> { + fn as_mut(&mut self) -> &mut T { + &mut **self + } +} + +/* Nota bene + * + * We could have chosen not to add this impl, and instead have written a + * function of Pin<Box<T>> to Pin<T>. Such a function would not be sound, + * because Box<T> implements Unpin even when T does not, as a result of + * this impl. + * + * We chose this API instead of the alternative for a few reasons: + * - Logically, it is helpful to understand pinning in regard to the + * memory region being pointed to. For this reason none of the + * standard library pointer types support projecting through a pin + * (Box<T> is the only pointer type in std for which this would be + * safe.) + * - It is in practice very useful to have Box<T> be unconditionally + * Unpin because of trait objects, for which the structural auto + * trait functionality does not apply (e.g., Box<dyn Foo> would + * otherwise not be Unpin). + * + * Another type with the same semantics as Box but only a conditional + * implementation of `Unpin` (where `T: Unpin`) would be valid/safe, and + * could have a method to project a Pin<T> from it. + */ +#[stable(feature = "pin", since = "1.33.0")] +#[rustc_const_unstable(feature = "const_box", issue = "92521")] +impl<T: ?Sized, A: Allocator> const Unpin for Box<T, A> where A: 'static {} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl<G: ?Sized + Generator<R> + Unpin, R, A: Allocator> Generator<R> for Box<G, A> +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState<Self::Yield, Self::Return> { + G::resume(Pin::new(&mut *self), arg) + } +} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl<G: ?Sized + Generator<R>, R, A: Allocator> Generator<R> for Pin<Box<G, A>> +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState<Self::Yield, Self::Return> { + G::resume((*self).as_mut(), arg) + } +} + +#[stable(feature = "futures_api", since = "1.36.0")] +impl<F: ?Sized + Future + Unpin, A: Allocator> Future for Box<F, A> +where + A: 'static, +{ + type Output = F::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> { + F::poll(Pin::new(&mut *self), cx) + } +} + +#[unstable(feature = "async_iterator", issue = "79024")] +impl<S: ?Sized + AsyncIterator + Unpin> AsyncIterator for Box<S> { + type Item = S::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { + Pin::new(&mut **self).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (**self).size_hint() + } +} diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs new file mode 100644 index 000000000000..1eec265b28f8 --- /dev/null +++ b/rust/alloc/collections/mod.rs @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Collection types. + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +pub mod binary_heap; +#[cfg(not(no_global_oom_handling))] +mod btree; +#[cfg(not(no_global_oom_handling))] +pub mod linked_list; +#[cfg(not(no_global_oom_handling))] +pub mod vec_deque; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_map { + //! An ordered map based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::map::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_set { + //! An ordered set based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::set::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use binary_heap::BinaryHeap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_map::BTreeMap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_set::BTreeSet; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use linked_list::LinkedList; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use vec_deque::VecDeque; + +use crate::alloc::{Layout, LayoutError}; +use core::fmt::Display; + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +#[stable(feature = "try_reserve", since = "1.57.0")] +pub struct TryReserveError { + kind: TryReserveErrorKind, +} + +impl TryReserveError { + /// Details about the allocation that caused the error + #[inline] + #[must_use] + #[unstable( + feature = "try_reserve_kind", + reason = "Uncertain how much info should be exposed", + issue = "48043" + )] + pub fn kind(&self) -> TryReserveErrorKind { + self.kind.clone() + } +} + +/// Details of the allocation that caused a `TryReserveError` +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable( + feature = "try_reserve_kind", + reason = "Uncertain how much info should be exposed", + issue = "48043" +)] +pub enum TryReserveErrorKind { + /// Error due to the computed capacity exceeding the collection's maximum + /// (usually `isize::MAX` bytes). + CapacityOverflow, + + /// The memory allocator returned an error + AllocError { + /// The layout of allocation request that failed + layout: Layout, + + #[doc(hidden)] + #[unstable( + feature = "container_error_extra", + issue = "none", + reason = "\ + Enable exposing the allocator’s custom error value \ + if an associated type is added in the future: \ + https://github.com/rust-lang/wg-allocators/issues/23" + )] + non_exhaustive: (), + }, +} + +#[unstable( + feature = "try_reserve_kind", + reason = "Uncertain how much info should be exposed", + issue = "48043" +)] +impl From<TryReserveErrorKind> for TryReserveError { + #[inline] + fn from(kind: TryReserveErrorKind) -> Self { + Self { kind } + } +} + +#[unstable(feature = "try_reserve_kind", reason = "new API", issue = "48043")] +impl From<LayoutError> for TryReserveErrorKind { + /// Always evaluates to [`TryReserveErrorKind::CapacityOverflow`]. + #[inline] + fn from(_: LayoutError) -> Self { + TryReserveErrorKind::CapacityOverflow + } +} + +#[stable(feature = "try_reserve", since = "1.57.0")] +impl Display for TryReserveError { + fn fmt( + &self, + fmt: &mut core::fmt::Formatter<'_>, + ) -> core::result::Result<(), core::fmt::Error> { + fmt.write_str("memory allocation failed")?; + let reason = match self.kind { + TryReserveErrorKind::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveErrorKind::AllocError { .. } => { + " because the memory allocator returned a error" + } + }; + fmt.write_str(reason) + } +} + +/// An intermediate trait for specialization of `Extend`. +#[doc(hidden)] +trait SpecExtend<I: IntoIterator> { + /// Extends `self` with the contents of the given iterator. + fn spec_extend(&mut self, iter: I); +} diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs new file mode 100644 index 000000000000..233bcd5e4654 --- /dev/null +++ b/rust/alloc/lib.rs @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! # The Rust core allocation and collections library +//! +//! This library provides smart pointers and collections for managing +//! heap-allocated values. +//! +//! This library, like libcore, normally doesn’t need to be used directly +//! since its contents are re-exported in the [`std` crate](../std/index.html). +//! Crates that use the `#![no_std]` attribute however will typically +//! not depend on `std`, so they’d use this crate instead. +//! +//! ## Boxed values +//! +//! The [`Box`] type is a smart pointer type. There can only be one owner of a +//! [`Box`], and the owner can decide to mutate the contents, which live on the +//! heap. +//! +//! This type can be sent among threads efficiently as the size of a `Box` value +//! is the same as that of a pointer. Tree-like data structures are often built +//! with boxes because each node often has only one owner, the parent. +//! +//! ## Reference counted pointers +//! +//! The [`Rc`] type is a non-threadsafe reference-counted pointer type intended +//! for sharing memory within a thread. An [`Rc`] pointer wraps a type, `T`, and +//! only allows access to `&T`, a shared reference. +//! +//! This type is useful when inherited mutability (such as using [`Box`]) is too +//! constraining for an application, and is often paired with the [`Cell`] or +//! [`RefCell`] types in order to allow mutation. +//! +//! ## Atomically reference counted pointers +//! +//! The [`Arc`] type is the threadsafe equivalent of the [`Rc`] type. It +//! provides all the same functionality of [`Rc`], except it requires that the +//! contained type `T` is shareable. Additionally, [`Arc<T>`][`Arc`] is itself +//! sendable while [`Rc<T>`][`Rc`] is not. +//! +//! This type allows for shared access to the contained data, and is often +//! paired with synchronization primitives such as mutexes to allow mutation of +//! shared resources. +//! +//! ## Collections +//! +//! Implementations of the most common general purpose data structures are +//! defined in this library. They are re-exported through the +//! [standard collections library](../std/collections/index.html). +//! +//! ## Heap interfaces +//! +//! The [`alloc`](alloc/index.html) module defines the low-level interface to the +//! default global allocator. It is not compatible with the libc allocator API. +//! +//! [`Arc`]: sync +//! [`Box`]: boxed +//! [`Cell`]: core::cell +//! [`Rc`]: rc +//! [`RefCell`]: core::cell + +// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be +// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>. +// rustc itself never sets the feature, so this line has no affect there. +#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))] +#![allow(unused_attributes)] +#![stable(feature = "alloc", since = "1.36.0")] +#![doc( + html_playground_url = "https://play.rust-lang.org/", + issue_tracker_base_url = "https://github.com/rust-lang/rust/issues/", + test(no_crate_inject, attr(allow(unused_variables), deny(warnings))) +)] +#![doc(cfg_hide( + not(test), + not(any(test, bootstrap)), + any(not(feature = "miri-test-libstd"), test, doctest), + no_global_oom_handling, + not(no_global_oom_handling), + target_has_atomic = "ptr" +))] +#![no_std] +#![needs_allocator] +// +// Lints: +#![deny(unsafe_op_in_unsafe_fn)] +#![warn(deprecated_in_future)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![allow(explicit_outlives_requirements)] +// +// Library features: +#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))] +#![feature(alloc_layout_extra)] +#![feature(allocator_api)] +#![feature(array_chunks)] +#![feature(array_methods)] +#![feature(array_windows)] +#![feature(assert_matches)] +#![feature(async_iterator)] +#![feature(coerce_unsized)] +#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))] +#![feature(const_box)] +#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))] +#![feature(const_cow_is_borrowed)] +#![feature(const_convert)] +#![feature(const_size_of_val)] +#![feature(const_align_of_val)] +#![feature(const_ptr_read)] +#![feature(const_maybe_uninit_write)] +#![feature(const_maybe_uninit_as_mut_ptr)] +#![feature(const_refs_to_cell)] +#![feature(core_c_str)] +#![feature(core_intrinsics)] +#![feature(core_ffi_c)] +#![feature(const_eval_select)] +#![feature(const_pin)] +#![feature(cstr_from_bytes_until_nul)] +#![feature(dispatch_from_dyn)] +#![feature(exact_size_is_empty)] +#![feature(extend_one)] +#![feature(fmt_internals)] +#![feature(fn_traits)] +#![feature(hasher_prefixfree_extras)] +#![feature(inplace_iteration)] +#![feature(iter_advance_by)] +#![feature(layout_for_ptr)] +#![feature(maybe_uninit_slice)] +#![cfg_attr(test, feature(new_uninit))] +#![feature(nonnull_slice_from_raw_parts)] +#![feature(pattern)] +#![feature(ptr_internals)] +#![feature(ptr_metadata)] +#![feature(ptr_sub_ptr)] +#![feature(receiver_trait)] +#![feature(set_ptr_value)] +#![feature(slice_group_by)] +#![feature(slice_ptr_get)] +#![feature(slice_ptr_len)] +#![feature(slice_range)] +#![feature(str_internals)] +#![feature(strict_provenance)] +#![feature(trusted_len)] +#![feature(trusted_random_access)] +#![feature(try_trait_v2)] +#![feature(unchecked_math)] +#![feature(unicode_internals)] +#![feature(unsize)] +// +// Language features: +#![feature(allocator_internals)] +#![feature(allow_internal_unstable)] +#![feature(associated_type_bounds)] +#![feature(box_syntax)] +#![feature(cfg_sanitize)] +#![feature(const_deref)] +#![feature(const_mut_refs)] +#![feature(const_ptr_write)] +#![feature(const_precise_live_drops)] +#![feature(const_trait_impl)] +#![feature(const_try)] +#![feature(dropck_eyepatch)] +#![feature(exclusive_range_pattern)] +#![feature(fundamental)] +#![cfg_attr(not(test), feature(generator_trait))] +#![feature(hashmap_internals)] +#![feature(lang_items)] +#![feature(let_else)] +#![feature(min_specialization)] +#![feature(negative_impls)] +#![feature(never_type)] +#![feature(nll)] // Not necessary, but here to test the `nll` feature. +#![feature(rustc_allow_const_fn_unstable)] +#![feature(rustc_attrs)] +#![feature(slice_internals)] +#![feature(staged_api)] +#![cfg_attr(test, feature(test))] +#![feature(unboxed_closures)] +#![feature(unsized_fn_params)] +#![feature(c_unwind)] +// +// Rustdoc features: +#![feature(doc_cfg)] +#![feature(doc_cfg_hide)] +// Technically, this is a bug in rustdoc: rustdoc sees the documentation on `#[lang = slice_alloc]` +// blocks is for `&[T]`, which also has documentation using this feature in `core`, and gets mad +// that the feature-gate isn't enabled. Ideally, it wouldn't check for the feature gate for docs +// from other crates, but since this can only appear for lang items, it doesn't seem worth fixing. +#![feature(intra_doc_pointers)] + +// Allow testing this library +#[cfg(test)] +#[macro_use] +extern crate std; +#[cfg(test)] +extern crate test; + +// Module with internal macros used by other modules (needs to be included before other modules). +#[cfg(not(no_macros))] +#[macro_use] +mod macros; + +mod raw_vec; + +// Heaps provided for low-level allocation strategies + +pub mod alloc; + +// Primitive types using the heaps above + +// Need to conditionally define the mod from `boxed.rs` to avoid +// duplicating the lang-items when building in test cfg; but also need +// to allow code to have `use boxed::Box;` declarations. +#[cfg(not(test))] +pub mod boxed; +#[cfg(test)] +mod boxed { + pub use std::boxed::Box; +} +pub mod borrow; +pub mod collections; +#[cfg(not(no_global_oom_handling))] +pub mod ffi; +#[cfg(not(no_fmt))] +pub mod fmt; +#[cfg(not(no_rc))] +pub mod rc; +pub mod slice; +#[cfg(not(no_str))] +pub mod str; +#[cfg(not(no_string))] +pub mod string; +#[cfg(not(no_sync))] +#[cfg(target_has_atomic = "ptr")] +pub mod sync; +#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))] +pub mod task; +#[cfg(test)] +mod tests; +pub mod vec; + +#[doc(hidden)] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +pub mod __export { + pub use core::format_args; +} diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs new file mode 100644 index 000000000000..daf5f2da7168 --- /dev/null +++ b/rust/alloc/raw_vec.rs @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![unstable(feature = "raw_vec_internals", reason = "unstable const warnings", issue = "none")] + +use core::alloc::LayoutError; +use core::cmp; +use core::intrinsics; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::Drop; +use core::ptr::{self, NonNull, Unique}; +use core::slice; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::handle_alloc_error; +use crate::alloc::{Allocator, Global, Layout}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::collections::TryReserveErrorKind::*; + +#[cfg(test)] +mod tests; + +#[cfg(not(no_global_oom_handling))] +enum AllocInit { + /// The contents of the new memory are uninitialized. + Uninitialized, + /// The new memory is guaranteed to be zeroed. + Zeroed, +} + +/// A low-level utility for more ergonomically allocating, reallocating, and deallocating +/// a buffer of memory on the heap without having to worry about all the corner cases +/// involved. This type is excellent for building your own data structures like Vec and VecDeque. +/// In particular: +/// +/// * Produces `Unique::dangling()` on zero-sized types. +/// * Produces `Unique::dangling()` on zero-length allocations. +/// * Avoids freeing `Unique::dangling()`. +/// * Catches all overflows in capacity computations (promotes them to "capacity overflow" panics). +/// * Guards against 32-bit systems allocating more than isize::MAX bytes. +/// * Guards against overflowing your length. +/// * Calls `handle_alloc_error` for fallible allocations. +/// * Contains a `ptr::Unique` and thus endows the user with all related benefits. +/// * Uses the excess returned from the allocator to use the largest available capacity. +/// +/// This type does not in anyway inspect the memory that it manages. When dropped it *will* +/// free its memory, but it *won't* try to drop its contents. It is up to the user of `RawVec` +/// to handle the actual things *stored* inside of a `RawVec`. +/// +/// Note that the excess of a zero-sized types is always infinite, so `capacity()` always returns +/// `usize::MAX`. This means that you need to be careful when round-tripping this type with a +/// `Box<[T]>`, since `capacity()` won't yield the length. +#[allow(missing_debug_implementations)] +pub(crate) struct RawVec<T, A: Allocator = Global> { + ptr: Unique<T>, + cap: usize, + alloc: A, +} + +impl<T> RawVec<T, Global> { + /// HACK(Centril): This exists because stable `const fn` can only call stable `const fn`, so + /// they cannot call `Self::new()`. + /// + /// If you change `RawVec<T>::new` or dependencies, please take care to not introduce anything + /// that would truly const-call something unstable. + pub const NEW: Self = Self::new(); + + /// Creates the biggest possible `RawVec` (on the system heap) + /// without allocating. If `T` has positive size, then this makes a + /// `RawVec` with capacity `0`. If `T` is zero-sized, then it makes a + /// `RawVec` with capacity `usize::MAX`. Useful for implementing + /// delayed allocation. + #[must_use] + pub const fn new() -> Self { + Self::new_in(Global) + } + + /// Creates a `RawVec` (on the system heap) with exactly the + /// capacity and alignment requirements for a `[T; capacity]`. This is + /// equivalent to calling `RawVec::new` when `capacity` is `0` or `T` is + /// zero-sized. Note that if `T` is zero-sized this means you will + /// *not* get a `RawVec` with the requested capacity. + /// + /// # Panics + /// + /// Panics if the requested capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(any(no_global_oom_handling, test)))] + #[must_use] + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Like `with_capacity`, but guarantees the buffer is zeroed. + #[cfg(not(any(no_global_oom_handling, test)))] + #[must_use] + #[inline] + pub fn with_capacity_zeroed(capacity: usize) -> Self { + Self::with_capacity_zeroed_in(capacity, Global) + } +} + +impl<T, A: Allocator> RawVec<T, A> { + // Tiny Vecs are dumb. Skip to: + // - 8 if the element size is 1, because any heap allocators is likely + // to round up a request of less than 8 bytes to at least 8 bytes. + // - 4 if elements are moderate-sized (<= 1 KiB). + // - 1 otherwise, to avoid wasting too much space for very short Vecs. + pub(crate) const MIN_NON_ZERO_CAP: usize = if mem::size_of::<T>() == 1 { + 8 + } else if mem::size_of::<T>() <= 1024 { + 4 + } else { + 1 + }; + + /// Like `new`, but parameterized over the choice of allocator for + /// the returned `RawVec`. + pub const fn new_in(alloc: A) -> Self { + // `cap: 0` means "unallocated". zero-sized types are ignored. + Self { ptr: Unique::dangling(), cap: 0, alloc } + } + + /// Like `with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + + /// Like `with_capacity_zeroed`, but parameterized over the choice + /// of allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_zeroed_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Zeroed, alloc) + } + + /// Converts the entire buffer into `Box<[MaybeUninit<T>]>` with the specified `len`. + /// + /// Note that this will correctly reconstitute any `cap` changes + /// that may have been performed. (See description of type for details.) + /// + /// # Safety + /// + /// * `len` must be greater than or equal to the most recently requested capacity, and + /// * `len` must be less than or equal to `self.capacity()`. + /// + /// Note, that the requested capacity and `self.capacity()` could differ, as + /// an allocator could overallocate and return a greater memory block than requested. + pub unsafe fn into_box(self, len: usize) -> Box<[MaybeUninit<T>], A> { + // Sanity-check one half of the safety requirement (we cannot check the other half). + debug_assert!( + len <= self.capacity(), + "`len` must be smaller than or equal to `self.capacity()`" + ); + + let me = ManuallyDrop::new(self); + unsafe { + let slice = slice::from_raw_parts_mut(me.ptr() as *mut MaybeUninit<T>, len); + Box::from_raw_in(slice, ptr::read(&me.alloc)) + } + } + + #[cfg(not(no_global_oom_handling))] + fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self { + // Don't allocate here because `Drop` will not deallocate when `capacity` is 0. + if mem::size_of::<T>() == 0 || capacity == 0 { + Self::new_in(alloc) + } else { + // We avoid `unwrap_or_else` here because it bloats the amount of + // LLVM IR generated. + let layout = match Layout::array::<T>(capacity) { + Ok(layout) => layout, + Err(_) => capacity_overflow(), + }; + match alloc_guard(layout.size()) { + Ok(_) => {} + Err(_) => capacity_overflow(), + } + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = match result { + Ok(ptr) => ptr, + Err(_) => handle_alloc_error(layout), + }; + + // Allocators currently return a `NonNull<[u8]>` whose length + // matches the size requested. If that ever changes, the capacity + // here should change to `ptr.len() / mem::size_of::<T>()`. + Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: capacity, + alloc, + } + } + } + + /// Reconstitutes a `RawVec` from a pointer, capacity, and allocator. + /// + /// # Safety + /// + /// The `ptr` must be allocated (via the given allocator `alloc`), and with the given + /// `capacity`. + /// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit + /// systems). ZST vectors may have a capacity up to `usize::MAX`. + /// If the `ptr` and `capacity` come from a `RawVec` created via `alloc`, then this is + /// guaranteed. + #[inline] + pub unsafe fn from_raw_parts_in(ptr: *mut T, capacity: usize, alloc: A) -> Self { + Self { ptr: unsafe { Unique::new_unchecked(ptr) }, cap: capacity, alloc } + } + + /// Gets a raw pointer to the start of the allocation. Note that this is + /// `Unique::dangling()` if `capacity == 0` or `T` is zero-sized. In the former case, you must + /// be careful. + #[inline] + pub fn ptr(&self) -> *mut T { + self.ptr.as_ptr() + } + + /// Gets the capacity of the allocation. + /// + /// This will always be `usize::MAX` if `T` is zero-sized. + #[inline(always)] + pub fn capacity(&self) -> usize { + if mem::size_of::<T>() == 0 { usize::MAX } else { self.cap } + } + + /// Returns a shared reference to the allocator backing this `RawVec`. + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn current_memory(&self) -> Option<(NonNull<u8>, Layout)> { + if mem::size_of::<T>() == 0 || self.cap == 0 { + None + } else { + // We have an allocated chunk of memory, so we can bypass runtime + // checks to get our current layout. + unsafe { + let layout = Layout::array::<T>(self.cap).unwrap_unchecked(); + Some((self.ptr.cast().into(), layout)) + } + } + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already have enough capacity, will + /// reallocate enough space plus comfortable slack space to get amortized + /// *O*(1) behavior. Will limit this behavior if it would needlessly cause + /// itself to panic. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe + /// code *you* write that relies on the behavior of this function may break. + /// + /// This is ideal for implementing a bulk-push operation like `extend`. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn reserve(&mut self, len: usize, additional: usize) { + // Callers expect this function to be very cheap when there is already sufficient capacity. + // Therefore, we move all the resizing and error-handling logic from grow_amortized and + // handle_reserve behind a call, while making sure that this function is likely to be + // inlined as just a comparison and a call if the comparison fails. + #[cold] + fn do_reserve_and_handle<T, A: Allocator>( + slf: &mut RawVec<T, A>, + len: usize, + additional: usize, + ) { + handle_reserve(slf.grow_amortized(len, additional)); + } + + if self.needs_to_grow(len, additional) { + do_reserve_and_handle(self, len, additional); + } + } + + /// A specialized version of `reserve()` used only by the hot and + /// oft-instantiated `Vec::push()`, which does its own capacity check. + #[cfg(not(no_global_oom_handling))] + #[inline(never)] + pub fn reserve_for_push(&mut self, len: usize) { + handle_reserve(self.grow_amortized(len, 1)); + } + + /// The same as `reserve`, but returns on errors instead of panicking or aborting. + pub fn try_reserve(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { + self.grow_amortized(len, additional) + } else { + Ok(()) + } + } + + /// The same as `reserve_for_push`, but returns on errors instead of panicking or aborting. + #[inline(never)] + pub fn try_reserve_for_push(&mut self, len: usize) -> Result<(), TryReserveError> { + self.grow_amortized(len, 1) + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already, will reallocate the + /// minimum possible amount of memory necessary. Generally this will be + /// exactly the amount of memory necessary, but in principle the allocator + /// is free to give back more than we asked for. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe code + /// *you* write that relies on the behavior of this function may break. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn reserve_exact(&mut self, len: usize, additional: usize) { + handle_reserve(self.try_reserve_exact(len, additional)); + } + + /// The same as `reserve_exact`, but returns on errors instead of panicking or aborting. + pub fn try_reserve_exact( + &mut self, + len: usize, + additional: usize, + ) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { self.grow_exact(len, additional) } else { Ok(()) } + } + + /// Shrinks the buffer down to the specified capacity. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn shrink_to_fit(&mut self, cap: usize) { + handle_reserve(self.shrink(cap)); + } +} + +impl<T, A: Allocator> RawVec<T, A> { + /// Returns if the buffer needs to grow to fulfill the needed extra capacity. + /// Mainly used to make inlining reserve-calls possible without inlining `grow`. + fn needs_to_grow(&self, len: usize, additional: usize) -> bool { + additional > self.capacity().wrapping_sub(len) + } + + fn set_ptr_and_cap(&mut self, ptr: NonNull<[u8]>, cap: usize) { + // Allocators currently return a `NonNull<[u8]>` whose length matches + // the size requested. If that ever changes, the capacity here should + // change to `ptr.len() / mem::size_of::<T>()`. + self.ptr = unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }; + self.cap = cap; + } + + // This method is usually instantiated many times. So we want it to be as + // small as possible, to improve compile times. But we also want as much of + // its contents to be statically computable as possible, to make the + // generated code run faster. Therefore, this method is carefully written + // so that all of the code that depends on `T` is within it, while as much + // of the code that doesn't depend on `T` as possible is in functions that + // are non-generic over `T`. + fn grow_amortized(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + // This is ensured by the calling contexts. + debug_assert!(additional > 0); + + if mem::size_of::<T>() == 0 { + // Since we return a capacity of `usize::MAX` when `elem_size` is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow.into()); + } + + // Nothing we can really do about these checks, sadly. + let required_cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + + // This guarantees exponential growth. The doubling cannot overflow + // because `cap <= isize::MAX` and the type of `cap` is `usize`. + let cap = cmp::max(self.cap * 2, required_cap); + let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap); + + let new_layout = Layout::array::<T>(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr_and_cap(ptr, cap); + Ok(()) + } + + // The constraints on this method are much the same as those on + // `grow_amortized`, but this method is usually instantiated less often so + // it's less critical. + fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if mem::size_of::<T>() == 0 { + // Since we return a capacity of `usize::MAX` when the type size is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow.into()); + } + + let cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + let new_layout = Layout::array::<T>(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr_and_cap(ptr, cap); + Ok(()) + } + + #[allow(dead_code)] + fn shrink(&mut self, cap: usize) -> Result<(), TryReserveError> { + assert!(cap <= self.capacity(), "Tried to shrink to a larger capacity"); + + let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) }; + + let ptr = unsafe { + // `Layout::array` cannot overflow here because it would have + // overflowed earlier when capacity was larger. + let new_layout = Layout::array::<T>(cap).unwrap_unchecked(); + self.alloc + .shrink(ptr, layout, new_layout) + .map_err(|_| AllocError { layout: new_layout, non_exhaustive: () })? + }; + self.set_ptr_and_cap(ptr, cap); + Ok(()) + } +} + +// This function is outside `RawVec` to minimize compile times. See the comment +// above `RawVec::grow_amortized` for details. (The `A` parameter isn't +// significant, because the number of different `A` types seen in practice is +// much smaller than the number of `T` types.) +#[inline(never)] +fn finish_grow<A>( + new_layout: Result<Layout, LayoutError>, + current_memory: Option<(NonNull<u8>, Layout)>, + alloc: &mut A, +) -> Result<NonNull<[u8]>, TryReserveError> +where + A: Allocator, +{ + // Check for the error here to minimize the size of `RawVec::grow_*`. + let new_layout = new_layout.map_err(|_| CapacityOverflow)?; + + alloc_guard(new_layout.size())?; + + let memory = if let Some((ptr, old_layout)) = current_memory { + debug_assert_eq!(old_layout.align(), new_layout.align()); + unsafe { + // The allocator checks for alignment equality + intrinsics::assume(old_layout.align() == new_layout.align()); + alloc.grow(ptr, old_layout, new_layout) + } + } else { + alloc.allocate(new_layout) + }; + + memory.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () }.into()) +} + +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawVec<T, A> { + /// Frees the memory owned by the `RawVec` *without* trying to drop its contents. + fn drop(&mut self) { + if let Some((ptr, layout)) = self.current_memory() { + unsafe { self.alloc.deallocate(ptr, layout) } + } + } +} + +// Central function for reserve error handling. +#[cfg(not(no_global_oom_handling))] +#[inline] +fn handle_reserve(result: Result<(), TryReserveError>) { + match result.map_err(|e| e.kind()) { + Err(CapacityOverflow) => capacity_overflow(), + Err(AllocError { layout, .. }) => handle_alloc_error(layout), + Ok(()) => { /* yay */ } + } +} + +// We need to guarantee the following: +// * We don't ever allocate `> isize::MAX` byte-size objects. +// * We don't overflow `usize::MAX` and actually allocate too little. +// +// On 64-bit we just need to check for overflow since trying to allocate +// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add +// an extra guard for this in case we're running on a platform which can use +// all 4GB in user-space, e.g., PAE or x32. + +#[inline] +fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> { + if usize::BITS < 64 && alloc_size > isize::MAX as usize { + Err(CapacityOverflow.into()) + } else { + Ok(()) + } +} + +// One central function responsible for reporting capacity overflows. This'll +// ensure that the code generation related to these panics is minimal as there's +// only one location which panics rather than a bunch throughout the module. +#[cfg(not(no_global_oom_handling))] +fn capacity_overflow() -> ! { + panic!("capacity overflow"); +} diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs new file mode 100644 index 000000000000..e444e97fa145 --- /dev/null +++ b/rust/alloc/slice.rs @@ -0,0 +1,1204 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A dynamically-sized view into a contiguous sequence, `[T]`. +//! +//! *[See also the slice primitive type](slice).* +//! +//! Slices are a view into a block of memory represented as a pointer and a +//! length. +//! +//! ``` +//! // slicing a Vec +//! let vec = vec![1, 2, 3]; +//! let int_slice = &vec[..]; +//! // coercing an array to a slice +//! let str_slice: &[&str] = &["one", "two", "three"]; +//! ``` +//! +//! Slices are either mutable or shared. The shared slice type is `&[T]`, +//! while the mutable slice type is `&mut [T]`, where `T` represents the element +//! type. For example, you can mutate the block of memory that a mutable slice +//! points to: +//! +//! ``` +//! let x = &mut [1, 2, 3]; +//! x[1] = 7; +//! assert_eq!(x, &[1, 7, 3]); +//! ``` +//! +//! Here are some of the things this module contains: +//! +//! ## Structs +//! +//! There are several structs that are useful for slices, such as [`Iter`], which +//! represents iteration over a slice. +//! +//! ## Trait Implementations +//! +//! There are several implementations of common traits for slices. Some examples +//! include: +//! +//! * [`Clone`] +//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`]. +//! * [`Hash`] - for slices whose element type is [`Hash`]. +//! +//! ## Iteration +//! +//! The slices implement `IntoIterator`. The iterator yields references to the +//! slice elements. +//! +//! ``` +//! let numbers = &[0, 1, 2]; +//! for n in numbers { +//! println!("{n} is a number!"); +//! } +//! ``` +//! +//! The mutable slice yields mutable references to the elements: +//! +//! ``` +//! let mut scores = [7, 8, 9]; +//! for score in &mut scores[..] { +//! *score += 1; +//! } +//! ``` +//! +//! This iterator yields mutable references to the slice's elements, so while +//! the element type of the slice is `i32`, the element type of the iterator is +//! `&mut i32`. +//! +//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default +//! iterators. +//! * Further methods that return iterators are [`.split`], [`.splitn`], +//! [`.chunks`], [`.windows`] and more. +//! +//! [`Hash`]: core::hash::Hash +//! [`.iter`]: slice::iter +//! [`.iter_mut`]: slice::iter_mut +//! [`.split`]: slice::split +//! [`.splitn`]: slice::splitn +//! [`.chunks`]: slice::chunks +//! [`.windows`]: slice::windows +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![cfg_attr(test, allow(unused_imports, dead_code))] + +use core::borrow::{Borrow, BorrowMut}; +#[cfg(not(no_global_oom_handling))] +use core::cmp::Ordering::{self, Less}; +#[cfg(not(no_global_oom_handling))] +use core::mem; +#[cfg(not(no_global_oom_handling))] +use core::mem::size_of; +#[cfg(not(no_global_oom_handling))] +use core::ptr; + +use crate::alloc::Allocator; +#[cfg(not(no_global_oom_handling))] +use crate::alloc::Global; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::vec::Vec; + +#[unstable(feature = "slice_range", issue = "76393")] +pub use core::slice::range; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunks; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunksMut; +#[unstable(feature = "array_windows", issue = "75027")] +pub use core::slice::ArrayWindows; +#[stable(feature = "inherent_ascii_escape", since = "1.60.0")] +pub use core::slice::EscapeAscii; +#[stable(feature = "slice_get_slice", since = "1.28.0")] +pub use core::slice::SliceIndex; +#[stable(feature = "from_ref", since = "1.28.0")] +pub use core::slice::{from_mut, from_ref}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{from_raw_parts, from_raw_parts_mut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Chunks, Windows}; +#[stable(feature = "chunks_exact", since = "1.31.0")] +pub use core::slice::{ChunksExact, ChunksExactMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{ChunksMut, Split, SplitMut}; +#[unstable(feature = "slice_group_by", issue = "80552")] +pub use core::slice::{GroupBy, GroupByMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Iter, IterMut}; +#[stable(feature = "rchunks", since = "1.31.0")] +pub use core::slice::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; +#[stable(feature = "slice_rsplit", since = "1.27.0")] +pub use core::slice::{RSplit, RSplitMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{RSplitN, RSplitNMut, SplitN, SplitNMut}; +#[stable(feature = "split_inclusive", since = "1.51.0")] +pub use core::slice::{SplitInclusive, SplitInclusiveMut}; + +//////////////////////////////////////////////////////////////////////////////// +// Basic slice extension methods +//////////////////////////////////////////////////////////////////////////////// + +// HACK(japaric) needed for the implementation of `vec!` macro during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::into_vec; + +// HACK(japaric) needed for the implementation of `Vec::clone` during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::to_vec; + +// HACK(japaric): With cfg(test) `impl [T]` is not available, these three +// functions are actually methods that are in `impl [T]` but not in +// `core::slice::SliceExt` - we need to supply these functions for the +// `test_permutations` test +pub(crate) mod hack { + use core::alloc::Allocator; + + use crate::boxed::Box; + use crate::vec::Vec; + + // We shouldn't add inline attribute to this since this is used in + // `vec!` macro mostly and causes perf regression. See #71204 for + // discussion and perf results. + pub fn into_vec<T, A: Allocator>(b: Box<[T], A>) -> Vec<T, A> { + unsafe { + let len = b.len(); + let (b, alloc) = Box::into_raw_with_allocator(b); + Vec::from_raw_parts_in(b as *mut T, len, len, alloc) + } + } + + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn to_vec<T: ConvertVec, A: Allocator>(s: &[T], alloc: A) -> Vec<T, A> { + T::to_vec(s, alloc) + } + + #[cfg(not(no_global_oom_handling))] + pub trait ConvertVec { + fn to_vec<A: Allocator>(s: &[Self], alloc: A) -> Vec<Self, A> + where + Self: Sized; + } + + #[cfg(not(no_global_oom_handling))] + impl<T: Clone> ConvertVec for T { + #[inline] + default fn to_vec<A: Allocator>(s: &[Self], alloc: A) -> Vec<Self, A> { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec<T, A>, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::with_capacity_in(s.len(), alloc); + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + vec + } + } + + #[cfg(not(no_global_oom_handling))] + impl<T: Copy> ConvertVec for T { + #[inline] + fn to_vec<A: Allocator>(s: &[Self], alloc: A) -> Vec<Self, A> { + let mut v = Vec::with_capacity_in(s.len(), alloc); + // SAFETY: + // allocated above with the capacity of `s`, and initialize to `s.len()` in + // ptr::copy_to_non_overlapping below. + unsafe { + s.as_ptr().copy_to_nonoverlapping(v.as_mut_ptr(), s.len()); + v.set_len(s.len()); + } + v + } + } +} + +#[cfg(not(test))] +impl<T> [T] { + /// Sorts the slice. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable`](slice::sort_unstable). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5, 4, 1, -3, 2]; + /// + /// v.sort(); + /// assert!(v == [-5, -3, 1, 2, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort(&mut self) + where + T: Ord, + { + merge_sort(self, |a, b| a.lt(b)); + } + + /// Sorts the slice with a comparator function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// The comparator function must define a total ordering for the elements in the slice. If + /// the ordering is not total, the order of the elements is unspecified. An order is a + /// total order if it is (for all `a`, `b` and `c`): + /// + /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and + /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`. + /// + /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use + /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`. + /// + /// ``` + /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0]; + /// floats.sort_by(|a, b| a.partial_cmp(b).unwrap()); + /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]); + /// ``` + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by`](slice::sort_unstable_by). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [5, 4, 1, 3, 2]; + /// v.sort_by(|a, b| a.cmp(b)); + /// assert!(v == [1, 2, 3, 4, 5]); + /// + /// // reverse sorting + /// v.sort_by(|a, b| b.cmp(a)); + /// assert!(v == [5, 4, 3, 2, 1]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort_by<F>(&mut self, mut compare: F) + where + F: FnMut(&T, &T) -> Ordering, + { + merge_sort(self, |a, b| compare(a, b) == Less); + } + + /// Sorts the slice with a key extraction function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For expensive key functions (e.g. functions that are not simple property accesses or + /// basic operations), [`sort_by_cached_key`](slice::sort_by_cached_key) is likely to be + /// significantly faster, as it does not recompute element keys. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by_key`](slice::sort_unstable_by_key). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 1, -3, 2]; + /// + /// v.sort_by_key(|k| k.abs()); + /// assert!(v == [1, 2, -3, 4, -5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[stable(feature = "slice_sort_by_key", since = "1.7.0")] + #[inline] + pub fn sort_by_key<K, F>(&mut self, mut f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + merge_sort(self, |a, b| f(a).lt(&f(b))); + } + + /// Sorts the slice with a key extraction function. + /// + /// During sorting, the key function is called at most once per element, by using + /// temporary storage to remember the results of key evaluation. + /// The order of calls to the key function is unspecified and may change in future versions + /// of the standard library. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For simple key functions (e.g., functions that are property accesses or + /// basic operations), [`sort_by_key`](slice::sort_by_key) is likely to be + /// faster. + /// + /// # Current implementation + /// + /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, + /// which combines the fast average case of randomized quicksort with the fast worst case of + /// heapsort, while achieving linear time on slices with certain patterns. It uses some + /// randomization to avoid degenerate cases, but with a fixed seed to always provide + /// deterministic behavior. + /// + /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the + /// length of the slice. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 32, -3, 2]; + /// + /// v.sort_by_cached_key(|k| k.to_string()); + /// assert!(v == [-3, -5, 2, 32, 4]); + /// ``` + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[stable(feature = "slice_sort_by_cached_key", since = "1.34.0")] + #[inline] + pub fn sort_by_cached_key<K, F>(&mut self, f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + // Helper macro for indexing our vector by the smallest possible type, to reduce allocation. + macro_rules! sort_by_key { + ($t:ty, $slice:ident, $f:ident) => {{ + let mut indices: Vec<_> = + $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect(); + // The elements of `indices` are unique, as they are indexed, so any sort will be + // stable with respect to the original slice. We use `sort_unstable` here because + // it requires less memory allocation. + indices.sort_unstable(); + for i in 0..$slice.len() { + let mut index = indices[i].1; + while (index as usize) < i { + index = indices[index as usize].1; + } + indices[i].1 = index; + $slice.swap(i, index as usize); + } + }}; + } + + let sz_u8 = mem::size_of::<(K, u8)>(); + let sz_u16 = mem::size_of::<(K, u16)>(); + let sz_u32 = mem::size_of::<(K, u32)>(); + let sz_usize = mem::size_of::<(K, usize)>(); + + let len = self.len(); + if len < 2 { + return; + } + if sz_u8 < sz_u16 && len <= (u8::MAX as usize) { + return sort_by_key!(u8, self, f); + } + if sz_u16 < sz_u32 && len <= (u16::MAX as usize) { + return sort_by_key!(u16, self, f); + } + if sz_u32 < sz_usize && len <= (u32::MAX as usize) { + return sort_by_key!(u32, self, f); + } + sort_by_key!(usize, self, f) + } + + /// Copies `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.to_vec(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn to_vec(&self) -> Vec<T> + where + T: Clone, + { + self.to_vec_in(Global) + } + + /// Copies `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.to_vec_in(System); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn to_vec_in<A: Allocator>(&self, alloc: A) -> Vec<T, A> + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::to_vec(self, alloc) + } + + /// Converts `self` into a vector without clones or allocation. + /// + /// The resulting vector can be converted back into a box via + /// `Vec<T>`'s `into_boxed_slice` method. + /// + /// # Examples + /// + /// ``` + /// let s: Box<[i32]> = Box::new([10, 40, 30]); + /// let x = s.into_vec(); + /// // `s` cannot be used anymore because it has been converted into `x`. + /// + /// assert_eq!(x, vec![10, 40, 30]); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn into_vec<A: Allocator>(self: Box<Self, A>) -> Vec<T, A> { + // N.B., see the `hack` module in this file for more details. + hack::into_vec(self) + } + + /// Creates a vector by repeating a slice `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!([1, 2].repeat(3), vec![1, 2, 1, 2, 1, 2]); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// b"0123456789abcdef".repeat(usize::MAX); + /// ``` + #[rustc_allow_incoherent_impl] + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "repeat_generic_slice", since = "1.40.0")] + pub fn repeat(&self, n: usize) -> Vec<T> + where + T: Copy, + { + if n == 0 { + return Vec::new(); + } + + // If `n` is larger than zero, it can be split as + // `n = 2^expn + rem (2^expn > rem, expn >= 0, rem >= 0)`. + // `2^expn` is the number represented by the leftmost '1' bit of `n`, + // and `rem` is the remaining part of `n`. + + // Using `Vec` to access `set_len()`. + let capacity = self.len().checked_mul(n).expect("capacity overflow"); + let mut buf = Vec::with_capacity(capacity); + + // `2^expn` repetition is done by doubling `buf` `expn`-times. + buf.extend(self); + { + let mut m = n >> 1; + // If `m > 0`, there are remaining bits up to the leftmost '1'. + while m > 0 { + // `buf.extend(buf)`: + unsafe { + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + buf.len(), + ); + // `buf` has capacity of `self.len() * n`. + let buf_len = buf.len(); + buf.set_len(buf_len * 2); + } + + m >>= 1; + } + } + + // `rem` (`= n - 2^expn`) repetition is done by copying + // first `rem` repetitions from `buf` itself. + let rem_len = capacity - buf.len(); // `self.len() * rem` + if rem_len > 0 { + // `buf.extend(buf[0 .. rem_len])`: + unsafe { + // This is non-overlapping since `2^expn > rem`. + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + rem_len, + ); + // `buf.len() + rem_len` equals to `buf.capacity()` (`= self.len() * n`). + buf.set_len(capacity); + } + } + buf + } + + /// Flattens a slice of `T` into a single value `Self::Output`. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].concat(), "helloworld"); + /// assert_eq!([[1, 2], [3, 4]].concat(), [1, 2, 3, 4]); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn concat<Item: ?Sized>(&self) -> <Self as Concat<Item>>::Output + where + Self: Concat<Item>, + { + Concat::concat(self) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].join(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].join(&0), [1, 2, 0, 3, 4]); + /// assert_eq!([[1, 2], [3, 4]].join(&[0, 0][..]), [1, 2, 0, 0, 3, 4]); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rename_connect_to_join", since = "1.3.0")] + pub fn join<Separator>(&self, sep: Separator) -> <Self as Join<Separator>>::Output + where + Self: Join<Separator>, + { + Join::join(self, sep) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// # #![allow(deprecated)] + /// assert_eq!(["hello", "world"].connect(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].connect(&0), [1, 2, 0, 3, 4]); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[deprecated(since = "1.3.0", note = "renamed to join")] + pub fn connect<Separator>(&self, sep: Separator) -> <Self as Join<Separator>>::Output + where + Self: Join<Separator>, + { + Join::join(self, sep) + } +} + +#[cfg(not(test))] +impl [u8] { + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: slice::make_ascii_uppercase + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the uppercase bytes as a new Vec, \ + without modifying the original"] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> Vec<u8> { + let mut me = self.to_vec(); + me.make_ascii_uppercase(); + me + } + + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: slice::make_ascii_lowercase + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the lowercase bytes as a new Vec, \ + without modifying the original"] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> Vec<u8> { + let mut me = self.to_vec(); + me.make_ascii_lowercase(); + me + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Extension traits for slices over specific kinds of data +//////////////////////////////////////////////////////////////////////////////// + +/// Helper trait for [`[T]::concat`](slice::concat). +/// +/// Note: the `Item` type parameter is not used in this trait, +/// but it allows impls to be more generic. +/// Without it, we get this error: +/// +/// ```error +/// error[E0207]: the type parameter `T` is not constrained by the impl trait, self type, or predica +/// --> src/liballoc/slice.rs:608:6 +/// | +/// 608 | impl<T: Clone, V: Borrow<[T]>> Concat for [V] { +/// | ^ unconstrained type parameter +/// ``` +/// +/// This is because there could exist `V` types with multiple `Borrow<[_]>` impls, +/// such that multiple `T` types would apply: +/// +/// ``` +/// # #[allow(dead_code)] +/// pub struct Foo(Vec<u32>, Vec<String>); +/// +/// impl std::borrow::Borrow<[u32]> for Foo { +/// fn borrow(&self) -> &[u32] { &self.0 } +/// } +/// +/// impl std::borrow::Borrow<[String]> for Foo { +/// fn borrow(&self) -> &[String] { &self.1 } +/// } +/// ``` +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Concat<Item: ?Sized> { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::concat`](slice::concat) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn concat(slice: &Self) -> Self::Output; +} + +/// Helper trait for [`[T]::join`](slice::join) +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Join<Separator> { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::join`](slice::join) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn join(slice: &Self, sep: Separator) -> Self::Output; +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl<T: Clone, V: Borrow<[T]>> Concat<T> for [V] { + type Output = Vec<T>; + + fn concat(slice: &Self) -> Vec<T> { + let size = slice.iter().map(|slice| slice.borrow().len()).sum(); + let mut result = Vec::with_capacity(size); + for v in slice { + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl<T: Clone, V: Borrow<[T]>> Join<&T> for [V] { + type Output = Vec<T>; + + fn join(slice: &Self, sep: &T) -> Vec<T> { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = slice.iter().map(|v| v.borrow().len()).sum::<usize>() + slice.len() - 1; + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl<T: Clone, V: Borrow<[T]>> Join<&[T]> for [V] { + type Output = Vec<T>; + + fn join(slice: &Self, sep: &[T]) -> Vec<T> { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = + slice.iter().map(|v| v.borrow().len()).sum::<usize>() + sep.len() * (slice.len() - 1); + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.extend_from_slice(sep); + result.extend_from_slice(v.borrow()) + } + result + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Standard trait implementations for slices +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T> Borrow<[T]> for Vec<T> { + fn borrow(&self) -> &[T] { + &self[..] + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T> BorrowMut<[T]> for Vec<T> { + fn borrow_mut(&mut self) -> &mut [T] { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Clone> ToOwned for [T] { + type Owned = Vec<T>; + #[cfg(not(test))] + fn to_owned(&self) -> Vec<T> { + self.to_vec() + } + + #[cfg(test)] + fn to_owned(&self) -> Vec<T> { + hack::to_vec(self, Global) + } + + fn clone_into(&self, target: &mut Vec<T>) { + // drop anything in target that will not be overwritten + target.truncate(self.len()); + + // target.len <= self.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = self.split_at(target.len()); + + // reuse the contained values' allocations/resources. + target.clone_from_slice(init); + target.extend_from_slice(tail); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Sorting +//////////////////////////////////////////////////////////////////////////////// + +/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. +/// +/// This is the integral subroutine of insertion sort. +#[cfg(not(no_global_oom_handling))] +fn insert_head<T, F>(v: &mut [T], is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + if v.len() >= 2 && is_less(&v[1], &v[0]) { + unsafe { + // There are three ways to implement insertion here: + // + // 1. Swap adjacent elements until the first one gets to its final destination. + // However, this way we copy data around more than is necessary. If elements are big + // structures (costly to copy), this method will be slow. + // + // 2. Iterate until the right place for the first element is found. Then shift the + // elements succeeding it to make room for it and finally place it into the + // remaining hole. This is a good method. + // + // 3. Copy the first element into a temporary variable. Iterate until the right place + // for it is found. As we go along, copy every traversed element into the slot + // preceding it. Finally, copy data from the temporary variable into the remaining + // hole. This method is very good. Benchmarks demonstrated slightly better + // performance than with the 2nd method. + // + // All methods were benchmarked, and the 3rd showed best results. So we chose that one. + let tmp = mem::ManuallyDrop::new(ptr::read(&v[0])); + + // Intermediate state of the insertion process is always tracked by `hole`, which + // serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` in the end. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and + // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it + // initially held exactly once. + let mut hole = InsertionHole { src: &*tmp, dest: &mut v[1] }; + ptr::copy_nonoverlapping(&v[1], &mut v[0], 1); + + for i in 2..v.len() { + if !is_less(&v[i], &*tmp) { + break; + } + ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1); + hole.dest = &mut v[i]; + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } + + // When dropped, copies from `src` into `dest`. + struct InsertionHole<T> { + src: *const T, + dest: *mut T, + } + + impl<T> Drop for InsertionHole<T> { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, 1); + } + } + } +} + +/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and +/// stores the result into `v[..]`. +/// +/// # Safety +/// +/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough +/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type. +#[cfg(not(no_global_oom_handling))] +unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + let len = v.len(); + let v = v.as_mut_ptr(); + let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) }; + + // The merge process first copies the shorter run into `buf`. Then it traces the newly copied + // run and the longer run forwards (or backwards), comparing their next unconsumed elements and + // copying the lesser (or greater) one into `v`. + // + // As soon as the shorter run is fully consumed, the process is done. If the longer run gets + // consumed first, then we must copy whatever is left of the shorter run into the remaining + // hole in `v`. + // + // Intermediate state of the process is always tracked by `hole`, which serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` if the longer run gets consumed first. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and fill the + // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every + // object it initially held exactly once. + let mut hole; + + if mid <= len - mid { + // The left run is shorter. + unsafe { + ptr::copy_nonoverlapping(v, buf, mid); + hole = MergeHole { start: buf, end: buf.add(mid), dest: v }; + } + + // Initially, these pointers point to the beginnings of their arrays. + let left = &mut hole.start; + let mut right = v_mid; + let out = &mut hole.dest; + + while *left < hole.end && right < v_end { + // Consume the lesser side. + // If equal, prefer the left run to maintain stability. + unsafe { + let to_copy = if is_less(&*right, &**left) { + get_and_increment(&mut right) + } else { + get_and_increment(left) + }; + ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1); + } + } + } else { + // The right run is shorter. + unsafe { + ptr::copy_nonoverlapping(v_mid, buf, len - mid); + hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid }; + } + + // Initially, these pointers point past the ends of their arrays. + let left = &mut hole.dest; + let right = &mut hole.end; + let mut out = v_end; + + while v < *left && buf < *right { + // Consume the greater side. + // If equal, prefer the right run to maintain stability. + unsafe { + let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) { + decrement_and_get(left) + } else { + decrement_and_get(right) + }; + ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1); + } + } + } + // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of + // it will now be copied into the hole in `v`. + + unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T { + let old = *ptr; + *ptr = unsafe { ptr.offset(1) }; + old + } + + unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T { + *ptr = unsafe { ptr.offset(-1) }; + *ptr + } + + // When dropped, copies the range `start..end` into `dest..`. + struct MergeHole<T> { + start: *mut T, + end: *mut T, + dest: *mut T, + } + + impl<T> Drop for MergeHole<T> { + fn drop(&mut self) { + // `T` is not a zero-sized type, and these are pointers into a slice's elements. + unsafe { + let len = self.end.sub_ptr(self.start); + ptr::copy_nonoverlapping(self.start, self.dest, len); + } + } + } +} + +/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail +/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt). +/// +/// The algorithm identifies strictly descending and non-descending subsequences, which are called +/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed +/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are +/// satisfied: +/// +/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` +/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` +/// +/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case. +#[cfg(not(no_global_oom_handling))] +fn merge_sort<T, F>(v: &mut [T], mut is_less: F) +where + F: FnMut(&T, &T) -> bool, +{ + // Slices of up to this length get sorted using insertion sort. + const MAX_INSERTION: usize = 20; + // Very short runs are extended using insertion sort to span at least this many elements. + const MIN_RUN: usize = 10; + + // Sorting has no meaningful behavior on zero-sized types. + if size_of::<T>() == 0 { + return; + } + + let len = v.len(); + + // Short arrays get sorted in-place via insertion sort to avoid allocations. + if len <= MAX_INSERTION { + if len >= 2 { + for i in (0..len - 1).rev() { + insert_head(&mut v[i..], &mut is_less); + } + } + return; + } + + // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + // shallow copies of the contents of `v` without risking the dtors running on copies if + // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run, + // which will always have length at most `len / 2`. + let mut buf = Vec::with_capacity(len / 2); + + // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + // strange decision, but consider the fact that merges more often go in the opposite direction + // (forwards). According to benchmarks, merging forwards is slightly faster than merging + // backwards. To conclude, identifying runs by traversing backwards improves performance. + let mut runs = vec![]; + let mut end = len; + while end > 0 { + // Find the next natural run, and reverse it if it's strictly descending. + let mut start = end - 1; + if start > 0 { + start -= 1; + unsafe { + if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) { + while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) { + start -= 1; + } + v[start..end].reverse(); + } else { + while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) + { + start -= 1; + } + } + } + } + + // Insert some more elements into the run if it's too short. Insertion sort is faster than + // merge sort on short sequences, so this significantly improves performance. + while start > 0 && end - start < MIN_RUN { + start -= 1; + insert_head(&mut v[start..end], &mut is_less); + } + + // Push this run onto the stack. + runs.push(Run { start, len: end - start }); + end = start; + + // Merge some pairs of adjacent runs to satisfy the invariants. + while let Some(r) = collapse(&runs) { + let left = runs[r + 1]; + let right = runs[r]; + unsafe { + merge( + &mut v[left.start..right.start + right.len], + left.len, + buf.as_mut_ptr(), + &mut is_less, + ); + } + runs[r] = Run { start: left.start, len: left.len + right.len }; + runs.remove(r + 1); + } + } + + // Finally, exactly one run must remain in the stack. + debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len); + + // Examines the stack of runs and identifies the next pair of runs to merge. More specifically, + // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the + // algorithm should continue building a new run instead, `None` is returned. + // + // TimSort is infamous for its buggy implementations, as described here: + // http://envisage-project.eu/timsort-specification-and-verification/ + // + // The gist of the story is: we must enforce the invariants on the top four runs on the stack. + // Enforcing them on just top three is not sufficient to ensure that the invariants will still + // hold for *all* runs in the stack. + // + // This function correctly checks invariants for the top four runs. Additionally, if the top + // run starts at index 0, it will always demand a merge operation until the stack is fully + // collapsed, in order to complete the sort. + #[inline] + fn collapse(runs: &[Run]) -> Option<usize> { + let n = runs.len(); + if n >= 2 + && (runs[n - 1].start == 0 + || runs[n - 2].len <= runs[n - 1].len + || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len) + || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len)) + { + if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) } + } else { + None + } + } + + #[derive(Clone, Copy)] + struct Run { + start: usize, + len: usize, + } +} diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs new file mode 100644 index 000000000000..b6a5f98e4fcd --- /dev/null +++ b/rust/alloc/vec/drain.rs @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::fmt; +use core::iter::{FusedIterator, TrustedLen}; +use core::mem; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +use super::Vec; + +/// A draining iterator for `Vec<T>`. +/// +/// This `struct` is created by [`Vec::drain`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::Drain<_> = v.drain(..); +/// ``` +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain< + 'a, + T: 'a, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator + 'a = Global, +> { + /// Index of tail to preserve + pub(super) tail_start: usize, + /// Length of tail + pub(super) tail_len: usize, + /// Current remaining range to remove + pub(super) iter: slice::Iter<'a, T>, + pub(super) vec: NonNull<Vec<T, A>>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl<T: fmt::Debug, A: Allocator> fmt::Debug for Drain<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.iter.as_slice()).finish() + } +} + +impl<'a, T, A: Allocator> Drain<'a, T, A> { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!['a', 'b', 'c']; + /// let mut drain = vec.drain(..); + /// assert_eq!(drain.as_slice(), &['a', 'b', 'c']); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_slice(), &['b', 'c']); + /// ``` + #[must_use] + #[stable(feature = "vec_drain_as_slice", since = "1.46.0")] + pub fn as_slice(&self) -> &[T] { + self.iter.as_slice() + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[must_use] + #[inline] + pub fn allocator(&self) -> &A { + unsafe { self.vec.as_ref().allocator() } + } +} + +#[stable(feature = "vec_drain_as_slice", since = "1.46.0")] +impl<'a, T, A: Allocator> AsRef<[T]> for Drain<'a, T, A> { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl<T: Sync, A: Sync + Allocator> Sync for Drain<'_, T, A> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl<T: Send, A: Send + Allocator> Send for Drain<'_, T, A> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl<T, A: Allocator> Iterator for Drain<'_, T, A> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option<T> { + self.iter.next().map(|elt| unsafe { ptr::read(elt as *const _) }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl<T, A: Allocator> DoubleEndedIterator for Drain<'_, T, A> { + #[inline] + fn next_back(&mut self) -> Option<T> { + self.iter.next_back().map(|elt| unsafe { ptr::read(elt as *const _) }) + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl<T, A: Allocator> Drop for Drain<'_, T, A> { + fn drop(&mut self) { + /// Moves back the un-`Drain`ed elements to restore the original `Vec`. + struct DropGuard<'r, 'a, T, A: Allocator>(&'r mut Drain<'a, T, A>); + + impl<'r, 'a, T, A: Allocator> Drop for DropGuard<'r, 'a, T, A> { + fn drop(&mut self) { + if self.0.tail_len > 0 { + unsafe { + let source_vec = self.0.vec.as_mut(); + // memmove back untouched tail, update to new length + let start = source_vec.len(); + let tail = self.0.tail_start; + if tail != start { + let src = source_vec.as_ptr().add(tail); + let dst = source_vec.as_mut_ptr().add(start); + ptr::copy(src, dst, self.0.tail_len); + } + source_vec.set_len(start + self.0.tail_len); + } + } + } + } + + let iter = mem::replace(&mut self.iter, (&mut []).iter()); + let drop_len = iter.len(); + + let mut vec = self.vec; + + if mem::size_of::<T>() == 0 { + // ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount. + // this can be achieved by manipulating the Vec length instead of moving values out from `iter`. + unsafe { + let vec = vec.as_mut(); + let old_len = vec.len(); + vec.set_len(old_len + drop_len + self.tail_len); + vec.truncate(old_len + self.tail_len); + } + + return; + } + + // ensure elements are moved back into their appropriate places, even when drop_in_place panics + let _guard = DropGuard(self); + + if drop_len == 0 { + return; + } + + // as_slice() must only be called when iter.len() is > 0 because + // vec::Splice modifies vec::Drain fields and may grow the vec which would invalidate + // the iterator's internal pointers. Creating a reference to deallocated memory + // is invalid even when it is zero-length + let drop_ptr = iter.as_slice().as_ptr(); + + unsafe { + // drop_ptr comes from a slice::Iter which only gives us a &[T] but for drop_in_place + // a pointer with mutable provenance is necessary. Therefore we must reconstruct + // it from the original vec but also avoid creating a &mut to the front since that could + // invalidate raw pointers to it which some unsafe code might rely on. + let vec_ptr = vec.as_mut().as_mut_ptr(); + let drop_offset = drop_ptr.sub_ptr(vec_ptr); + let to_drop = ptr::slice_from_raw_parts_mut(vec_ptr.add(drop_offset), drop_len); + ptr::drop_in_place(to_drop); + } + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl<T, A: Allocator> ExactSizeIterator for Drain<'_, T, A> { + fn is_empty(&self) -> bool { + self.iter.is_empty() + } +} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl<T, A: Allocator> TrustedLen for Drain<'_, T, A> {} + +#[stable(feature = "fused", since = "1.26.0")] +impl<T, A: Allocator> FusedIterator for Drain<'_, T, A> {} diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs new file mode 100644 index 000000000000..b04fce041622 --- /dev/null +++ b/rust/alloc/vec/drain_filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::ptr::{self}; +use core::slice::{self}; + +use super::Vec; + +/// An iterator which uses a closure to determine if an element should be removed. +/// +/// This struct is created by [`Vec::drain_filter`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// #![feature(drain_filter)] +/// +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::DrainFilter<_, _> = v.drain_filter(|x| *x % 2 == 0); +/// ``` +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +#[derive(Debug)] +pub struct DrainFilter< + 'a, + T, + F, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> where + F: FnMut(&mut T) -> bool, +{ + pub(super) vec: &'a mut Vec<T, A>, + /// The index of the item that will be inspected by the next call to `next`. + pub(super) idx: usize, + /// The number of items that have been drained (removed) thus far. + pub(super) del: usize, + /// The original length of `vec` prior to draining. + pub(super) old_len: usize, + /// The filter test predicate. + pub(super) pred: F, + /// A flag that indicates a panic has occurred in the filter test predicate. + /// This is used as a hint in the drop implementation to prevent consumption + /// of the remainder of the `DrainFilter`. Any unprocessed items will be + /// backshifted in the `vec`, but no further items will be dropped or + /// tested by the filter predicate. + pub(super) panic_flag: bool, +} + +impl<T, F, A: Allocator> DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.vec.allocator() + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl<T, F, A: Allocator> Iterator for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option<T> { + unsafe { + while self.idx < self.old_len { + let i = self.idx; + let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); + self.panic_flag = true; + let drained = (self.pred)(&mut v[i]); + self.panic_flag = false; + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + return Some(ptr::read(&v[i])); + } else if self.del > 0 { + let del = self.del; + let src: *const T = &v[i]; + let dst: *mut T = &mut v[i - del]; + ptr::copy_nonoverlapping(src, dst, 1); + } + } + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (0, Some(self.old_len - self.idx)) + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl<T, F, A: Allocator> Drop for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + fn drop(&mut self) { + struct BackshiftOnDrop<'a, 'b, T, F, A: Allocator> + where + F: FnMut(&mut T) -> bool, + { + drain: &'b mut DrainFilter<'a, T, F, A>, + } + + impl<'a, 'b, T, F, A: Allocator> Drop for BackshiftOnDrop<'a, 'b, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + fn drop(&mut self) { + unsafe { + if self.drain.idx < self.drain.old_len && self.drain.del > 0 { + // This is a pretty messed up state, and there isn't really an + // obviously right thing to do. We don't want to keep trying + // to execute `pred`, so we just backshift all the unprocessed + // elements and tell the vec that they still exist. The backshift + // is required to prevent a double-drop of the last successfully + // drained item prior to a panic in the predicate. + let ptr = self.drain.vec.as_mut_ptr(); + let src = ptr.add(self.drain.idx); + let dst = src.sub(self.drain.del); + let tail_len = self.drain.old_len - self.drain.idx; + src.copy_to(dst, tail_len); + } + self.drain.vec.set_len(self.drain.old_len - self.drain.del); + } + } + } + + let backshift = BackshiftOnDrop { drain: self }; + + // Attempt to consume any remaining elements if the filter predicate + // has not yet panicked. We'll backshift any remaining elements + // whether we've already panicked or if the consumption here panics. + if !backshift.drain.panic_flag { + backshift.drain.for_each(drop); + } + } +} diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs new file mode 100644 index 000000000000..f7a50e76691e --- /dev/null +++ b/rust/alloc/vec/into_iter.rs @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#[cfg(not(no_global_oom_handling))] +use super::AsVecIntoIter; +use crate::alloc::{Allocator, Global}; +use crate::raw_vec::RawVec; +use core::fmt; +use core::intrinsics::arith_offset; +use core::iter::{ + FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce, +}; +use core::marker::PhantomData; +use core::mem::{self, ManuallyDrop}; +#[cfg(not(no_global_oom_handling))] +use core::ops::Deref; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +/// An iterator that moves out of a vector. +/// +/// This `struct` is created by the `into_iter` method on [`Vec`](super::Vec) +/// (provided by the [`IntoIterator`] trait). +/// +/// # Example +/// +/// ``` +/// let v = vec![0, 1, 2]; +/// let iter: std::vec::IntoIter<_> = v.into_iter(); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_insignificant_dtor] +pub struct IntoIter< + T, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> { + pub(super) buf: NonNull<T>, + pub(super) phantom: PhantomData<T>, + pub(super) cap: usize, + // the drop impl reconstructs a RawVec from buf, cap and alloc + // to avoid dropping the allocator twice we need to wrap it into ManuallyDrop + pub(super) alloc: ManuallyDrop<A>, + pub(super) ptr: *const T, + pub(super) end: *const T, +} + +#[stable(feature = "vec_intoiter_debug", since = "1.13.0")] +impl<T: fmt::Debug, A: Allocator> fmt::Debug for IntoIter<T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("IntoIter").field(&self.as_slice()).finish() + } +} + +impl<T, A: Allocator> IntoIter<T, A> { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// let _ = into_iter.next().unwrap(); + /// assert_eq!(into_iter.as_slice(), &['b', 'c']); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_slice(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.ptr, self.len()) } + } + + /// Returns the remaining items of this iterator as a mutable slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// into_iter.as_mut_slice()[2] = 'z'; + /// assert_eq!(into_iter.next().unwrap(), 'a'); + /// assert_eq!(into_iter.next().unwrap(), 'b'); + /// assert_eq!(into_iter.next().unwrap(), 'z'); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + unsafe { &mut *self.as_raw_mut_slice() } + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn as_raw_mut_slice(&mut self) -> *mut [T] { + ptr::slice_from_raw_parts_mut(self.ptr as *mut T, self.len()) + } + + /// Drops remaining elements and relinquishes the backing allocation. + /// + /// This is roughly equivalent to the following, but more efficient + /// + /// ``` + /// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter(); + /// (&mut into_iter).for_each(core::mem::drop); + /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); } + /// ``` + /// + /// This method is used by in-place iteration, refer to the vec::in_place_collect + /// documentation for an overview. + #[cfg(not(no_global_oom_handling))] + pub(super) fn forget_allocation_drop_remaining(&mut self) { + let remaining = self.as_raw_mut_slice(); + + // overwrite the individual fields instead of creating a new + // struct and then overwriting &mut self. + // this creates less assembly + self.cap = 0; + self.buf = unsafe { NonNull::new_unchecked(RawVec::NEW.ptr()) }; + self.ptr = self.buf.as_ptr(); + self.end = self.buf.as_ptr(); + + unsafe { + ptr::drop_in_place(remaining); + } + } + + /// Forgets to Drop the remaining elements while still allowing the backing allocation to be freed. + #[allow(dead_code)] + pub(crate) fn forget_remaining_elements(&mut self) { + self.ptr = self.end; + } +} + +#[stable(feature = "vec_intoiter_as_ref", since = "1.46.0")] +impl<T, A: Allocator> AsRef<[T]> for IntoIter<T, A> { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<T: Send, A: Allocator + Send> Send for IntoIter<T, A> {} +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<T: Sync, A: Allocator + Sync> Sync for IntoIter<T, A> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> Iterator for IntoIter<T, A> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option<T> { + if self.ptr as *const _ == self.end { + None + } else if mem::size_of::<T>() == 0 { + // purposefully don't use 'ptr.offset' because for + // vectors with 0-size elements this would return the + // same pointer. + self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + let old = self.ptr; + self.ptr = unsafe { self.ptr.offset(1) }; + + Some(unsafe { ptr::read(old) }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let exact = if mem::size_of::<T>() == 0 { + self.end.addr().wrapping_sub(self.ptr.addr()) + } else { + unsafe { self.end.sub_ptr(self.ptr) } + }; + (exact, Some(exact)) + } + + #[inline] + fn advance_by(&mut self, n: usize) -> Result<(), usize> { + let step_size = self.len().min(n); + let to_drop = ptr::slice_from_raw_parts_mut(self.ptr as *mut T, step_size); + if mem::size_of::<T>() == 0 { + // SAFETY: due to unchecked casts of unsigned amounts to signed offsets the wraparound + // effectively results in unsigned pointers representing positions 0..usize::MAX, + // which is valid for ZSTs. + self.ptr = unsafe { arith_offset(self.ptr as *const i8, step_size as isize) as *mut T } + } else { + // SAFETY: the min() above ensures that step_size is in bounds + self.ptr = unsafe { self.ptr.add(step_size) }; + } + // SAFETY: the min() above ensures that step_size is in bounds + unsafe { + ptr::drop_in_place(to_drop); + } + if step_size < n { + return Err(step_size); + } + Ok(()) + } + + #[inline] + fn count(self) -> usize { + self.len() + } + + unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item + where + Self: TrustedRandomAccessNoCoerce, + { + // SAFETY: the caller must guarantee that `i` is in bounds of the + // `Vec<T>`, so `i` cannot overflow an `isize`, and the `self.ptr.add(i)` + // is guaranteed to pointer to an element of the `Vec<T>` and + // thus guaranteed to be valid to dereference. + // + // Also note the implementation of `Self: TrustedRandomAccess` requires + // that `T: Copy` so reading elements from the buffer doesn't invalidate + // them for `Drop`. + unsafe { + if mem::size_of::<T>() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> { + #[inline] + fn next_back(&mut self) -> Option<T> { + if self.end == self.ptr { + None + } else if mem::size_of::<T>() == 0 { + // See above for why 'ptr.offset' isn't used + self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + self.end = unsafe { self.end.offset(-1) }; + + Some(unsafe { ptr::read(self.end) }) + } + } + + #[inline] + fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { + let step_size = self.len().min(n); + if mem::size_of::<T>() == 0 { + // SAFETY: same as for advance_by() + self.end = unsafe { + arith_offset(self.end as *const i8, step_size.wrapping_neg() as isize) as *mut T + } + } else { + // SAFETY: same as for advance_by() + self.end = unsafe { self.end.offset(step_size.wrapping_neg() as isize) }; + } + let to_drop = ptr::slice_from_raw_parts_mut(self.end as *mut T, step_size); + // SAFETY: same as for advance_by() + unsafe { + ptr::drop_in_place(to_drop); + } + if step_size < n { + return Err(step_size); + } + Ok(()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> ExactSizeIterator for IntoIter<T, A> { + fn is_empty(&self) -> bool { + self.ptr == self.end + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl<T, A: Allocator> FusedIterator for IntoIter<T, A> {} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl<T, A: Allocator> TrustedLen for IntoIter<T, A> {} + +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +#[rustc_unsafe_specialization_marker] +pub trait NonDrop {} + +// T: Copy as approximation for !Drop since get_unchecked does not advance self.ptr +// and thus we can't implement drop-handling +#[unstable(issue = "none", feature = "std_internals")] +impl<T: Copy> NonDrop for T {} + +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +// TrustedRandomAccess (without NoCoerce) must not be implemented because +// subtypes/supertypes of `T` might not be `NonDrop` +unsafe impl<T, A: Allocator> TrustedRandomAccessNoCoerce for IntoIter<T, A> +where + T: NonDrop, +{ + const MAY_HAVE_SIDE_EFFECT: bool = false; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_into_iter_clone", since = "1.8.0")] +impl<T: Clone, A: Allocator + Clone> Clone for IntoIter<T, A> { + #[cfg(not(test))] + fn clone(&self) -> Self { + self.as_slice().to_vec_in(self.alloc.deref().clone()).into_iter() + } + #[cfg(test)] + fn clone(&self) -> Self { + crate::slice::to_vec(self.as_slice(), self.alloc.deref().clone()).into_iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for IntoIter<T, A> { + fn drop(&mut self) { + struct DropGuard<'a, T, A: Allocator>(&'a mut IntoIter<T, A>); + + impl<T, A: Allocator> Drop for DropGuard<'_, T, A> { + fn drop(&mut self) { + unsafe { + // `IntoIter::alloc` is not used anymore after this and will be dropped by RawVec + let alloc = ManuallyDrop::take(&mut self.0.alloc); + // RawVec handles deallocation + let _ = RawVec::from_raw_parts_in(self.0.buf.as_ptr(), self.0.cap, alloc); + } + } + } + + let guard = DropGuard(self); + // destroy the remaining elements + unsafe { + ptr::drop_in_place(guard.0.as_raw_mut_slice()); + } + // now `guard` will be dropped and do the rest + } +} + +// In addition to the SAFETY invariants of the following three unsafe traits +// also refer to the vec::in_place_collect module documentation to get an overview +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl<T, A: Allocator> InPlaceIterable for IntoIter<T, A> {} + +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl<T, A: Allocator> SourceIter for IntoIter<T, A> { + type Source = Self; + + #[inline] + unsafe fn as_inner(&mut self) -> &mut Self::Source { + self + } +} + +#[cfg(not(no_global_oom_handling))] +unsafe impl<T> AsVecIntoIter for IntoIter<T> { + type Item = T; + + fn as_into_iter(&mut self) -> &mut IntoIter<Self::Item> { + self + } +} diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs new file mode 100644 index 000000000000..377f3d172777 --- /dev/null +++ b/rust/alloc/vec/is_zero.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::boxed::Box; + +#[rustc_specialization_trait] +pub(super) unsafe trait IsZero { + /// Whether this value's representation is all zeros + fn is_zero(&self) -> bool; +} + +macro_rules! impl_is_zero { + ($t:ty, $is_zero:expr) => { + unsafe impl IsZero for $t { + #[inline] + fn is_zero(&self) -> bool { + $is_zero(*self) + } + } + }; +} + +impl_is_zero!(i16, |x| x == 0); +impl_is_zero!(i32, |x| x == 0); +impl_is_zero!(i64, |x| x == 0); +impl_is_zero!(i128, |x| x == 0); +impl_is_zero!(isize, |x| x == 0); + +impl_is_zero!(u16, |x| x == 0); +impl_is_zero!(u32, |x| x == 0); +impl_is_zero!(u64, |x| x == 0); +impl_is_zero!(u128, |x| x == 0); +impl_is_zero!(usize, |x| x == 0); + +impl_is_zero!(bool, |x| x == false); +impl_is_zero!(char, |x| x == '\0'); + +impl_is_zero!(f32, |x: f32| x.to_bits() == 0); +impl_is_zero!(f64, |x: f64| x.to_bits() == 0); + +unsafe impl<T> IsZero for *const T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +unsafe impl<T> IsZero for *mut T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] { + #[inline] + fn is_zero(&self) -> bool { + // Because this is generated as a runtime check, it's not obvious that + // it's worth doing if the array is really long. The threshold here + // is largely arbitrary, but was picked because as of 2022-05-01 LLVM + // can const-fold the check in `vec![[0; 32]; n]` but not in + // `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b + // Feel free to tweak if you have better evidence. + + N <= 32 && self.iter().all(IsZero::is_zero) + } +} + +// `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null. +// For fat pointers, the bytes that would be the pointer metadata in the `Some` +// variant are padding in the `None` variant, so ignoring them and +// zero-initializing instead is ok. +// `Option<&mut T>` never implements `Clone`, so there's no need for an impl of +// `SpecFromElem`. + +unsafe impl<T: ?Sized> IsZero for Option<&T> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +unsafe impl<T: ?Sized> IsZero for Option<Box<T>> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +// `Option<num::NonZeroU32>` and similar have a representation guarantee that +// they're the same size as the corresponding `u32` type, as well as a guarantee +// that transmuting between `NonZeroU32` and `Option<num::NonZeroU32>` works. +// While the documentation officially makes it UB to transmute from `None`, +// we're the standard library so we can make extra inferences, and we know that +// the only niche available to represent `None` is the one that's all zeros. + +macro_rules! impl_is_zero_option_of_nonzero { + ($($t:ident,)+) => {$( + unsafe impl IsZero for Option<core::num::$t> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } + } + )+}; +} + +impl_is_zero_option_of_nonzero!( + NonZeroU8, + NonZeroU16, + NonZeroU32, + NonZeroU64, + NonZeroU128, + NonZeroI8, + NonZeroI16, + NonZeroI32, + NonZeroI64, + NonZeroI128, + NonZeroUsize, + NonZeroIsize, +); diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs new file mode 100644 index 000000000000..540787804cc2 --- /dev/null +++ b/rust/alloc/vec/mod.rs @@ -0,0 +1,3140 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A contiguous growable array type with heap-allocated contents, written +//! `Vec<T>`. +//! +//! Vectors have *O*(1) indexing, amortized *O*(1) push (to the end) and +//! *O*(1) pop (from the end). +//! +//! Vectors ensure they never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! You can explicitly create a [`Vec`] with [`Vec::new`]: +//! +//! ``` +//! let v: Vec<i32> = Vec::new(); +//! ``` +//! +//! ...or by using the [`vec!`] macro: +//! +//! ``` +//! let v: Vec<i32> = vec![]; +//! +//! let v = vec![1, 2, 3, 4, 5]; +//! +//! let v = vec![0; 10]; // ten zeroes +//! ``` +//! +//! You can [`push`] values onto the end of a vector (which will grow the vector +//! as needed): +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! v.push(3); +//! ``` +//! +//! Popping values works in much the same way: +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! let two = v.pop(); +//! ``` +//! +//! Vectors also support indexing (through the [`Index`] and [`IndexMut`] traits): +//! +//! ``` +//! let mut v = vec![1, 2, 3]; +//! let three = v[2]; +//! v[1] = v[1] + 5; +//! ``` +//! +//! [`push`]: Vec::push + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::cmp; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::intrinsics::{arith_offset, assume}; +use core::iter; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::marker::PhantomData; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr::{self, NonNull}; +use core::slice::{self, SliceIndex}; + +use crate::alloc::{Allocator, Global}; +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::raw_vec::RawVec; + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +pub use self::drain_filter::DrainFilter; + +mod drain_filter; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_splice", since = "1.21.0")] +pub use self::splice::Splice; + +#[cfg(not(no_global_oom_handling))] +mod splice; + +#[stable(feature = "drain", since = "1.6.0")] +pub use self::drain::Drain; + +mod drain; + +#[cfg(not(no_global_oom_handling))] +mod cow; + +#[cfg(not(no_global_oom_handling))] +pub(crate) use self::in_place_collect::AsVecIntoIter; +#[stable(feature = "rust1", since = "1.0.0")] +pub use self::into_iter::IntoIter; + +mod into_iter; + +#[cfg(not(no_global_oom_handling))] +use self::is_zero::IsZero; + +mod is_zero; + +#[cfg(not(no_global_oom_handling))] +mod in_place_collect; + +mod partial_eq; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_elem::SpecFromElem; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_elem; + +#[cfg(not(no_global_oom_handling))] +use self::set_len_on_drop::SetLenOnDrop; + +#[cfg(not(no_global_oom_handling))] +mod set_len_on_drop; + +#[cfg(not(no_global_oom_handling))] +use self::in_place_drop::InPlaceDrop; + +#[cfg(not(no_global_oom_handling))] +mod in_place_drop; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter_nested::SpecFromIterNested; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter_nested; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter::SpecFromIter; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter; + +#[cfg(not(no_global_oom_handling))] +use self::spec_extend::SpecExtend; + +#[cfg(not(no_global_oom_handling))] +mod spec_extend; + +/// A contiguous growable array type, written as `Vec<T>`, short for 'vector'. +/// +/// # Examples +/// +/// ``` +/// let mut vec = Vec::new(); +/// vec.push(1); +/// vec.push(2); +/// +/// assert_eq!(vec.len(), 2); +/// assert_eq!(vec[0], 1); +/// +/// assert_eq!(vec.pop(), Some(2)); +/// assert_eq!(vec.len(), 1); +/// +/// vec[0] = 7; +/// assert_eq!(vec[0], 7); +/// +/// vec.extend([1, 2, 3].iter().copied()); +/// +/// for x in &vec { +/// println!("{x}"); +/// } +/// assert_eq!(vec, [7, 1, 2, 3]); +/// ``` +/// +/// The [`vec!`] macro is provided for convenient initialization: +/// +/// ``` +/// let mut vec1 = vec![1, 2, 3]; +/// vec1.push(4); +/// let vec2 = Vec::from([1, 2, 3, 4]); +/// assert_eq!(vec1, vec2); +/// ``` +/// +/// It can also initialize each element of a `Vec<T>` with a given value. +/// This may be more efficient than performing allocation and initialization +/// in separate steps, especially when initializing a vector of zeros: +/// +/// ``` +/// let vec = vec![0; 5]; +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// +/// // The following is equivalent, but potentially slower: +/// let mut vec = Vec::with_capacity(5); +/// vec.resize(5, 0); +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// ``` +/// +/// For more information, see +/// [Capacity and Reallocation](#capacity-and-reallocation). +/// +/// Use a `Vec<T>` as an efficient stack: +/// +/// ``` +/// let mut stack = Vec::new(); +/// +/// stack.push(1); +/// stack.push(2); +/// stack.push(3); +/// +/// while let Some(top) = stack.pop() { +/// // Prints 3, 2, 1 +/// println!("{top}"); +/// } +/// ``` +/// +/// # Indexing +/// +/// The `Vec` type allows to access values by index, because it implements the +/// [`Index`] trait. An example will be more explicit: +/// +/// ``` +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[1]); // it will display '2' +/// ``` +/// +/// However be careful: if you try to access an index which isn't in the `Vec`, +/// your software will panic! You cannot do this: +/// +/// ```should_panic +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[6]); // it will panic! +/// ``` +/// +/// Use [`get`] and [`get_mut`] if you want to check whether the index is in +/// the `Vec`. +/// +/// # Slicing +/// +/// A `Vec` can be mutable. On the other hand, slices are read-only objects. +/// To get a [slice][prim@slice], use [`&`]. Example: +/// +/// ``` +/// fn read_slice(slice: &[usize]) { +/// // ... +/// } +/// +/// let v = vec![0, 1]; +/// read_slice(&v); +/// +/// // ... and that's all! +/// // you can also do it like this: +/// let u: &[usize] = &v; +/// // or like this: +/// let u: &[_] = &v; +/// ``` +/// +/// In Rust, it's more common to pass slices as arguments rather than vectors +/// when you just want to provide read access. The same goes for [`String`] and +/// [`&str`]. +/// +/// # Capacity and reallocation +/// +/// The capacity of a vector is the amount of space allocated for any future +/// elements that will be added onto the vector. This is not to be confused with +/// the *length* of a vector, which specifies the number of actual elements +/// within the vector. If a vector's length exceeds its capacity, its capacity +/// will automatically be increased, but its elements will have to be +/// reallocated. +/// +/// For example, a vector with capacity 10 and length 0 would be an empty vector +/// with space for 10 more elements. Pushing 10 or fewer elements onto the +/// vector will not change its capacity or cause reallocation to occur. However, +/// if the vector's length is increased to 11, it will have to reallocate, which +/// can be slow. For this reason, it is recommended to use [`Vec::with_capacity`] +/// whenever possible to specify how big the vector is expected to get. +/// +/// # Guarantees +/// +/// Due to its incredibly fundamental nature, `Vec` makes a lot of guarantees +/// about its design. This ensures that it's as low-overhead as possible in +/// the general case, and can be correctly manipulated in primitive ways +/// by unsafe code. Note that these guarantees refer to an unqualified `Vec<T>`. +/// If additional type parameters are added (e.g., to support custom allocators), +/// overriding their defaults may change the behavior. +/// +/// Most fundamentally, `Vec` is and always will be a (pointer, capacity, length) +/// triplet. No more, no less. The order of these fields is completely +/// unspecified, and you should use the appropriate methods to modify these. +/// The pointer will never be null, so this type is null-pointer-optimized. +/// +/// However, the pointer might not actually point to allocated memory. In particular, +/// if you construct a `Vec` with capacity 0 via [`Vec::new`], [`vec![]`][`vec!`], +/// [`Vec::with_capacity(0)`][`Vec::with_capacity`], or by calling [`shrink_to_fit`] +/// on an empty Vec, it will not allocate memory. Similarly, if you store zero-sized +/// types inside a `Vec`, it will not allocate space for them. *Note that in this case +/// the `Vec` might not report a [`capacity`] of 0*. `Vec` will allocate if and only +/// if <code>[mem::size_of::\<T>]\() * [capacity]\() > 0</code>. In general, `Vec`'s allocation +/// details are very subtle --- if you intend to allocate memory using a `Vec` +/// and use it for something else (either to pass to unsafe code, or to build your +/// own memory-backed collection), be sure to deallocate this memory by using +/// `from_raw_parts` to recover the `Vec` and then dropping it. +/// +/// If a `Vec` *has* allocated memory, then the memory it points to is on the heap +/// (as defined by the allocator Rust is configured to use by default), and its +/// pointer points to [`len`] initialized, contiguous elements in order (what +/// you would see if you coerced it to a slice), followed by <code>[capacity] - [len]</code> +/// logically uninitialized, contiguous elements. +/// +/// A vector containing the elements `'a'` and `'b'` with capacity 4 can be +/// visualized as below. The top part is the `Vec` struct, it contains a +/// pointer to the head of the allocation in the heap, length and capacity. +/// The bottom part is the allocation on the heap, a contiguous memory block. +/// +/// ```text +/// ptr len capacity +/// +--------+--------+--------+ +/// | 0x0123 | 2 | 4 | +/// +--------+--------+--------+ +/// | +/// v +/// Heap +--------+--------+--------+--------+ +/// | 'a' | 'b' | uninit | uninit | +/// +--------+--------+--------+--------+ +/// ``` +/// +/// - **uninit** represents memory that is not initialized, see [`MaybeUninit`]. +/// - Note: the ABI is not stable and `Vec` makes no guarantees about its memory +/// layout (including the order of fields). +/// +/// `Vec` will never perform a "small optimization" where elements are actually +/// stored on the stack for two reasons: +/// +/// * It would make it more difficult for unsafe code to correctly manipulate +/// a `Vec`. The contents of a `Vec` wouldn't have a stable address if it were +/// only moved, and it would be more difficult to determine if a `Vec` had +/// actually allocated memory. +/// +/// * It would penalize the general case, incurring an additional branch +/// on every access. +/// +/// `Vec` will never automatically shrink itself, even if completely empty. This +/// ensures no unnecessary allocations or deallocations occur. Emptying a `Vec` +/// and then filling it back up to the same [`len`] should incur no calls to +/// the allocator. If you wish to free up unused memory, use +/// [`shrink_to_fit`] or [`shrink_to`]. +/// +/// [`push`] and [`insert`] will never (re)allocate if the reported capacity is +/// sufficient. [`push`] and [`insert`] *will* (re)allocate if +/// <code>[len] == [capacity]</code>. That is, the reported capacity is completely +/// accurate, and can be relied on. It can even be used to manually free the memory +/// allocated by a `Vec` if desired. Bulk insertion methods *may* reallocate, even +/// when not necessary. +/// +/// `Vec` does not guarantee any particular growth strategy when reallocating +/// when full, nor when [`reserve`] is called. The current strategy is basic +/// and it may prove desirable to use a non-constant growth factor. Whatever +/// strategy is used will of course guarantee *O*(1) amortized [`push`]. +/// +/// `vec![x; n]`, `vec![a, b, c, d]`, and +/// [`Vec::with_capacity(n)`][`Vec::with_capacity`], will all produce a `Vec` +/// with exactly the requested capacity. If <code>[len] == [capacity]</code>, +/// (as is the case for the [`vec!`] macro), then a `Vec<T>` can be converted to +/// and from a [`Box<[T]>`][owned slice] without reallocating or moving the elements. +/// +/// `Vec` will not specifically overwrite any data that is removed from it, +/// but also won't specifically preserve it. Its uninitialized memory is +/// scratch space that it may use however it wants. It will generally just do +/// whatever is most efficient or otherwise easy to implement. Do not rely on +/// removed data to be erased for security purposes. Even if you drop a `Vec`, its +/// buffer may simply be reused by another allocation. Even if you zero a `Vec`'s memory +/// first, that might not actually happen because the optimizer does not consider +/// this a side-effect that must be preserved. There is one case which we will +/// not break, however: using `unsafe` code to write to the excess capacity, +/// and then increasing the length to match, is always valid. +/// +/// Currently, `Vec` does not guarantee the order in which elements are dropped. +/// The order has changed in the past and may change again. +/// +/// [`get`]: ../../std/vec/struct.Vec.html#method.get +/// [`get_mut`]: ../../std/vec/struct.Vec.html#method.get_mut +/// [`String`]: crate::string::String +/// [`&str`]: type@str +/// [`shrink_to_fit`]: Vec::shrink_to_fit +/// [`shrink_to`]: Vec::shrink_to +/// [capacity]: Vec::capacity +/// [`capacity`]: Vec::capacity +/// [mem::size_of::\<T>]: core::mem::size_of +/// [len]: Vec::len +/// [`len`]: Vec::len +/// [`push`]: Vec::push +/// [`insert`]: Vec::insert +/// [`reserve`]: Vec::reserve +/// [`MaybeUninit`]: core::mem::MaybeUninit +/// [owned slice]: Box +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "Vec")] +#[rustc_insignificant_dtor] +pub struct Vec<T, #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global> { + buf: RawVec<T, A>, + len: usize, +} + +//////////////////////////////////////////////////////////////////////////////// +// Inherent methods +//////////////////////////////////////////////////////////////////////////////// + +impl<T> Vec<T> { + /// Constructs a new, empty `Vec<T>`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// # #![allow(unused_mut)] + /// let mut vec: Vec<i32> = Vec::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_vec_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub const fn new() -> Self { + Vec { buf: RawVec::NEW, len: 0 } + } + + /// Constructs a new, empty `Vec<T>` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Creates a `Vec<T>` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * The size of `T` times the `capacity` (ie. the allocated size in bytes) needs + /// to be the same size as the pointer was allocated with. (Because similar to + /// alignment, [`dealloc`] must be called with the same layout `size`.) + /// * `length` needs to be less than or equal to `capacity`. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is normally **not** safe + /// to build a `Vec<u8>` from a pointer to a C `char` array with length + /// `size_t`, doing so is only safe if the array was initially allocated by + /// a `Vec` or `String`. + /// It's also not safe to build one from a `Vec<u16>` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec<u8>` it'll be deallocated with alignment 1. To avoid + /// these issues, it is often preferable to do casting/transmuting using + /// [`slice::from_raw_parts`] instead. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec<T>` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// use std::ptr; + /// use std::mem; + /// + /// let v = vec![1, 2, 3]; + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts(p, len, cap); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self { + unsafe { Self::from_raw_parts_in(ptr, length, capacity, Global) } + } +} + +impl<T, A: Allocator> Vec<T, A> { + /// Constructs a new, empty `Vec<T, A>`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// # #[allow(unused_mut)] + /// let mut vec: Vec<i32, _> = Vec::new_in(System); + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub const fn new_in(alloc: A) -> Self { + Vec { buf: RawVec::new_in(alloc), len: 0 } + } + + /// Constructs a new, empty `Vec<T, A>` with the specified capacity with the provided + /// allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::with_capacity_in(10, System); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 } + } + + /// Creates a `Vec<T, A>` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is **not** safe + /// to build a `Vec<u8>` from a pointer to a C `char` array with length `size_t`. + /// It's also not safe to build one from a `Vec<u16>` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec<u8>` it'll be deallocated with alignment 1. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec<T>` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// use std::ptr; + /// use std::mem; + /// + /// let mut v = Vec::with_capacity_in(3, System); + /// v.push(1); + /// v.push(2); + /// v.push(3); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// let alloc = v.allocator(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts_in(p, len, cap, alloc.clone()); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self { + unsafe { Vec { buf: RawVec::from_raw_parts_in(ptr, capacity, alloc), len: length } } + } + + /// Decomposes a `Vec<T>` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the vector (in elements), and the allocated capacity of the + /// data (in elements). These are the same arguments in the same + /// order as the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: Vec::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let v: Vec<i32> = vec![-1, 0, 1]; + /// + /// let (ptr, len, cap) = v.into_raw_parts(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts(ptr, len, cap) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut T, usize, usize) { + let mut me = ManuallyDrop::new(self); + (me.as_mut_ptr(), me.len(), me.capacity()) + } + + /// Decomposes a `Vec<T>` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of the vector (in elements), + /// the allocated capacity of the data (in elements), and the allocator. These are the same + /// arguments in the same order as the arguments to [`from_raw_parts_in`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts_in`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts_in`]: Vec::from_raw_parts_in + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, vec_into_raw_parts)] + /// + /// use std::alloc::System; + /// + /// let mut v: Vec<i32, System> = Vec::new_in(System); + /// v.push(-1); + /// v.push(0); + /// v.push(1); + /// + /// let (ptr, len, cap, alloc) = v.into_raw_parts_with_alloc(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts_in(ptr, len, cap, alloc) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts_with_alloc(self) -> (*mut T, usize, usize, A) { + let mut me = ManuallyDrop::new(self); + let len = me.len(); + let capacity = me.capacity(); + let ptr = me.as_mut_ptr(); + let alloc = unsafe { ptr::read(me.allocator()) }; + (ptr, len, capacity, alloc) + } + + /// Returns the number of elements the vector can hold without + /// reallocating. + /// + /// # Examples + /// + /// ``` + /// let vec: Vec<i32> = Vec::with_capacity(10); + /// assert_eq!(vec.capacity(), 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the given `Vec<T>`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.buf.reserve(self.len, additional); + } + + /// Reserves the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `Vec<T>`. After calling `reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: Vec::reserve + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve_exact(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.buf.reserve_exact(self.len, additional); + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `Vec<T>`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `try_reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result<Vec<u32>, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[stable(feature = "try_reserve", since = "1.57.0")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve(self.len, additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` + /// elements to be inserted in the given `Vec<T>`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + /// + /// [`try_reserve`]: Vec::try_reserve + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result<Vec<u32>, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve_exact(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[stable(feature = "try_reserve", since = "1.57.0")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve_exact(self.len, additional) + } + + /// Shrinks the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to_fit(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() > self.len { + self.buf.shrink_to_fit(self.len); + } + } + + /// Shrinks the capacity of the vector with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to(4); + /// assert!(vec.capacity() >= 4); + /// vec.shrink_to(0); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "shrink_to", since = "1.56.0")] + pub fn shrink_to(&mut self, min_capacity: usize) { + if self.capacity() > min_capacity { + self.buf.shrink_to_fit(cmp::max(self.len, min_capacity)); + } + } + + /// Converts the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.into_boxed_slice(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.into_boxed_slice(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_boxed_slice(mut self) -> Box<[T], A> { + unsafe { + self.shrink_to_fit(); + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + buf.into_box(len).assume_init() + } + } + + /// Shortens the vector, keeping the first `len` elements and dropping + /// the rest. + /// + /// If `len` is greater than the vector's current length, this has no + /// effect. + /// + /// The [`drain`] method can emulate `truncate`, but causes the excess + /// elements to be returned instead of dropped. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// Truncating a five element vector to two elements: + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// vec.truncate(2); + /// assert_eq!(vec, [1, 2]); + /// ``` + /// + /// No truncation occurs when `len` is greater than the vector's current + /// length: + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(8); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + /// + /// Truncating when `len == 0` is equivalent to calling the [`clear`] + /// method. + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(0); + /// assert_eq!(vec, []); + /// ``` + /// + /// [`clear`]: Vec::clear + /// [`drain`]: Vec::drain + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, len: usize) { + // This is safe because: + // + // * the slice passed to `drop_in_place` is valid; the `len > self.len` + // case avoids creating an invalid slice, and + // * the `len` of the vector is shrunk before calling `drop_in_place`, + // such that no value will be dropped twice in case `drop_in_place` + // were to panic once (if it panics twice, the program aborts). + unsafe { + // Note: It's intentional that this is `>` and not `>=`. + // Changing it to `>=` has negative performance + // implications in some cases. See #78884 for more. + if len > self.len { + return; + } + let remaining_len = self.len - len; + let s = ptr::slice_from_raw_parts_mut(self.as_mut_ptr().add(len), remaining_len); + self.len = len; + ptr::drop_in_place(s); + } + } + + /// Extracts a slice containing the entire vector. + /// + /// Equivalent to `&s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Write}; + /// let buffer = vec![1, 2, 3, 5, 8]; + /// io::sink().write(buffer.as_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_slice(&self) -> &[T] { + self + } + + /// Extracts a mutable slice of the entire vector. + /// + /// Equivalent to `&mut s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Read}; + /// let mut buffer = vec![0; 3]; + /// io::repeat(0b101).read_exact(buffer.as_mut_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + self + } + + /// Returns a raw pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// The caller must also ensure that the memory the pointer (non-transitively) points to + /// is never written to (except inside an `UnsafeCell`) using this pointer or any pointer + /// derived from it. If you need to mutate the contents of the slice, use [`as_mut_ptr`]. + /// + /// # Examples + /// + /// ``` + /// let x = vec![1, 2, 4]; + /// let x_ptr = x.as_ptr(); + /// + /// unsafe { + /// for i in 0..x.len() { + /// assert_eq!(*x_ptr.add(i), 1 << i); + /// } + /// } + /// ``` + /// + /// [`as_mut_ptr`]: Vec::as_mut_ptr + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_ptr(&self) -> *const T { + // We shadow the slice method of the same name to avoid going through + // `deref`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns an unsafe mutable pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// # Examples + /// + /// ``` + /// // Allocate vector big enough for 4 elements. + /// let size = 4; + /// let mut x: Vec<i32> = Vec::with_capacity(size); + /// let x_ptr = x.as_mut_ptr(); + /// + /// // Initialize elements via raw pointer writes, then set length. + /// unsafe { + /// for i in 0..size { + /// *x_ptr.add(i) = i as i32; + /// } + /// x.set_len(size); + /// } + /// assert_eq!(&*x, &[0, 1, 2, 3]); + /// ``` + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + // We shadow the slice method of the same name to avoid going through + // `deref_mut`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.buf.allocator() + } + + /// Forces the length of the vector to `new_len`. + /// + /// This is a low-level operation that maintains none of the normal + /// invariants of the type. Normally changing the length of a vector + /// is done using one of the safe operations instead, such as + /// [`truncate`], [`resize`], [`extend`], or [`clear`]. + /// + /// [`truncate`]: Vec::truncate + /// [`resize`]: Vec::resize + /// [`extend`]: Extend::extend + /// [`clear`]: Vec::clear + /// + /// # Safety + /// + /// - `new_len` must be less than or equal to [`capacity()`]. + /// - The elements at `old_len..new_len` must be initialized. + /// + /// [`capacity()`]: Vec::capacity + /// + /// # Examples + /// + /// This method can be useful for situations in which the vector + /// is serving as a buffer for other code, particularly over FFI: + /// + /// ```no_run + /// # #![allow(dead_code)] + /// # // This is just a minimal skeleton for the doc example; + /// # // don't use this as a starting point for a real library. + /// # pub struct StreamWrapper { strm: *mut std::ffi::c_void } + /// # const Z_OK: i32 = 0; + /// # extern "C" { + /// # fn deflateGetDictionary( + /// # strm: *mut std::ffi::c_void, + /// # dictionary: *mut u8, + /// # dictLength: *mut usize, + /// # ) -> i32; + /// # } + /// # impl StreamWrapper { + /// pub fn get_dictionary(&self) -> Option<Vec<u8>> { + /// // Per the FFI method's docs, "32768 bytes is always enough". + /// let mut dict = Vec::with_capacity(32_768); + /// let mut dict_length = 0; + /// // SAFETY: When `deflateGetDictionary` returns `Z_OK`, it holds that: + /// // 1. `dict_length` elements were initialized. + /// // 2. `dict_length` <= the capacity (32_768) + /// // which makes `set_len` safe to call. + /// unsafe { + /// // Make the FFI call... + /// let r = deflateGetDictionary(self.strm, dict.as_mut_ptr(), &mut dict_length); + /// if r == Z_OK { + /// // ...and update the length to what was initialized. + /// dict.set_len(dict_length); + /// Some(dict) + /// } else { + /// None + /// } + /// } + /// } + /// # } + /// ``` + /// + /// While the following example is sound, there is a memory leak since + /// the inner vectors were not freed prior to the `set_len` call: + /// + /// ``` + /// let mut vec = vec![vec![1, 0, 0], + /// vec![0, 1, 0], + /// vec![0, 0, 1]]; + /// // SAFETY: + /// // 1. `old_len..0` is empty so no elements need to be initialized. + /// // 2. `0 <= capacity` always holds whatever `capacity` is. + /// unsafe { + /// vec.set_len(0); + /// } + /// ``` + /// + /// Normally, here, one would use [`clear`] instead to correctly drop + /// the contents and thus not leak memory. + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len <= self.capacity()); + + self.len = new_len; + } + + /// Removes an element from the vector and returns it. + /// + /// The removed element is replaced by the last element of the vector. + /// + /// This does not preserve ordering, but is *O*(1). + /// If you need to preserve the element order, use [`remove`] instead. + /// + /// [`remove`]: Vec::remove + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec!["foo", "bar", "baz", "qux"]; + /// + /// assert_eq!(v.swap_remove(1), "bar"); + /// assert_eq!(v, ["foo", "qux", "baz"]); + /// + /// assert_eq!(v.swap_remove(0), "foo"); + /// assert_eq!(v, ["baz", "qux"]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn swap_remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("swap_remove index (is {index}) should be < len (is {len})"); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // We replace self[index] with the last element. Note that if the + // bounds check above succeeds there must be a last element (which + // can be self[index] itself). + let value = ptr::read(self.as_ptr().add(index)); + let base_ptr = self.as_mut_ptr(); + ptr::copy(base_ptr.add(len - 1), base_ptr.add(index), 1); + self.set_len(len - 1); + value + } + } + + /// Inserts an element at position `index` within the vector, shifting all + /// elements after it to the right. + /// + /// # Panics + /// + /// Panics if `index > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.insert(1, 4); + /// assert_eq!(vec, [1, 4, 2, 3]); + /// vec.insert(4, 5); + /// assert_eq!(vec, [1, 4, 2, 3, 5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, index: usize, element: T) { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("insertion index (is {index}) should be <= len (is {len})"); + } + + let len = self.len(); + if index > len { + assert_failed(index, len); + } + + // space for the new element + if len == self.buf.capacity() { + self.reserve(1); + } + + unsafe { + // infallible + // The spot to put the new value + { + let p = self.as_mut_ptr().add(index); + // Shift everything over to make space. (Duplicating the + // `index`th element into two consecutive places.) + ptr::copy(p, p.offset(1), len - index); + // Write it in, overwriting the first copy of the `index`th + // element. + ptr::write(p, element); + } + self.set_len(len + 1); + } + } + + /// Removes and returns the element at position `index` within the vector, + /// shifting all elements after it to the left. + /// + /// Note: Because this shifts over the remaining elements, it has a + /// worst-case performance of *O*(*n*). If you don't need the order of elements + /// to be preserved, use [`swap_remove`] instead. If you'd like to remove + /// elements from the beginning of the `Vec`, consider using + /// [`VecDeque::pop_front`] instead. + /// + /// [`swap_remove`]: Vec::swap_remove + /// [`VecDeque::pop_front`]: crate::collections::VecDeque::pop_front + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// assert_eq!(v.remove(1), 2); + /// assert_eq!(v, [1, 3]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[track_caller] + pub fn remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + #[track_caller] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("removal index (is {index}) should be < len (is {len})"); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // infallible + let ret; + { + // the place we are taking from. + let ptr = self.as_mut_ptr().add(index); + // copy it out, unsafely having a copy of the value on + // the stack and in the vector at the same time. + ret = ptr::read(ptr); + + // Shift everything down to fill in that spot. + ptr::copy(ptr.offset(1), ptr, len - index - 1); + } + self.set_len(len - 1); + ret + } + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` for which `f(&e)` returns `false`. + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.retain(|&x| x % 2 == 0); + /// assert_eq!(vec, [2, 4]); + /// ``` + /// + /// Because the elements are visited exactly once in the original order, + /// external state may be used to decide which elements to keep. + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// let keep = [false, true, true, false, true]; + /// let mut iter = keep.iter(); + /// vec.retain(|_| *iter.next().unwrap()); + /// assert_eq!(vec, [2, 3, 5]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn retain<F>(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + self.retain_mut(|elem| f(elem)); + } + + /// Retains only the elements specified by the predicate, passing a mutable reference to it. + /// + /// In other words, remove all elements `e` such that `f(&mut e)` returns `false`. + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.retain_mut(|x| if *x > 3 { + /// false + /// } else { + /// *x += 1; + /// true + /// }); + /// assert_eq!(vec, [2, 3, 4]); + /// ``` + #[stable(feature = "vec_retain_mut", since = "1.61.0")] + pub fn retain_mut<F>(&mut self, mut f: F) + where + F: FnMut(&mut T) -> bool, + { + let original_len = self.len(); + // Avoid double drop if the drop guard is not executed, + // since we may make some holes during the process. + unsafe { self.set_len(0) }; + + // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] + // |<- processed len ->| ^- next to check + // |<- deleted cnt ->| + // |<- original_len ->| + // Kept: Elements which predicate returns true on. + // Hole: Moved or dropped element slot. + // Unchecked: Unchecked valid elements. + // + // This drop guard will be invoked when predicate or `drop` of element panicked. + // It shifts unchecked elements to cover holes and `set_len` to the correct length. + // In cases when predicate and `drop` never panick, it will be optimized out. + struct BackshiftOnDrop<'a, T, A: Allocator> { + v: &'a mut Vec<T, A>, + processed_len: usize, + deleted_cnt: usize, + original_len: usize, + } + + impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> { + fn drop(&mut self) { + if self.deleted_cnt > 0 { + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.processed_len), + self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt), + self.original_len - self.processed_len, + ); + } + } + // SAFETY: After filling holes, all items are in contiguous memory. + unsafe { + self.v.set_len(self.original_len - self.deleted_cnt); + } + } + } + + let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; + + fn process_loop<F, T, A: Allocator, const DELETED: bool>( + original_len: usize, + f: &mut F, + g: &mut BackshiftOnDrop<'_, T, A>, + ) where + F: FnMut(&mut T) -> bool, + { + while g.processed_len != original_len { + // SAFETY: Unchecked element must be valid. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + if !f(cur) { + // Advance early to avoid double drop if `drop_in_place` panicked. + g.processed_len += 1; + g.deleted_cnt += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + // We already advanced the counter. + if DELETED { + continue; + } else { + break; + } + } + if DELETED { + // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } + } + g.processed_len += 1; + } + } + + // Stage 1: Nothing was deleted. + process_loop::<F, T, A, false>(original_len, &mut f, &mut g); + + // Stage 2: Some elements were deleted. + process_loop::<F, T, A, true>(original_len, &mut f, &mut g); + + // All item are processed. This can be optimized to `set_len` by LLVM. + drop(g); + } + + /// Removes all but the first of consecutive elements in the vector that resolve to the same + /// key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + #[inline] + pub fn dedup_by_key<F, K>(&mut self, mut key: F) + where + F: FnMut(&mut T) -> K, + K: PartialEq, + { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes all but the first of consecutive elements in the vector satisfying a given equality + /// relation. + /// + /// The `same_bucket` function is passed references to two elements from the vector and + /// must determine if the elements compare equal. The elements are passed in opposite order + /// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is removed. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + pub fn dedup_by<F>(&mut self, mut same_bucket: F) + where + F: FnMut(&mut T, &mut T) -> bool, + { + let len = self.len(); + if len <= 1 { + return; + } + + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec<T, A>, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed when `same_bucket` panics */ + + /* SAFETY: invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` panicked. + * Basically vec[read..].len() */ + let items_left = len.wrapping_sub(self.read); + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read.wrapping_sub(self.write); + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + let ptr = gap.vec.as_mut_ptr(); + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + // Increase `gap.read` now since the drop may panic. + gap.read += 1; + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(gap.write); + + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + gap.write += 1; + gap.read += 1; + } + } + + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); + } + } + + /// Appends an element to the back of a collection. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.push(3); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, value: T) { + // This will panic or abort if we would allocate > isize::MAX bytes + // or if the length increment would overflow for zero-sized types. + if self.len == self.buf.capacity() { + self.buf.reserve_for_push(self.len); + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + } + + /// Tries to append an element to the back of a collection. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.try_push(3).unwrap(); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_push(&mut self, value: T) -> Result<(), TryReserveError> { + if self.len == self.buf.capacity() { + self.buf.try_reserve_for_push(self.len)?; + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + Ok(()) + } + + /// Removes the last element from a vector and returns it, or [`None`] if it + /// is empty. + /// + /// If you'd like to pop the first element, consider using + /// [`VecDeque::pop_front`] instead. + /// + /// [`VecDeque::pop_front`]: crate::collections::VecDeque::pop_front + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// assert_eq!(vec.pop(), Some(3)); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option<T> { + if self.len == 0 { + None + } else { + unsafe { + self.len -= 1; + Some(ptr::read(self.as_ptr().add(self.len()))) + } + } + } + + /// Moves all the elements of `other` into `self`, leaving `other` empty. + /// + /// # Panics + /// + /// Panics if the number of elements in the vector overflows a `usize`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let mut vec2 = vec![4, 5, 6]; + /// vec.append(&mut vec2); + /// assert_eq!(vec, [1, 2, 3, 4, 5, 6]); + /// assert_eq!(vec2, []); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "append", since = "1.4.0")] + pub fn append(&mut self, other: &mut Self) { + unsafe { + self.append_elements(other.as_slice() as _); + other.set_len(0); + } + } + + /// Appends elements to `self` from other buffer. + #[cfg(not(no_global_oom_handling))] + #[inline] + unsafe fn append_elements(&mut self, other: *const [T]) { + let count = unsafe { (*other).len() }; + self.reserve(count); + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + } + + /// Removes the specified range from the vector in bulk, returning all + /// removed elements as an iterator. If the iterator is dropped before + /// being fully consumed, it drops the remaining removed elements. + /// + /// The returned iterator keeps a mutable borrow on the vector to optimize + /// its implementation. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Leaking + /// + /// If the returned iterator goes out of scope without being dropped (due to + /// [`mem::forget`], for example), the vector may have lost and leaked + /// elements arbitrarily, including elements outside the range. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// let u: Vec<_> = v.drain(1..).collect(); + /// assert_eq!(v, &[1]); + /// assert_eq!(u, &[2, 3]); + /// + /// // A full range clears the vector, like `clear()` does + /// v.drain(..); + /// assert_eq!(v, &[]); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain<R>(&mut self, range: R) -> Drain<'_, T, A> + where + R: RangeBounds<usize>, + { + // Memory safety + // + // When the Drain is first created, it shortens the length of + // the source vector to make sure no uninitialized or moved-from elements + // are accessible at all if the Drain's destructor never gets to run. + // + // Drain will ptr::read out the values to remove. + // When finished, remaining tail of the vec is copied back to cover + // the hole, and the vector length is restored to the new length. + // + let len = self.len(); + let Range { start, end } = slice::range(range, ..len); + + unsafe { + // set self.vec length's to start, to be safe in case Drain is leaked + self.set_len(start); + // Use the borrow in the IterMut to indicate borrowing behavior of the + // whole Drain iterator (like &mut T). + let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start); + Drain { + tail_start: end, + tail_len: len - end, + iter: range_slice.iter(), + vec: NonNull::from(self), + } + } + } + + /// Clears the vector, removing all values. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// + /// v.clear(); + /// + /// assert!(v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + let elems: *mut [T] = self.as_mut_slice(); + + // SAFETY: + // - `elems` comes directly from `as_mut_slice` and is therefore valid. + // - Setting `self.len` before calling `drop_in_place` means that, + // if an element's `Drop` impl panics, the vector's `Drop` impl will + // do nothing (leaking the rest of the elements) instead of dropping + // some twice. + unsafe { + self.len = 0; + ptr::drop_in_place(elems); + } + } + + /// Returns the number of elements in the vector, also referred to + /// as its 'length'. + /// + /// # Examples + /// + /// ``` + /// let a = vec![1, 2, 3]; + /// assert_eq!(a.len(), 3); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the vector contains no elements. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// assert!(v.is_empty()); + /// + /// v.push(1); + /// assert!(!v.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the collection into two at the given index. + /// + /// Returns a newly allocated vector containing the elements in the range + /// `[at, len)`. After the call, the original vector will be left containing + /// the elements `[0, at)` with its previous capacity unchanged. + /// + /// # Panics + /// + /// Panics if `at > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let vec2 = vec.split_off(1); + /// assert_eq!(vec, [1]); + /// assert_eq!(vec2, [2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[must_use = "use `.truncate()` if you don't need the other half"] + #[stable(feature = "split_off", since = "1.4.0")] + pub fn split_off(&mut self, at: usize) -> Self + where + A: Clone, + { + #[cold] + #[inline(never)] + fn assert_failed(at: usize, len: usize) -> ! { + panic!("`at` split index (is {at}) should be <= len (is {len})"); + } + + if at > self.len() { + assert_failed(at, self.len()); + } + + if at == 0 { + // the new vector can take over the original buffer and avoid the copy + return mem::replace( + self, + Vec::with_capacity_in(self.capacity(), self.allocator().clone()), + ); + } + + let other_len = self.len - at; + let mut other = Vec::with_capacity_in(other_len, self.allocator().clone()); + + // Unsafely `set_len` and copy items to `other`. + unsafe { + self.set_len(at); + other.set_len(other_len); + + ptr::copy_nonoverlapping(self.as_ptr().add(at), other.as_mut_ptr(), other.len()); + } + other + } + + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with the result of + /// calling the closure `f`. The return values from `f` will end up + /// in the `Vec` in the order they have been generated. + /// + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method uses a closure to create new values on every push. If + /// you'd rather [`Clone`] a given value, use [`Vec::resize`]. If you + /// want to use the [`Default`] trait to generate values, you can + /// pass [`Default::default`] as the second argument. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.resize_with(5, Default::default); + /// assert_eq!(vec, [1, 2, 3, 0, 0]); + /// + /// let mut vec = vec![]; + /// let mut p = 1; + /// vec.resize_with(4, || { p *= 2; p }); + /// assert_eq!(vec, [2, 4, 8, 16]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize_with", since = "1.33.0")] + pub fn resize_with<F>(&mut self, new_len: usize, f: F) + where + F: FnMut() -> T, + { + let len = self.len(); + if new_len > len { + self.extend_with(new_len - len, ExtendFunc(f)); + } else { + self.truncate(new_len); + } + } + + /// Consumes and leaks the `Vec`, returning a mutable reference to the contents, + /// `&'a mut [T]`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// As of Rust 1.57, this method does not reallocate or shrink the `Vec`, + /// so the leaked allocation may include unused capacity that is not part + /// of the returned slice. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = vec![1, 2, 3]; + /// let static_ref: &'static mut [usize] = x.leak(); + /// static_ref[0] += 1; + /// assert_eq!(static_ref, &[2, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_leak", since = "1.47.0")] + #[inline] + pub fn leak<'a>(self) -> &'a mut [T] + where + A: 'a, + { + let mut me = ManuallyDrop::new(self); + unsafe { slice::from_raw_parts_mut(me.as_mut_ptr(), me.len) } + } + + /// Returns the remaining spare capacity of the vector as a slice of + /// `MaybeUninit<T>`. + /// + /// The returned slice can be used to fill the vector with data (e.g. by + /// reading from a file) before marking the data as initialized using the + /// [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// # Examples + /// + /// ``` + /// // Allocate vector big enough for 10 elements. + /// let mut v = Vec::with_capacity(10); + /// + /// // Fill in the first 3 elements. + /// let uninit = v.spare_capacity_mut(); + /// uninit[0].write(0); + /// uninit[1].write(1); + /// uninit[2].write(2); + /// + /// // Mark the first 3 elements of the vector as being initialized. + /// unsafe { + /// v.set_len(3); + /// } + /// + /// assert_eq!(&v, &[0, 1, 2]); + /// ``` + #[stable(feature = "vec_spare_capacity", since = "1.60.0")] + #[inline] + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<T>] { + // Note: + // This method is not implemented in terms of `split_at_spare_mut`, + // to prevent invalidation of pointers to the buffer. + unsafe { + slice::from_raw_parts_mut( + self.as_mut_ptr().add(self.len) as *mut MaybeUninit<T>, + self.buf.capacity() - self.len, + ) + } + } + + /// Returns vector content as a slice of `T`, along with the remaining spare + /// capacity of the vector as a slice of `MaybeUninit<T>`. + /// + /// The returned spare capacity slice can be used to fill the vector with data + /// (e.g. by reading from a file) before marking the data as initialized using + /// the [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// Note that this is a low-level API, which should be used with care for + /// optimization purposes. If you need to append data to a `Vec` + /// you can use [`push`], [`extend`], [`extend_from_slice`], + /// [`extend_from_within`], [`insert`], [`append`], [`resize`] or + /// [`resize_with`], depending on your exact needs. + /// + /// [`push`]: Vec::push + /// [`extend`]: Vec::extend + /// [`extend_from_slice`]: Vec::extend_from_slice + /// [`extend_from_within`]: Vec::extend_from_within + /// [`insert`]: Vec::insert + /// [`append`]: Vec::append + /// [`resize`]: Vec::resize + /// [`resize_with`]: Vec::resize_with + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_split_at_spare)] + /// + /// let mut v = vec![1, 1, 2]; + /// + /// // Reserve additional space big enough for 10 elements. + /// v.reserve(10); + /// + /// let (init, uninit) = v.split_at_spare_mut(); + /// let sum = init.iter().copied().sum::<u32>(); + /// + /// // Fill in the next 4 elements. + /// uninit[0].write(sum); + /// uninit[1].write(sum * 2); + /// uninit[2].write(sum * 3); + /// uninit[3].write(sum * 4); + /// + /// // Mark the 4 elements of the vector as being initialized. + /// unsafe { + /// let len = v.len(); + /// v.set_len(len + 4); + /// } + /// + /// assert_eq!(&v, &[1, 1, 2, 4, 8, 12, 16]); + /// ``` + #[unstable(feature = "vec_split_at_spare", issue = "81944")] + #[inline] + pub fn split_at_spare_mut(&mut self) -> (&mut [T], &mut [MaybeUninit<T>]) { + // SAFETY: + // - len is ignored and so never changed + let (init, spare, _) = unsafe { self.split_at_spare_mut_with_len() }; + (init, spare) + } + + /// Safety: changing returned .2 (&mut usize) is considered the same as calling `.set_len(_)`. + /// + /// This method provides unique access to all vec parts at once in `extend_from_within`. + unsafe fn split_at_spare_mut_with_len( + &mut self, + ) -> (&mut [T], &mut [MaybeUninit<T>], &mut usize) { + let ptr = self.as_mut_ptr(); + // SAFETY: + // - `ptr` is guaranteed to be valid for `self.len` elements + // - but the allocation extends out to `self.buf.capacity()` elements, possibly + // uninitialized + let spare_ptr = unsafe { ptr.add(self.len) }; + let spare_ptr = spare_ptr.cast::<MaybeUninit<T>>(); + let spare_len = self.buf.capacity() - self.len; + + // SAFETY: + // - `ptr` is guaranteed to be valid for `self.len` elements + // - `spare_ptr` is pointing one element past the buffer, so it doesn't overlap with `initialized` + unsafe { + let initialized = slice::from_raw_parts_mut(ptr, self.len); + let spare = slice::from_raw_parts_mut(spare_ptr, spare_len); + + (initialized, spare, &mut self.len) + } + } +} + +impl<T: Clone, A: Allocator> Vec<T, A> { + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// If you only need to resize to a smaller size, use [`Vec::truncate`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.resize(3, "world"); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.resize(2, 0); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize", since = "1.5.0")] + pub fn resize(&mut self, new_len: usize, value: T) { + let len = self.len(); + + if new_len > len { + self.extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + } + } + + /// Clones and appends all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` slice is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.extend_from_slice(&[2, 3, 4]); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_slice", since = "1.6.0")] + pub fn extend_from_slice(&mut self, other: &[T]) { + self.spec_extend(other.iter()) + } + + /// Copies elements from `src` range to the end of the vector. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![0, 1, 2, 3, 4]; + /// + /// vec.extend_from_within(2..); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4]); + /// + /// vec.extend_from_within(..2); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1]); + /// + /// vec.extend_from_within(4..8); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1, 4, 2, 3, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_within", since = "1.53.0")] + pub fn extend_from_within<R>(&mut self, src: R) + where + R: RangeBounds<usize>, + { + let range = slice::range(src, ..self.len()); + self.reserve(range.len()); + + // SAFETY: + // - `slice::range` guarantees that the given range is valid for indexing self + unsafe { + self.spec_extend_from_within(range); + } + } +} + +impl<T, A: Allocator, const N: usize> Vec<[T; N], A> { + /// Takes a `Vec<[T; N]>` and flattens it into a `Vec<T>`. + /// + /// # Panics + /// + /// Panics if the length of the resulting vector would overflow a `usize`. + /// + /// This is only possible when flattening a vector of arrays of zero-sized + /// types, and thus tends to be irrelevant in practice. If + /// `size_of::<T>() > 0`, this will never panic. + /// + /// # Examples + /// + /// ``` + /// #![feature(slice_flatten)] + /// + /// let mut vec = vec![[1, 2, 3], [4, 5, 6], [7, 8, 9]]; + /// assert_eq!(vec.pop(), Some([7, 8, 9])); + /// + /// let mut flattened = vec.into_flattened(); + /// assert_eq!(flattened.pop(), Some(6)); + /// ``` + #[unstable(feature = "slice_flatten", issue = "95629")] + pub fn into_flattened(self) -> Vec<T, A> { + let (ptr, len, cap, alloc) = self.into_raw_parts_with_alloc(); + let (new_len, new_cap) = if mem::size_of::<T>() == 0 { + (len.checked_mul(N).expect("vec len overflow"), usize::MAX) + } else { + // SAFETY: + // - `cap * N` cannot overflow because the allocation is already in + // the address space. + // - Each `[T; N]` has `N` valid elements, so there are `len * N` + // valid elements in the allocation. + unsafe { (len.unchecked_mul(N), cap.unchecked_mul(N)) } + }; + // SAFETY: + // - `ptr` was allocated by `self` + // - `ptr` is well-aligned because `[T; N]` has the same alignment as `T`. + // - `new_cap` refers to the same sized allocation as `cap` because + // `new_cap * size_of::<T>()` == `cap * size_of::<[T; N]>()` + // - `len` <= `cap`, so `len * N` <= `cap * N`. + unsafe { Vec::<T, A>::from_raw_parts_in(ptr.cast(), new_len, new_cap, alloc) } + } +} + +// This code generalizes `extend_with_{element,default}`. +trait ExtendWith<T> { + fn next(&mut self) -> T; + fn last(self) -> T; +} + +struct ExtendElement<T>(T); +impl<T: Clone> ExtendWith<T> for ExtendElement<T> { + fn next(&mut self) -> T { + self.0.clone() + } + fn last(self) -> T { + self.0 + } +} + +struct ExtendFunc<F>(F); +impl<T, F: FnMut() -> T> ExtendWith<T> for ExtendFunc<F> { + fn next(&mut self) -> T { + (self.0)() + } + fn last(mut self) -> T { + (self.0)() + } +} + +impl<T, A: Allocator> Vec<T, A> { + #[cfg(not(no_global_oom_handling))] + /// Extend the vector by `n` values, using the given generator. + fn extend_with<E: ExtendWith<T>>(&mut self, n: usize, mut value: E) { + self.reserve(n); + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // might not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + } + } +} + +impl<T: PartialEq, A: Allocator> Vec<T, A> { + /// Removes consecutive repeated elements in the vector according to the + /// [`PartialEq`] trait implementation. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 2, 3, 2]; + /// + /// vec.dedup(); + /// + /// assert_eq!(vec, [1, 2, 3, 2]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn dedup(&mut self) { + self.dedup_by(|a, b| a == b) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Internal methods and functions +//////////////////////////////////////////////////////////////////////////////// + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn from_elem<T: Clone>(elem: T, n: usize) -> Vec<T> { + <T as SpecFromElem>::from_elem(elem, n, Global) +} + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "allocator_api", issue = "32838")] +pub fn from_elem_in<T: Clone, A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<T, A> { + <T as SpecFromElem>::from_elem(elem, n, alloc) +} + +trait ExtendFromWithinSpec { + /// # Safety + /// + /// - `src` needs to be valid index + /// - `self.capacity() - self.len()` must be `>= src.len()` + unsafe fn spec_extend_from_within(&mut self, src: Range<usize>); +} + +impl<T: Clone, A: Allocator> ExtendFromWithinSpec for Vec<T, A> { + default unsafe fn spec_extend_from_within(&mut self, src: Range<usize>) { + // SAFETY: + // - len is increased only after initializing elements + let (this, spare, len) = unsafe { self.split_at_spare_mut_with_len() }; + + // SAFETY: + // - caller guaratees that src is a valid index + let to_clone = unsafe { this.get_unchecked(src) }; + + iter::zip(to_clone, spare) + .map(|(src, dst)| dst.write(src.clone())) + // Note: + // - Element was just initialized with `MaybeUninit::write`, so it's ok to increase len + // - len is increased after each element to prevent leaks (see issue #82533) + .for_each(|_| *len += 1); + } +} + +impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> { + unsafe fn spec_extend_from_within(&mut self, src: Range<usize>) { + let count = src.len(); + { + let (init, spare) = self.split_at_spare_mut(); + + // SAFETY: + // - caller guaratees that `src` is a valid index + let source = unsafe { init.get_unchecked(src) }; + + // SAFETY: + // - Both pointers are created from unique slice references (`&mut [_]`) + // so they are valid and do not overlap. + // - Elements are :Copy so it's OK to to copy them, without doing + // anything with the original values + // - `count` is equal to the len of `source`, so source is valid for + // `count` reads + // - `.reserve(count)` guarantees that `spare.len() >= count` so spare + // is valid for `count` writes + unsafe { ptr::copy_nonoverlapping(source.as_ptr(), spare.as_mut_ptr() as _, count) }; + } + + // SAFETY: + // - The elements were just initialized by `copy_nonoverlapping` + self.len += count; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Common trait implementations for Vec +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> ops::Deref for Vec<T, A> { + type Target = [T]; + + fn deref(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.as_ptr(), self.len) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> ops::DerefMut for Vec<T, A> { + fn deref_mut(&mut self) -> &mut [T] { + unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } + } +} + +#[cfg(not(no_global_oom_handling))] +trait SpecCloneFrom { + fn clone_from(this: &mut Self, other: &Self); +} + +#[cfg(not(no_global_oom_handling))] +impl<T: Clone, A: Allocator> SpecCloneFrom for Vec<T, A> { + default fn clone_from(this: &mut Self, other: &Self) { + // drop anything that will not be overwritten + this.truncate(other.len()); + + // self.len <= other.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = other.split_at(this.len()); + + // reuse the contained values' allocations/resources. + this.clone_from_slice(init); + this.extend_from_slice(tail); + } +} + +#[cfg(not(no_global_oom_handling))] +impl<T: Copy, A: Allocator> SpecCloneFrom for Vec<T, A> { + fn clone_from(this: &mut Self, other: &Self) { + this.clear(); + this.extend_from_slice(other); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> { + #[cfg(not(test))] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + <[T]>::to_vec_in(&**self, alloc) + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Instead use the + // `slice::to_vec` function which is only available with cfg(test) + // NB see the slice::hack module in slice.rs for more information + #[cfg(test)] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + crate::slice::to_vec(&**self, alloc) + } + + fn clone_from(&mut self, other: &Self) { + SpecCloneFrom::clone_from(self, other) + } +} + +/// The hash of a vector is the same as that of the corresponding slice, +/// as required by the `core::borrow::Borrow` implementation. +/// +/// ``` +/// #![feature(build_hasher_simple_hash_one)] +/// use std::hash::BuildHasher; +/// +/// let b = std::collections::hash_map::RandomState::new(); +/// let v: Vec<u8> = vec![0xa8, 0x3c, 0x09]; +/// let s: &[u8] = &[0xa8, 0x3c, 0x09]; +/// assert_eq!(b.hash_one(v), b.hash_one(s)); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Hash, A: Allocator> Hash for Vec<T, A> { + #[inline] + fn hash<H: Hasher>(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl<T, I: SliceIndex<[T]>, A: Allocator> Index<I> for Vec<T, A> { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&**self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl<T, I: SliceIndex<[T]>, A: Allocator> IndexMut<I> for Vec<T, A> { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut **self, index) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T> FromIterator<T> for Vec<T> { + #[inline] + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Vec<T> { + <Self as SpecFromIter<T, I::IntoIter>>::from_iter(iter.into_iter()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> IntoIterator for Vec<T, A> { + type Item = T; + type IntoIter = IntoIter<T, A>; + + /// Creates a consuming iterator, that is, one that moves each value out of + /// the vector (from start to end). The vector cannot be used after calling + /// this. + /// + /// # Examples + /// + /// ``` + /// let v = vec!["a".to_string(), "b".to_string()]; + /// for s in v.into_iter() { + /// // s has type String, not &String + /// println!("{s}"); + /// } + /// ``` + #[inline] + fn into_iter(self) -> IntoIter<T, A> { + unsafe { + let mut me = ManuallyDrop::new(self); + let alloc = ManuallyDrop::new(ptr::read(me.allocator())); + let begin = me.as_mut_ptr(); + let end = if mem::size_of::<T>() == 0 { + arith_offset(begin as *const i8, me.len() as isize) as *const T + } else { + begin.add(me.len()) as *const T + }; + let cap = me.buf.capacity(); + IntoIter { + buf: NonNull::new_unchecked(begin), + phantom: PhantomData, + cap, + alloc, + ptr: begin, + end, + } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a Vec<T, A> { + type Item = &'a T; + type IntoIter = slice::Iter<'a, T>; + + fn into_iter(self) -> slice::Iter<'a, T> { + self.iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec<T, A> { + type Item = &'a mut T; + type IntoIter = slice::IterMut<'a, T>; + + fn into_iter(self) -> slice::IterMut<'a, T> { + self.iter_mut() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> Extend<T> for Vec<T, A> { + #[inline] + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + <Self as SpecExtend<T, I::IntoIter>>::spec_extend(self, iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, item: T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +impl<T, A: Allocator> Vec<T, A> { + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + #[cfg(not(no_global_oom_handling))] + fn extend_desugared<I: Iterator<Item = T>>(&mut self, mut iterator: I) { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // Since next() executes user code which can panic we have to bump the length + // after each step. + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + } + + /// Creates a splicing iterator that replaces the specified range in the vector + /// with the given `replace_with` iterator and yields the removed items. + /// `replace_with` does not need to be the same length as `range`. + /// + /// `range` is removed even if the iterator is not consumed until the end. + /// + /// It is unspecified how many elements are removed from the vector + /// if the `Splice` value is leaked. + /// + /// The input iterator `replace_with` is only consumed when the `Splice` value is dropped. + /// + /// This is optimal if: + /// + /// * The tail (elements in the vector after `range`) is empty, + /// * or `replace_with` yields fewer or equal elements than `range`’s length + /// * or the lower bound of its `size_hint()` is exact. + /// + /// Otherwise, a temporary vector is allocated and the tail is moved twice. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3, 4]; + /// let new = [7, 8, 9]; + /// let u: Vec<_> = v.splice(1..3, new).collect(); + /// assert_eq!(v, &[1, 7, 8, 9, 4]); + /// assert_eq!(u, &[2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "vec_splice", since = "1.21.0")] + pub fn splice<R, I>(&mut self, range: R, replace_with: I) -> Splice<'_, I::IntoIter, A> + where + R: RangeBounds<usize>, + I: IntoIterator<Item = T>, + { + Splice { drain: self.drain(range), replace_with: replace_with.into_iter() } + } + + /// Creates an iterator which uses a closure to determine if an element should be removed. + /// + /// If the closure returns true, then the element is removed and yielded. + /// If the closure returns false, the element will remain in the vector and will not be yielded + /// by the iterator. + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; + /// # let mut vec = vec![1, 2, 3, 4, 5, 6]; + /// let mut i = 0; + /// while i < vec.len() { + /// if some_predicate(&mut vec[i]) { + /// let val = vec.remove(i); + /// // your code here + /// } else { + /// i += 1; + /// } + /// } + /// + /// # assert_eq!(vec, vec![1, 4, 5]); + /// ``` + /// + /// But `drain_filter` is easier to use. `drain_filter` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// Note that `drain_filter` also lets you mutate every element in the filter closure, + /// regardless of whether you choose to keep or remove it. + /// + /// # Examples + /// + /// Splitting an array into evens and odds, reusing the original allocation: + /// + /// ``` + /// #![feature(drain_filter)] + /// let mut numbers = vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; + /// + /// let evens = numbers.drain_filter(|x| *x % 2 == 0).collect::<Vec<_>>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, vec![2, 4, 6, 8, 14]); + /// assert_eq!(odds, vec![1, 3, 5, 9, 11, 13, 15]); + /// ``` + #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] + pub fn drain_filter<F>(&mut self, filter: F) -> DrainFilter<'_, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + let old_len = self.len(); + + // Guard against us getting leaked (leak amplification) + unsafe { + self.set_len(0); + } + + DrainFilter { vec: self, idx: 0, del: 0, old_len, pred: filter, panic_flag: false } + } +} + +/// Extend implementation that copies elements out of references before pushing them onto the Vec. +/// +/// This implementation is specialized for slice iterators, where it uses [`copy_from_slice`] to +/// append the entire slice at once. +/// +/// [`copy_from_slice`]: slice::copy_from_slice +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec<T, A> { + fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) { + self.spec_extend(iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, &item: &'a T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +/// Implements comparison of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: PartialOrd, A: Allocator> PartialOrd for Vec<T, A> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Eq, A: Allocator> Eq for Vec<T, A> {} + +/// Implements ordering of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Ord, A: Allocator> Ord for Vec<T, A> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec<T, A> { + fn drop(&mut self) { + unsafe { + // use drop for [T] + // use a raw slice to refer to the elements of the vector as weakest necessary type; + // could avoid questions of validity in certain cases + ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.as_mut_ptr(), self.len)) + } + // RawVec handles deallocation + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")] +impl<T> const Default for Vec<T> { + /// Creates an empty `Vec<T>`. + fn default() -> Vec<T> { + Vec::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: fmt::Debug, A: Allocator> fmt::Debug for Vec<T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> AsRef<Vec<T, A>> for Vec<T, A> { + fn as_ref(&self) -> &Vec<T, A> { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl<T, A: Allocator> AsMut<Vec<T, A>> for Vec<T, A> { + fn as_mut(&mut self) -> &mut Vec<T, A> { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<T, A: Allocator> AsRef<[T]> for Vec<T, A> { + fn as_ref(&self) -> &[T] { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl<T, A: Allocator> AsMut<[T]> for Vec<T, A> { + fn as_mut(&mut self) -> &mut [T] { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: Clone> From<&[T]> for Vec<T> { + /// Allocate a `Vec<T>` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&[1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &[T]) -> Vec<T> { + s.to_vec() + } + #[cfg(test)] + fn from(s: &[T]) -> Vec<T> { + crate::slice::to_vec(s, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_from_mut", since = "1.19.0")] +impl<T: Clone> From<&mut [T]> for Vec<T> { + /// Allocate a `Vec<T>` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&mut [1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &mut [T]) -> Vec<T> { + s.to_vec() + } + #[cfg(test)] + fn from(s: &mut [T]) -> Vec<T> { + crate::slice::to_vec(s, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_from_array", since = "1.44.0")] +impl<T, const N: usize> From<[T; N]> for Vec<T> { + /// Allocate a `Vec<T>` and move `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from([1, 2, 3]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: [T; N]) -> Vec<T> { + <[T]>::into_vec(box s) + } + + #[cfg(test)] + fn from(s: [T; N]) -> Vec<T> { + crate::slice::into_vec(box s) + } +} + +#[stable(feature = "vec_from_cow_slice", since = "1.14.0")] +impl<'a, T> From<Cow<'a, [T]>> for Vec<T> +where + [T]: ToOwned<Owned = Vec<T>>, +{ + /// Convert a clone-on-write slice into a vector. + /// + /// If `s` already owns a `Vec<T>`, it will be returned directly. + /// If `s` is borrowing a slice, a new `Vec<T>` will be allocated and + /// filled by cloning `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// # use std::borrow::Cow; + /// let o: Cow<[i32]> = Cow::Owned(vec![1, 2, 3]); + /// let b: Cow<[i32]> = Cow::Borrowed(&[1, 2, 3]); + /// assert_eq!(Vec::from(o), Vec::from(b)); + /// ``` + fn from(s: Cow<'a, [T]>) -> Vec<T> { + s.into_owned() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "vec_from_box", since = "1.18.0")] +impl<T, A: Allocator> From<Box<[T], A>> for Vec<T, A> { + /// Convert a boxed slice into a vector by transferring ownership of + /// the existing heap allocation. + /// + /// # Examples + /// + /// ``` + /// let b: Box<[i32]> = vec![1, 2, 3].into_boxed_slice(); + /// assert_eq!(Vec::from(b), vec![1, 2, 3]); + /// ``` + fn from(s: Box<[T], A>) -> Self { + s.into_vec() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(no_global_oom_handling))] +#[cfg(not(test))] +#[stable(feature = "box_from_vec", since = "1.20.0")] +impl<T, A: Allocator> From<Vec<T, A>> for Box<[T], A> { + /// Convert a vector into a boxed slice. + /// + /// If `v` has excess capacity, its items will be moved into a + /// newly-allocated buffer with exactly the right capacity. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Box::from(vec![1, 2, 3]), vec![1, 2, 3].into_boxed_slice()); + /// ``` + fn from(v: Vec<T, A>) -> Self { + v.into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for Vec<u8> { + /// Allocate a `Vec<u8>` and fill it with a UTF-8 string. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from("123"), vec![b'1', b'2', b'3']); + /// ``` + fn from(s: &str) -> Vec<u8> { + From::from(s.as_bytes()) + } +} + +#[stable(feature = "array_try_from_vec", since = "1.48.0")] +impl<T, A: Allocator, const N: usize> TryFrom<Vec<T, A>> for [T; N] { + type Error = Vec<T, A>; + + /// Gets the entire contents of the `Vec<T>` as an array, + /// if its size exactly matches that of the requested array. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(vec![1, 2, 3].try_into(), Ok([1, 2, 3])); + /// assert_eq!(<Vec<i32>>::new().try_into(), Ok([])); + /// ``` + /// + /// If the length doesn't match, the input comes back in `Err`: + /// ``` + /// let r: Result<[i32; 4], _> = (0..10).collect::<Vec<_>>().try_into(); + /// assert_eq!(r, Err(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9])); + /// ``` + /// + /// If you're fine with just getting a prefix of the `Vec<T>`, + /// you can call [`.truncate(N)`](Vec::truncate) first. + /// ``` + /// let mut v = String::from("hello world").into_bytes(); + /// v.sort(); + /// v.truncate(2); + /// let [a, b]: [_; 2] = v.try_into().unwrap(); + /// assert_eq!(a, b' '); + /// assert_eq!(b, b'd'); + /// ``` + fn try_from(mut vec: Vec<T, A>) -> Result<[T; N], Vec<T, A>> { + if vec.len() != N { + return Err(vec); + } + + // SAFETY: `.set_len(0)` is always sound. + unsafe { vec.set_len(0) }; + + // SAFETY: A `Vec`'s pointer is always aligned properly, and + // the alignment the array needs is the same as the items. + // We checked earlier that we have sufficient items. + // The items will not double-drop as the `set_len` + // tells the `Vec` not to also drop them. + let array = unsafe { ptr::read(vec.as_ptr() as *const [T; N]) }; + Ok(array) + } +} diff --git a/rust/alloc/vec/partial_eq.rs b/rust/alloc/vec/partial_eq.rs new file mode 100644 index 000000000000..10ad4e492287 --- /dev/null +++ b/rust/alloc/vec/partial_eq.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; + +use super::Vec; + +macro_rules! __impl_slice_eq1 { + ([$($vars:tt)*] $lhs:ty, $rhs:ty $(where $ty:ty: $bound:ident)?, #[$stability:meta]) => { + #[$stability] + impl<T, U, $($vars)*> PartialEq<$rhs> for $lhs + where + T: PartialEq<U>, + $($ty: $bound)? + { + #[inline] + fn eq(&self, other: &$rhs) -> bool { self[..] == other[..] } + #[inline] + fn ne(&self, other: &$rhs) -> bool { self[..] != other[..] } + } + } +} + +__impl_slice_eq1! { [A1: Allocator, A2: Allocator] Vec<T, A1>, Vec<U, A2>, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec<T, A>, &[U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec<T, A>, &mut [U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] &[T], Vec<U, A>, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] &mut [T], Vec<U, A>, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] Vec<T, A>, [U], #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +__impl_slice_eq1! { [A: Allocator] [T], Vec<U, A>, #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [A: Allocator] Cow<'_, [T]>, Vec<U, A> where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &[U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &mut [U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec<T, A>, [U; N], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec<T, A>, &[U; N], #[stable(feature = "rust1", since = "1.0.0")] } + +// NOTE: some less important impls are omitted to reduce code bloat +// FIXME(Centril): Reconsider this? +//__impl_slice_eq1! { [const N: usize] Vec<A>, &mut [B; N], } +//__impl_slice_eq1! { [const N: usize] [A; N], Vec<B>, } +//__impl_slice_eq1! { [const N: usize] &[A; N], Vec<B>, } +//__impl_slice_eq1! { [const N: usize] &mut [A; N], Vec<B>, } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, [B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &[B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &mut [B; N], } diff --git a/rust/bindgen_parameters b/rust/bindgen_parameters new file mode 100644 index 000000000000..be4963bf7203 --- /dev/null +++ b/rust/bindgen_parameters @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +--opaque-type xregs_state +--opaque-type desc_struct +--opaque-type arch_lbr_state +--opaque-type local_apic + +# Packed type cannot transitively contain a `#[repr(align)]` type. +--opaque-type x86_msi_data +--opaque-type x86_msi_addr_lo + +# `try` is a reserved keyword since Rust 2018; solved in `bindgen` v0.59.2, +# commit 2aed6b021680 ("context: Escape the try keyword properly"). +--opaque-type kunit_try_catch + +# If SMP is disabled, `arch_spinlock_t` is defined as a ZST which triggers a Rust +# warning. We don't need to peek into it anyway. +--opaque-type spinlock + +# `seccomp`'s comment gets understood as a doctest +--no-doc-comments diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h new file mode 100644 index 000000000000..c48bc284214a --- /dev/null +++ b/rust/bindings/bindings_helper.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Header that contains the code (mostly headers) for which Rust bindings + * will be automatically generated by `bindgen`. + * + * Sorted alphabetically. + */ + +#include <linux/slab.h> + +/* `bindgen` gets confused at certain things. */ +const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; +const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs new file mode 100644 index 000000000000..6c50ee62c56b --- /dev/null +++ b/rust/bindings/lib.rs @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This crate may not be directly used. If you need a kernel C API that is +//! not ported or wrapped in the `kernel` crate, then do so first instead of +//! using this crate. + +#![no_std] +#![feature(core_ffi_c)] +// See <https://github.com/rust-lang/rust-bindgen/issues/1651>. +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + clippy::all, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] + +mod bindings_raw { + // Use glob import here to expose all helpers. + // Symbols defined within the module will take precedence to the glob import. + pub use super::bindings_helper::*; + include!(concat!( + env!("OBJTREE"), + "/rust/bindings/bindings_generated.rs" + )); +} + +// When both a directly exposed symbol and a helper exists for the same function, +// the directly exposed symbol is preferred and the helper becomes dead code, so +// ignore the warning here. +#[allow(dead_code)] +mod bindings_helper { + // Import the generated bindings for types. + include!(concat!( + env!("OBJTREE"), + "/rust/bindings/bindings_helpers_generated.rs" + )); +} + +pub use bindings_raw::*; + +pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL; +pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO; diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs new file mode 100644 index 000000000000..f8f39a3e6855 --- /dev/null +++ b/rust/compiler_builtins.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Our own `compiler_builtins`. +//! +//! Rust provides [`compiler_builtins`] as a port of LLVM's [`compiler-rt`]. +//! Since we do not need the vast majority of them, we avoid the dependency +//! by providing this file. +//! +//! At the moment, some builtins are required that should not be. For instance, +//! [`core`] has 128-bit integers functionality which we should not be compiling +//! in. We will work with upstream [`core`] to provide feature flags to disable +//! the parts we do not need. For the moment, we define them to [`panic!`] at +//! runtime for simplicity to catch mistakes, instead of performing surgery +//! on `core.o`. +//! +//! In any case, all these symbols are weakened to ensure we do not override +//! those that may be provided by the rest of the kernel. +//! +//! [`compiler_builtins`]: https://github.com/rust-lang/compiler-builtins +//! [`compiler-rt`]: https://compiler-rt.llvm.org/ + +#![feature(compiler_builtins)] +#![compiler_builtins] +#![no_builtins] +#![no_std] + +macro_rules! define_panicking_intrinsics( + ($reason: tt, { $($ident: ident, )* }) => { + $( + #[doc(hidden)] + #[no_mangle] + pub extern "C" fn $ident() { + panic!($reason); + } + )* + } +); + +define_panicking_intrinsics!("`f32` should not be used", { + __eqsf2, + __gesf2, + __lesf2, + __nesf2, + __unordsf2, +}); + +define_panicking_intrinsics!("`f64` should not be used", { + __unorddf2, +}); + +define_panicking_intrinsics!("`i128` should not be used", { + __ashrti3, + __muloti4, + __multi3, +}); + +define_panicking_intrinsics!("`u128` should not be used", { + __ashlti3, + __lshrti3, + __udivmodti4, + __udivti3, + __umodti3, +}); diff --git a/rust/exports.c b/rust/exports.c new file mode 100644 index 000000000000..bb7cc64cecd0 --- /dev/null +++ b/rust/exports.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A hack to export Rust symbols for loadable modules without having to redo + * the entire `include/linux/export.h` logic in Rust. + * + * This requires the Rust's new/future `v0` mangling scheme because the default + * one ("legacy") uses invalid characters for C identifiers (thus we cannot use + * the `EXPORT_SYMBOL_*` macros). + * + * All symbols are exported as GPL-only to guarantee no GPL-only feature is + * accidentally exposed. + */ + +#include <linux/module.h> + +#define EXPORT_SYMBOL_RUST_GPL(sym) extern int sym; EXPORT_SYMBOL_GPL(sym) + +#include "exports_core_generated.h" +#include "exports_alloc_generated.h" +#include "exports_bindings_generated.h" +#include "exports_kernel_generated.h" diff --git a/rust/helpers.c b/rust/helpers.c new file mode 100644 index 000000000000..b4f15eee2ffd --- /dev/null +++ b/rust/helpers.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Non-trivial C macros cannot be used in Rust. Similarly, inlined C functions + * cannot be called either. This file explicitly creates functions ("helpers") + * that wrap those so that they can be called from Rust. + * + * Even though Rust kernel modules should never use directly the bindings, some + * of these helpers need to be exported because Rust generics and inlined + * functions may not get their code generated in the crate where they are + * defined. Other helpers, called from non-inline functions, may not be + * exported, in principle. However, in general, the Rust compiler does not + * guarantee codegen will be performed for a non-inline function either. + * Therefore, this file exports all the helpers. In the future, this may be + * revisited to reduce the number of exports after the compiler is informed + * about the places codegen is required. + * + * All symbols are exported as GPL-only to guarantee no GPL-only feature is + * accidentally exposed. + */ + +#include <linux/bug.h> +#include <linux/build_bug.h> + +__noreturn void rust_helper_BUG(void) +{ + BUG(); +} +EXPORT_SYMBOL_GPL(rust_helper_BUG); + +/* + * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type + * as the Rust `usize` type, so we can use it in contexts where Rust + * expects a `usize` like slice (array) indices. `usize` is defined to be + * the same as C's `uintptr_t` type (can hold any pointer) but not + * necessarily the same as `size_t` (can hold the size of any single + * object). Most modern platforms use the same concrete integer type for + * both of them, but in case we find ourselves on a platform where + * that's not true, fail early instead of risking ABI or + * integer-overflow issues. + * + * If your platform fails this assertion, it means that you are in + * danger of integer-overflow bugs (even if you attempt to remove + * `--size_t-is-usize`). It may be easiest to change the kernel ABI on + * your platform such that `size_t` matches `uintptr_t` (i.e., to increase + * `size_t`, because `uintptr_t` has to be at least as big as `size_t`). + */ +static_assert( + sizeof(size_t) == sizeof(uintptr_t) && + __alignof__(size_t) == __alignof__(uintptr_t), + "Rust code expects C `size_t` to match Rust `usize`" +); diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs new file mode 100644 index 000000000000..397a3dd57a9b --- /dev/null +++ b/rust/kernel/allocator.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Allocator support. + +use core::alloc::{GlobalAlloc, Layout}; +use core::ptr; + +use crate::bindings; + +struct KernelAllocator; + +unsafe impl GlobalAlloc for KernelAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // `krealloc()` is used instead of `kmalloc()` because the latter is + // an inline function and cannot be bound to as a result. + unsafe { bindings::krealloc(ptr::null(), layout.size(), bindings::GFP_KERNEL) as *mut u8 } + } + + unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { + unsafe { + bindings::kfree(ptr as *const core::ffi::c_void); + } + } +} + +#[global_allocator] +static ALLOCATOR: KernelAllocator = KernelAllocator; + +// `rustc` only generates these for some crate types. Even then, we would need +// to extract the object file that has them from the archive. For the moment, +// let's generate them ourselves instead. +// +// Note that `#[no_mangle]` implies exported too, nowadays. +#[no_mangle] +fn __rust_alloc(size: usize, _align: usize) -> *mut u8 { + unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 } +} + +#[no_mangle] +fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) { + unsafe { bindings::kfree(ptr as *const core::ffi::c_void) }; +} + +#[no_mangle] +fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + ptr as *const core::ffi::c_void, + new_size, + bindings::GFP_KERNEL, + ) as *mut u8 + } +} + +#[no_mangle] +fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + core::ptr::null(), + size, + bindings::GFP_KERNEL | bindings::__GFP_ZERO, + ) as *mut u8 + } +} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs new file mode 100644 index 000000000000..466b2a8fe569 --- /dev/null +++ b/rust/kernel/error.rs @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel errors. +//! +//! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h) + +use alloc::collections::TryReserveError; + +/// Contains the C-compatible error codes. +pub mod code { + /// Out of memory. + pub const ENOMEM: super::Error = super::Error(-(crate::bindings::ENOMEM as i32)); +} + +/// Generic integer kernel error. +/// +/// The kernel defines a set of integer generic error codes based on C and +/// POSIX ones. These codes may have a more specific meaning in some contexts. +/// +/// # Invariants +/// +/// The value is a valid `errno` (i.e. `>= -MAX_ERRNO && < 0`). +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Error(core::ffi::c_int); + +impl Error { + /// Returns the kernel error code. + pub fn to_kernel_errno(self) -> core::ffi::c_int { + self.0 + } +} + +impl From<TryReserveError> for Error { + fn from(_: TryReserveError) -> Error { + code::ENOMEM + } +} + +/// A [`Result`] with an [`Error`] error type. +/// +/// To be used as the return type for functions that may fail. +/// +/// # Error codes in C and Rust +/// +/// In C, it is common that functions indicate success or failure through +/// their return value; modifying or returning extra data through non-`const` +/// pointer parameters. In particular, in the kernel, functions that may fail +/// typically return an `int` that represents a generic error code. We model +/// those as [`Error`]. +/// +/// In Rust, it is idiomatic to model functions that may fail as returning +/// a [`Result`]. Since in the kernel many functions return an error code, +/// [`Result`] is a type alias for a [`core::result::Result`] that uses +/// [`Error`] as its error type. +/// +/// Note that even if a function does not return anything when it succeeds, +/// it should still be modeled as returning a `Result` rather than +/// just an [`Error`]. +pub type Result<T = ()> = core::result::Result<T, Error>; diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs new file mode 100644 index 000000000000..abd46261d385 --- /dev/null +++ b/rust/kernel/lib.rs @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` crate. +//! +//! This crate contains the kernel APIs that have been ported or wrapped for +//! usage by Rust code in the kernel and is shared by all of them. +//! +//! In other words, all the rest of the Rust code in the kernel (e.g. kernel +//! modules written in Rust) depends on [`core`], [`alloc`] and this crate. +//! +//! If you need a kernel C API that is not ported or wrapped yet here, then +//! do so first instead of bypassing this crate. + +#![no_std] +#![feature(core_ffi_c)] + +// Ensure conditional compilation based on the kernel configuration works; +// otherwise we may silently break things like initcall handling. +#[cfg(not(CONFIG_RUST))] +compile_error!("Missing kernel configuration for conditional compilation"); + +#[cfg(not(test))] +#[cfg(not(testlib))] +mod allocator; +pub mod error; +pub mod prelude; +pub mod print; +pub mod str; + +#[doc(hidden)] +pub use bindings; +pub use macros; + +/// Prefix to appear before log messages printed from within the `kernel` crate. +const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; + +/// The top level entrypoint to implementing a kernel module. +/// +/// For any teardown or cleanup operations, your type may implement [`Drop`]. +pub trait Module: Sized + Sync { + /// Called at module initialization time. + /// + /// Use this method to perform whatever setup or registration your module + /// should do. + /// + /// Equivalent to the `module_init` macro in the C API. + fn init(module: &'static ThisModule) -> error::Result<Self>; +} + +/// Equivalent to `THIS_MODULE` in the C API. +/// +/// C header: `include/linux/export.h` +pub struct ThisModule(*mut bindings::module); + +// SAFETY: `THIS_MODULE` may be used from all threads within a module. +unsafe impl Sync for ThisModule {} + +impl ThisModule { + /// Creates a [`ThisModule`] given the `THIS_MODULE` pointer. + /// + /// # Safety + /// + /// The pointer must be equal to the right `THIS_MODULE`. + pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule { + ThisModule(ptr) + } +} + +#[cfg(not(any(testlib, test)))] +#[panic_handler] +fn panic(info: &core::panic::PanicInfo<'_>) -> ! { + pr_emerg!("{}\n", info); + // SAFETY: FFI call. + unsafe { bindings::BUG() }; + // Bindgen currently does not recognize `__noreturn` so `BUG` returns `()` + // instead of `!`. See <https://github.com/rust-lang/rust-bindgen/issues/2094>. + loop {} +} diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs new file mode 100644 index 000000000000..495e22250726 --- /dev/null +++ b/rust/kernel/prelude.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` prelude. +//! +//! These are the most common items used by Rust code in the kernel, +//! intended to be imported by all Rust code, for convenience. +//! +//! # Examples +//! +//! ``` +//! use kernel::prelude::*; +//! ``` + +pub use super::{ + error::{Error, Result}, + pr_emerg, pr_info, ThisModule, +}; +pub use alloc::{boxed::Box, vec::Vec}; +pub use core::pin::Pin; +pub use macros::module; diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs new file mode 100644 index 000000000000..55db5a1ba752 --- /dev/null +++ b/rust/kernel/print.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Printing facilities. +//! +//! C header: [`include/linux/printk.h`](../../../../include/linux/printk.h) +//! +//! Reference: <https://www.kernel.org/doc/html/latest/core-api/printk-basics.html> + +use core::{ + ffi::{c_char, c_void}, + fmt, +}; + +use crate::str::RawFormatter; + +#[cfg(CONFIG_PRINTK)] +use crate::bindings; + +// Called from `vsprintf` with format specifier `%pA`. +#[no_mangle] +unsafe fn rust_fmt_argument(buf: *mut c_char, end: *mut c_char, ptr: *const c_void) -> *mut c_char { + use fmt::Write; + // SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`. + let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) }; + let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + w.pos().cast() +} + +/// Format strings. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +pub mod format_strings { + use crate::bindings; + + /// The length we copy from the `KERN_*` kernel prefixes. + const LENGTH_PREFIX: usize = 2; + + /// The length of the fixed format strings. + pub const LENGTH: usize = 10; + + /// Generates a fixed format string for the kernel's [`_printk`]. + /// + /// The format string is always the same for a given level, i.e. for a + /// given `prefix`, which are the kernel's `KERN_*` constants. + /// + /// [`_printk`]: ../../../../include/linux/printk.h + const fn generate(is_cont: bool, prefix: &[u8; 3]) -> [u8; LENGTH] { + // Ensure the `KERN_*` macros are what we expect. + assert!(prefix[0] == b'\x01'); + if is_cont { + assert!(prefix[1] == b'c'); + } else { + assert!(prefix[1] >= b'0' && prefix[1] <= b'7'); + } + assert!(prefix[2] == b'\x00'); + + let suffix: &[u8; LENGTH - LENGTH_PREFIX] = if is_cont { + b"%pA\0\0\0\0\0" + } else { + b"%s: %pA\0" + }; + + [ + prefix[0], prefix[1], suffix[0], suffix[1], suffix[2], suffix[3], suffix[4], suffix[5], + suffix[6], suffix[7], + ] + } + + // Generate the format strings at compile-time. + // + // This avoids the compiler generating the contents on the fly in the stack. + // + // Furthermore, `static` instead of `const` is used to share the strings + // for all the kernel. + pub static EMERG: [u8; LENGTH] = generate(false, bindings::KERN_EMERG); + pub static INFO: [u8; LENGTH] = generate(false, bindings::KERN_INFO); +} + +/// Prints a message via the kernel's [`_printk`]. +/// +/// Public but hidden since it should only be used from public macros. +/// +/// # Safety +/// +/// The format string must be one of the ones in [`format_strings`], and +/// the module name must be null-terminated. +/// +/// [`_printk`]: ../../../../include/linux/_printk.h +#[doc(hidden)] +#[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))] +pub unsafe fn call_printk( + format_string: &[u8; format_strings::LENGTH], + module_name: &[u8], + args: fmt::Arguments<'_>, +) { + // `_printk` does not seem to fail in any path. + #[cfg(CONFIG_PRINTK)] + unsafe { + bindings::_printk( + format_string.as_ptr() as _, + module_name.as_ptr(), + &args as *const _ as *const c_void, + ); + } +} + +/// Performs formatting and forwards the string to [`call_printk`]. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +#[cfg(not(testlib))] +#[macro_export] +#[allow(clippy::crate_in_macro_def)] +macro_rules! print_macro ( + // The non-continuation cases (most of them, e.g. `INFO`). + ($format_string:path, $($arg:tt)+) => ( + // SAFETY: This hidden macro should only be called by the documented + // printing macros which ensure the format string is one of the fixed + // ones. All `__LOG_PREFIX`s are null-terminated as they are generated + // by the `module!` proc macro or fixed values defined in a kernel + // crate. + unsafe { + $crate::print::call_printk( + &$format_string, + crate::__LOG_PREFIX, + format_args!($($arg)+), + ); + } + ); +); + +/// Stub for doctests +#[cfg(testlib)] +#[macro_export] +macro_rules! print_macro ( + ($format_string:path, $e:expr, $($arg:tt)+) => ( + () + ); +); + +// We could use a macro to generate these macros. However, doing so ends +// up being a bit ugly: it requires the dollar token trick to escape `$` as +// well as playing with the `doc` attribute. Furthermore, they cannot be easily +// imported in the prelude due to [1]. So, for the moment, we just write them +// manually, like in the C side; while keeping most of the logic in another +// macro, i.e. [`print_macro`]. +// +// [1]: https://github.com/rust-lang/rust/issues/52234 + +/// Prints an emergency-level message (level 0). +/// +/// Use this level if the system is unusable. +/// +/// Equivalent to the kernel's [`pr_emerg`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// `alloc::format!` for information about the formatting syntax. +/// +/// [`pr_emerg`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_emerg +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// pr_emerg!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_emerg ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::EMERG, $($arg)*) + ) +); + +/// Prints an info-level message (level 6). +/// +/// Use this level for informational messages. +/// +/// Equivalent to the kernel's [`pr_info`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// `alloc::format!` for information about the formatting syntax. +/// +/// [`pr_info`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_info +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// pr_info!("hello {}\n", "there"); +/// ``` +#[macro_export] +#[doc(alias = "print")] +macro_rules! pr_info ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::INFO, $($arg)*) + ) +); diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs new file mode 100644 index 000000000000..e45ff220ae50 --- /dev/null +++ b/rust/kernel/str.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! String representations. + +use core::fmt; + +/// Allows formatting of [`fmt::Arguments`] into a raw buffer. +/// +/// It does not fail if callers write past the end of the buffer so that they can calculate the +/// size required to fit everything. +/// +/// # Invariants +/// +/// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos` +/// is less than `end`. +pub(crate) struct RawFormatter { + // Use `usize` to use `saturating_*` functions. + #[allow(dead_code)] + beg: usize, + pos: usize, + end: usize, +} + +impl RawFormatter { + /// Creates a new instance of [`RawFormatter`] with the given buffer pointers. + /// + /// # Safety + /// + /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end` + /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`]. + pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { + // INVARIANT: The safety requierments guarantee the type invariants. + Self { + beg: pos as _, + pos: pos as _, + end: end as _, + } + } + + /// Returns the current insert position. + /// + /// N.B. It may point to invalid memory. + pub(crate) fn pos(&self) -> *mut u8 { + self.pos as _ + } +} + +impl fmt::Write for RawFormatter { + fn write_str(&mut self, s: &str) -> fmt::Result { + // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we + // don't want it to wrap around to 0. + let pos_new = self.pos.saturating_add(s.len()); + + // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`. + let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos); + + if len_to_copy > 0 { + // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end` + // yet, so it is valid for write per the type invariants. + unsafe { + core::ptr::copy_nonoverlapping( + s.as_bytes().as_ptr(), + self.pos as *mut u8, + len_to_copy, + ) + }; + } + + self.pos = pos_new; + Ok(()) + } +} diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs new file mode 100644 index 000000000000..cdc7dc6135d2 --- /dev/null +++ b/rust/macros/helpers.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +use proc_macro::{token_stream, TokenTree}; + +pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option<String> { + if let Some(TokenTree::Ident(ident)) = it.next() { + Some(ident.to_string()) + } else { + None + } +} + +pub(crate) fn try_literal(it: &mut token_stream::IntoIter) -> Option<String> { + if let Some(TokenTree::Literal(literal)) = it.next() { + Some(literal.to_string()) + } else { + None + } +} + +pub(crate) fn try_byte_string(it: &mut token_stream::IntoIter) -> Option<String> { + try_literal(it).and_then(|byte_string| { + if byte_string.starts_with("b\"") && byte_string.ends_with('\"') { + Some(byte_string[2..byte_string.len() - 1].to_string()) + } else { + None + } + }) +} + +pub(crate) fn expect_ident(it: &mut token_stream::IntoIter) -> String { + try_ident(it).expect("Expected Ident") +} + +pub(crate) fn expect_punct(it: &mut token_stream::IntoIter) -> char { + if let TokenTree::Punct(punct) = it.next().expect("Reached end of token stream for Punct") { + punct.as_char() + } else { + panic!("Expected Punct"); + } +} + +pub(crate) fn expect_byte_string(it: &mut token_stream::IntoIter) -> String { + try_byte_string(it).expect("Expected byte string") +} + +pub(crate) fn expect_end(it: &mut token_stream::IntoIter) { + if it.next().is_some() { + panic!("Expected end"); + } +} diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs new file mode 100644 index 000000000000..91764bfb1f89 --- /dev/null +++ b/rust/macros/lib.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Crate for all kernel procedural macros. + +mod helpers; +mod module; + +use proc_macro::TokenStream; + +/// Declares a kernel module. +/// +/// The `type` argument should be a type which implements the [`Module`] +/// trait. Also accepts various forms of kernel metadata. +/// +/// C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) +/// +/// [`Module`]: ../kernel/trait.Module.html +/// +/// # Examples +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyModule, +/// name: b"my_kernel_module", +/// author: b"Rust for Linux Contributors", +/// description: b"My very own kernel module!", +/// license: b"GPL", +/// params: { +/// my_i32: i32 { +/// default: 42, +/// permissions: 0o000, +/// description: b"Example of i32", +/// }, +/// writeable_i32: i32 { +/// default: 42, +/// permissions: 0o644, +/// description: b"Example of i32", +/// }, +/// }, +/// } +/// +/// struct MyModule; +/// +/// impl kernel::Module for MyModule { +/// fn init() -> Result<Self> { +/// // If the parameter is writeable, then the kparam lock must be +/// // taken to read the parameter: +/// { +/// let lock = THIS_MODULE.kernel_param_lock(); +/// pr_info!("i32 param is: {}\n", writeable_i32.read(&lock)); +/// } +/// // If the parameter is read only, it can be read without locking +/// // the kernel parameters: +/// pr_info!("i32 param is: {}\n", my_i32.read()); +/// Ok(Self) +/// } +/// } +/// ``` +/// +/// # Supported argument types +/// - `type`: type which implements the [`Module`] trait (required). +/// - `name`: byte array of the name of the kernel module (required). +/// - `author`: byte array of the author of the kernel module. +/// - `description`: byte array of the description of the kernel module. +/// - `license`: byte array of the license of the kernel module (required). +/// - `alias`: byte array of alias name of the kernel module. +#[proc_macro] +pub fn module(ts: TokenStream) -> TokenStream { + module::module(ts) +} diff --git a/rust/macros/module.rs b/rust/macros/module.rs new file mode 100644 index 000000000000..186a5b8be23c --- /dev/null +++ b/rust/macros/module.rs @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::helpers::*; +use proc_macro::{token_stream, Literal, TokenStream, TokenTree}; +use std::fmt::Write; + +struct ModInfoBuilder<'a> { + module: &'a str, + counter: usize, + buffer: String, +} + +impl<'a> ModInfoBuilder<'a> { + fn new(module: &'a str) -> Self { + ModInfoBuilder { + module, + counter: 0, + buffer: String::new(), + } + } + + fn emit_base(&mut self, field: &str, content: &str, builtin: bool) { + let string = if builtin { + // Built-in modules prefix their modinfo strings by `module.`. + format!( + "{module}.{field}={content}\0", + module = self.module, + field = field, + content = content + ) + } else { + // Loadable modules' modinfo strings go as-is. + format!("{field}={content}\0", field = field, content = content) + }; + + write!( + &mut self.buffer, + " + {cfg} + #[doc(hidden)] + #[link_section = \".modinfo\"] + #[used] + pub static __{module}_{counter}: [u8; {length}] = *{string}; + ", + cfg = if builtin { + "#[cfg(not(MODULE))]" + } else { + "#[cfg(MODULE)]" + }, + module = self.module.to_uppercase(), + counter = self.counter, + length = string.len(), + string = Literal::byte_string(string.as_bytes()), + ) + .unwrap(); + + self.counter += 1; + } + + fn emit_only_builtin(&mut self, field: &str, content: &str) { + self.emit_base(field, content, true) + } + + fn emit_only_loadable(&mut self, field: &str, content: &str) { + self.emit_base(field, content, false) + } + + fn emit(&mut self, field: &str, content: &str) { + self.emit_only_builtin(field, content); + self.emit_only_loadable(field, content); + } +} + +#[derive(Debug, Default)] +struct ModuleInfo { + type_: String, + license: String, + name: String, + author: Option<String>, + description: Option<String>, + alias: Option<String>, +} + +impl ModuleInfo { + fn parse(it: &mut token_stream::IntoIter) -> Self { + let mut info = ModuleInfo::default(); + + const EXPECTED_KEYS: &[&str] = + &["type", "name", "author", "description", "license", "alias"]; + const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; + let mut seen_keys = Vec::new(); + + loop { + let key = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + if seen_keys.contains(&key) { + panic!( + "Duplicated key \"{}\". Keys can only be specified once.", + key + ); + } + + assert_eq!(expect_punct(it), ':'); + + match key.as_str() { + "type" => info.type_ = expect_ident(it), + "name" => info.name = expect_byte_string(it), + "author" => info.author = Some(expect_byte_string(it)), + "description" => info.description = Some(expect_byte_string(it)), + "license" => info.license = expect_byte_string(it), + "alias" => info.alias = Some(expect_byte_string(it)), + _ => panic!( + "Unknown key \"{}\". Valid keys are: {:?}.", + key, EXPECTED_KEYS + ), + } + + assert_eq!(expect_punct(it), ','); + + seen_keys.push(key); + } + + expect_end(it); + + for key in REQUIRED_KEYS { + if !seen_keys.iter().any(|e| e == key) { + panic!("Missing required key \"{}\".", key); + } + } + + let mut ordered_keys: Vec<&str> = Vec::new(); + for key in EXPECTED_KEYS { + if seen_keys.iter().any(|e| e == key) { + ordered_keys.push(key); + } + } + + if seen_keys != ordered_keys { + panic!( + "Keys are not ordered as expected. Order them like: {:?}.", + ordered_keys + ); + } + + info + } +} + +pub(crate) fn module(ts: TokenStream) -> TokenStream { + let mut it = ts.into_iter(); + + let info = ModuleInfo::parse(&mut it); + + let mut modinfo = ModInfoBuilder::new(info.name.as_ref()); + if let Some(author) = info.author { + modinfo.emit("author", &author); + } + if let Some(description) = info.description { + modinfo.emit("description", &description); + } + modinfo.emit("license", &info.license); + if let Some(alias) = info.alias { + modinfo.emit("alias", &alias); + } + + // Built-in modules also export the `file` modinfo string. + let file = + std::env::var("RUST_MODFILE").expect("Unable to fetch RUST_MODFILE environmental variable"); + modinfo.emit_only_builtin("file", &file); + + format!( + " + /// The module name. + /// + /// Used by the printing macros, e.g. [`info!`]. + const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; + + /// The \"Rust loadable module\" mark, for `scripts/is_rust_module.sh`. + // + // This may be best done another way later on, e.g. as a new modinfo + // key or a new section. For the moment, keep it simple. + #[cfg(MODULE)] + #[doc(hidden)] + #[used] + static __IS_RUST_MODULE: () = (); + + static mut __MOD: Option<{type_}> = None; + + // SAFETY: `__this_module` is constructed by the kernel at load time and will not be + // freed until the module is unloaded. + #[cfg(MODULE)] + static THIS_MODULE: kernel::ThisModule = unsafe {{ + kernel::ThisModule::from_ptr(&kernel::bindings::__this_module as *const _ as *mut _) + }}; + #[cfg(not(MODULE))] + static THIS_MODULE: kernel::ThisModule = unsafe {{ + kernel::ThisModule::from_ptr(core::ptr::null_mut()) + }}; + + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn init_module() -> core::ffi::c_int {{ + __init() + }} + + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + __exit() + }} + + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique. + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + core::arch::global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ + __init() + }} + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + __exit() + }} + + fn __init() -> core::ffi::c_int {{ + match <{type_} as kernel::Module>::init(&THIS_MODULE) {{ + Ok(m) => {{ + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_kernel_errno(); + }} + }} + }} + + fn __exit() {{ + unsafe {{ + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; + }} + }} + + {modinfo} + ", + type_ = info.type_, + name = info.name, + modinfo = modinfo.buffer, + initcall_section = ".initcall6.init" + ) + .parse() + .expect("Error parsing formatted string into token stream.") +} diff --git a/samples/Kconfig b/samples/Kconfig index 470ee3baf2e1..0d81c00289ee 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -263,6 +263,8 @@ config SAMPLE_CORESIGHT_SYSCFG This demonstrates how a user may create their own CoreSight configurations and easily load them into the system at runtime. +source "samples/rust/Kconfig" + endif # SAMPLES config HAVE_SAMPLE_FTRACE_DIRECT diff --git a/samples/Makefile b/samples/Makefile index 701e912ab5af..9832ef3f8fcb 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -35,3 +35,4 @@ subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/ obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/ +obj-$(CONFIG_SAMPLES_RUST) += rust/ diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index 3e404e51ec64..f29bb3c72230 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -162,11 +162,10 @@ out_free_name: LANDLOCK_ACCESS_FS_MAKE_SYM | \ LANDLOCK_ACCESS_FS_REFER) -#define ACCESS_ABI_2 ( \ - LANDLOCK_ACCESS_FS_REFER) - /* clang-format on */ +#define LANDLOCK_ABI_LAST 2 + int main(const int argc, char *const argv[], char *const *const envp) { const char *cmd_path; @@ -196,8 +195,12 @@ int main(const int argc, char *const argv[], char *const *const envp) "\nexample:\n" "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" " "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" " - "%s bash -i\n", + "%s bash -i\n\n", ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]); + fprintf(stderr, + "This sandboxer can use Landlock features " + "up to ABI version %d.\n", + LANDLOCK_ABI_LAST); return 1; } @@ -225,12 +228,30 @@ int main(const int argc, char *const argv[], char *const *const envp) } return 1; } + /* Best-effort security. */ - if (abi < 2) { - ruleset_attr.handled_access_fs &= ~ACCESS_ABI_2; - access_fs_ro &= ~ACCESS_ABI_2; - access_fs_rw &= ~ACCESS_ABI_2; + switch (abi) { + case 1: + /* Removes LANDLOCK_ACCESS_FS_REFER for ABI < 2 */ + ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER; + + fprintf(stderr, + "Hint: You should update the running kernel " + "to leverage Landlock features " + "provided by ABI version %d (instead of %d).\n", + LANDLOCK_ABI_LAST, abi); + __attribute__((fallthrough)); + case LANDLOCK_ABI_LAST: + break; + default: + fprintf(stderr, + "Hint: You should update this sandboxer " + "to leverage Landlock features " + "provided by ABI version %d (instead of %d).\n", + abi, LANDLOCK_ABI_LAST); } + access_fs_ro &= ruleset_attr.handled_access_fs; + access_fs_rw &= ruleset_attr.handled_access_fs; ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig new file mode 100644 index 000000000000..841e0906e943 --- /dev/null +++ b/samples/rust/Kconfig @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig SAMPLES_RUST + bool "Rust samples" + depends on RUST + help + You can build sample Rust kernel code here. + + If unsure, say N. + +if SAMPLES_RUST + +config SAMPLE_RUST_MINIMAL + tristate "Minimal" + help + This option builds the Rust minimal module sample. + + To compile this as a module, choose M here: + the module will be called rust_minimal. + + If unsure, say N. + +config SAMPLE_RUST_HOSTPROGS + bool "Host programs" + help + This option builds the Rust host program samples. + + If unsure, say N. + +endif # SAMPLES_RUST diff --git a/samples/rust/Makefile b/samples/rust/Makefile new file mode 100644 index 000000000000..1daba5f8658a --- /dev/null +++ b/samples/rust/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o + +subdir-$(CONFIG_SAMPLE_RUST_HOSTPROGS) += hostprogs diff --git a/samples/rust/hostprogs/.gitignore b/samples/rust/hostprogs/.gitignore new file mode 100644 index 000000000000..a6c173da5048 --- /dev/null +++ b/samples/rust/hostprogs/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +single diff --git a/samples/rust/hostprogs/Makefile b/samples/rust/hostprogs/Makefile new file mode 100644 index 000000000000..8ddcbd7416db --- /dev/null +++ b/samples/rust/hostprogs/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +hostprogs-always-y := single + +single-rust := y diff --git a/samples/rust/hostprogs/a.rs b/samples/rust/hostprogs/a.rs new file mode 100644 index 000000000000..f7a4a3d0f4e0 --- /dev/null +++ b/samples/rust/hostprogs/a.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust single host program sample: module `a`. + +pub(crate) fn f(x: i32) { + println!("The number is {}.", x); +} diff --git a/samples/rust/hostprogs/b.rs b/samples/rust/hostprogs/b.rs new file mode 100644 index 000000000000..c1675890648f --- /dev/null +++ b/samples/rust/hostprogs/b.rs @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust single host program sample: module `b`. + +pub(crate) const CONSTANT: i32 = 42; diff --git a/samples/rust/hostprogs/single.rs b/samples/rust/hostprogs/single.rs new file mode 100644 index 000000000000..8c48a119339a --- /dev/null +++ b/samples/rust/hostprogs/single.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust single host program sample. + +mod a; +mod b; + +fn main() { + println!("Hello world!"); + + a::f(b::CONSTANT); +} diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs new file mode 100644 index 000000000000..54ad17685742 --- /dev/null +++ b/samples/rust/rust_minimal.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust minimal sample. + +use kernel::prelude::*; + +module! { + type: RustMinimal, + name: b"rust_minimal", + author: b"Rust for Linux Contributors", + description: b"Rust minimal sample", + license: b"GPL", +} + +struct RustMinimal { + numbers: Vec<i32>, +} + +impl kernel::Module for RustMinimal { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Rust minimal sample (init)\n"); + pr_info!("Am I built-in? {}\n", !cfg!(MODULE)); + + let mut numbers = Vec::new(); + numbers.try_push(72)?; + numbers.try_push(108)?; + numbers.try_push(200)?; + + Ok(RustMinimal { numbers }) + } +} + +impl Drop for RustMinimal { + fn drop(&mut self) { + pr_info!("My numbers are {:?}\n", self.numbers); + pr_info!("Rust minimal sample (exit)\n"); + } +} diff --git a/scripts/.gitignore b/scripts/.gitignore index eed308bef604..b7aec8eb1bd4 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /asn1_compiler /bin2c +/generate_rust_target /insert-sys-cert /kallsyms /module.lds diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index a0ccceb22cf8..274125307ebd 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -36,12 +36,12 @@ ld-option = $(success,$(LD) -v $(1)) as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler -o /dev/null -) # check if $(CC) and $(LD) exist -$(error-if,$(failure,command -v $(CC)),compiler '$(CC)' not found) +$(error-if,$(failure,command -v $(CC)),C compiler '$(CC)' not found) $(error-if,$(failure,command -v $(LD)),linker '$(LD)' not found) -# Get the compiler name, version, and error out if it is not supported. +# Get the C compiler name, version, and error out if it is not supported. cc-info := $(shell,$(srctree)/scripts/cc-version.sh $(CC)) -$(error-if,$(success,test -z "$(cc-info)"),Sorry$(comma) this compiler is not supported.) +$(error-if,$(success,test -z "$(cc-info)"),Sorry$(comma) this C compiler is not supported.) cc-name := $(shell,set -- $(cc-info) && echo $1) cc-version := $(shell,set -- $(cc-info) && echo $2) diff --git a/scripts/Makefile b/scripts/Makefile index f084f08ed176..1575af84d557 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -10,6 +10,9 @@ hostprogs-always-$(CONFIG_BUILDTIME_TABLE_SORT) += sorttable hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_RUST) += generate_rust_target + +generate_rust_target-rust := y HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTLDLIBS_sorttable = -lpthread diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 784f46d41959..27be77c0d6d8 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -26,6 +26,7 @@ EXTRA_CPPFLAGS := EXTRA_LDFLAGS := asflags-y := ccflags-y := +rustflags-y := cppflags-y := ldflags-y := @@ -271,6 +272,65 @@ quiet_cmd_cc_lst_c = MKLST $@ $(obj)/%.lst: $(src)/%.c FORCE $(call if_changed_dep,cc_lst_c) +# Compile Rust sources (.rs) +# --------------------------------------------------------------------------- + +rust_allowed_features := core_ffi_c + +rust_common_cmd = \ + RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \ + -Zallow-features=$(rust_allowed_features) \ + -Zcrate-attr=no_std \ + -Zcrate-attr='feature($(rust_allowed_features))' \ + --extern alloc --extern kernel \ + --crate-type rlib --out-dir $(obj) -L $(objtree)/rust/ \ + --crate-name $(basename $(notdir $@)) + +rust_handle_depfile = \ + mv $(obj)/$(basename $(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +# `--emit=obj`, `--emit=asm` and `--emit=llvm-ir` imply a single codegen unit +# will be used. We explicitly request `-Ccodegen-units=1` in any case, and +# the compiler shows a warning if it is not 1. However, if we ever stop +# requesting it explicitly and we start using some other `--emit` that does not +# imply it (and for which codegen is performed), then we would be out of sync, +# i.e. the outputs we would get for the different single targets (e.g. `.ll`) +# would not match each other. + +quiet_cmd_rustc_o_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_o_rs = \ + $(rust_common_cmd) --emit=dep-info,obj $<; \ + $(rust_handle_depfile) + +$(obj)/%.o: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_o_rs) + +quiet_cmd_rustc_rsi_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_rsi_rs = \ + $(rust_common_cmd) --emit=dep-info -Zunpretty=expanded $< >$@; \ + command -v $(RUSTFMT) >/dev/null && $(RUSTFMT) $@; \ + $(rust_handle_depfile) + +$(obj)/%.rsi: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_rsi_rs) + +quiet_cmd_rustc_s_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_s_rs = \ + $(rust_common_cmd) --emit=dep-info,asm $<; \ + $(rust_handle_depfile) + +$(obj)/%.s: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_s_rs) + +quiet_cmd_rustc_ll_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_ll_rs = \ + $(rust_common_cmd) --emit=dep-info,llvm-ir $<; \ + $(rust_handle_depfile) + +$(obj)/%.ll: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_ll_rs) + # Compile assembler sources (.S) # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.debug b/scripts/Makefile.debug index 8cf1cb22dd93..332c486f705f 100644 --- a/scripts/Makefile.debug +++ b/scripts/Makefile.debug @@ -1,4 +1,6 @@ DEBUG_CFLAGS := +DEBUG_RUSTFLAGS := + debug-flags-y := -g ifdef CONFIG_DEBUG_INFO_SPLIT @@ -17,9 +19,12 @@ KBUILD_AFLAGS += $(debug-flags-y) ifdef CONFIG_DEBUG_INFO_REDUCED DEBUG_CFLAGS += -fno-var-tracking +DEBUG_RUSTFLAGS += -Cdebuginfo=1 ifdef CONFIG_CC_IS_GCC DEBUG_CFLAGS += -femit-struct-debug-baseonly endif +else +DEBUG_RUSTFLAGS += -Cdebuginfo=2 endif ifdef CONFIG_DEBUG_INFO_COMPRESSED @@ -30,3 +35,6 @@ endif KBUILD_CFLAGS += $(DEBUG_CFLAGS) export DEBUG_CFLAGS + +KBUILD_RUSTFLAGS += $(DEBUG_RUSTFLAGS) +export DEBUG_RUSTFLAGS diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6ae482158bc4..52bd7df84fd6 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -64,6 +64,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) +KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) endif endif diff --git a/scripts/Makefile.host b/scripts/Makefile.host index 278b4d6ac945..da133780b751 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -22,6 +22,8 @@ $(obj)/%.tab.c $(obj)/%.tab.h: $(src)/%.y FORCE # to preprocess a data file. # # Both C and C++ are supported, but preferred language is C for such utilities. +# Rust is also supported, but it may only be used in scenarios where a Rust +# toolchain is required to be available (e.g. when `CONFIG_RUST` is enabled). # # Sample syntax (see Documentation/kbuild/makefiles.rst for reference) # hostprogs := bin2hex @@ -37,15 +39,20 @@ $(obj)/%.tab.c $(obj)/%.tab.h: $(src)/%.y FORCE # qconf-objs := menu.o # Will compile qconf as a C++ program, and menu as a C program. # They are linked as C++ code to the executable qconf +# +# hostprogs := target +# target-rust := y +# Will compile `target` as a Rust program, using `target.rs` as the crate root. +# The crate may consist of several source files. # C code # Executables compiled from a single .c file host-csingle := $(foreach m,$(hostprogs), \ - $(if $($(m)-objs)$($(m)-cxxobjs),,$(m))) + $(if $($(m)-objs)$($(m)-cxxobjs)$($(m)-rust),,$(m))) # C executables linked based on several .o files host-cmulti := $(foreach m,$(hostprogs),\ - $(if $($(m)-cxxobjs),,$(if $($(m)-objs),$(m)))) + $(if $($(m)-cxxobjs)$($(m)-rust),,$(if $($(m)-objs),$(m)))) # Object (.o) files compiled from .c files host-cobjs := $(sort $(foreach m,$(hostprogs),$($(m)-objs))) @@ -58,11 +65,17 @@ host-cxxmulti := $(foreach m,$(hostprogs),$(if $($(m)-cxxobjs),$(m))) # C++ Object (.o) files compiled from .cc files host-cxxobjs := $(sort $(foreach m,$(host-cxxmulti),$($(m)-cxxobjs))) +# Rust code +# Executables compiled from a single Rust crate (which may consist of +# one or more .rs files) +host-rust := $(foreach m,$(hostprogs),$(if $($(m)-rust),$(m))) + host-csingle := $(addprefix $(obj)/,$(host-csingle)) host-cmulti := $(addprefix $(obj)/,$(host-cmulti)) host-cobjs := $(addprefix $(obj)/,$(host-cobjs)) host-cxxmulti := $(addprefix $(obj)/,$(host-cxxmulti)) host-cxxobjs := $(addprefix $(obj)/,$(host-cxxobjs)) +host-rust := $(addprefix $(obj)/,$(host-rust)) ##### # Handle options to gcc. Support building with separate output directory @@ -71,6 +84,8 @@ _hostc_flags = $(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS) \ $(HOSTCFLAGS_$(target-stem).o) _hostcxx_flags = $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \ $(HOSTCXXFLAGS_$(target-stem).o) +_hostrust_flags = $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \ + $(HOSTRUSTFLAGS_$(target-stem)) # $(objtree)/$(obj) for including generated headers from checkin source files ifeq ($(KBUILD_EXTMOD),) @@ -82,6 +97,7 @@ endif hostc_flags = -Wp,-MMD,$(depfile) $(_hostc_flags) hostcxx_flags = -Wp,-MMD,$(depfile) $(_hostcxx_flags) +hostrust_flags = $(_hostrust_flags) ##### # Compile programs on the host @@ -128,5 +144,17 @@ quiet_cmd_host-cxxobjs = HOSTCXX $@ $(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE $(call if_changed_dep,host-cxxobjs) +# Create executable from a single Rust crate (which may consist of +# one or more `.rs` files) +# host-rust -> Executable +quiet_cmd_host-rust = HOSTRUSTC $@ + cmd_host-rust = \ + $(HOSTRUSTC) $(hostrust_flags) --emit=dep-info,link \ + --out-dir=$(obj)/ $<; \ + mv $(obj)/$(target-stem).d $(depfile); \ + sed -i '/^\#/d' $(depfile) +$(host-rust): $(obj)/%: $(src)/%.rs FORCE + $(call if_changed_dep,host-rust) + targets += $(host-csingle) $(host-cmulti) $(host-cobjs) \ - $(host-cxxmulti) $(host-cxxobjs) + $(host-cxxmulti) $(host-cxxobjs) $(host-rust) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3fb6a99e78c4..c88b98b5dc44 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -8,6 +8,7 @@ ldflags-y += $(EXTRA_LDFLAGS) # flags that take effect in current and sub directories KBUILD_AFLAGS += $(subdir-asflags-y) KBUILD_CFLAGS += $(subdir-ccflags-y) +KBUILD_RUSTFLAGS += $(subdir-rustflags-y) # Figure out what we need to build from the various variables # =========================================================================== @@ -128,6 +129,10 @@ _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(ccflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(ccflags-y)) \ $(CFLAGS_$(target-stem).o)) +_rust_flags = $(filter-out $(RUSTFLAGS_REMOVE_$(target-stem).o), \ + $(filter-out $(rustflags-remove-y), \ + $(KBUILD_RUSTFLAGS) $(rustflags-y)) \ + $(RUSTFLAGS_$(target-stem).o)) _a_flags = $(filter-out $(AFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(asflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(asflags-y)) \ @@ -202,6 +207,11 @@ modkern_cflags = \ $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE), \ $(KBUILD_CFLAGS_KERNEL) $(CFLAGS_KERNEL) $(modfile_flags)) +modkern_rustflags = \ + $(if $(part-of-module), \ + $(KBUILD_RUSTFLAGS_MODULE) $(RUSTFLAGS_MODULE), \ + $(KBUILD_RUSTFLAGS_KERNEL) $(RUSTFLAGS_KERNEL)) + modkern_aflags = $(if $(part-of-module), \ $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE), \ $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL)) @@ -211,6 +221,8 @@ c_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_c_flags) $(modkern_cflags) \ $(basename_flags) $(modname_flags) +rust_flags = $(_rust_flags) $(modkern_rustflags) @$(objtree)/include/generated/rustc_cfg + a_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_a_flags) $(modkern_aflags) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 35100e981f4a..9a1fa6aa30fe 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -39,11 +39,13 @@ quiet_cmd_ld_ko_o = LD [M] $@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ - if [ -f vmlinux ]; then \ + if [ ! -f vmlinux ]; then \ + printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ + elif [ -n "$(CONFIG_RUST)" ] && $(srctree)/scripts/is_rust_module.sh $@; then \ + printf "Skipping BTF generation for %s because it's a Rust module\n" $@ 1>&2; \ + else \ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \ $(RESOLVE_BTFIDS) -b vmlinux $@; \ - else \ - printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ fi; # Same as newer-prereqs, but allows to exclude specified extra dependencies diff --git a/scripts/cc-version.sh b/scripts/cc-version.sh index f1952c522466..2401c86fcf53 100755 --- a/scripts/cc-version.sh +++ b/scripts/cc-version.sh @@ -1,13 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # -# Print the compiler name and its version in a 5 or 6-digit form. +# Print the C compiler name and its version in a 5 or 6-digit form. # Also, perform the minimum version check. set -e -# Print the compiler name and some version components. -get_compiler_info() +# Print the C compiler name and some version components. +get_c_compiler_info() { cat <<- EOF | "$@" -E -P -x c - 2>/dev/null #if defined(__clang__) @@ -32,7 +32,7 @@ get_canonical_version() # $@ instead of $1 because multiple words might be given, e.g. CC="ccache gcc". orig_args="$@" -set -- $(get_compiler_info "$@") +set -- $(get_c_compiler_info "$@") name=$1 @@ -52,7 +52,7 @@ ICC) min_version=$($min_tool_version icc) ;; *) - echo "$orig_args: unknown compiler" >&2 + echo "$orig_args: unknown C compiler" >&2 exit 1 ;; esac @@ -62,7 +62,7 @@ min_cversion=$(get_canonical_version $min_version) if [ "$cversion" -lt "$min_cversion" ]; then echo >&2 "***" - echo >&2 "*** Compiler is too old." + echo >&2 "*** C compiler is too old." echo >&2 "*** Your $name version: $version" echo >&2 "*** Minimum $name version: $min_version" echo >&2 "***" diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 79e759aac543..c8a616a9d034 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3616,7 +3616,7 @@ sub process { my $comment = ""; if ($realfile =~ /\.(h|s|S)$/) { $comment = '/*'; - } elsif ($realfile =~ /\.(c|dts|dtsi)$/) { + } elsif ($realfile =~ /\.(c|rs|dts|dtsi)$/) { $comment = '//'; } elsif (($checklicenseline == 2) || $realfile =~ /\.(sh|pl|py|awk|tc|yaml)$/) { $comment = '#'; @@ -3664,7 +3664,7 @@ sub process { } # check we are in a valid source file if not then ignore this hunk - next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/); + next if ($realfile !~ /\.(h|c|rs|s|S|sh|dtsi|dts)$/); # check for using SPDX-License-Identifier on the wrong line number if ($realline != $checklicenseline && @@ -3751,7 +3751,7 @@ sub process { if ($realfile =~ /\.S$/ && $line =~ /^\+\s*(?:[A-Z]+_)?SYM_[A-Z]+_(?:START|END)(?:_[A-Z_]+)?\s*\(\s*\.L/) { WARN("AVOID_L_PREFIX", - "Avoid using '.L' prefixed local symbol names for denoting a range of code via 'SYM_*_START/END' annotations; see Documentation/asm-annotations.rst\n" . $herecurr); + "Avoid using '.L' prefixed local symbol names for denoting a range of code via 'SYM_*_START/END' annotations; see Documentation/core-api/asm-annotations.rst\n" . $herecurr); } # check we are in a valid source file C or perl if not then ignore this hunk @@ -4695,12 +4695,12 @@ sub process { } } -# avoid BUG() or BUG_ON() - if ($line =~ /\b(?:BUG|BUG_ON)\b/) { +# do not use BUG() or variants + if ($line =~ /\b(?!AA_|BUILD_|DCCP_|IDA_|KVM_|RWLOCK_|snd_|SPIN_)(?:[a-zA-Z_]*_)?BUG(?:_ON)?(?:_[A-Z_]+)?\s*\(/) { my $msg_level = \&WARN; $msg_level = \&CHK if ($file); &{$msg_level}("AVOID_BUG", - "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr); + "Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants\n" . $herecurr); } # avoid LINUX_VERSION_CODE @@ -6783,15 +6783,19 @@ sub process { } if ($bad_specifier ne "") { my $stat_real = get_stat_real($linenr, $lc); + my $msg_level = \&WARN; my $ext_type = "Invalid"; my $use = ""; if ($bad_specifier =~ /p[Ff]/) { $use = " - use %pS instead"; $use =~ s/pS/ps/ if ($bad_specifier =~ /pf/); + } elsif ($bad_specifier =~ /pA/) { + $use = " - '%pA' is only intended to be used from Rust code"; + $msg_level = \&ERROR; } - WARN("VSPRINTF_POINTER_EXTENSION", - "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n"); + &{$msg_level}("VSPRINTF_POINTER_EXTENSION", + "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n"); } } } diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 7075e26ab2c4..564c5632e1a2 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -8,6 +8,14 @@ usage() { echo " $0 -r <release> | <vmlinux> [<base path>|auto] [<modules path>]" } +# Try to find a Rust demangler +if type llvm-cxxfilt >/dev/null 2>&1 ; then + cppfilt=llvm-cxxfilt +elif type c++filt >/dev/null 2>&1 ; then + cppfilt=c++filt + cppfilt_opts=-i +fi + if [[ $1 == "-r" ]] ; then vmlinux="" basepath="auto" @@ -180,6 +188,12 @@ parse_symbol() { # In the case of inlines, move everything to same line code=${code//$'\n'/' '} + # Demangle if the name looks like a Rust symbol and if + # we got a Rust demangler + if [[ $name =~ ^_R && $cppfilt != "" ]] ; then + name=$("$cppfilt" "$cppfilt_opts" "$name") + fi + # Replace old address with pretty line numbers symbol="$segment$name ($code)" } diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py new file mode 100755 index 000000000000..75bb611bd751 --- /dev/null +++ b/scripts/generate_rust_analyzer.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""generate_rust_analyzer - Generates the `rust-project.json` file for `rust-analyzer`. +""" + +import argparse +import json +import logging +import pathlib +import sys + +def generate_crates(srctree, objtree, sysroot_src): + # Generate the configuration list. + cfg = [] + with open(objtree / "include" / "generated" / "rustc_cfg") as fd: + for line in fd: + line = line.replace("--cfg=", "") + line = line.replace("\n", "") + cfg.append(line) + + # Now fill the crates list -- dependencies need to come first. + # + # Avoid O(n^2) iterations by keeping a map of indexes. + crates = [] + crates_indexes = {} + + def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False): + crates_indexes[display_name] = len(crates) + crates.append({ + "display_name": display_name, + "root_module": str(root_module), + "is_workspace_member": is_workspace_member, + "is_proc_macro": is_proc_macro, + "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps], + "cfg": cfg, + "edition": "2021", + "env": { + "RUST_MODFILE": "This is only for rust-analyzer" + } + }) + + # First, the ones in `rust/` since they are a bit special. + append_crate( + "core", + sysroot_src / "core" / "src" / "lib.rs", + [], + is_workspace_member=False, + ) + + append_crate( + "compiler_builtins", + srctree / "rust" / "compiler_builtins.rs", + [], + ) + + append_crate( + "alloc", + srctree / "rust" / "alloc" / "lib.rs", + ["core", "compiler_builtins"], + ) + + append_crate( + "macros", + srctree / "rust" / "macros" / "lib.rs", + [], + is_proc_macro=True, + ) + crates[-1]["proc_macro_dylib_path"] = "rust/libmacros.so" + + append_crate( + "bindings", + srctree / "rust"/ "bindings" / "lib.rs", + ["core"], + cfg=cfg, + ) + crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + + append_crate( + "kernel", + srctree / "rust" / "kernel" / "lib.rs", + ["core", "alloc", "macros", "bindings"], + cfg=cfg, + ) + crates[-1]["source"] = { + "include_dirs": [ + str(srctree / "rust" / "kernel"), + str(objtree / "rust") + ], + "exclude_dirs": [], + } + + # Then, the rest outside of `rust/`. + # + # We explicitly mention the top-level folders we want to cover. + for folder in ("samples", "drivers"): + for path in (srctree / folder).rglob("*.rs"): + logging.info("Checking %s", path) + name = path.name.replace(".rs", "") + + # Skip those that are not crate roots. + if f"{name}.o" not in open(path.parent / "Makefile").read(): + continue + + logging.info("Adding %s", name) + append_crate( + name, + path, + ["core", "alloc", "kernel"], + cfg=cfg, + ) + + return crates + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', '-v', action='store_true') + parser.add_argument("srctree", type=pathlib.Path) + parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot_src", type=pathlib.Path) + args = parser.parse_args() + + logging.basicConfig( + format="[%(asctime)s] [%(levelname)s] %(message)s", + level=logging.INFO if args.verbose else logging.WARNING + ) + + rust_project = { + "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src), + "sysroot_src": str(args.sysroot_src), + } + + json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) + +if __name__ == "__main__": + main() diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs new file mode 100644 index 000000000000..3c6cbe2b278d --- /dev/null +++ b/scripts/generate_rust_target.rs @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The custom target specification file generator for `rustc`. +//! +//! To configure a target from scratch, a JSON-encoded file has to be passed +//! to `rustc` (introduced in [RFC 131]). These options and the file itself are +//! unstable. Eventually, `rustc` should provide a way to do this in a stable +//! manner. For instance, via command-line arguments. Therefore, this file +//! should avoid using keys which can be set via `-C` or `-Z` options. +//! +//! [RFC 131]: https://rust-lang.github.io/rfcs/0131-target-specification.html + +use std::{ + collections::HashMap, + fmt::{Display, Formatter, Result}, + io::BufRead, +}; + +enum Value { + Boolean(bool), + Number(i32), + String(String), + Object(Object), +} + +type Object = Vec<(String, Value)>; + +/// Minimal "almost JSON" generator (e.g. no `null`s, no arrays, no escaping), +/// enough for this purpose. +impl Display for Value { + fn fmt(&self, formatter: &mut Formatter<'_>) -> Result { + match self { + Value::Boolean(boolean) => write!(formatter, "{}", boolean), + Value::Number(number) => write!(formatter, "{}", number), + Value::String(string) => write!(formatter, "\"{}\"", string), + Value::Object(object) => { + formatter.write_str("{")?; + if let [ref rest @ .., ref last] = object[..] { + for (key, value) in rest { + write!(formatter, "\"{}\": {},", key, value)?; + } + write!(formatter, "\"{}\": {}", last.0, last.1)?; + } + formatter.write_str("}") + } + } + } +} + +struct TargetSpec(Object); + +impl TargetSpec { + fn new() -> TargetSpec { + TargetSpec(Vec::new()) + } +} + +trait Push<T> { + fn push(&mut self, key: &str, value: T); +} + +impl Push<bool> for TargetSpec { + fn push(&mut self, key: &str, value: bool) { + self.0.push((key.to_string(), Value::Boolean(value))); + } +} + +impl Push<i32> for TargetSpec { + fn push(&mut self, key: &str, value: i32) { + self.0.push((key.to_string(), Value::Number(value))); + } +} + +impl Push<String> for TargetSpec { + fn push(&mut self, key: &str, value: String) { + self.0.push((key.to_string(), Value::String(value))); + } +} + +impl Push<&str> for TargetSpec { + fn push(&mut self, key: &str, value: &str) { + self.push(key, value.to_string()); + } +} + +impl Push<Object> for TargetSpec { + fn push(&mut self, key: &str, value: Object) { + self.0.push((key.to_string(), Value::Object(value))); + } +} + +impl Display for TargetSpec { + fn fmt(&self, formatter: &mut Formatter<'_>) -> Result { + // We add some newlines for clarity. + formatter.write_str("{\n")?; + if let [ref rest @ .., ref last] = self.0[..] { + for (key, value) in rest { + write!(formatter, " \"{}\": {},\n", key, value)?; + } + write!(formatter, " \"{}\": {}\n", last.0, last.1)?; + } + formatter.write_str("}") + } +} + +struct KernelConfig(HashMap<String, String>); + +impl KernelConfig { + /// Parses `include/config/auto.conf` from `stdin`. + fn from_stdin() -> KernelConfig { + let mut result = HashMap::new(); + + let stdin = std::io::stdin(); + let mut handle = stdin.lock(); + let mut line = String::new(); + + loop { + line.clear(); + + if handle.read_line(&mut line).unwrap() == 0 { + break; + } + + if line.starts_with('#') { + continue; + } + + let (key, value) = line.split_once('=').expect("Missing `=` in line."); + result.insert(key.to_string(), value.trim_end_matches('\n').to_string()); + } + + KernelConfig(result) + } + + /// Does the option exist in the configuration (any value)? + /// + /// The argument must be passed without the `CONFIG_` prefix. + /// This avoids repetition and it also avoids `fixdep` making us + /// depend on it. + fn has(&self, option: &str) -> bool { + let option = "CONFIG_".to_owned() + option; + self.0.contains_key(&option) + } +} + +fn main() { + let cfg = KernelConfig::from_stdin(); + let mut ts = TargetSpec::new(); + + // `llvm-target`s are taken from `scripts/Makefile.clang`. + if cfg.has("X86_64") { + ts.push("arch", "x86_64"); + ts.push( + "data-layout", + "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + ); + let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + if cfg.has("RETPOLINE") { + features += ",+retpoline-external-thunk"; + } + ts.push("features", features); + ts.push("llvm-target", "x86_64-linux-gnu"); + ts.push("target-pointer-width", "64"); + } else { + panic!("Unsupported architecture"); + } + + ts.push("emit-debug-gdb-scripts", false); + ts.push("frame-pointer", "may-omit"); + ts.push( + "stack-probes", + vec![("kind".to_string(), Value::String("none".to_string()))], + ); + + // Everything else is LE, whether `CPU_LITTLE_ENDIAN` is declared or not + // (e.g. x86). It is also `rustc`'s default. + if cfg.has("CPU_BIG_ENDIAN") { + ts.push("target-endian", "big"); + } + + println!("{}", ts); +} diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh new file mode 100755 index 000000000000..28b3831a7593 --- /dev/null +++ b/scripts/is_rust_module.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# is_rust_module.sh module.ko +# +# Returns `0` if `module.ko` is a Rust module, `1` otherwise. + +set -e + +# Using the `16_` prefix ensures other symbols with the same substring +# are not picked up (even if it would be unlikely). The last part is +# used just in case LLVM decides to use the `.` suffix. +# +# In the future, checking for the `.comment` section may be another +# option, see https://github.com/rust-lang/rust/pull/97550. +${NM} "$*" | grep -qE '^[0-9a-fA-F]+ r _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$' diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index f18e6dfc68c5..ff5e7810e437 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -27,7 +27,23 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -#define KSYM_NAME_LEN 128 +#define _stringify_1(x) #x +#define _stringify(x) _stringify_1(x) + +#define KSYM_NAME_LEN 512 + +/* + * A substantially bigger size than the current maximum. + * + * It cannot be defined as an expression because it gets stringified + * for the fscanf() format string. Therefore, a _Static_assert() is + * used instead to maintain the relationship with KSYM_NAME_LEN. + */ +#define KSYM_NAME_LEN_BUFFER 2048 +_Static_assert( + KSYM_NAME_LEN_BUFFER == KSYM_NAME_LEN * 4, + "Please keep KSYM_NAME_LEN_BUFFER in sync with KSYM_NAME_LEN" +); struct sym_entry { unsigned long long addr; @@ -119,6 +135,7 @@ static bool is_ignored_symbol(const char *name, char type) "__ThumbV7PILongThunk_", "__LA25Thunk_", /* mips lld */ "__microLA25Thunk_", + "__kcfi_typeid_", /* CFI type identifiers */ NULL }; @@ -198,15 +215,15 @@ static void check_symbol_range(const char *sym, unsigned long long addr, static struct sym_entry *read_symbol(FILE *in) { - char name[500], type; + char name[KSYM_NAME_LEN_BUFFER+1], type; unsigned long long addr; unsigned int len; struct sym_entry *sym; int rc; - rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name); + rc = fscanf(in, "%llx %c %" _stringify(KSYM_NAME_LEN_BUFFER) "s\n", &addr, &type, name); if (rc != 3) { - if (rc != EOF && fgets(name, 500, in) == NULL) + if (rc != EOF && fgets(name, ARRAY_SIZE(name), in) == NULL) fprintf(stderr, "Read error or end of file.\n"); return NULL; } @@ -471,12 +488,35 @@ static void write_src(void) if ((i & 0xFF) == 0) markers[i >> 8] = off; - printf("\t.byte 0x%02x", table[i]->len); + /* There cannot be any symbol of length zero. */ + if (table[i]->len == 0) { + fprintf(stderr, "kallsyms failure: " + "unexpected zero symbol length\n"); + exit(EXIT_FAILURE); + } + + /* Only lengths that fit in up-to-two-byte ULEB128 are supported. */ + if (table[i]->len > 0x3FFF) { + fprintf(stderr, "kallsyms failure: " + "unexpected huge symbol length\n"); + exit(EXIT_FAILURE); + } + + /* Encode length with ULEB128. */ + if (table[i]->len <= 0x7F) { + /* Most symbols use a single byte for the length. */ + printf("\t.byte 0x%02x", table[i]->len); + off += table[i]->len + 1; + } else { + /* "Big" symbols use two bytes. */ + printf("\t.byte 0x%02x, 0x%02x", + (table[i]->len & 0x7F) | 0x80, + (table[i]->len >> 7) & 0x7F); + off += table[i]->len + 2; + } for (k = 0; k < table[i]->len; k++) printf(", 0x%02x", table[i]->sym[k]); printf("\n"); - - off += table[i]->len + 1; } printf("\n"); diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index c4340c90e172..b7c9f1dd5e42 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -216,6 +216,13 @@ static const char *conf_get_autoheader_name(void) return name ? name : "include/generated/autoconf.h"; } +static const char *conf_get_rustccfg_name(void) +{ + char *name = getenv("KCONFIG_RUSTCCFG"); + + return name ? name : "include/generated/rustc_cfg"; +} + static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) { char *p2; @@ -605,6 +612,9 @@ static const struct comment_style comment_style_c = { static void conf_write_heading(FILE *fp, const struct comment_style *cs) { + if (!cs) + return; + fprintf(fp, "%s\n", cs->prefix); fprintf(fp, "%s Automatically generated file; DO NOT EDIT.\n", @@ -745,6 +755,65 @@ static void print_symbol_for_c(FILE *fp, struct symbol *sym) free(escaped); } +static void print_symbol_for_rustccfg(FILE *fp, struct symbol *sym) +{ + const char *val; + const char *val_prefix = ""; + char *val_prefixed = NULL; + size_t val_prefixed_len; + char *escaped = NULL; + + if (sym->type == S_UNKNOWN) + return; + + val = sym_get_string_value(sym); + + switch (sym->type) { + case S_BOOLEAN: + case S_TRISTATE: + /* + * We do not care about disabled ones, i.e. no need for + * what otherwise are "comments" in other printers. + */ + if (*val == 'n') + return; + + /* + * To have similar functionality to the C macro `IS_ENABLED()` + * we provide an empty `--cfg CONFIG_X` here in both `y` + * and `m` cases. + * + * Then, the common `fprintf()` below will also give us + * a `--cfg CONFIG_X="y"` or `--cfg CONFIG_X="m"`, which can + * be used as the equivalent of `IS_BUILTIN()`/`IS_MODULE()`. + */ + fprintf(fp, "--cfg=%s%s\n", CONFIG_, sym->name); + break; + case S_HEX: + if (val[0] != '0' || (val[1] != 'x' && val[1] != 'X')) + val_prefix = "0x"; + break; + default: + break; + } + + if (strlen(val_prefix) > 0) { + val_prefixed_len = strlen(val) + strlen(val_prefix) + 1; + val_prefixed = xmalloc(val_prefixed_len); + snprintf(val_prefixed, val_prefixed_len, "%s%s", val_prefix, val); + val = val_prefixed; + } + + /* All values get escaped: the `--cfg` option only takes strings */ + escaped = escape_string_value(val); + val = escaped; + + fprintf(fp, "--cfg=%s%s=%s\n", CONFIG_, sym->name, val); + + free(escaped); + free(val_prefixed); +} + /* * Write out a minimal config. * All values that has default values are skipped as this is redundant. @@ -1132,6 +1201,12 @@ int conf_write_autoconf(int overwrite) if (ret) return ret; + ret = __conf_write_autoconf(conf_get_rustccfg_name(), + print_symbol_for_rustccfg, + NULL); + if (ret) + return ret; + /* * Create include/config/auto.conf. This must be the last step because * Kbuild has a dependency on auto.conf and this marks the successful diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 250925aab101..b6593eac5003 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -30,6 +30,12 @@ llvm) echo 11.0.0 fi ;; +rustc) + echo 1.62.0 + ;; +bindgen) + echo 0.56.0 + ;; *) echo "$1: unknown tool" >&2 exit 1 diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 3a3aa2354ed8..da4bddd26171 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -3,20 +3,10 @@ * Archs are free to supply their own linker scripts. ld will * combine them automatically. */ -#ifdef CONFIG_CFI_CLANG -# include <asm/page.h> -# define ALIGN_CFI ALIGN(PAGE_SIZE) -# define SANITIZER_DISCARDS *(.eh_frame) -#else -# define ALIGN_CFI -# define SANITIZER_DISCARDS -#endif - SECTIONS { /DISCARD/ : { *(.discard) *(.discard.*) - SANITIZER_DISCARDS } __ksymtab 0 : { *(SORT(___ksymtab+*)) } @@ -33,6 +23,10 @@ SECTIONS { __patchable_function_entries : { *(__patchable_function_entries) } +#ifdef CONFIG_ARCH_USES_CFI_TRAPS + __kcfi_traps : { KEEP(*(.kcfi_traps)) } +#endif + #ifdef CONFIG_LTO_CLANG /* * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and @@ -53,15 +47,6 @@ SECTIONS { *(.rodata .rodata.[0-9a-zA-Z_]*) *(.rodata..L*) } - - /* - * With CONFIG_CFI_CLANG, we assume __cfi_check is at the beginning - * of the .text section, and is aligned to PAGE_SIZE. - */ - .text : ALIGN_CFI { - *(.text.__cfi_check) - *(.text .text.[0-9a-zA-Z_]* .text..L.cfi*) - } #endif } diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh new file mode 100755 index 000000000000..aebbf1913970 --- /dev/null +++ b/scripts/rust_is_available.sh @@ -0,0 +1,160 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Tests whether a suitable Rust toolchain is available. +# +# Pass `-v` for human output and more checks (as warnings). + +set -e + +min_tool_version=$(dirname $0)/min-tool-version.sh + +# Convert the version string x.y.z to a canonical up-to-7-digits form. +# +# Note that this function uses one more digit (compared to other +# instances in other version scripts) to give a bit more space to +# `rustc` since it will reach 1.100.0 in late 2026. +get_canonical_version() +{ + IFS=. + set -- $1 + echo $((100000 * $1 + 100 * $2 + $3)) +} + +# Check that the Rust compiler exists. +if ! command -v "$RUSTC" >/dev/null; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** Rust compiler '$RUSTC' could not be found." + echo >&2 "***" + fi + exit 1 +fi + +# Check that the Rust bindings generator exists. +if ! command -v "$BINDGEN" >/dev/null; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** Rust bindings generator '$BINDGEN' could not be found." + echo >&2 "***" + fi + exit 1 +fi + +# Check that the Rust compiler version is suitable. +# +# Non-stable and distributions' versions may have a version suffix, e.g. `-dev`. +rust_compiler_version=$( \ + LC_ALL=C "$RUSTC" --version 2>/dev/null \ + | head -n 1 \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \ +) +rust_compiler_min_version=$($min_tool_version rustc) +rust_compiler_cversion=$(get_canonical_version $rust_compiler_version) +rust_compiler_min_cversion=$(get_canonical_version $rust_compiler_min_version) +if [ "$rust_compiler_cversion" -lt "$rust_compiler_min_cversion" ]; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** Rust compiler '$RUSTC' is too old." + echo >&2 "*** Your version: $rust_compiler_version" + echo >&2 "*** Minimum version: $rust_compiler_min_version" + echo >&2 "***" + fi + exit 1 +fi +if [ "$1" = -v ] && [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then + echo >&2 "***" + echo >&2 "*** Rust compiler '$RUSTC' is too new. This may or may not work." + echo >&2 "*** Your version: $rust_compiler_version" + echo >&2 "*** Expected version: $rust_compiler_min_version" + echo >&2 "***" +fi + +# Check that the Rust bindings generator is suitable. +# +# Non-stable and distributions' versions may have a version suffix, e.g. `-dev`. +rust_bindings_generator_version=$( \ + LC_ALL=C "$BINDGEN" --version 2>/dev/null \ + | head -n 1 \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \ +) +rust_bindings_generator_min_version=$($min_tool_version bindgen) +rust_bindings_generator_cversion=$(get_canonical_version $rust_bindings_generator_version) +rust_bindings_generator_min_cversion=$(get_canonical_version $rust_bindings_generator_min_version) +if [ "$rust_bindings_generator_cversion" -lt "$rust_bindings_generator_min_cversion" ]; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** Rust bindings generator '$BINDGEN' is too old." + echo >&2 "*** Your version: $rust_bindings_generator_version" + echo >&2 "*** Minimum version: $rust_bindings_generator_min_version" + echo >&2 "***" + fi + exit 1 +fi +if [ "$1" = -v ] && [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cversion" ]; then + echo >&2 "***" + echo >&2 "*** Rust bindings generator '$BINDGEN' is too new. This may or may not work." + echo >&2 "*** Your version: $rust_bindings_generator_version" + echo >&2 "*** Expected version: $rust_bindings_generator_min_version" + echo >&2 "***" +fi + +# Check that the `libclang` used by the Rust bindings generator is suitable. +bindgen_libclang_version=$( \ + LC_ALL=C "$BINDGEN" $(dirname $0)/rust_is_available_bindgen_libclang.h 2>&1 >/dev/null \ + | grep -F 'clang version ' \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \ + | head -n 1 \ +) +bindgen_libclang_min_version=$($min_tool_version llvm) +bindgen_libclang_cversion=$(get_canonical_version $bindgen_libclang_version) +bindgen_libclang_min_cversion=$(get_canonical_version $bindgen_libclang_min_version) +if [ "$bindgen_libclang_cversion" -lt "$bindgen_libclang_min_cversion" ]; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN') is too old." + echo >&2 "*** Your version: $bindgen_libclang_version" + echo >&2 "*** Minimum version: $bindgen_libclang_min_version" + echo >&2 "***" + fi + exit 1 +fi + +# If the C compiler is Clang, then we can also check whether its version +# matches the `libclang` version used by the Rust bindings generator. +# +# In the future, we might be able to perform a full version check, see +# https://github.com/rust-lang/rust-bindgen/issues/2138. +if [ "$1" = -v ]; then + cc_name=$($(dirname $0)/cc-version.sh "$CC" | cut -f1 -d' ') + if [ "$cc_name" = Clang ]; then + clang_version=$( \ + LC_ALL=C "$CC" --version 2>/dev/null \ + | sed -nE '1s:.*version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p' + ) + if [ "$clang_version" != "$bindgen_libclang_version" ]; then + echo >&2 "***" + echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN')" + echo >&2 "*** version does not match Clang's. This may be a problem." + echo >&2 "*** libclang version: $bindgen_libclang_version" + echo >&2 "*** Clang version: $clang_version" + echo >&2 "***" + fi + fi +fi + +# Check that the source code for the `core` standard library exists. +# +# `$KRUSTFLAGS` is passed in case the user added `--sysroot`. +rustc_sysroot=$("$RUSTC" $KRUSTFLAGS --print sysroot) +rustc_src=${RUST_LIB_SRC:-"$rustc_sysroot/lib/rustlib/src/rust/library"} +rustc_src_core="$rustc_src/core/src/lib.rs" +if [ ! -e "$rustc_src_core" ]; then + if [ "$1" = -v ]; then + echo >&2 "***" + echo >&2 "*** Source code for the 'core' standard library could not be found" + echo >&2 "*** at '$rustc_src_core'." + echo >&2 "***" + fi + exit 1 +fi diff --git a/scripts/rust_is_available_bindgen_libclang.h b/scripts/rust_is_available_bindgen_libclang.h new file mode 100644 index 000000000000..0ef6db10d674 --- /dev/null +++ b/scripts/rust_is_available_bindgen_libclang.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma message("clang version " __clang_version__) diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh index 2dccf141241d..24086793b0d8 100755 --- a/scripts/selinux/install_policy.sh +++ b/scripts/selinux/install_policy.sh @@ -31,8 +31,7 @@ fi if selinuxenabled; then echo "SELinux is already enabled" echo "This prevents safely relabeling all files." - echo "Boot with selinux=0 on the kernel command-line or" - echo "SELINUX=disabled in /etc/selinux/config." + echo "Boot with selinux=0 on the kernel command-line." exit 1 fi @@ -78,7 +77,7 @@ cd /etc/selinux/dummy/contexts/files $SF -F file_contexts / mounts=`cat /proc/$$/mounts | \ - egrep "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \ + grep -E "ext[234]|jfs|xfs|reiserfs|jffs2|gfs2|btrfs|f2fs|ocfs2" | \ awk '{ print $2 '}` $SF -F file_contexts $mounts diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index bd2aabb2c60f..995bc42003e6 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -22,11 +22,17 @@ menu "Memory initialization" config CC_HAS_AUTO_VAR_INIT_PATTERN def_bool $(cc-option,-ftrivial-auto-var-init=pattern) -config CC_HAS_AUTO_VAR_INIT_ZERO - # GCC ignores the -enable flag, so we can test for the feature with - # a single invocation using the flag, but drop it as appropriate in - # the Makefile, depending on the presence of Clang. +config CC_HAS_AUTO_VAR_INIT_ZERO_BARE + def_bool $(cc-option,-ftrivial-auto-var-init=zero) + +config CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER + # Clang 16 and later warn about using the -enable flag, but it + # is required before then. def_bool $(cc-option,-ftrivial-auto-var-init=zero -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang) + depends on !CC_HAS_AUTO_VAR_INIT_ZERO_BARE + +config CC_HAS_AUTO_VAR_INIT_ZERO + def_bool CC_HAS_AUTO_VAR_INIT_ZERO_BARE || CC_HAS_AUTO_VAR_INIT_ZERO_ENABLER choice prompt "Initialize kernel stack variables at function entry" diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 2e6fb6e2ffd2..23d484e05e6f 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -457,10 +457,21 @@ static int evm_xattr_acl_change(struct user_namespace *mnt_userns, int rc; /* - * user_ns is not relevant here, ACL_USER/ACL_GROUP don't have impact - * on the inode mode (see posix_acl_equiv_mode()). + * An earlier comment here mentioned that the idmappings for + * ACL_{GROUP,USER} don't matter since EVM is only interested in the + * mode stored as part of POSIX ACLs. Nonetheless, if it must translate + * from the uapi POSIX ACL representation to the VFS internal POSIX ACL + * representation it should do so correctly. There's no guarantee that + * we won't change POSIX ACLs in a way that ACL_{GROUP,USER} matters + * for the mode at some point and it's difficult to keep track of all + * the LSM and integrity modules and what they do to POSIX ACLs. + * + * Frankly, EVM shouldn't try to interpret the uapi struct for POSIX + * ACLs it received. It requires knowledge that only the VFS is + * guaranteed to have. */ - acl = posix_acl_from_xattr(&init_user_ns, xattr_value, xattr_value_len); + acl = vfs_set_acl_prepare(mnt_userns, i_user_ns(inode), + xattr_value, xattr_value_len); if (IS_ERR_OR_NULL(acl)) return 1; diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index bde74fcecee3..3e0fbbd99534 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -750,22 +750,26 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const struct evm_ima_xattr_data *xvalue = xattr_value; int digsig = 0; int result; + int err; result = ima_protect_xattr(dentry, xattr_name, xattr_value, xattr_value_len); if (result == 1) { if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST)) return -EINVAL; + + err = validate_hash_algo(dentry, xvalue, xattr_value_len); + if (err) + return err; + digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG); } else if (!strcmp(xattr_name, XATTR_NAME_EVM) && xattr_value_len > 0) { digsig = (xvalue->type == EVM_XATTR_PORTABLE_DIGSIG); } if (result == 1 || evm_revalidate_status(xattr_name)) { - result = validate_hash_algo(dentry, xvalue, xattr_value_len); - if (result) - return result; - ima_reset_appraise_flags(d_backing_inode(dentry), digsig); + if (result == 1) + result = 0; } return result; } diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 093894a640dc..b78753d27d8e 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -31,7 +31,7 @@ static const struct dmi_system_id uefi_skip_cert[] = { { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") }, - { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacMini8,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "Macmini8,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") }, { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") }, diff --git a/security/landlock/fs.c b/security/landlock/fs.c index a9dbd99d9ee7..64ed7665455f 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -712,7 +712,7 @@ static inline access_mask_t maybe_remove(const struct dentry *const dentry) * allowed accesses in @layer_masks_dom. * * This is similar to check_access_path_dual() but much simpler because it only - * handles walking on the same mount point and only check one set of accesses. + * handles walking on the same mount point and only checks one set of accesses. * * Returns: * - true if all the domain access rights are allowed for @dir; diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 735a0865ea11..2ca0ccbd905a 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -149,10 +149,10 @@ static const struct file_operations ruleset_fops = { * * Possible returned errors are: * - * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; - * - EINVAL: unknown @flags, or unknown access, or too small @size; - * - E2BIG or EFAULT: @attr or @size inconsistencies; - * - ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. + * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; + * - %EINVAL: unknown @flags, or unknown access, or too small @size; + * - %E2BIG or %EFAULT: @attr or @size inconsistencies; + * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, @@ -280,7 +280,7 @@ out_fdput: * @ruleset_fd: File descriptor tied to the ruleset that should be extended * with the new rule. * @rule_type: Identify the structure type pointed to by @rule_attr (only - * LANDLOCK_RULE_PATH_BENEATH for now). + * %LANDLOCK_RULE_PATH_BENEATH for now). * @rule_attr: Pointer to a rule (only of type &struct * landlock_path_beneath_attr for now). * @flags: Must be 0. @@ -290,17 +290,17 @@ out_fdput: * * Possible returned errors are: * - * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; - * - EINVAL: @flags is not 0, or inconsistent access in the rule (i.e. + * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; + * - %EINVAL: @flags is not 0, or inconsistent access in the rule (i.e. * &landlock_path_beneath_attr.allowed_access is not a subset of the * ruleset handled accesses); - * - ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access); - * - EBADF: @ruleset_fd is not a file descriptor for the current thread, or a + * - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access); + * - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a * member of @rule_attr is not a file descriptor as expected; - * - EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of + * - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of * @rule_attr is not the expected file descriptor type; - * - EPERM: @ruleset_fd has no write access to the underlying ruleset; - * - EFAULT: @rule_attr inconsistency. + * - %EPERM: @ruleset_fd has no write access to the underlying ruleset; + * - %EFAULT: @rule_attr inconsistency. */ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, const enum landlock_rule_type, rule_type, @@ -378,20 +378,20 @@ out_put_ruleset: * @flags: Must be 0. * * This system call enables to enforce a Landlock ruleset on the current - * thread. Enforcing a ruleset requires that the task has CAP_SYS_ADMIN in its + * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * * Possible returned errors are: * - * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; - * - EINVAL: @flags is not 0. - * - EBADF: @ruleset_fd is not a file descriptor for the current thread; - * - EBADFD: @ruleset_fd is not a ruleset file descriptor; - * - EPERM: @ruleset_fd has no read access to the underlying ruleset, or the + * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; + * - %EINVAL: @flags is not 0. + * - %EBADF: @ruleset_fd is not a file descriptor for the current thread; + * - %EBADFD: @ruleset_fd is not a ruleset file descriptor; + * - %EPERM: @ruleset_fd has no read access to the underlying ruleset, or the * current thread is not running with no_new_privs, or it doesn't have - * CAP_SYS_ADMIN in its namespace. - * - E2BIG: The maximum number of stacked rulesets is reached for the current + * %CAP_SYS_ADMIN in its namespace. + * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, diff --git a/security/loadpin/Kconfig b/security/loadpin/Kconfig index 70e7985b2561..6724eaba3d36 100644 --- a/security/loadpin/Kconfig +++ b/security/loadpin/Kconfig @@ -33,4 +33,9 @@ config SECURITY_LOADPIN_VERITY on the LoadPin securityfs entry 'dm-verity'. The ioctl expects a file descriptor of a file with verity digests as parameter. The file must be located on the pinned root and - contain a comma separated list of digests. + start with the line: + + # LOADPIN_TRUSTED_VERITY_ROOT_DIGESTS + + This is followed by the verity digests, with one digest per + line. diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 44521582dcba..de41621f4998 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -21,6 +21,8 @@ #include <linux/dm-verity-loadpin.h> #include <uapi/linux/loadpin.h> +#define VERITY_DIGEST_FILE_HEADER "# LOADPIN_TRUSTED_VERITY_ROOT_DIGESTS" + static void report_load(const char *origin, struct file *file, char *operation) { char *cmdline, *pathname; @@ -292,9 +294,21 @@ static int read_trusted_verity_root_digests(unsigned int fd) p = strim(data); while ((d = strsep(&p, "\n")) != NULL) { - int len = strlen(d); + int len; struct dm_verity_loadpin_trusted_root_digest *trd; + if (d == data) { + /* first line, validate header */ + if (strcmp(d, VERITY_DIGEST_FILE_HEADER)) { + rc = -EPROTO; + goto err; + } + + continue; + } + + len = strlen(d); + if (len % 2) { rc = -EPROTO; goto err; diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 87cbdc64d272..a79b985e917e 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -63,7 +63,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) if (kernel_locked_down >= what) { if (lockdown_reasons[what]) - pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", + pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", current->comm, lockdown_reasons[what]); return -EPERM; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 78a278f28e49..75cc3f8d2a42 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -44,9 +44,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, struct iphdr *ih; ih = ip_hdr(skb); - if (ih == NULL) - return -EINVAL; - ad->u.net->v4info.saddr = ih->saddr; ad->u.net->v4info.daddr = ih->daddr; @@ -59,8 +56,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, switch (ih->protocol) { case IPPROTO_TCP: { struct tcphdr *th = tcp_hdr(skb); - if (th == NULL) - break; ad->u.net->sport = th->source; ad->u.net->dport = th->dest; @@ -68,8 +63,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, } case IPPROTO_UDP: { struct udphdr *uh = udp_hdr(skb); - if (uh == NULL) - break; ad->u.net->sport = uh->source; ad->u.net->dport = uh->dest; @@ -77,8 +70,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, } case IPPROTO_DCCP: { struct dccp_hdr *dh = dccp_hdr(skb); - if (dh == NULL) - break; ad->u.net->sport = dh->dccph_sport; ad->u.net->dport = dh->dccph_dport; @@ -86,8 +77,7 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, } case IPPROTO_SCTP: { struct sctphdr *sh = sctp_hdr(skb); - if (sh == NULL) - break; + ad->u.net->sport = sh->source; ad->u.net->dport = sh->dest; break; @@ -115,8 +105,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb, __be16 frag_off; ip6 = ipv6_hdr(skb); - if (ip6 == NULL) - return -EINVAL; ad->u.net->v6info.saddr = ip6->saddr; ad->u.net->v6info.daddr = ip6->daddr; /* IPv6 can have several extension header before the Transport header diff --git a/security/security.c b/security/security.c index 4b95de24bc8d..9696dd64095e 100644 --- a/security/security.c +++ b/security/security.c @@ -1909,6 +1909,11 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode) call_void_hook(task_to_inode, p, inode); } +int security_create_user_ns(const struct cred *cred) +{ + return call_int_hook(userns_create, 0, cred); +} + int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { return call_int_hook(ipc_permission, 0, ipcp, flag); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 03bca97c8b29..f8b05f14284b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4222,6 +4222,14 @@ static void selinux_task_to_inode(struct task_struct *p, spin_unlock(&isec->lock); } +static int selinux_userns_create(const struct cred *cred) +{ + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, sid, SECCLASS_USER_NAMESPACE, + USER_NAMESPACE__CREATE, NULL); +} + /* Returns error only if unable to parse addresses */ static int selinux_parse_skb_ipv4(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto) @@ -5987,7 +5995,6 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) struct ipc_security_struct *isec; struct common_audit_data ad; u32 sid = current_sid(); - int rc; isec = selinux_ipc(msq); ipc_init_security(isec, SECCLASS_MSGQ); @@ -5995,10 +6002,9 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->key; - rc = avc_has_perm(&selinux_state, - sid, isec->sid, SECCLASS_MSGQ, - MSGQ__CREATE, &ad); - return rc; + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, + MSGQ__CREATE, &ad); } static int selinux_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) @@ -6126,7 +6132,6 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) struct ipc_security_struct *isec; struct common_audit_data ad; u32 sid = current_sid(); - int rc; isec = selinux_ipc(shp); ipc_init_security(isec, SECCLASS_SHM); @@ -6134,10 +6139,9 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->key; - rc = avc_has_perm(&selinux_state, - sid, isec->sid, SECCLASS_SHM, - SHM__CREATE, &ad); - return rc; + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SHM, + SHM__CREATE, &ad); } static int selinux_shm_associate(struct kern_ipc_perm *shp, int shmflg) @@ -6211,7 +6215,6 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) struct ipc_security_struct *isec; struct common_audit_data ad; u32 sid = current_sid(); - int rc; isec = selinux_ipc(sma); ipc_init_security(isec, SECCLASS_SEM); @@ -6219,10 +6222,9 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->key; - rc = avc_has_perm(&selinux_state, - sid, isec->sid, SECCLASS_SEM, - SEM__CREATE, &ad); - return rc; + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SEM, + SEM__CREATE, &ad); } static int selinux_sem_associate(struct kern_ipc_perm *sma, int semflg) @@ -7134,6 +7136,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(task_movememory, selinux_task_movememory), LSM_HOOK_INIT(task_kill, selinux_task_kill), LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode), + LSM_HOOK_INIT(userns_create, selinux_userns_create), LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission), LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 1c2f41ff4e55..a3c380775d41 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -254,6 +254,8 @@ const struct security_class_mapping secclass_map[] = { { COMMON_FILE_PERMS, NULL } }, { "io_uring", { "override_creds", "sqpoll", "cmd", NULL } }, + { "user_namespace", + { "create", NULL } }, { NULL } }; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 8fcdd494af27..a00d19139436 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -71,7 +71,7 @@ struct selinux_fs_info { struct dentry *bool_dir; unsigned int bool_num; char **bool_pending_names; - unsigned int *bool_pending_values; + int *bool_pending_values; struct dentry *class_dir; unsigned long last_class_ino; bool policy_opened; @@ -356,7 +356,7 @@ static const struct file_operations sel_policyvers_ops = { /* declaration for sel_write_load */ static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_dir, unsigned int *bool_num, char ***bool_pending_names, - unsigned int **bool_pending_values); + int **bool_pending_values); static int sel_make_classes(struct selinux_policy *newpolicy, struct dentry *class_dir, unsigned long *last_class_ino); @@ -527,7 +527,7 @@ static const struct file_operations sel_policy_ops = { }; static void sel_remove_old_bool_data(unsigned int bool_num, char **bool_names, - unsigned int *bool_values) + int *bool_values) { u32 i; @@ -545,7 +545,7 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi, struct dentry *tmp_parent, *tmp_bool_dir, *tmp_class_dir, *old_dentry; unsigned int tmp_bool_num, old_bool_num; char **tmp_bool_names, **old_bool_names; - unsigned int *tmp_bool_values, *old_bool_values; + int *tmp_bool_values, *old_bool_values; unsigned long tmp_ino = fsi->last_ino; /* Don't increment last_ino in this function */ tmp_parent = sel_make_disconnected_dir(fsi->sb, &tmp_ino); @@ -1423,7 +1423,7 @@ static void sel_remove_entries(struct dentry *de) static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_dir, unsigned int *bool_num, char ***bool_pending_names, - unsigned int **bool_pending_values) + int **bool_pending_values) { int ret; ssize_t len; @@ -1917,7 +1917,6 @@ static int sel_make_class_dir_entries(struct selinux_policy *newpolicy, struct selinux_fs_info *fsi = sb->s_fs_info; struct dentry *dentry = NULL; struct inode *inode = NULL; - int rc; dentry = d_alloc_name(dir, "index"); if (!dentry) @@ -1937,9 +1936,7 @@ static int sel_make_class_dir_entries(struct selinux_policy *newpolicy, if (IS_ERR(dentry)) return PTR_ERR(dentry); - rc = sel_make_perm_files(newpolicy, classname, index, dentry); - - return rc; + return sel_make_perm_files(newpolicy, classname, index, dentry); } static int sel_make_classes(struct selinux_policy *newpolicy, diff --git a/security/selinux/ss/context.h b/security/selinux/ss/context.h index 62990aa1ec9e..eda32c3d4c0a 100644 --- a/security/selinux/ss/context.h +++ b/security/selinux/ss/context.h @@ -38,7 +38,7 @@ static inline void mls_context_init(struct context *c) memset(&c->range, 0, sizeof(c->range)); } -static inline int mls_context_cpy(struct context *dst, struct context *src) +static inline int mls_context_cpy(struct context *dst, const struct context *src) { int rc; @@ -58,7 +58,7 @@ out: /* * Sets both levels in the MLS range of 'dst' to the low level of 'src'. */ -static inline int mls_context_cpy_low(struct context *dst, struct context *src) +static inline int mls_context_cpy_low(struct context *dst, const struct context *src) { int rc; @@ -78,7 +78,7 @@ out: /* * Sets both levels in the MLS range of 'dst' to the high level of 'src'. */ -static inline int mls_context_cpy_high(struct context *dst, struct context *src) +static inline int mls_context_cpy_high(struct context *dst, const struct context *src) { int rc; @@ -97,9 +97,10 @@ out: static inline int mls_context_glblub(struct context *dst, - struct context *c1, struct context *c2) + const struct context *c1, const struct context *c2) { - struct mls_range *dr = &dst->range, *r1 = &c1->range, *r2 = &c2->range; + struct mls_range *dr = &dst->range; + const struct mls_range *r1 = &c1->range, *r2 = &c2->range; int rc = 0; if (r1->level[1].sens < r2->level[0].sens || @@ -127,7 +128,7 @@ out: return rc; } -static inline int mls_context_cmp(struct context *c1, struct context *c2) +static inline int mls_context_cmp(const struct context *c1, const struct context *c2) { return ((c1->range.level[0].sens == c2->range.level[0].sens) && ebitmap_cmp(&c1->range.level[0].cat, &c2->range.level[0].cat) && @@ -147,7 +148,7 @@ static inline void context_init(struct context *c) memset(c, 0, sizeof(*c)); } -static inline int context_cpy(struct context *dst, struct context *src) +static inline int context_cpy(struct context *dst, const struct context *src) { int rc; @@ -180,7 +181,7 @@ static inline void context_destroy(struct context *c) mls_context_destroy(c); } -static inline int context_cmp(struct context *c1, struct context *c2) +static inline int context_cmp(const struct context *c1, const struct context *c2) { if (c1->len && c2->len) return (c1->len == c2->len && !strcmp(c1->str, c2->str)); diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index abde349c8321..d31b87be9a1e 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -28,9 +28,9 @@ static struct kmem_cache *ebitmap_node_cachep __ro_after_init; -int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) +int ebitmap_cmp(const struct ebitmap *e1, const struct ebitmap *e2) { - struct ebitmap_node *n1, *n2; + const struct ebitmap_node *n1, *n2; if (e1->highbit != e2->highbit) return 0; @@ -50,9 +50,10 @@ int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) return 1; } -int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src) +int ebitmap_cpy(struct ebitmap *dst, const struct ebitmap *src) { - struct ebitmap_node *n, *new, *prev; + struct ebitmap_node *new, *prev; + const struct ebitmap_node *n; ebitmap_init(dst); n = src->node; @@ -78,7 +79,7 @@ int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src) return 0; } -int ebitmap_and(struct ebitmap *dst, struct ebitmap *e1, struct ebitmap *e2) +int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1, const struct ebitmap *e2) { struct ebitmap_node *n; int bit, rc; @@ -217,9 +218,9 @@ netlbl_import_failure: * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed * last_e2bit. */ -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit) +int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2, u32 last_e2bit) { - struct ebitmap_node *n1, *n2; + const struct ebitmap_node *n1, *n2; int i; if (e1->highbit < e2->highbit) @@ -258,9 +259,9 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit) return 1; } -int ebitmap_get_bit(struct ebitmap *e, unsigned long bit) +int ebitmap_get_bit(const struct ebitmap *e, unsigned long bit) { - struct ebitmap_node *n; + const struct ebitmap_node *n; if (e->highbit < bit) return 0; @@ -467,7 +468,7 @@ bad: goto out; } -int ebitmap_write(struct ebitmap *e, void *fp) +int ebitmap_write(const struct ebitmap *e, void *fp) { struct ebitmap_node *n; u32 count; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 58eb822f11ee..e5b57dc3fc53 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -44,7 +44,7 @@ struct ebitmap { #define ebitmap_length(e) ((e)->highbit) -static inline unsigned int ebitmap_start_positive(struct ebitmap *e, +static inline unsigned int ebitmap_start_positive(const struct ebitmap *e, struct ebitmap_node **n) { unsigned int ofs; @@ -62,7 +62,7 @@ static inline void ebitmap_init(struct ebitmap *e) memset(e, 0, sizeof(*e)); } -static inline unsigned int ebitmap_next_positive(struct ebitmap *e, +static inline unsigned int ebitmap_next_positive(const struct ebitmap *e, struct ebitmap_node **n, unsigned int bit) { @@ -85,7 +85,7 @@ static inline unsigned int ebitmap_next_positive(struct ebitmap *e, #define EBITMAP_NODE_OFFSET(node, bit) \ (((bit) - (node)->startbit) % EBITMAP_UNIT_SIZE) -static inline int ebitmap_node_get_bit(struct ebitmap_node *n, +static inline int ebitmap_node_get_bit(const struct ebitmap_node *n, unsigned int bit) { unsigned int index = EBITMAP_NODE_INDEX(n, bit); @@ -122,15 +122,15 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n, (bit) < ebitmap_length(e); \ (bit) = ebitmap_next_positive(e, &(n), bit)) \ -int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); -int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); -int ebitmap_and(struct ebitmap *dst, struct ebitmap *e1, struct ebitmap *e2); -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit); -int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); +int ebitmap_cmp(const struct ebitmap *e1, const struct ebitmap *e2); +int ebitmap_cpy(struct ebitmap *dst, const struct ebitmap *src); +int ebitmap_and(struct ebitmap *dst, const struct ebitmap *e1, const struct ebitmap *e2); +int ebitmap_contains(const struct ebitmap *e1, const struct ebitmap *e2, u32 last_e2bit); +int ebitmap_get_bit(const struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); void ebitmap_destroy(struct ebitmap *e); int ebitmap_read(struct ebitmap *e, void *fp); -int ebitmap_write(struct ebitmap *e, void *fp); +int ebitmap_write(const struct ebitmap *e, void *fp); u32 ebitmap_hash(const struct ebitmap *e, u32 hash); #ifdef CONFIG_NETLABEL diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h index 068e0d7809db..7d48d5e52233 100644 --- a/security/selinux/ss/mls_types.h +++ b/security/selinux/ss/mls_types.h @@ -27,13 +27,13 @@ struct mls_range { struct mls_level level[2]; /* low == level[0], high == level[1] */ }; -static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2) +static inline int mls_level_eq(const struct mls_level *l1, const struct mls_level *l2) { return ((l1->sens == l2->sens) && ebitmap_cmp(&l1->cat, &l2->cat)); } -static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2) +static inline int mls_level_dom(const struct mls_level *l1, const struct mls_level *l2) { return ((l1->sens >= l2->sens) && ebitmap_contains(&l1->cat, &l2->cat, 0)); diff --git a/security/smack/smack.h b/security/smack/smack.h index fc837dcebf96..e2239be7bd60 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -180,15 +180,6 @@ struct smack_known_list_elem { struct smack_known *smk_label; }; -/* Super block security struct flags for mount options */ -#define FSDEFAULT_MNT 0x01 -#define FSFLOOR_MNT 0x02 -#define FSHAT_MNT 0x04 -#define FSROOT_MNT 0x08 -#define FSTRANS_MNT 0x10 - -#define NUM_SMK_MNT_OPTS 5 - enum { Opt_error = -1, Opt_fsdefault = 0, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index bffccdc494cb..dadcb9941da5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -497,13 +497,11 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) */ static int smack_ptrace_traceme(struct task_struct *ptp) { - int rc; struct smack_known *skp; skp = smk_of_task(smack_cred(current_cred())); - rc = smk_ptrace_rule_check(ptp, skp, PTRACE_MODE_ATTACH, __func__); - return rc; + return smk_ptrace_rule_check(ptp, skp, PTRACE_MODE_ATTACH, __func__); } /** @@ -2280,6 +2278,21 @@ static void smack_sk_free_security(struct sock *sk) } /** + * smack_sk_clone_security - Copy security context + * @sk: the old socket + * @newsk: the new socket + * + * Copy the security context of the old socket pointer to the cloned + */ +static void smack_sk_clone_security(const struct sock *sk, struct sock *newsk) +{ + struct socket_smack *ssp_old = sk->sk_security; + struct socket_smack *ssp_new = newsk->sk_security; + + *ssp_new = *ssp_old; +} + +/** * smack_ipv4host_label - check host based restrictions * @sip: the object end * @@ -4882,6 +4895,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(socket_getpeersec_dgram, smack_socket_getpeersec_dgram), LSM_HOOK_INIT(sk_alloc_security, smack_sk_alloc_security), LSM_HOOK_INIT(sk_free_security, smack_sk_free_security), + LSM_HOOK_INIT(sk_clone_security, smack_sk_clone_security), LSM_HOOK_INIT(sock_graft, smack_sock_graft), LSM_HOOK_INIT(inet_conn_request, smack_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, smack_inet_csk_clone), diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index efb6c3f5f2a9..5a37ccbec54f 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -6,7 +6,7 @@ #include <stdio.h> #include <unistd.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct module; diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 95e2b7924925..ba04771cb3a3 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -190,7 +190,7 @@ __asm__ (".section .text\n" ".option norelax\n" "lla gp, __global_pointer$\n" ".option pop\n" - "ld a0, 0(sp)\n" // argc (a0) was in the stack + "lw a0, 0(sp)\n" // argc (a0) was in the stack "add a1, sp, "SZREG"\n" // argv (a1) = sp "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... "add a2, a2, "SZREG"\n" // + SZREG (skip null) diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 08491070387b..ce3ee03aa679 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -692,12 +692,12 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, { #ifndef my_syscall6 /* Function not implemented. */ - return -ENOSYS; + return (void *)-ENOSYS; #else int n; -#if defined(__i386__) +#if defined(__NR_mmap2) n = __NR_mmap2; offset >>= 12; #else diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 93bf93a59c99..d8ae4e944467 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -97,7 +97,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 72ab9870454b..542f9b059c3b 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -7,7 +7,7 @@ #include <linux/types.h> #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif static inline u8 kallsyms2elf_binding(char type) diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt index 8a9d5d2787f9..26554b1c5575 100644 --- a/tools/memory-model/Documentation/litmus-tests.txt +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -946,22 +946,39 @@ Limitations of the Linux-kernel memory model (LKMM) include: carrying a dependency, then the compiler can break that dependency by substituting a constant of that value. - Conversely, LKMM sometimes doesn't recognize that a particular - optimization is not allowed, and as a result, thinks that a - dependency is not present (because the optimization would break it). - The memory model misses some pretty obvious control dependencies - because of this limitation. A simple example is: + Conversely, LKMM will sometimes overestimate the amount of + reordering compilers and CPUs can carry out, leading it to miss + some pretty obvious cases of ordering. A simple example is: r1 = READ_ONCE(x); if (r1 == 0) smp_mb(); WRITE_ONCE(y, 1); - There is a control dependency from the READ_ONCE to the WRITE_ONCE, - even when r1 is nonzero, but LKMM doesn't realize this and thinks - that the write may execute before the read if r1 != 0. (Yes, that - doesn't make sense if you think about it, but the memory model's - intelligence is limited.) + The WRITE_ONCE() does not depend on the READ_ONCE(), and as a + result, LKMM does not claim ordering. However, even though no + dependency is present, the WRITE_ONCE() will not be executed before + the READ_ONCE(). There are two reasons for this: + + The presence of the smp_mb() in one of the branches + prevents the compiler from moving the WRITE_ONCE() + up before the "if" statement, since the compiler has + to assume that r1 will sometimes be 0 (but see the + comment below); + + CPUs do not execute stores before po-earlier conditional + branches, even in cases where the store occurs after the + two arms of the branch have recombined. + + It is clear that it is not dangerous in the slightest for LKMM to + make weaker guarantees than architectures. In fact, it is + desirable, as it gives compilers room for making optimizations. + For instance, suppose that a 0 value in r1 would trigger undefined + behavior elsewhere. Then a clever compiler might deduce that r1 + can never be 0 in the if condition. As a result, said clever + compiler might deem it safe to optimize away the smp_mb(), + eliminating the branch and any ordering an architecture would + guarantee otherwise. 2. Multiple access sizes for a single variable are not supported, and neither are misaligned or partially overlapping accesses. diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9216060c3408..a8cf38639fe8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1073,6 +1073,9 @@ static const char *uaccess_safe_builtin[] = { "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + "clear_user_erms", + "clear_user_rep_good", + "clear_user_original", NULL }; @@ -3316,6 +3319,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, next_insn = next_insn_to_validate(file, insn); if (func && insn->func && func != insn->func->pfunc) { + /* Ignore KCFI type preambles, which always fall through */ + if (!strncmp(func->name, "__cfi_", 6)) + return 0; + WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; @@ -4114,6 +4121,7 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__ex_table") || !strcmp(sec->name, "__jump_table") || !strcmp(sec->name, "__mcount_loc") || + !strcmp(sec->name, ".kcfi_traps") || strstr(sec->name, "__patchable_function_entries")) continue; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c25e957c1e52..7e24b09b1163 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -619,6 +619,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, Elf64_Xword entsize = symtab->sh.sh_entsize; int max_idx, idx = sym->idx; Elf_Scn *s, *t = NULL; + bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE && + sym->sym.st_shndx != SHN_XINDEX; + + if (is_special_shndx) + shndx = sym->sym.st_shndx; s = elf_getscn(elf->elf, symtab->idx); if (!s) { @@ -704,7 +709,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } /* setup extended section index magic and write the symbol */ - if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) { sym->sym.st_shndx = shndx; if (!shndx_data) shndx = 0; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index dfb6173b2a82..9e9a2b67de19 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -114,8 +114,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest for (i = 0; i < nsyscalls; ++i) for (j = 0; j < expected_nr_events[i]; ++j) { - int foo = syscalls[i](); - ++foo; + syscalls[i](); } md = &evlist->mmap[0]; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6a001fcfed68..4952abe716f3 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -332,7 +332,7 @@ out_delete_evlist: out: if (err == -EACCES) return TEST_SKIP; - if (err < 0) + if (err < 0 || errs != 0) return TEST_FAIL; return TEST_OK; } diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 00c7285ce1ac..301f95427159 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -61,7 +61,7 @@ test_register_capture() { echo "Register capture test [Skipped missing registers]" return fi - if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \ + if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ -c 1000 --per-thread true 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | egrep -q "DI:" diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index c644f94a6500..ec801cffae6b 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -12,7 +12,8 @@ if ! [ -x "$(command -v cc)" ]; then fi # skip the test if the hardware doesn't support branch stack sampling -perf record -b -o- -B true > /dev/null 2>&1 || exit 2 +# and if the architecture doesn't support filter types: any,save_type,u +perf record -b -o- -B --branch-filter any,save_type,u true > /dev/null 2>&1 || exit 2 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 4fd8d703ff19..8ab035b55875 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type) /* Symbol names that begin with the following are ignored.*/ static const char * const ignored_prefixes[] = { "$", /* local symbols for ARM, MIPS, etc. */ - ".LASANPC", /* s390 kasan local symbols */ + ".L", /* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ - "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__kvm_nvhe_$", /* arm64 local symbols in non-VHE KVM namespace */ + "__kvm_nvhe_.L", /* arm64 local symbols in non-VHE KVM namespace */ "__AArch64ADRPThunk_", /* arm64 lld */ "__ARMV5PILongThunk_", /* arm lld */ "__ARMV7PILongThunk_", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9dfae1bda9cc..485e1a343165 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -269,7 +269,7 @@ CFLAGS_expr-flex.o += $(flex_flags) bison_flags := -DYYENABLE_NLS=0 BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) ifeq ($(BISON_GE_35),1) - bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum + bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option else bison_flags += -w endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 22dcfe07e886..906476a839e1 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -498,7 +498,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { 0 }; - bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index c72f8ad96f75..9aa8cdd93de4 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -48,6 +48,7 @@ const volatile __u32 num_cpus = 1; int enabled = 0; int use_cgroup_v2 = 0; +int perf_subsys_id = -1; static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) { @@ -58,7 +59,15 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) int level; int cnt; - cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup); + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); for (cnt = 0; i < MAX_LEVELS; i++) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index c4ba2bcf179f..38e3b287dbb2 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -94,6 +94,8 @@ const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; const volatile bool uses_cgroup_v1 = false; +int perf_subsys_id = -1; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -119,11 +121,19 @@ static inline __u64 get_cgroup_id(struct task_struct *t) { struct cgroup *cgrp; - if (uses_cgroup_v1) - cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); - else - cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + if (!uses_cgroup_v1) + return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup); return BPF_CORE_READ(cgrp, kn, id); } diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 284f8eabd3b9..7c9f9150bad5 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -33,7 +33,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr, * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. */ attr->type = type; - attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); + attr->config = (attr->config & PERF_HW_EVENT_MASK) | + ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); } static int create_event_hybrid(__u32 config_type, int *idx, @@ -48,13 +49,25 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); + + /* + * Some hybrid hardware cache events are only available on one CPU + * PMU. For example, the 'L1-dcache-load-misses' is only available + * on cpu_core, while the 'L1-icache-loads' is only available on + * cpu_atom. We need to remove "not supported" hybrid cache events. + */ + if (attr->type == PERF_TYPE_HW_CACHE + && !is_event_supported(attr->type, attr->config)) + return 0; + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); - if (evsel) + if (evsel) { evsel->pmu_name = strdup(pmu->name); - else + if (!evsel->pmu_name) + return -ENOMEM; + } else return -ENOMEM; - attr->type = type; attr->config = config; return 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f05e15acd33f..f3b2c2a87456 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -28,6 +28,7 @@ #include "util/parse-events-hybrid.h" #include "util/pmu-hybrid.h" #include "tracepoint.h" +#include "thread_map.h" #define MAX_NAME_LEN 100 @@ -157,6 +158,44 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) +bool is_event_supported(u8 type, u64 config) +{ + bool ret = true; + int open_return; + struct evsel *evsel; + struct perf_event_attr attr = { + .type = type, + .config = config, + .disabled = 1, + }; + struct perf_thread_map *tmap = thread_map__new_by_tid(0); + + if (tmap == NULL) + return false; + + evsel = evsel__new(&attr); + if (evsel) { + open_return = evsel__open(evsel, NULL, tmap); + ret = open_return >= 0; + + if (open_return == -EACCES) { + /* + * This happens if the paranoid value + * /proc/sys/kernel/perf_event_paranoid is set to 2 + * Re-run with exclude_kernel set; we don't do that + * by default as some ARM machines do not support it. + * + */ + evsel->core.attr.exclude_kernel = 1; + ret = evsel__open(evsel, NULL, tmap) >= 0; + } + evsel__delete(evsel); + } + + perf_thread_map__put(tmap); + return ret; +} + const char *event_type(int type) { switch (type) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 7e6a601d9cd0..07df7bb7b042 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -19,6 +19,7 @@ struct option; struct perf_pmu; bool have_tracepoints(struct list_head *evlist); +bool is_event_supported(u8 type, u64 config); const char *event_type(int type); diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ba1ab5134685..c4d5d87fae2f 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -22,7 +22,6 @@ #include "probe-file.h" #include "string2.h" #include "strlist.h" -#include "thread_map.h" #include "tracepoint.h" #include "pfm.h" #include "pmu-hybrid.h" @@ -239,44 +238,6 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, strlist__delete(sdtlist); } -static bool is_event_supported(u8 type, unsigned int config) -{ - bool ret = true; - int open_return; - struct evsel *evsel; - struct perf_event_attr attr = { - .type = type, - .config = config, - .disabled = 1, - }; - struct perf_thread_map *tmap = thread_map__new_by_tid(0); - - if (tmap == NULL) - return false; - - evsel = evsel__new(&attr); - if (evsel) { - open_return = evsel__open(evsel, NULL, tmap); - ret = open_return >= 0; - - if (open_return == -EACCES) { - /* - * This happens if the paranoid value - * /proc/sys/kernel/perf_event_paranoid is set to 2 - * Re-run with exclude_kernel set; we don't do that - * by default as some ARM machines do not support it. - * - */ - evsel->core.attr.exclude_kernel = 1; - ret = evsel__open(evsel, NULL, tmap) >= 0; - } - evsel__delete(evsel); - } - - perf_thread_map__put(tmap); - return ret; -} - int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index c92326c2233a..0f5ba28339cf 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -3,4 +3,4 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-error=deprecated-declarations +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c index d79c335594b2..52aa0351533c 100644 --- a/tools/power/acpi/tools/pfrut/pfrut.c +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -190,7 +190,7 @@ int main(int argc, char *argv[]) void *addr_map_capsule; struct stat st; char *log_buf; - int ret = 0; + int ret; if (getuid() != 0) { printf("Please run the tool as root - Exiting.\n"); diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c new file mode 100644 index 000000000000..1bc6241b755b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <test_progs.h> +#include "test_deny_namespace.skel.h" +#include <sched.h> +#include "cap_helpers.h" +#include <stdio.h> + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +/* negative return value -> some internal error + * positive return value -> userns creation failed + * 0 -> userns creation succeeded + */ +static int create_user_ns(void) +{ + pid_t pid; + + pid = fork(); + if (pid < 0) + return -1; + + if (pid == 0) { + if (unshare(CLONE_NEWUSER)) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + return wait_for_pid(pid); +} + +static void test_userns_create_bpf(void) +{ + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; + __u64 old_caps = 0; + + cap_enable_effective(cap_mask, &old_caps); + + ASSERT_OK(create_user_ns(), "priv new user ns"); + + cap_disable_effective(cap_mask, &old_caps); + + ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns"); + + if (cap_mask & old_caps) + cap_enable_effective(cap_mask, NULL); +} + +static void test_unpriv_userns_create_no_bpf(void) +{ + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; + __u64 old_caps = 0; + + cap_disable_effective(cap_mask, &old_caps); + + ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns"); + + if (cap_mask & old_caps) + cap_enable_effective(cap_mask, NULL); +} + +void test_deny_namespace(void) +{ + struct test_deny_namespace *skel = NULL; + int err; + + if (test__start_subtest("unpriv_userns_create_no_bpf")) + test_unpriv_userns_create_no_bpf(); + + skel = test_deny_namespace__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel load")) + goto close_prog; + + err = test_deny_namespace__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto close_prog; + + if (test__start_subtest("userns_create_bpf")) + test_userns_create_bpf(); + + test_deny_namespace__detach(skel); + +close_prog: + test_deny_namespace__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c new file mode 100644 index 000000000000..09ad5a4ebd1f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> +#include <linux/capability.h> + +struct kernel_cap_struct { + __u32 cap[_LINUX_CAPABILITY_U32S_3]; +} __attribute__((preserve_access_index)); + +struct cred { + struct kernel_cap_struct cap_effective; +} __attribute__((preserve_access_index)); + +char _license[] SEC("license") = "GPL"; + +SEC("lsm.s/userns_create") +int BPF_PROG(test_userns_create, const struct cred *cred, int ret) +{ + struct kernel_cap_struct caps = cred->cap_effective; + int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN); + __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN); + + if (ret) + return 0; + + ret = -EPERM; + if (caps.cap[cap_index] & cap_mask) + return 0; + + return -EPERM; +} diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4c122f1b1737..6448cb9f710f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -48,6 +48,8 @@ LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM_STRING += lib/string_override.c + LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S LIBKVM_x86_64 += lib/x86_64/perf_test_util.c @@ -220,7 +222,8 @@ LIBKVM_C := $(filter %.c,$(LIBKVM)) LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.* @@ -231,6 +234,12 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) $(TEST_GEN_PROGS): $(LIBKVM_OBJS) $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 1c2749b1481a..76c583a07ea2 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -31,8 +31,9 @@ * These limitations are worked around in this test by using a large enough * region of memory for each vCPU such that the number of translations cached in * the TLB and the number of pages held in pagevecs are a small fraction of the - * overall workload. And if either of those conditions are not true this test - * will fail rather than silently passing. + * overall workload. And if either of those conditions are not true (for example + * in nesting, where TLB size is unlimited) this test will print a warning + * rather than silently passing. */ #include <inttypes.h> #include <limits.h> @@ -172,17 +173,23 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, vcpu_idx, no_pfn, pages); /* - * Test that at least 90% of memory has been marked idle (the rest might - * not be marked idle because the pages have not yet made it to an LRU - * list or the translations are still cached in the TLB). 90% is + * Check that at least 90% of memory has been marked idle (the rest + * might not be marked idle because the pages have not yet made it to an + * LRU list or the translations are still cached in the TLB). 90% is * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. + * + * Note that when run in nested virtualization, this check will trigger + * much more frequently because TLB size is unlimited and since no flush + * happens, much more pages are cached there and guest won't see the + * "idle" bit cleared. */ - TEST_ASSERT(still_idle < pages / 10, - "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" - PRIu64 ").\n", - vcpu_idx, still_idle, pages); + if (still_idle < pages / 10) + printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64 + "out of %" PRIu64 "), this will affect performance results" + ".\n", + vcpu_idx, still_idle, pages); close(page_idle_fd); close(pagemap_fd); diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 99fa1410964c..790c6d1ecb34 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -617,6 +617,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); +bool kvm_vm_has_ept(struct kvm_vm *vm); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 71ade6100fd3..2bd25b191d15 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -22,7 +22,7 @@ static void test_dump_stack(void) * Build and run this command: * * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ - * grep -v test_dump_stack | cat -n 1>&2 + * cat -n 1>&2 * * Note that the spacing is different and there's no newline. */ @@ -36,18 +36,24 @@ static void test_dump_stack(void) n * (((sizeof(void *)) * 2) + 1) + /* Null terminator: */ 1]; - char *c; + char *c = cmd; n = backtrace(stack, n); - c = &cmd[0]; - c += sprintf(c, "%s", addr2line); /* - * Skip the first 3 frames: backtrace, test_dump_stack, and - * test_assert. We hope that backtrace isn't inlined and the other two - * we've declared noinline. + * Skip the first 2 frames, which should be test_dump_stack() and + * test_assert(); both of which are declared noinline. Bail if the + * resulting stack trace would be empty. Otherwise, addr2line will block + * waiting for addresses to be passed in via stdin. */ + if (n <= 2) { + fputs(" (stack trace empty)\n", stderr); + return; + } + + c += sprintf(c, "%s", addr2line); for (i = 2; i < n; i++) c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); + c += sprintf(c, "%s", pipeline); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c new file mode 100644 index 000000000000..632398adc229 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/string_override.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> + +/* + * Override the "basic" built-in string helpers so that they can be used in + * guest code. KVM selftests don't support dynamic loading in guest code and + * will jump into the weeds if the compiler decides to insert an out-of-line + * call via the PLT. + */ +int memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) { + if ((res = *su1 - *su2) != 0) + break; + } + return res; +} + +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + +void *memset(void *s, int c, size_t count) +{ + char *xs = s; + + while (count--) + *xs++ = c; + return s; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 80a568c439b8..d21049c38fc5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -5,6 +5,8 @@ * Copyright (C) 2018, Google LLC. */ +#include <asm/msr-index.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" @@ -542,9 +544,27 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } +bool kvm_vm_has_ept(struct kvm_vm *vm) +{ + struct kvm_vcpu *vcpu; + uint64_t ctrl; + + vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); + TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); + + ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) + return false; + + ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + return ctrl & SECONDARY_EXEC_ENABLE_EPT; +} + void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { + TEST_REQUIRE(kvm_vm_has_ept(vm)); + vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index b1905d280ef5..e0004bd26536 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -14,6 +14,9 @@ #include "kvm_util.h" #include "processor.h" +/* VMCALL and VMMCALL are both 3-byte opcodes. */ +#define HYPERCALL_INSN_SIZE 3 + static bool ud_expected; static void guest_ud_handler(struct ex_regs *regs) @@ -22,7 +25,7 @@ static void guest_ud_handler(struct ex_regs *regs) GUEST_DONE(); } -extern unsigned char svm_hypercall_insn; +extern uint8_t svm_hypercall_insn[HYPERCALL_INSN_SIZE]; static uint64_t svm_do_sched_yield(uint8_t apic_id) { uint64_t ret; @@ -39,7 +42,7 @@ static uint64_t svm_do_sched_yield(uint8_t apic_id) return ret; } -extern unsigned char vmx_hypercall_insn; +extern uint8_t vmx_hypercall_insn[HYPERCALL_INSN_SIZE]; static uint64_t vmx_do_sched_yield(uint8_t apic_id) { uint64_t ret; @@ -56,30 +59,20 @@ static uint64_t vmx_do_sched_yield(uint8_t apic_id) return ret; } -static void assert_hypercall_insn(unsigned char *exp_insn, unsigned char *obs_insn) -{ - uint32_t exp = 0, obs = 0; - - memcpy(&exp, exp_insn, sizeof(exp)); - memcpy(&obs, obs_insn, sizeof(obs)); - - GUEST_ASSERT_EQ(exp, obs); -} - static void guest_main(void) { - unsigned char *native_hypercall_insn, *hypercall_insn; + uint8_t *native_hypercall_insn, *hypercall_insn; uint8_t apic_id; apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); if (is_intel_cpu()) { - native_hypercall_insn = &vmx_hypercall_insn; - hypercall_insn = &svm_hypercall_insn; + native_hypercall_insn = vmx_hypercall_insn; + hypercall_insn = svm_hypercall_insn; svm_do_sched_yield(apic_id); } else if (is_amd_cpu()) { - native_hypercall_insn = &svm_hypercall_insn; - hypercall_insn = &vmx_hypercall_insn; + native_hypercall_insn = svm_hypercall_insn; + hypercall_insn = vmx_hypercall_insn; vmx_do_sched_yield(apic_id); } else { GUEST_ASSERT(0); @@ -87,8 +80,13 @@ static void guest_main(void) return; } + /* + * The hypercall didn't #UD (guest_ud_handler() signals "done" if a #UD + * occurs). Verify that a #UD is NOT expected and that KVM patched in + * the native hypercall. + */ GUEST_ASSERT(!ud_expected); - assert_hypercall_insn(native_hypercall_insn, hypercall_insn); + GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); GUEST_DONE(); } diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 65e53eb0840b..607b8d7e3ea3 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -75,7 +75,9 @@ USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO CFI_BACKWARD call trace:|ok: control flow unchanged -FORTIFIED_STRSCPY -FORTIFIED_OBJECT -FORTIFIED_SUBOBJECT +FORTIFY_STRSCPY detected buffer overflow +FORTIFY_STR_OBJECT detected buffer overflow +FORTIFY_STR_MEMBER detected buffer overflow +FORTIFY_MEM_OBJECT detected buffer overflow +FORTIFY_MEM_MEMBER detected field-spanning write PPC_SLB_MULTIHIT Recovered diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore new file mode 100644 index 000000000000..4696df589d68 --- /dev/null +++ b/tools/testing/selftests/nolibc/.gitignore @@ -0,0 +1,4 @@ +/initramfs/ +/nolibc-test +/run.out +/sysroot/ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile new file mode 100644 index 000000000000..69ea659caca9 --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc tests +include ../../../scripts/Makefile.include + +# we're in ".../tools/testing/selftests/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +endif + +ifeq ($(ARCH),) +include $(srctree)/scripts/subarch.include +ARCH = $(SUBARCH) +endif + +# kernel image names by architecture +IMAGE_i386 = arch/x86/boot/bzImage +IMAGE_x86 = arch/x86/boot/bzImage +IMAGE_arm64 = arch/arm64/boot/Image +IMAGE_arm = arch/arm/boot/zImage +IMAGE_mips = vmlinuz +IMAGE_riscv = arch/riscv/boot/Image +IMAGE = $(IMAGE_$(ARCH)) +IMAGE_NAME = $(notdir $(IMAGE)) + +# default kernel configurations that appear to be usable +DEFCONFIG_i386 = defconfig +DEFCONFIG_x86 = defconfig +DEFCONFIG_arm64 = defconfig +DEFCONFIG_arm = multi_v7_defconfig +DEFCONFIG_mips = malta_defconfig +DEFCONFIG_riscv = defconfig +DEFCONFIG = $(DEFCONFIG_$(ARCH)) + +# optional tests to run (default = all) +TEST = + +# QEMU_ARCH: arch names used by qemu +QEMU_ARCH_i386 = i386 +QEMU_ARCH_x86 = x86_64 +QEMU_ARCH_arm64 = aarch64 +QEMU_ARCH_arm = arm +QEMU_ARCH_mips = mipsel # works with malta_defconfig +QEMU_ARCH_riscv = riscv64 +QEMU_ARCH = $(QEMU_ARCH_$(ARCH)) + +# QEMU_ARGS : some arch-specific args to pass to qemu +QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS = $(QEMU_ARGS_$(ARCH)) + +# OUTPUT is only set when run from the main makefile, otherwise +# it defaults to this nolibc directory. +OUTPUT ?= $(CURDIR)/ + +ifeq ($(V),1) +Q= +else +Q=@ +endif + +CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables +LDFLAGS := -s + +help: + @echo "Supported targets under selftests/nolibc:" + @echo " all call the \"run\" target below" + @echo " help this help" + @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" + @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " initramfs prepare the initramfs with nolibc-test" + @echo " defconfig create a fresh new default config (uses \$$ARCH)" + @echo " kernel (re)build the kernel with the initramfs (uses \$$ARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$ARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$ARCH, \$$TEST)" + @echo " clean clean the sysroot, initramfs, build and output files" + @echo "" + @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." + @echo "" + @echo "Currently using the following variables:" + @echo " ARCH = $(ARCH)" + @echo " CROSS_COMPILE = $(CROSS_COMPILE)" + @echo " CC = $(CC)" + @echo " OUTPUT = $(OUTPUT)" + @echo " TEST = $(TEST)" + @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$ARCH]" + @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$ARCH]" + @echo "" + +all: run + +sysroot: sysroot/$(ARCH)/include + +sysroot/$(ARCH)/include: + $(QUIET_MKDIR)mkdir -p sysroot + $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone + $(Q)mv sysroot/sysroot sysroot/$(ARCH) + +nolibc-test: nolibc-test.c sysroot/$(ARCH)/include + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc + +initramfs: nolibc-test + $(QUIET_MKDIR)mkdir -p initramfs + $(call QUIET_INSTALL, initramfs/init) + $(Q)cp nolibc-test initramfs/init + +defconfig: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare + +kernel: initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs + +# run the tests after building the kernel +run: kernel + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + +# re-run the tests from an existing kernel +rerun: + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + +clean: + $(call QUIET_CLEAN, sysroot) + $(Q)rm -rf sysroot + $(call QUIET_CLEAN, nolibc-test) + $(Q)rm -f nolibc-test + $(call QUIET_CLEAN, initramfs) + $(Q)rm -rf initramfs + $(call QUIET_CLEAN, run.out) + $(Q)rm -rf run.out diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c new file mode 100644 index 000000000000..78bced95ac63 --- /dev/null +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -0,0 +1,757 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +/* platform-specific include files coming from the compiler */ +#include <limits.h> + +/* libc-specific include files + * The program may be built in 3 ways: + * $(CC) -nostdlib -include /path/to/nolibc.h => NOLIBC already defined + * $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present + * $(CC) with default libc => NOLIBC* never defined + */ +#ifndef NOLIBC +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef _NOLIBC_STDIO_H +/* standard libcs need more includes */ +#include <linux/reboot.h> +#include <sys/io.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/reboot.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdarg.h> +#include <unistd.h> +#endif +#endif + +/* will be used by nolibc by getenv() */ +char **environ; + +/* definition of a series of tests */ +struct test { + const char *name; // test name + int (*func)(int min, int max); // handler +}; + +#ifndef _NOLIBC_STDLIB_H +char *itoa(int i) +{ + static char buf[12]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", i); + return (ret >= 0 && ret < sizeof(buf)) ? buf : "#err"; +} +#endif + +#define CASE_ERR(err) \ + case err: return #err + +/* returns the error name (e.g. "ENOENT") for common errors, "SUCCESS" for 0, + * or the decimal value for less common ones. + */ +const char *errorname(int err) +{ + switch (err) { + case 0: return "SUCCESS"; + CASE_ERR(EPERM); + CASE_ERR(ENOENT); + CASE_ERR(ESRCH); + CASE_ERR(EINTR); + CASE_ERR(EIO); + CASE_ERR(ENXIO); + CASE_ERR(E2BIG); + CASE_ERR(ENOEXEC); + CASE_ERR(EBADF); + CASE_ERR(ECHILD); + CASE_ERR(EAGAIN); + CASE_ERR(ENOMEM); + CASE_ERR(EACCES); + CASE_ERR(EFAULT); + CASE_ERR(ENOTBLK); + CASE_ERR(EBUSY); + CASE_ERR(EEXIST); + CASE_ERR(EXDEV); + CASE_ERR(ENODEV); + CASE_ERR(ENOTDIR); + CASE_ERR(EISDIR); + CASE_ERR(EINVAL); + CASE_ERR(ENFILE); + CASE_ERR(EMFILE); + CASE_ERR(ENOTTY); + CASE_ERR(ETXTBSY); + CASE_ERR(EFBIG); + CASE_ERR(ENOSPC); + CASE_ERR(ESPIPE); + CASE_ERR(EROFS); + CASE_ERR(EMLINK); + CASE_ERR(EPIPE); + CASE_ERR(EDOM); + CASE_ERR(ERANGE); + CASE_ERR(ENOSYS); + default: + return itoa(err); + } +} + +static int pad_spc(int llen, int cnt, const char *fmt, ...) +{ + va_list args; + int len; + int ret; + + for (len = 0; len < cnt - llen; len++) + putchar(' '); + + va_start(args, fmt); + ret = vfprintf(stdout, fmt, args); + va_end(args); + return ret < 0 ? ret : ret + len; +} + +/* The tests below are intended to be used by the macroes, which evaluate + * expression <expr>, print the status to stdout, and update the "ret" + * variable to count failures. The functions themselves return the number + * of failures, thus either 0 or 1. + */ + +#define EXPECT_ZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0) + +static int expect_zr(int expr, int llen) +{ + int ret = !(expr == 0); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_NZ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0) + +static int expect_nz(int expr, int llen) +{ + int ret = !(expr != 0); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_EQ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0) + +static int expect_eq(int expr, int llen, int val) +{ + int ret = !(expr == val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_NE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0) + +static int expect_ne(int expr, int llen, int val) +{ + int ret = !(expr != val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_GE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0) + +static int expect_ge(int expr, int llen, int val) +{ + int ret = !(expr >= val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_GT(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0) + +static int expect_gt(int expr, int llen, int val) +{ + int ret = !(expr > val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_LE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0) + +static int expect_le(int expr, int llen, int val) +{ + int ret = !(expr <= val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_LT(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0) + +static int expect_lt(int expr, int llen, int val) +{ + int ret = !(expr < val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_SYSZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0) + +static int expect_syszr(int expr, int llen) +{ + int ret = 0; + + if (expr) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSEQ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0) + +static int expect_syseq(int expr, int llen, int val) +{ + int ret = 0; + + if (expr != val) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSNE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0) + +static int expect_sysne(int expr, int llen, int val) +{ + int ret = 0; + + if (expr == val) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSER(cond, expr, expret, experr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0) + +static int expect_syserr(int expr, int expret, int experr, int llen) +{ + int ret = 0; + int _errno = errno; + + llen += printf(" = %d %s ", expr, errorname(_errno)); + if (expr != expret || _errno != experr) { + ret = 1; + llen += printf(" != (%d %s) ", expret, errorname(experr)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_PTRZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0) + +static int expect_ptrzr(const void *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_PTRNZ(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0) + +static int expect_ptrnz(const void *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (!expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0) + +static int expect_strzr(const char *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRNZ(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0) + +static int expect_strnz(const char *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (!expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STREQ(cond, expr, cmp) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0) + +static int expect_streq(const char *expr, int llen, const char *cmp) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (strcmp(expr, cmp) != 0) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRNE(cond, expr, cmp) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0) + +static int expect_strne(const char *expr, int llen, const char *cmp) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (strcmp(expr, cmp) == 0) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +/* declare tests based on line numbers. There must be exactly one test per line. */ +#define CASE_TEST(name) \ + case __LINE__: llen += printf("%d %s", test, #name); + + +/* used by some syscall tests below */ +int test_getdents64(const char *dir) +{ + char buffer[4096]; + int fd, ret; + int err; + + ret = fd = open(dir, O_RDONLY | O_DIRECTORY, 0); + if (ret < 0) + return ret; + + ret = getdents64(fd, (void *)buffer, sizeof(buffer)); + err = errno; + close(fd); + + errno = err; + return ret; +} + +/* Run syscall tests between IDs <min> and <max>. + * Return 0 on success, non-zero on failure. + */ +int run_syscall(int min, int max) +{ + struct stat stat_buf; + int proc; + int test; + int tmp; + int ret = 0; + void *p1, *p2; + + /* <proc> indicates whether or not /proc is mounted */ + proc = stat("/proc", &stat_buf) == 0; + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; // line length + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; + CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; +#ifdef NOLIBC + CASE_TEST(gettid); EXPECT_SYSNE(1, gettid(), -1); break; +#endif + CASE_TEST(getpgid_self); EXPECT_SYSNE(1, getpgid(0), -1); break; + CASE_TEST(getpgid_bad); EXPECT_SYSER(1, getpgid(-1), -1, ESRCH); break; + CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break; + CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break; + CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break; + CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break; + CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break; + CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); break; + CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break; + CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break; + CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; + CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; + CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; + CASE_TEST(chroot_root); EXPECT_SYSZR(1, chroot("/")); break; + CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break; + CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; + CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; + CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup_m1); tmp = dup(-1); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(dup2_0); tmp = dup2(0, 100); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup2_m1); tmp = dup2(-1, 100); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; + CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; + CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; + CASE_TEST(gettimeofday_null); EXPECT_SYSZR(1, gettimeofday(NULL, NULL)); break; +#ifdef NOLIBC + CASE_TEST(gettimeofday_bad1); EXPECT_SYSER(1, gettimeofday((void *)1, NULL), -1, EFAULT); break; + CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; + CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; +#endif + CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; + CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; + CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; + CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break; + CASE_TEST(link_dir); EXPECT_SYSER(1, link("/", "/blah"), -1, EPERM); break; + CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/net", "/blah"), -1, EXDEV); break; + CASE_TEST(lseek_m1); EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break; + CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break; + CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break; + CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; + CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; + CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; + CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break; + CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break; + CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break; + CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break; + CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break; + CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break; + CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; + CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; + CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; + CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break; + CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break; + CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break; + CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break; + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + +int run_stdlib(int min, int max) +{ + int test; + int tmp; + int ret = 0; + void *p1, *p2; + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; // line length + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(getenv_TERM); EXPECT_STRNZ(1, getenv("TERM")); break; + CASE_TEST(getenv_blah); EXPECT_STRZR(1, getenv("blah")); break; + CASE_TEST(setcmp_blah_blah); EXPECT_EQ(1, strcmp("blah", "blah"), 0); break; + CASE_TEST(setcmp_blah_blah2); EXPECT_NE(1, strcmp("blah", "blah2"), 0); break; + CASE_TEST(setncmp_blah_blah); EXPECT_EQ(1, strncmp("blah", "blah", 10), 0); break; + CASE_TEST(setncmp_blah_blah4); EXPECT_EQ(1, strncmp("blah", "blah4", 4), 0); break; + CASE_TEST(setncmp_blah_blah5); EXPECT_NE(1, strncmp("blah", "blah5", 5), 0); break; + CASE_TEST(setncmp_blah_blah6); EXPECT_NE(1, strncmp("blah", "blah6", 6), 0); break; + CASE_TEST(strchr_foobar_o); EXPECT_STREQ(1, strchr("foobar", 'o'), "oobar"); break; + CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; + CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; + CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + +/* prepare what needs to be prepared for pid 1 (stdio, /dev, /proc, etc) */ +int prepare(void) +{ + struct stat stat_buf; + + /* It's possible that /dev doesn't even exist or was not mounted, so + * we'll try to create it, mount it, or create minimal entries into it. + * We want at least /dev/null and /dev/console. + */ + if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) { + if (stat("/dev/console", &stat_buf) != 0 || + stat("/dev/null", &stat_buf) != 0) { + /* try devtmpfs first, otherwise fall back to manual creation */ + if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) { + mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1)); + mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3)); + } + } + } + + /* If no /dev/console was found before calling init, stdio is closed so + * we need to reopen it from /dev/console. If it failed above, it will + * still fail here and we cannot emit a message anyway. + */ + if (close(dup(1)) == -1) { + int fd = open("/dev/console", O_RDWR); + + if (fd >= 0) { + if (fd != 0) + dup2(fd, 0); + if (fd != 1) + dup2(fd, 1); + if (fd != 2) + dup2(fd, 2); + if (fd > 2) + close(fd); + puts("\nSuccessfully reopened /dev/console."); + } + } + + /* try to mount /proc if not mounted. Silently fail otherwise */ + if (stat("/proc/.", &stat_buf) == 0 || mkdir("/proc", 0755) == 0) { + if (stat("/proc/self", &stat_buf) != 0) + mount("/proc", "/proc", "proc", 0, 0); + } + + return 0; +} + +/* This is the definition of known test names, with their functions */ +static struct test test_names[] = { + /* add new tests here */ + { .name = "syscall", .func = run_syscall }, + { .name = "stdlib", .func = run_stdlib }, + { 0 } +}; + +int main(int argc, char **argv, char **envp) +{ + int min = 0; + int max = __INT_MAX__; + int ret = 0; + int err; + int idx; + char *test; + + environ = envp; + + /* when called as init, it's possible that no console was opened, for + * example if no /dev file system was provided. We'll check that fd#1 + * was opened, and if not we'll attempt to create and open /dev/console + * and /dev/null that we'll use for later tests. + */ + if (getpid() == 1) + prepare(); + + /* the definition of a series of tests comes from either argv[1] or the + * "NOLIBC_TEST" environment variable. It's made of a comma-delimited + * series of test names and optional ranges: + * syscall:5-15[:.*],stdlib:8-10 + */ + test = argv[1]; + if (!test) + test = getenv("NOLIBC_TEST"); + + if (test) { + char *comma, *colon, *dash, *value; + + do { + comma = strchr(test, ','); + if (comma) + *(comma++) = '\0'; + + colon = strchr(test, ':'); + if (colon) + *(colon++) = '\0'; + + for (idx = 0; test_names[idx].name; idx++) { + if (strcmp(test, test_names[idx].name) == 0) + break; + } + + if (test_names[idx].name) { + /* The test was named, it will be called at least + * once. We may have an optional range at <colon> + * here, which defaults to the full range. + */ + do { + min = 0; max = __INT_MAX__; + value = colon; + if (value && *value) { + colon = strchr(value, ':'); + if (colon) + *(colon++) = '\0'; + + dash = strchr(value, '-'); + if (dash) + *(dash++) = '\0'; + + /* support :val: :min-max: :min-: :-max: */ + if (*value) + min = atoi(value); + if (!dash) + max = min; + else if (*dash) + max = atoi(dash); + + value = colon; + } + + /* now's time to call the test */ + printf("Running test '%s'\n", test_names[idx].name); + err = test_names[idx].func(min, max); + ret += err; + printf("Errors during this test: %d\n\n", err); + } while (colon && *colon); + } else + printf("Ignoring unknown test name '%s'\n", test); + + test = comma; + } while (test && *test); + } else { + /* no test mentioned, run everything */ + for (idx = 0; test_names[idx].name; idx++) { + printf("Running test '%s'\n", test_names[idx].name); + err = test_names[idx].func(min, max); + ret += err; + printf("Errors during this test: %d\n\n", err); + } + } + + printf("Total number of errors: %d\n", ret); + + if (getpid() == 1) { + /* we're running as init, there's no other process on the + * system, thus likely started from a VM for a quick check. + * Exiting will provoke a kernel panic that may be reported + * as an error by Qemu or the hypervisor, while stopping + * cleanly will often be reported as a success. This allows + * to use the output of this program for bisecting kernels. + */ + printf("Leaving init with final status: %d\n", !!ret); + if (ret == 0) + reboot(LINUX_REBOOT_CMD_POWER_OFF); +#if defined(__x86_64__) + /* QEMU started with "-device isa-debug-exit -no-reboot" will + * exit with status code 2N+1 when N is written to 0x501. We + * hard-code the syscall here as it's arch-dependent. + */ +#if defined(_NOLIBC_SYS_H) + else if (my_syscall3(__NR_ioperm, 0x501, 1, 1) == 0) +#else + else if (ioperm(0x501, 1, 1) == 0) +#endif + asm volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0)); + /* if it does nothing, fall back to the regular panic */ +#endif + } + + printf("Exiting with status %d\n", !!ret); + return !!ret; +} diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 363b98228301..5d3440f474dd 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -14,6 +14,7 @@ struct virtio_device { u64 features; struct list_head vqs; spinlock_t vqs_list_lock; + const struct virtio_config_ops *config; }; struct virtqueue { @@ -23,7 +24,9 @@ struct virtqueue { struct virtio_device *vdev; unsigned int index; unsigned int num_free; + unsigned int num_max; void *priv; + bool reset; }; /* Interfaces exported by virtio_ring. */ diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index f2640e505c4e..2a8a70e2a950 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -3,6 +3,11 @@ #include <linux/virtio.h> #include <uapi/linux/virtio_config.h> +struct virtio_config_ops { + int (*disable_vq_and_reset)(struct virtqueue *vq); + int (*enable_vq_after_reset)(struct virtqueue *vq); +}; + /* * __virtio_test_bit - helper to test feature bits. For use by transports. * Devices should normally use virtio_has_feature, |