mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM64:
- Eager page splitting optimization for dirty logging, optionally
allowing for a VM to avoid the cost of hugepage splitting in the
stage-2 fault path.
- Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact
with services that live in the Secure world. pKVM intervenes on
FF-A calls to guarantee the host doesn't misuse memory donated to
the hyp or a pKVM guest.
- Support for running the split hypervisor with VHE enabled, known as
'hVHE' mode. This is extremely useful for testing the split
hypervisor on VHE-only systems, and paves the way for new use cases
that depend on having two TTBRs available at EL2.
- Generalized framework for configurable ID registers from userspace.
KVM/arm64 currently prevents arbitrary CPU feature set
configuration from userspace, but the intent is to relax this
limitation and allow userspace to select a feature set consistent
with the CPU.
- Enable the use of Branch Target Identification (FEAT_BTI) in the
hypervisor.
- Use a separate set of pointer authentication keys for the
hypervisor when running in protected mode, as the host is untrusted
at runtime.
- Ensure timer IRQs are consistently released in the init failure
paths.
- Avoid trapping CTR_EL0 on systems with Enhanced Virtualization
Traps (FEAT_EVT), as it is a register commonly read from userspace.
- Erratum workaround for the upcoming AmpereOne part, which has
broken hardware A/D state management.
RISC-V:
- Redirect AMO load/store misaligned traps to KVM guest
- Trap-n-emulate AIA in-kernel irqchip for KVM guest
- Svnapot support for KVM Guest
s390:
- New uvdevice secret API
- CMM selftest and fixes
- fix racy access to target CPU for diag 9c
x86:
- Fix missing/incorrect #GP checks on ENCLS
- Use standard mmu_notifier hooks for handling APIC access page
- Drop now unnecessary TR/TSS load after VM-Exit on AMD
- Print more descriptive information about the status of SEV and
SEV-ES during module load
- Add a test for splitting and reconstituting hugepages during and
after dirty logging
- Add support for CPU pinning in demand paging test
- Add support for AMD PerfMonV2, with a variety of cleanups and minor
fixes included along the way
- Add a "nx_huge_pages=never" option to effectively avoid creating NX
hugepage recovery threads (because nx_huge_pages=off can be toggled
at runtime)
- Move handling of PAT out of MTRR code and dedup SVM+VMX code
- Fix output of PIC poll command emulation when there's an interrupt
- Add a maintainer's handbook to document KVM x86 processes,
preferred coding style, testing expectations, etc.
- Misc cleanups, fixes and comments
Generic:
- Miscellaneous bugfixes and cleanups
Selftests:
- Generate dependency files so that partial rebuilds work as
expected"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits)
Documentation/process: Add a maintainer handbook for KVM x86
Documentation/process: Add a label for the tip tree handbook's coding style
KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index
RISC-V: KVM: Remove unneeded semicolon
RISC-V: KVM: Allow Svnapot extension for Guest/VM
riscv: kvm: define vcpu_sbi_ext_pmu in header
RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel emulation of AIA APLIC
RISC-V: KVM: Implement device interface for AIA irqchip
RISC-V: KVM: Skeletal in-kernel AIA irqchip support
RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
RISC-V: KVM: Add APLIC related defines
RISC-V: KVM: Add IMSIC related defines
RISC-V: KVM: Implement guest external interrupt line management
KVM: x86: Remove PRIx* definitions as they are solely for user space
s390/uv: Update query for secret-UVCs
s390/uv: replace scnprintf with sysfs_emit
s390/uvdevice: Add 'Lock Secret Store' UVC
...
This commit is contained in:
@@ -52,6 +52,9 @@ stable kernels.
|
||||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
|
||||
|
||||
@@ -18,3 +18,4 @@ Contents:
|
||||
maintainer-netdev
|
||||
maintainer-soc
|
||||
maintainer-tip
|
||||
maintainer-kvm-x86
|
||||
|
||||
390
Documentation/process/maintainer-kvm-x86.rst
Normal file
390
Documentation/process/maintainer-kvm-x86.rst
Normal file
@@ -0,0 +1,390 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
KVM x86
|
||||
=======
|
||||
|
||||
Foreword
|
||||
--------
|
||||
KVM strives to be a welcoming community; contributions from newcomers are
|
||||
valued and encouraged. Please do not be discouraged or intimidated by the
|
||||
length of this document and the many rules/guidelines it contains. Everyone
|
||||
makes mistakes, and everyone was a newbie at some point. So long as you make
|
||||
an honest effort to follow KVM x86's guidelines, are receptive to feedback,
|
||||
and learn from any mistakes you make, you will be welcomed with open arms, not
|
||||
torches and pitchforks.
|
||||
|
||||
TL;DR
|
||||
-----
|
||||
Testing is mandatory. Be consistent with established styles and patterns.
|
||||
|
||||
Trees
|
||||
-----
|
||||
KVM x86 is currently in a transition period from being part of the main KVM
|
||||
tree, to being "just another KVM arch". As such, KVM x86 is split across the
|
||||
main KVM tree, ``git.kernel.org/pub/scm/virt/kvm/kvm.git``, and a KVM x86
|
||||
specific tree, ``github.com/kvm-x86/linux.git``.
|
||||
|
||||
Generally speaking, fixes for the current cycle are applied directly to the
|
||||
main KVM tree, while all development for the next cycle is routed through the
|
||||
KVM x86 tree. In the unlikely event that a fix for the current cycle is routed
|
||||
through the KVM x86 tree, it will be applied to the ``fixes`` branch before
|
||||
making its way to the main KVM tree.
|
||||
|
||||
Note, this transition period is expected to last quite some time, i.e. will be
|
||||
the status quo for the foreseeable future.
|
||||
|
||||
Branches
|
||||
~~~~~~~~
|
||||
The KVM x86 tree is organized into multiple topic branches. The purpose of
|
||||
using finer-grained topic branches is to make it easier to keep tabs on an area
|
||||
of development, and to limit the collateral damage of human errors and/or buggy
|
||||
commits, e.g. dropping the HEAD commit of a topic branch has no impact on other
|
||||
in-flight commits' SHA1 hashes, and having to reject a pull request due to bugs
|
||||
delays only that topic branch.
|
||||
|
||||
All topic branches, except for ``next`` and ``fixes``, are rolled into ``next``
|
||||
via a Cthulhu merge on an as-needed basis, i.e. when a topic branch is updated.
|
||||
As a result, force pushes to ``next`` are common.
|
||||
|
||||
Lifecycle
|
||||
~~~~~~~~~
|
||||
Fixes that target the current release, a.k.a. mainline, are typically applied
|
||||
directly to the main KVM tree, i.e. do not route through the KVM x86 tree.
|
||||
|
||||
Changes that target the next release are routed through the KVM x86 tree. Pull
|
||||
requests (from KVM x86 to main KVM) are sent for each KVM x86 topic branch,
|
||||
typically the week before Linus' opening of the merge window, e.g. the week
|
||||
following rc7 for "normal" releases. If all goes well, the topic branches are
|
||||
rolled into the main KVM pull request sent during Linus' merge window.
|
||||
|
||||
The KVM x86 tree doesn't have its own official merge window, but there's a soft
|
||||
close around rc5 for new features, and a soft close around rc6 for fixes (for
|
||||
the next release; see above for fixes that target the current release).
|
||||
|
||||
Timeline
|
||||
~~~~~~~~
|
||||
Submissions are typically reviewed and applied in FIFO order, with some wiggle
|
||||
room for the size of a series, patches that are "cache hot", etc. Fixes,
|
||||
especially for the current release and or stable trees, get to jump the queue.
|
||||
Patches that will be taken through a non-KVM tree (most often through the tip
|
||||
tree) and/or have other acks/reviews also jump the queue to some extent.
|
||||
|
||||
Note, the vast majority of review is done between rc1 and rc6, give or take.
|
||||
The period between rc6 and the next rc1 is used to catch up on other tasks,
|
||||
i.e. radio silence during this period isn't unusual.
|
||||
|
||||
Pings to get a status update are welcome, but keep in mind the timing of the
|
||||
current release cycle and have realistic expectations. If you are pinging for
|
||||
acceptance, i.e. not just for feedback or an update, please do everything you
|
||||
can, within reason, to ensure that your patches are ready to be merged! Pings
|
||||
on series that break the build or fail tests lead to unhappy maintainers!
|
||||
|
||||
Development
|
||||
-----------
|
||||
|
||||
Base Tree/Branch
|
||||
~~~~~~~~~~~~~~~~
|
||||
Fixes that target the current release, a.k.a. mainline, should be based on
|
||||
``git://git.kernel.org/pub/scm/virt/kvm/kvm.git master``. Note, fixes do not
|
||||
automatically warrant inclusion in the current release. There is no singular
|
||||
rule, but typically only fixes for bugs that are urgent, critical, and/or were
|
||||
introduced in the current release should target the current release.
|
||||
|
||||
Everything else should be based on ``kvm-x86/next``, i.e. there is no need to
|
||||
select a specific topic branch as the base. If there are conflicts and/or
|
||||
dependencies across topic branches, it is the maintainer's job to sort them
|
||||
out.
|
||||
|
||||
The only exception to using ``kvm-x86/next`` as the base is if a patch/series
|
||||
is a multi-arch series, i.e. has non-trivial modifications to common KVM code
|
||||
and/or has more than superficial changes to other architectures' code. Multi-
|
||||
arch patch/series should instead be based on a common, stable point in KVM's
|
||||
history, e.g. the release candidate upon which ``kvm-x86 next`` is based. If
|
||||
you're unsure whether a patch/series is truly multi-arch, err on the side of
|
||||
caution and treat it as multi-arch, i.e. use a common base.
|
||||
|
||||
Coding Style
|
||||
~~~~~~~~~~~~
|
||||
When it comes to style, naming, patterns, etc., consistency is the number one
|
||||
priority in KVM x86. If all else fails, match what already exists.
|
||||
|
||||
With a few caveats listed below, follow the tip tree maintainers' preferred
|
||||
:ref:`maintainer-tip-coding-style`, as patches/series often touch both KVM and
|
||||
non-KVM x86 files, i.e. draw the attention of KVM *and* tip tree maintainers.
|
||||
|
||||
Using reverse fir tree, a.k.a. reverse Christmas tree or reverse XMAS tree, for
|
||||
variable declarations isn't strictly required, though it is still preferred.
|
||||
|
||||
Except for a handful of special snowflakes, do not use kernel-doc comments for
|
||||
functions. The vast majority of "public" KVM functions aren't truly public as
|
||||
they are intended only for KVM-internal consumption (there are plans to
|
||||
privatize KVM's headers and exports to enforce this).
|
||||
|
||||
Comments
|
||||
~~~~~~~~
|
||||
Write comments using imperative mood and avoid pronouns. Use comments to
|
||||
provide a high level overview of the code, and/or to explain why the code does
|
||||
what it does. Do not reiterate what the code literally does; let the code
|
||||
speak for itself. If the code itself is inscrutable, comments will not help.
|
||||
|
||||
SDM and APM References
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
Much of KVM's code base is directly tied to architectural behavior defined in
|
||||
Intel's Software Development Manual (SDM) and AMD's Architecture Programmer’s
|
||||
Manual (APM). Use of "Intel's SDM" and "AMD's APM", or even just "SDM" or
|
||||
"APM", without additional context is a-ok.
|
||||
|
||||
Do not reference specific sections, tables, figures, etc. by number, especially
|
||||
not in comments. Instead, if necessary (see below), copy-paste the relevant
|
||||
snippet and reference sections/tables/figures by name. The layouts of the SDM
|
||||
and APM are constantly changing, and so the numbers/labels aren't stable.
|
||||
|
||||
Generally speaking, do not explicitly reference or copy-paste from the SDM or
|
||||
APM in comments. With few exceptions, KVM *must* honor architectural behavior,
|
||||
therefore it's implied that KVM behavior is emulating SDM and/or APM behavior.
|
||||
Note, referencing the SDM/APM in changelogs to justify the change and provide
|
||||
context is perfectly ok and encouraged.
|
||||
|
||||
Shortlog
|
||||
~~~~~~~~
|
||||
The preferred prefix format is ``KVM: <topic>:``, where ``<topic>`` is one of::
|
||||
|
||||
- x86
|
||||
- x86/mmu
|
||||
- x86/pmu
|
||||
- x86/xen
|
||||
- selftests
|
||||
- SVM
|
||||
- nSVM
|
||||
- VMX
|
||||
- nVMX
|
||||
|
||||
**DO NOT use x86/kvm!** ``x86/kvm`` is used exclusively for Linux-as-a-KVM-guest
|
||||
changes, i.e. for arch/x86/kernel/kvm.c. Do not use file names or complete file
|
||||
paths as the subject/shortlog prefix.
|
||||
|
||||
Note, these don't align with the topics branches (the topic branches care much
|
||||
more about code conflicts).
|
||||
|
||||
All names are case sensitive! ``KVM: x86:`` is good, ``kvm: vmx:`` is not.
|
||||
|
||||
Capitalize the first word of the condensed patch description, but omit ending
|
||||
punctionation. E.g.::
|
||||
|
||||
KVM: x86: Fix a null pointer dereference in function_xyz()
|
||||
|
||||
not::
|
||||
|
||||
kvm: x86: fix a null pointer dereference in function_xyz.
|
||||
|
||||
If a patch touches multiple topics, traverse up the conceptual tree to find the
|
||||
first common parent (which is often simply ``x86``). When in doubt,
|
||||
``git log path/to/file`` should provide a reasonable hint.
|
||||
|
||||
New topics do occasionally pop up, but please start an on-list discussion if
|
||||
you want to propose introducing a new topic, i.e. don't go rogue.
|
||||
|
||||
See :ref:`the_canonical_patch_format` for more information, with one amendment:
|
||||
do not treat the 70-75 character limit as an absolute, hard limit. Instead,
|
||||
use 75 characters as a firm-but-not-hard limit, and use 80 characters as a hard
|
||||
limit. I.e. let the shortlog run a few characters over the standard limit if
|
||||
you have good reason to do so.
|
||||
|
||||
Changelog
|
||||
~~~~~~~~~
|
||||
Most importantly, write changelogs using imperative mood and avoid pronouns.
|
||||
|
||||
See :ref:`describe_changes` for more information, with one amendment: lead with
|
||||
a short blurb on the actual changes, and then follow up with the context and
|
||||
background. Note! This order directly conflicts with the tip tree's preferred
|
||||
approach! Please follow the tip tree's preferred style when sending patches
|
||||
that primarily target arch/x86 code that is _NOT_ KVM code.
|
||||
|
||||
Stating what a patch does before diving into details is preferred by KVM x86
|
||||
for several reasons. First and foremost, what code is actually being changed
|
||||
is arguably the most important information, and so that info should be easy to
|
||||
find. Changelogs that bury the "what's actually changing" in a one-liner after
|
||||
3+ paragraphs of background make it very hard to find that information.
|
||||
|
||||
For initial review, one could argue the "what's broken" is more important, but
|
||||
for skimming logs and git archaeology, the gory details matter less and less.
|
||||
E.g. when doing a series of "git blame", the details of each change along the
|
||||
way are useless, the details only matter for the culprit. Providing the "what
|
||||
changed" makes it easy to quickly determine whether or not a commit might be of
|
||||
interest.
|
||||
|
||||
Another benefit of stating "what's changing" first is that it's almost always
|
||||
possible to state "what's changing" in a single sentence. Conversely, all but
|
||||
the most simple bugs require multiple sentences or paragraphs to fully describe
|
||||
the problem. If both the "what's changing" and "what's the bug" are super
|
||||
short then the order doesn't matter. But if one is shorter (almost always the
|
||||
"what's changing), then covering the shorter one first is advantageous because
|
||||
it's less of an inconvenience for readers/reviewers that have a strict ordering
|
||||
preference. E.g. having to skip one sentence to get to the context is less
|
||||
painful than having to skip three paragraphs to get to "what's changing".
|
||||
|
||||
Fixes
|
||||
~~~~~
|
||||
If a change fixes a KVM/kernel bug, add a Fixes: tag even if the change doesn't
|
||||
need to be backported to stable kernels, and even if the change fixes a bug in
|
||||
an older release.
|
||||
|
||||
Conversely, if a fix does need to be backported, explicitly tag the patch with
|
||||
"Cc: stable@vger.kernel" (though the email itself doesn't need to Cc: stable);
|
||||
KVM x86 opts out of backporting Fixes: by default. Some auto-selected patches
|
||||
do get backported, but require explicit maintainer approval (search MANUALSEL).
|
||||
|
||||
Function References
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
When a function is mentioned in a comment, changelog, or shortlog (or anywhere
|
||||
for that matter), use the format ``function_name()``. The parentheses provide
|
||||
context and disambiguate the reference.
|
||||
|
||||
Testing
|
||||
-------
|
||||
At a bare minimum, *all* patches in a series must build cleanly for KVM_INTEL=m
|
||||
KVM_AMD=m, and KVM_WERROR=y. Building every possible combination of Kconfigs
|
||||
isn't feasible, but the more the merrier. KVM_SMM, KVM_XEN, PROVE_LOCKING, and
|
||||
X86_64 are particularly interesting knobs to turn.
|
||||
|
||||
Running KVM selftests and KVM-unit-tests is also mandatory (and stating the
|
||||
obvious, the tests need to pass). The only exception is for changes that have
|
||||
negligible probability of affecting runtime behavior, e.g. patches that only
|
||||
modify comments. When possible and relevant, testing on both Intel and AMD is
|
||||
strongly preferred. Booting an actual VM is encouraged, but not mandatory.
|
||||
|
||||
For changes that touch KVM's shadow paging code, running with TDP (EPT/NPT)
|
||||
disabled is mandatory. For changes that affect common KVM MMU code, running
|
||||
with TDP disabled is strongly encouraged. For all other changes, if the code
|
||||
being modified depends on and/or interacts with a module param, testing with
|
||||
the relevant settings is mandatory.
|
||||
|
||||
Note, KVM selftests and KVM-unit-tests do have known failures. If you suspect
|
||||
a failure is not due to your changes, verify that the *exact same* failure
|
||||
occurs with and without your changes.
|
||||
|
||||
Changes that touch reStructured Text documentation, i.e. .rst files, must build
|
||||
htmldocs cleanly, i.e. with no new warnings or errors.
|
||||
|
||||
If you can't fully test a change, e.g. due to lack of hardware, clearly state
|
||||
what level of testing you were able to do, e.g. in the cover letter.
|
||||
|
||||
New Features
|
||||
~~~~~~~~~~~~
|
||||
With one exception, new features *must* come with test coverage. KVM specific
|
||||
tests aren't strictly required, e.g. if coverage is provided by running a
|
||||
sufficiently enabled guest VM, or by running a related kernel selftest in a VM,
|
||||
but dedicated KVM tests are preferred in all cases. Negative testcases in
|
||||
particular are mandatory for enabling of new hardware features as error and
|
||||
exception flows are rarely exercised simply by running a VM.
|
||||
|
||||
The only exception to this rule is if KVM is simply advertising support for a
|
||||
feature via KVM_GET_SUPPORTED_CPUID, i.e. for instructions/features that KVM
|
||||
can't prevent a guest from using and for which there is no true enabling.
|
||||
|
||||
Note, "new features" does not just mean "new hardware features"! New features
|
||||
that can't be well validated using existing KVM selftests and/or KVM-unit-tests
|
||||
must come with tests.
|
||||
|
||||
Posting new feature development without tests to get early feedback is more
|
||||
than welcome, but such submissions should be tagged RFC, and the cover letter
|
||||
should clearly state what type of feedback is requested/expected. Do not abuse
|
||||
the RFC process; RFCs will typically not receive in-depth review.
|
||||
|
||||
Bug Fixes
|
||||
~~~~~~~~~
|
||||
Except for "obvious" found-by-inspection bugs, fixes must be accompanied by a
|
||||
reproducer for the bug being fixed. In many cases the reproducer is implicit,
|
||||
e.g. for build errors and test failures, but it should still be clear to
|
||||
readers what is broken and how to verify the fix. Some leeway is given for
|
||||
bugs that are found via non-public workloads/tests, but providing regression
|
||||
tests for such bugs is strongly preferred.
|
||||
|
||||
In general, regression tests are preferred for any bug that is not trivial to
|
||||
hit. E.g. even if the bug was originally found by a fuzzer such as syzkaller,
|
||||
a targeted regression test may be warranted if the bug requires hitting a
|
||||
one-in-a-million type race condition.
|
||||
|
||||
Note, KVM bugs are rarely urgent *and* non-trivial to reproduce. Ask yourself
|
||||
if a bug is really truly the end of the world before posting a fix without a
|
||||
reproducer.
|
||||
|
||||
Posting
|
||||
-------
|
||||
|
||||
Links
|
||||
~~~~~
|
||||
Do not explicitly reference bug reports, prior versions of a patch/series, etc.
|
||||
via ``In-Reply-To:`` headers. Using ``In-Reply-To:`` becomes an unholy mess
|
||||
for large series and/or when the version count gets high, and ``In-Reply-To:``
|
||||
is useless for anyone that doesn't have the original message, e.g. if someone
|
||||
wasn't Cc'd on the bug report or if the list of recipients changes between
|
||||
versions.
|
||||
|
||||
To link to a bug report, previous version, or anything of interest, use lore
|
||||
links. For referencing previous version(s), generally speaking do not include
|
||||
a Link: in the changelog as there is no need to record the history in git, i.e.
|
||||
put the link in the cover letter or in the section git ignores. Do provide a
|
||||
formal Link: for bug reports and/or discussions that led to the patch. The
|
||||
context of why a change was made is highly valuable for future readers.
|
||||
|
||||
Git Base
|
||||
~~~~~~~~
|
||||
If you are using git version 2.9.0 or later (Googlers, this is all of you!),
|
||||
use ``git format-patch`` with the ``--base`` flag to automatically include the
|
||||
base tree information in the generated patches.
|
||||
|
||||
Note, ``--base=auto`` works as expected if and only if a branch's upstream is
|
||||
set to the base topic branch, e.g. it will do the wrong thing if your upstream
|
||||
is set to your personal repository for backup purposes. An alternative "auto"
|
||||
solution is to derive the names of your development branches based on their
|
||||
KVM x86 topic, and feed that into ``--base``. E.g. ``x86/pmu/my_branch_name``,
|
||||
and then write a small wrapper to extract ``pmu`` from the current branch name
|
||||
to yield ``--base=x/pmu``, where ``x`` is whatever name your repository uses to
|
||||
track the KVM x86 remote.
|
||||
|
||||
Co-Posting Tests
|
||||
~~~~~~~~~~~~~~~~
|
||||
KVM selftests that are associated with KVM changes, e.g. regression tests for
|
||||
bug fixes, should be posted along with the KVM changes as a single series. The
|
||||
standard kernel rules for bisection apply, i.e. KVM changes that result in test
|
||||
failures should be ordered after the selftests updates, and vice versa, new
|
||||
tests that fail due to KVM bugs should be ordered after the KVM fixes.
|
||||
|
||||
KVM-unit-tests should *always* be posted separately. Tools, e.g. b4 am, don't
|
||||
know that KVM-unit-tests is a separate repository and get confused when patches
|
||||
in a series apply on different trees. To tie KVM-unit-tests patches back to
|
||||
KVM patches, first post the KVM changes and then provide a lore Link: to the
|
||||
KVM patch/series in the KVM-unit-tests patch(es).
|
||||
|
||||
Notifications
|
||||
-------------
|
||||
When a patch/series is officially accepted, a notification email will be sent
|
||||
in reply to the original posting (cover letter for multi-patch series). The
|
||||
notification will include the tree and topic branch, along with the SHA1s of
|
||||
the commits of applied patches.
|
||||
|
||||
If a subset of patches is applied, this will be clearly stated in the
|
||||
notification. Unless stated otherwise, it's implied that any patches in the
|
||||
series that were not accepted need more work and should be submitted in a new
|
||||
version.
|
||||
|
||||
If for some reason a patch is dropped after officially being accepted, a reply
|
||||
will be sent to the notification email explaining why the patch was dropped, as
|
||||
well as the next steps.
|
||||
|
||||
SHA1 Stability
|
||||
~~~~~~~~~~~~~~
|
||||
SHA1s are not 100% guaranteed to be stable until they land in Linus' tree! A
|
||||
SHA1 is *usually* stable once a notification has been sent, but things happen.
|
||||
In most cases, an update to the notification email be provided if an applied
|
||||
patch's SHA1 changes. However, in some scenarios, e.g. if all KVM x86 branches
|
||||
need to be rebased, individual notifications will not be given.
|
||||
|
||||
Vulnerabilities
|
||||
---------------
|
||||
Bugs that can be exploited by the guest to attack the host (kernel or
|
||||
userspace), or that can be exploited by a nested VM to *its* host (L2 attacking
|
||||
L1), are of particular interest to KVM. Please follow the protocol for
|
||||
:ref:`securitybugs` if you suspect a bug can lead to an escape, data leak, etc.
|
||||
|
||||
@@ -455,6 +455,8 @@ and can be added to an existing kernel config by running:
|
||||
Some of these options are x86-specific and can be left out when testing
|
||||
on other architectures.
|
||||
|
||||
.. _maintainer-tip-coding-style:
|
||||
|
||||
Coding style notes
|
||||
------------------
|
||||
|
||||
|
||||
@@ -8445,6 +8445,33 @@ structure.
|
||||
When getting the Modified Change Topology Report value, the attr->addr
|
||||
must point to a byte where the value will be stored or retrieved from.
|
||||
|
||||
8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
---------------------------------------
|
||||
|
||||
:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
:Architectures: arm64
|
||||
:Type: vm
|
||||
:Parameters: arg[0] is the new split chunk size.
|
||||
:Returns: 0 on success, -EINVAL if any memslot was already created.
|
||||
|
||||
This capability sets the chunk size used in Eager Page Splitting.
|
||||
|
||||
Eager Page Splitting improves the performance of dirty-logging (used
|
||||
in live migrations) when guest memory is backed by huge-pages. It
|
||||
avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
|
||||
it eagerly when enabling dirty logging (with the
|
||||
KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
|
||||
KVM_CLEAR_DIRTY_LOG.
|
||||
|
||||
The chunk size specifies how many pages to break at a time, using a
|
||||
single allocation for each chunk. Bigger the chunk size, more pages
|
||||
need to be allocated ahead of time.
|
||||
|
||||
The chunk size needs to be a valid block size. The list of acceptable
|
||||
block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
|
||||
64-bit bitmap (each bit describing a block size). The default value is
|
||||
0, to disable the eager page splitting.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
||||
@@ -205,7 +205,7 @@ Shadow pages contain the following information:
|
||||
role.passthrough:
|
||||
The page is not backed by a guest page table, but its first entry
|
||||
points to one. This is set if NPT uses 5-level page tables (host
|
||||
CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=1).
|
||||
CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=0).
|
||||
gfn:
|
||||
Either the guest page table containing the translations shadowed by this
|
||||
page, or the base page frame for linear translations. See role.direct.
|
||||
|
||||
@@ -11546,6 +11546,7 @@ M: Sean Christopherson <seanjc@google.com>
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
P: Documentation/process/maintainer-kvm-x86.rst
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
F: arch/x86/include/asm/kvm*
|
||||
F: arch/x86/include/asm/svm.h
|
||||
|
||||
@@ -414,6 +414,25 @@ menu "Kernel Features"
|
||||
|
||||
menu "ARM errata workarounds via the alternatives framework"
|
||||
|
||||
config AMPERE_ERRATUM_AC03_CPU_38
|
||||
bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
|
||||
default y
|
||||
help
|
||||
This option adds an alternative code sequence to work around Ampere
|
||||
erratum AC03_CPU_38 on AmpereOne.
|
||||
|
||||
The affected design reports FEAT_HAFDBS as not implemented in
|
||||
ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
|
||||
as required by the architecture. The unadvertised HAFDBS
|
||||
implementation suffers from an additional erratum where hardware
|
||||
A/D updates can occur after a PTE has been marked invalid.
|
||||
|
||||
The workaround forces KVM to explicitly set VTCR_EL2.HA to 0,
|
||||
which avoids enabling unadvertised hardware Access Flag management
|
||||
at stage-2.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config ARM64_WORKAROUND_CLEAN_CACHE
|
||||
bool
|
||||
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
#define MAX_CPU_FEATURES 128
|
||||
#define cpu_feature(x) KERNEL_HWCAP_ ## x
|
||||
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bug.h>
|
||||
@@ -905,6 +908,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
|
||||
return 8;
|
||||
}
|
||||
|
||||
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
|
||||
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
|
||||
|
||||
extern struct arm64_ftr_override id_aa64mmfr1_override;
|
||||
@@ -915,6 +919,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
|
||||
extern struct arm64_ftr_override id_aa64isar1_override;
|
||||
extern struct arm64_ftr_override id_aa64isar2_override;
|
||||
|
||||
extern struct arm64_ftr_override arm64_sw_feature_override;
|
||||
|
||||
u32 get_kvm_ipa_limit(void);
|
||||
void dump_cpu_features(void);
|
||||
|
||||
|
||||
@@ -43,6 +43,11 @@
|
||||
*/
|
||||
.macro __init_el2_timers
|
||||
mov x0, #3 // Enable EL1 physical timers
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .LnVHE_\@
|
||||
lsl x0, x0, #10
|
||||
.LnVHE_\@:
|
||||
msr cnthctl_el2, x0
|
||||
msr cntvoff_el2, xzr // Clear virtual offset
|
||||
.endm
|
||||
@@ -133,8 +138,15 @@
|
||||
.endm
|
||||
|
||||
/* Coprocessor traps */
|
||||
.macro __init_el2_nvhe_cptr
|
||||
.macro __init_el2_cptr
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .LnVHE_\@
|
||||
mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
|
||||
b .Lset_cptr_\@
|
||||
.LnVHE_\@:
|
||||
mov x0, #0x33ff
|
||||
.Lset_cptr_\@:
|
||||
msr cptr_el2, x0 // Disable copro. traps to EL2
|
||||
.endm
|
||||
|
||||
@@ -210,9 +222,8 @@
|
||||
__init_el2_gicv3
|
||||
__init_el2_hstr
|
||||
__init_el2_nvhe_idregs
|
||||
__init_el2_nvhe_cptr
|
||||
__init_el2_cptr
|
||||
__init_el2_fgt
|
||||
__init_el2_nvhe_prepare_eret
|
||||
.endm
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
@@ -258,7 +269,17 @@
|
||||
|
||||
.Linit_sve_\@: /* SVE register access */
|
||||
mrs x0, cptr_el2 // Disable SVE traps
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .Lcptr_nvhe_\@
|
||||
|
||||
// VHE case
|
||||
orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
|
||||
b .Lset_cptr_\@
|
||||
|
||||
.Lcptr_nvhe_\@: // nVHE case
|
||||
bic x0, x0, #CPTR_EL2_TZ
|
||||
.Lset_cptr_\@:
|
||||
msr cptr_el2, x0
|
||||
isb
|
||||
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#define HCR_ATA_SHIFT 56
|
||||
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||
#define HCR_AMVOFFEN (UL(1) << 51)
|
||||
#define HCR_TID4 (UL(1) << 49)
|
||||
#define HCR_FIEN (UL(1) << 47)
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_API (UL(1) << 41)
|
||||
@@ -87,7 +88,7 @@
|
||||
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TACR | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
|
||||
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
|
||||
@@ -289,7 +290,6 @@
|
||||
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
|
||||
#define CPTR_EL2_TZ (1 << 8)
|
||||
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
|
||||
#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
|
||||
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
|
||||
GENMASK(29, 21) | \
|
||||
GENMASK(19, 14) | \
|
||||
@@ -351,8 +351,7 @@
|
||||
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
|
||||
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
|
||||
|
||||
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
|
||||
CPACR_EL1_ZEN_EL1EN)
|
||||
#define CPACR_EL1_TTA (1 << 28)
|
||||
|
||||
#define kvm_mode_names \
|
||||
{ PSR_MODE_EL0t, "EL0t" }, \
|
||||
|
||||
@@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
@@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
|
||||
@@ -62,19 +62,14 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
#else
|
||||
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
|
||||
&kvm->arch.flags));
|
||||
|
||||
return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
|
||||
return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
|
||||
if (is_kernel_in_hyp_mode())
|
||||
if (has_vhe() || has_hvhe())
|
||||
vcpu->arch.hcr_el2 |= HCR_E2H;
|
||||
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
|
||||
/* route synchronous external abort exceptions to EL2 */
|
||||
@@ -95,6 +90,12 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.hcr_el2 |= HCR_TVM;
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_EVT) &&
|
||||
!cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
|
||||
vcpu->arch.hcr_el2 |= HCR_TID4;
|
||||
else
|
||||
vcpu->arch.hcr_el2 |= HCR_TID2;
|
||||
|
||||
if (vcpu_el1_is_32bit(vcpu))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_RW;
|
||||
|
||||
@@ -570,4 +571,35 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
|
||||
return test_bit(feature, vcpu->arch.features);
|
||||
}
|
||||
|
||||
static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (has_vhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL1EN);
|
||||
} else if (has_hvhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
|
||||
} else {
|
||||
val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
if (vcpu_has_sve(vcpu) &&
|
||||
(vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
|
||||
val |= CPTR_EL2_TZ;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val &= ~CPTR_EL2_TSM;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = kvm_get_reset_cptr_el2(vcpu);
|
||||
|
||||
if (has_vhe() || has_hvhe())
|
||||
write_sysreg(val, cpacr_el1);
|
||||
else
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
#endif /* __ARM64_KVM_EMULATE_H__ */
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
|
||||
|
||||
#define KVM_VCPU_MAX_FEATURES 7
|
||||
#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
|
||||
|
||||
#define KVM_REQ_SLEEP \
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
@@ -159,6 +160,21 @@ struct kvm_s2_mmu {
|
||||
/* The last vcpu id that ran on each physical CPU */
|
||||
int __percpu *last_vcpu_ran;
|
||||
|
||||
#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
|
||||
/*
|
||||
* Memory cache used to split
|
||||
* KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
|
||||
* is used to allocate stage2 page tables while splitting huge
|
||||
* pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
* influences both the capacity of the split page cache, and
|
||||
* how often KVM reschedules. Be wary of raising CHUNK_SIZE
|
||||
* too high.
|
||||
*
|
||||
* Protected by kvm->slots_lock.
|
||||
*/
|
||||
struct kvm_mmu_memory_cache split_page_cache;
|
||||
uint64_t split_page_chunk_size;
|
||||
|
||||
struct kvm_arch *arch;
|
||||
};
|
||||
|
||||
@@ -214,25 +230,23 @@ struct kvm_arch {
|
||||
#define KVM_ARCH_FLAG_MTE_ENABLED 1
|
||||
/* At least one vCPU has ran in the VM */
|
||||
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
|
||||
/*
|
||||
* The following two bits are used to indicate the guest's EL1
|
||||
* register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
|
||||
* bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
|
||||
* Otherwise, the guest's EL1 register width has not yet been
|
||||
* determined yet.
|
||||
*/
|
||||
#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
|
||||
#define KVM_ARCH_FLAG_EL1_32BIT 4
|
||||
/* The vCPU feature set for the VM is configured */
|
||||
#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
|
||||
/* PSCI SYSTEM_SUSPEND enabled for the guest */
|
||||
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
|
||||
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
|
||||
/* VM counter offset */
|
||||
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
|
||||
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
|
||||
/* Timer PPIs made immutable */
|
||||
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
|
||||
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
|
||||
/* SMCCC filter initialized for the VM */
|
||||
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
|
||||
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
|
||||
/* Initial ID reg values loaded */
|
||||
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
|
||||
unsigned long flags;
|
||||
|
||||
/* VM-wide vCPU feature set */
|
||||
DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/*
|
||||
* VM-wide PMU filter, implemented as a bitmap and big enough for
|
||||
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
|
||||
@@ -242,17 +256,23 @@ struct kvm_arch {
|
||||
|
||||
cpumask_var_t supported_cpus;
|
||||
|
||||
u8 pfr0_csv2;
|
||||
u8 pfr0_csv3;
|
||||
struct {
|
||||
u8 imp:4;
|
||||
u8 unimp:4;
|
||||
} dfr0_pmuver;
|
||||
|
||||
/* Hypercall features firmware registers' descriptor */
|
||||
struct kvm_smccc_features smccc_feat;
|
||||
struct maple_tree smccc_filter;
|
||||
|
||||
/*
|
||||
* Emulated CPU ID registers per VM
|
||||
* (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
|
||||
* is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
|
||||
*
|
||||
* These emulated idregs are VM-wide, but accessed from the context of a vCPU.
|
||||
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
|
||||
*/
|
||||
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
|
||||
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
|
||||
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
|
||||
u64 id_regs[KVM_ARM_ID_REG_NUM];
|
||||
|
||||
/*
|
||||
* For an untrusted host VM, 'pkvm.handle' is used to lookup
|
||||
* the associated pKVM instance in the hypervisor.
|
||||
@@ -410,6 +430,7 @@ struct kvm_host_data {
|
||||
struct kvm_host_psci_config {
|
||||
/* PSCI version used by host. */
|
||||
u32 version;
|
||||
u32 smccc_version;
|
||||
|
||||
/* Function IDs used by host if version is v0.1. */
|
||||
struct psci_0_1_function_ids function_ids_0_1;
|
||||
|
||||
@@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
/*
|
||||
* Unified accessors for registers that have a different encoding
|
||||
* between VHE and non-VHE. They must be specified without their "ELx"
|
||||
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
|
||||
*/
|
||||
|
||||
#if defined(__KVM_VHE_HYPERVISOR__)
|
||||
|
||||
#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
|
||||
#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
|
||||
#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
|
||||
#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
|
||||
#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
|
||||
#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
|
||||
|
||||
#else // !__KVM_VHE_HYPERVISOR__
|
||||
|
||||
#if defined(__KVM_NVHE_HYPERVISOR__)
|
||||
#define VHE_ALT_KEY ARM64_KVM_HVHE
|
||||
#else
|
||||
#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
|
||||
#endif
|
||||
|
||||
#define read_sysreg_elx(r,nvh,vh) \
|
||||
({ \
|
||||
u64 reg; \
|
||||
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
|
||||
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
|
||||
__mrs_s("%0", r##vh), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
VHE_ALT_KEY) \
|
||||
: "=r" (reg)); \
|
||||
reg; \
|
||||
})
|
||||
@@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
u64 __val = (u64)(v); \
|
||||
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
|
||||
__msr_s(r##vh, "%x0"), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
VHE_ALT_KEY) \
|
||||
: : "rZ" (__val)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Unified accessors for registers that have a different encoding
|
||||
* between VHE and non-VHE. They must be specified without their "ELx"
|
||||
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
|
||||
*/
|
||||
|
||||
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
|
||||
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
|
||||
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
|
||||
@@ -48,6 +65,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
|
||||
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
|
||||
|
||||
#endif // __KVM_VHE_HYPERVISOR__
|
||||
|
||||
/*
|
||||
* Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
|
||||
* static inline can allow the compiler to out-of-line this. KVM always wants
|
||||
|
||||
@@ -172,6 +172,7 @@ void __init free_hyp_pgds(void);
|
||||
|
||||
void stage2_unmap_vm(struct kvm *kvm);
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
|
||||
void kvm_uninit_stage2_mmu(struct kvm *kvm);
|
||||
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
|
||||
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
phys_addr_t pa, unsigned long size, bool writable);
|
||||
@@ -227,7 +228,8 @@ static inline void __invalidate_icache_guest_page(void *va, size_t size)
|
||||
if (icache_is_aliasing()) {
|
||||
/* any kind of VIPT cache */
|
||||
icache_inval_all_pou();
|
||||
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
|
||||
} else if (read_sysreg(CurrentEL) != CurrentEL_EL1 ||
|
||||
!icache_is_vpipt()) {
|
||||
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
|
||||
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
|
||||
}
|
||||
|
||||
@@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
}
|
||||
|
||||
static inline u32 kvm_supported_block_sizes(void)
|
||||
{
|
||||
u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
u32 r = 0;
|
||||
|
||||
for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
|
||||
r |= BIT(kvm_granule_shift(level));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline bool kvm_is_block_size_supported(u64 size)
|
||||
{
|
||||
bool is_power_of_two = IS_ALIGNED(size, size);
|
||||
|
||||
return is_power_of_two && (size & kvm_supported_block_sizes());
|
||||
}
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||
* @zalloc_page: Allocate a single zeroed memory page.
|
||||
@@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
* allocation is physically contiguous.
|
||||
* @free_pages_exact: Free an exact number of memory pages previously
|
||||
* allocated by zalloc_pages_exact.
|
||||
* @free_removed_table: Free a removed paging structure by unlinking and
|
||||
* @free_unlinked_table: Free an unlinked paging structure by unlinking and
|
||||
* dropping references.
|
||||
* @get_page: Increment the refcount on a page.
|
||||
* @put_page: Decrement the refcount on a page. When the
|
||||
@@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_removed_table)(void *addr, u32 level);
|
||||
void (*free_unlinked_table)(void *addr, u32 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
* with other software walkers.
|
||||
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
|
||||
* invoked from a fault handler.
|
||||
* @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
|
||||
* without Break-before-make's
|
||||
* TLB invalidation.
|
||||
* @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
|
||||
* without Cache maintenance
|
||||
* operations required.
|
||||
*/
|
||||
enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_LEAF = BIT(0),
|
||||
@@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
|
||||
KVM_PGTABLE_WALK_SHARED = BIT(3),
|
||||
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
|
||||
KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
|
||||
KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
|
||||
};
|
||||
|
||||
struct kvm_pgtable_visit_ctx {
|
||||
@@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
|
||||
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @pgtable: Unlinked stage-2 paging structure to be freed.
|
||||
* @level: Level of the stage-2 paging structure to be freed.
|
||||
@@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @phys: Physical address of the memory to map.
|
||||
* @level: Starting level of the stage-2 paging structure to be created.
|
||||
* @prot: Permissions and attributes for the mapping.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @force_pte: Force mappings to PAGE_SIZE granularity.
|
||||
*
|
||||
* Returns an unlinked page-table tree. This new page-table tree is
|
||||
* not reachable (i.e., it is unlinked) from the root pgd and it's
|
||||
* therefore unreachableby the hardware page-table walker. No TLB
|
||||
* invalidation or CMOs are performed.
|
||||
*
|
||||
* If device attributes are not explicitly requested in @prot, then the
|
||||
* mapping will be normal, cacheable.
|
||||
*
|
||||
* Return: The fully populated (unlinked) stage-2 paging structure, or
|
||||
* an ERR_PTR(error) on failure.
|
||||
*/
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
|
||||
@@ -620,6 +672,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
|
||||
*/
|
||||
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
|
||||
* to PAGE_SIZE guest pages.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @addr: Intermediate physical address from which to split.
|
||||
* @size: Size of the range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
*
|
||||
* The function tries to split any level 1 or 2 entry that overlaps
|
||||
* with the input range (given by @addr and @size).
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure. Note that
|
||||
* kvm_pgtable_stage2_split() is best effort: it tries to break as many
|
||||
* blocks in the input range as allowed by @mc_capacity.
|
||||
*/
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_walk() - Walk a page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
#ifndef __ARM64_KVM_PKVM_H__
|
||||
#define __ARM64_KVM_PKVM_H__
|
||||
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
/* Maximum number of VMs that can co-exist under pKVM. */
|
||||
@@ -106,4 +108,23 @@ static inline unsigned long host_s2_pgtable_pages(void)
|
||||
return res;
|
||||
}
|
||||
|
||||
#define KVM_FFA_MBOX_NR_PAGES 1
|
||||
|
||||
static inline unsigned long hyp_ffa_proxy_pages(void)
|
||||
{
|
||||
size_t desc_max;
|
||||
|
||||
/*
|
||||
* The hypervisor FFA proxy needs enough memory to buffer a fragmented
|
||||
* descriptor returned from EL3 in response to a RETRIEVE_REQ call.
|
||||
*/
|
||||
desc_max = sizeof(struct ffa_mem_region) +
|
||||
sizeof(struct ffa_mem_region_attributes) +
|
||||
sizeof(struct ffa_composite_mem_region) +
|
||||
SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
|
||||
|
||||
/* Plus a page each for the hypervisor's RX and TX mailboxes. */
|
||||
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_PKVM_H__ */
|
||||
|
||||
@@ -510,6 +510,7 @@
|
||||
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
|
||||
(BIT(29)))
|
||||
|
||||
#define SCTLR_EL2_BT (BIT(36))
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
|
||||
#else
|
||||
|
||||
@@ -110,8 +110,10 @@ static inline bool is_hyp_mode_mismatched(void)
|
||||
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
|
||||
}
|
||||
|
||||
static inline bool is_kernel_in_hyp_mode(void)
|
||||
static __always_inline bool is_kernel_in_hyp_mode(void)
|
||||
{
|
||||
BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
|
||||
__is_defined(__KVM_VHE_HYPERVISOR__));
|
||||
return read_sysreg(CurrentEL) == CurrentEL_EL2;
|
||||
}
|
||||
|
||||
@@ -140,6 +142,14 @@ static __always_inline bool is_protected_kvm_enabled(void)
|
||||
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
|
||||
}
|
||||
|
||||
static __always_inline bool has_hvhe(void)
|
||||
{
|
||||
if (is_vhe_hyp_code())
|
||||
return false;
|
||||
|
||||
return cpus_have_final_cap(ARM64_KVM_HVHE);
|
||||
}
|
||||
|
||||
static inline bool is_hyp_nvhe(void)
|
||||
{
|
||||
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user