aboutsummaryrefslogtreecommitdiffstats
diff options
authorMark Brown <broonie@kernel.org>2026-05-29 22:43:23 +0100
committerMark Brown <broonie@kernel.org>2026-05-29 22:43:23 +0100
commit4bc0e6d39b7c764e4345562c4ee398986939d1e0 (patch)
tree10e3fc67cf175b52a7b1e5d10d1d1dd66c1a1ee9
parentd4fbe67d77c3d934bbae2c1008293d3f1f1377f5 (diff)
parentac5fb67f6006bf80128340af67f9512fdaf75ae6 (diff)
downloadlinux-next-history-4bc0e6d39b7c764e4345562c4ee398986939d1e0.tar.gz
Merge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml40
-rw-r--r--Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml71
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base.dtsi11
-rw-r--r--drivers/iommu/Kconfig11
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd/amd_iommu.h2
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h27
-rw-r--r--drivers/iommu/amd/init.c63
-rw-r--r--drivers/iommu/amd/iommu.c6
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c24
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c558
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h249
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c16
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c13
-rw-r--r--drivers/iommu/generic_pt/fmt/amdv1.h1
-rw-r--r--drivers/iommu/generic_pt/fmt/iommu_riscv64.c4
-rw-r--r--drivers/iommu/generic_pt/fmt/riscv.h11
-rw-r--r--drivers/iommu/generic_pt/iommu_pt.h110
-rw-r--r--drivers/iommu/generic_pt/kunit_generic_pt.h10
-rw-r--r--drivers/iommu/generic_pt/kunit_iommu_pt.h5
-rw-r--r--drivers/iommu/intel/cache.c49
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c18
-rw-r--r--drivers/iommu/io-pgtable-arm.c88
-rw-r--r--drivers/iommu/riscv/iommu-bits.h123
-rw-r--r--drivers/iommu/riscv/iommu.c162
-rw-r--r--drivers/iommu/vsi-iommu.c791
-rw-r--r--drivers/pci/ats.c47
-rw-r--r--drivers/pci/pci.h9
-rw-r--r--drivers/pci/quirks.c42
-rw-r--r--include/linux/compiler_attributes.h11
-rw-r--r--include/linux/generic_pt/common.h9
-rw-r--r--include/linux/iommu.h42
-rw-r--r--include/linux/pci-ats.h3
-rw-r--r--include/uapi/linux/pci_regs.h1
-rw-r--r--lib/debugobjects.c2
37 files changed, 1964 insertions, 689 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 06fb5c8e7547c..25fd3efa24200 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -37,6 +37,7 @@ properties:
- enum:
- qcom,eliza-smmu-500
- qcom,glymur-smmu-500
+ - qcom,hawi-smmu-500
- qcom,kaanapali-smmu-500
- qcom,milos-smmu-500
- qcom,qcm2290-smmu-500
@@ -55,6 +56,7 @@ properties:
- qcom,sdx55-smmu-500
- qcom,sdx65-smmu-500
- qcom,sdx75-smmu-500
+ - qcom,shikra-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- qcom,sm6350-smmu-500
@@ -566,7 +568,9 @@ allOf:
properties:
compatible:
items:
- - const: qcom,sm8750-smmu-500
+ - enum:
+ - qcom,glymur-smmu-500
+ - qcom,sm8750-smmu-500
- const: qcom,adreno-smmu
- const: qcom,smmu-500
- const: arm,mmu-500
@@ -595,6 +599,7 @@ allOf:
- qcom,sdm845-smmu-500
- qcom,sdx55-smmu-500
- qcom,sdx65-smmu-500
+ - qcom,sdx75-smmu-500
- qcom,sm6350-smmu-500
- qcom,sm6375-smmu-500
then:
@@ -602,6 +607,39 @@ allOf:
clock-names: false
clocks: false
+ # Disallow clocks for all other platforms where specific compatible is used
+ # with different fallbacks and only one combination has no clocks
+ - if:
+ properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,eliza-smmu-500
+ - qcom,glymur-smmu-500
+ - qcom,kaanapali-smmu-500
+ - qcom,milos-smmu-500
+ - qcom,qcs615-smmu-500
+ - qcom,qcs8300-smmu-500
+ - qcom,sa8775p-smmu-500
+ - qcom,sm6115-smmu-500
+ - qcom,sm6125-smmu-500
+ - qcom,sm6350-smmu-500
+ - qcom,sm6375-smmu-500
+ - qcom,sm8150-smmu-500
+ - qcom,sm8250-smmu-500
+ - qcom,sm8350-smmu-500
+ - qcom,sm8450-smmu-500
+ - qcom,sm8550-smmu-500
+ - qcom,sm8650-smmu-500
+ - qcom,sm8750-smmu-500
+ - qcom,x1e80100-smmu-500
+ - const: qcom,smmu-500
+ - const: arm,mmu-500
+ then:
+ properties:
+ clock-names: false
+ clocks: false
+
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml b/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
new file mode 100644
index 0000000000000..d3ce9e603b61d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/verisilicon,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Verisilicon IOMMU
+
+maintainers:
+ - Benjamin Gaignard <benjamin.gaignard@collabora.com>
+
+description: |+
+ A Versilicon iommu translates io virtual addresses to physical addresses for
+ its associated video decoder.
+
+properties:
+ compatible:
+ items:
+ - const: rockchip,rk3588-av1-iommu
+ - const: verisilicon,iommu-1.2
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Core clock
+ - description: Interface clock
+
+ clock-names:
+ items:
+ - const: core
+ - const: iface
+
+ "#iommu-cells":
+ const: 0
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ iommu@fdca0000 {
+ compatible = "rockchip,rk3588-av1-iommu","verisilicon,iommu-1.2";
+ reg = <0x0 0xfdca0000 0x0 0x600>;
+ interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&cru ACLK_AV1>, <&cru PCLK_AV1>;
+ clock-names = "core", "iface";
+ #iommu-cells = <0>;
+ };
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index 143b4e97d47ae..ce36378c01192 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1158,8 +1158,9 @@ F: Documentation/arch/x86/amd-hfi.rst
F: drivers/platform/x86/amd/hfi/
AMD IOMMU (AMD-VI)
-M: Joerg Roedel <joro@8bytes.org>
+M: Joerg Roedel (AMD) <joro@8bytes.org>
R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+R: Vasant Hegde <vasant.hegde@amd.com>
L: iommu@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
@@ -13584,7 +13585,7 @@ F: include/linux/iommu-dma.h
F: include/linux/iova.h
IOMMU SUBSYSTEM
-M: Joerg Roedel <joro@8bytes.org>
+M: Joerg Roedel (AMD) <joro@8bytes.org>
M: Will Deacon <will@kernel.org>
R: Robin Murphy <robin.murphy@arm.com>
L: iommu@lists.linux.dev
@@ -28138,6 +28139,14 @@ F: drivers/media/v4l2-core/v4l2-isp.c
F: include/media/v4l2-isp.h
F: include/uapi/linux/media/v4l2-isp.h
+VERISILICON IOMMU DRIVER
+M: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+L: iommu@lists.linux.dev
+S: Maintained
+F: Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
+F: drivers/iommu/vsi-iommu.c
+F: include/linux/vsi-iommu.h
+
VF610 NAND DRIVER
M: Stefan Agner <stefan@agner.ch>
L: linux-mtd@lists.infradead.org
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
index a22da6671da31..fc1fdbfd31622 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
@@ -1472,6 +1472,17 @@
clock-names = "aclk", "hclk";
power-domains = <&power RK3588_PD_AV1>;
resets = <&cru SRST_A_AV1>, <&cru SRST_P_AV1>, <&cru SRST_A_AV1_BIU>, <&cru SRST_P_AV1_BIU>;
+ iommus = <&av1d_mmu>;
+ };
+
+ av1d_mmu: iommu@fdca0000 {
+ compatible = "rockchip,rk3588-av1-iommu", "verisilicon,iommu-1.2";
+ reg = <0x0 0xfdca0000 0x0 0x600>;
+ interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&cru ACLK_AV1>, <&cru PCLK_AV1>;
+ clock-names = "core", "iface";
+ #iommu-cells = <0>;
+ power-domains = <&power RK3588_PD_AV1>;
};
vicap: video-capture@fdce0000 {
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f86262b11416d..18d3d68af7cdf 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -384,6 +384,17 @@ config SPRD_IOMMU
Say Y here if you want to use the multimedia devices listed above.
+config VSI_IOMMU
+ tristate "Verisilicon IOMMU Support"
+ depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST
+ select IOMMU_API
+ help
+ Support for IOMMUs used by Verisilicon sub-systems like video
+ decoders or encoder hardware blocks.
+
+ Say Y here if you want to use this IOMMU in front of these
+ hardware blocks.
+
config IOMMU_DEBUG_PAGEALLOC
bool "Debug IOMMU mappings against page allocations"
depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 0275821f4ef98..887af357a7c96 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -36,4 +36,5 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
+obj-$(CONFIG_VSI_IOMMU) += vsi-iommu.o
obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 834d8fabfba38..af720bf14914d 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -44,7 +44,7 @@ int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
extern enum protection_domain_mode amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
-extern u8 amd_iommu_hpt_level;
+extern u8 amd_iommu_hpt_vasize;
extern unsigned long amd_iommu_pgsize_bitmap;
extern bool amd_iommu_hatdis;
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index f9f7180878930..d2c64e2e9f056 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -51,10 +51,6 @@
#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
#define MMIO_MSI_NUM(x) ((x) & 0x1f)
-/* Flag masks for the AMD IOMMU exclusion range */
-#define MMIO_EXCL_ENABLE_MASK 0x01ULL
-#define MMIO_EXCL_ALLOW_MASK 0x02ULL
-
/* Used offsets into the MMIO space */
#define MMIO_DEV_TABLE_OFFSET 0x0000
#define MMIO_CMD_BUF_OFFSET 0x0008
@@ -231,7 +227,6 @@
#define DEV_ENTRY_IR 0x3d
#define DEV_ENTRY_IW 0x3e
#define DEV_ENTRY_NO_PAGE_FAULT 0x62
-#define DEV_ENTRY_EX 0x67
#define DEV_ENTRY_SYSMGT1 0x68
#define DEV_ENTRY_SYSMGT2 0x69
#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40)
@@ -305,9 +300,6 @@
#define GA_GUEST_NR 0x1
-#define IOMMU_IN_ADDR_BIT_SIZE 52
-#define IOMMU_OUT_ADDR_BIT_SIZE 52
-
/*
* This bitmap is used to advertise the page sizes our hardware support
* to the IOMMU core, which will then use this information to split
@@ -389,8 +381,6 @@
#define IOMMU_PROT_IR 0x01
#define IOMMU_PROT_IW 0x02
-#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2)
-
/* IOMMU capabilities */
#define IOMMU_CAP_IOTLB 24
#define IOMMU_CAP_NPCACHE 26
@@ -400,6 +390,7 @@
#define IOMMU_IVINFO_OFFSET 36
#define IOMMU_IVINFO_EFRSUP BIT(0)
#define IOMMU_IVINFO_DMA_REMAP BIT(1)
+#define IOMMU_IVINFO_VASIZE GENMASK_ULL(21, 15)
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
@@ -685,11 +676,6 @@ struct amd_iommu {
/* pci domain of this IOMMU */
struct amd_iommu_pci_seg *pci_seg;
- /* start of exclusion range of that IOMMU */
- u64 exclusion_start;
- /* length of exclusion range of that IOMMU */
- u64 exclusion_length;
-
/* command buffer virtual address */
u8 *cmd_buf;
u32 cmd_buf_head;
@@ -948,12 +934,13 @@ static inline int get_hpet_devid(int id)
}
enum amd_iommu_intr_mode_type {
- AMD_IOMMU_GUEST_IR_LEGACY,
-
- /* This mode is not visible to users. It is used when
- * we cannot fully enable vAPIC and fallback to only support
- * legacy interrupt remapping via 128-bit IRTE.
+ /*
+ * The legacy format mode is not visible to users to prevent the user
+ * from crashing x2APIC systems, which for all intents and purposes
+ * require 128-bit IRTEs. The legacy format will be forced as needed
+ * when hardware doesn't support 128-bit IRTEs.
*/
+ AMD_IOMMU_GUEST_IR_LEGACY,
AMD_IOMMU_GUEST_IR_LEGACY_GA,
AMD_IOMMU_GUEST_IR_VAPIC,
};
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 3bdb380d23e9a..e93bcb5eef702 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -155,8 +155,8 @@ bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
-/* Host page table level */
-u8 amd_iommu_hpt_level;
+/* Virtual address size */
+u8 amd_iommu_hpt_vasize;
/* Guest page table level */
int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
@@ -355,28 +355,6 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
*
****************************************************************************/
-/*
- * This function set the exclusion range in the IOMMU. DMA accesses to the
- * exclusion range are passed through untranslated
- */
-static void iommu_set_exclusion_range(struct amd_iommu *iommu)
-{
- u64 start = iommu->exclusion_start & PAGE_MASK;
- u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
- u64 entry;
-
- if (!iommu->exclusion_start)
- return;
-
- entry = start | MMIO_EXCL_ENABLE_MASK;
- memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
- &entry, sizeof(entry));
-
- entry = limit;
- memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
- &entry, sizeof(entry));
-}
-
static void iommu_set_cwwb_range(struct amd_iommu *iommu)
{
u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
@@ -972,8 +950,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
{
int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
- return 0;
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
iommu->ga_log = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, GA_LOG_SIZE);
if (!iommu->ga_log)
@@ -1939,12 +1917,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
/* XT and GAM require GA mode. */
if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
- break;
+ } else {
+ if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
+ amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
}
- if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
- amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
-
if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
pr_warn_once("Host Address Translation is not supported.\n");
amd_iommu_hatdis = true;
@@ -2905,7 +2882,6 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_init_flags(iommu);
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
- iommu_set_exclusion_range(iommu);
iommu_enable_gt(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
@@ -3022,8 +2998,10 @@ static void enable_iommus_vapic(void)
return;
}
- if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !check_feature(FEATURE_GAM_VAPIC)) {
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
+ return;
+
+ if (!check_feature(FEATURE_GAM_VAPIC)) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
return;
}
@@ -3110,6 +3088,9 @@ static void __init free_iommu_resources(void)
/* SB IOAPIC is always on this device in AMD systems */
#define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0))
+/* SB IOAPIC for Hygon family 18h model 4h is on the device 0xb */
+#define IOAPIC_SB_DEVID_FAM18H_M4H ((0x00 << 8) | PCI_DEVFN(0xb, 0))
+
static bool __init check_ioapic_information(void)
{
const char *fw_bug = FW_BUG;
@@ -3135,7 +3116,12 @@ static bool __init check_ioapic_information(void)
pr_err("%s: IOAPIC[%d] not in IVRS table\n",
fw_bug, id);
ret = false;
- } else if (devid == IOAPIC_SB_DEVID) {
+ } else if (devid == IOAPIC_SB_DEVID ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON &&
+ boot_cpu_data.x86 == 0x18 &&
+ boot_cpu_data.x86_model >= 0x4 &&
+ boot_cpu_data.x86_model <= 0xf &&
+ devid == IOAPIC_SB_DEVID_FAM18H_M4H)) {
has_sb_ioapic = true;
ret = true;
}
@@ -3202,7 +3188,7 @@ static int __init early_amd_iommu_init(void)
struct acpi_table_header *ivrs_base;
int ret;
acpi_status status;
- u8 efr_hats;
+ u8 efr_hats, max_vasize;
if (!amd_iommu_detected)
return -ENODEV;
@@ -3232,6 +3218,10 @@ static int __init early_amd_iommu_init(void)
ivinfo_init(ivrs_base);
+ max_vasize = FIELD_GET(IOMMU_IVINFO_VASIZE, amd_iommu_ivinfo);
+ if (!max_vasize)
+ max_vasize = 64;
+
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
@@ -3254,7 +3244,8 @@ static int __init early_amd_iommu_init(void)
* efr[HATS] bits specify the maximum host translation level
* supported, with LEVEL 4 being initial max level.
*/
- amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
+ amd_iommu_hpt_vasize = min_t(unsigned int, max_vasize,
+ (efr_hats + PAGE_MODE_4_LEVEL - 1) * 9 + 21);
} else {
pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
efr_hats);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 57dc8fabc7d9b..84cad43dc188c 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2718,8 +2718,7 @@ static struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev,
else
cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
- cfg.common.hw_max_vasz_lg2 =
- min(64, (amd_iommu_hpt_level - 1) * 9 + 21);
+ cfg.common.hw_max_vasz_lg2 = amd_iommu_hpt_vasize;
cfg.common.hw_max_oasz_lg2 = 52;
cfg.starting_level = 2;
domain->domain.ops = &amdv1_ops;
@@ -3086,9 +3085,6 @@ static void amd_iommu_get_resv_regions(struct device *dev,
prot |= IOMMU_READ;
if (entry->prot & IOMMU_PROT_IW)
prot |= IOMMU_WRITE;
- if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE)
- /* Exclusion range */
- type = IOMMU_RESV_RESERVED;
region = iommu_alloc_resv_region(entry->address_start,
length, prot, type,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index ddae0b07c76b5..1e9f7d2de3441 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -300,7 +300,7 @@ unlock:
/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
struct arm_vsmmu_invalidation_cmd {
union {
- u64 cmd[2];
+ struct arm_smmu_cmd cmd;
struct iommu_viommu_arm_smmuv3_invalidate ucmd;
};
};
@@ -316,32 +316,32 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
struct arm_vsmmu_invalidation_cmd *cmd)
{
/* Commands are le64 stored in u64 */
- cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
- cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
+ cmd->cmd.data[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
+ cmd->cmd.data[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
- switch (cmd->cmd[0] & CMDQ_0_OP) {
+ switch (cmd->cmd.data[0] & CMDQ_0_OP) {
case CMDQ_OP_TLBI_NSNH_ALL:
/* Convert to NH_ALL */
- cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
+ cmd->cmd.data[0] = CMDQ_OP_TLBI_NH_ALL |
FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
- cmd->cmd[1] = 0;
+ cmd->cmd.data[1] = 0;
break;
case CMDQ_OP_TLBI_NH_VA:
case CMDQ_OP_TLBI_NH_VAA:
case CMDQ_OP_TLBI_NH_ALL:
case CMDQ_OP_TLBI_NH_ASID:
- cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
- cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
+ cmd->cmd.data[0] &= ~CMDQ_TLBI_0_VMID;
+ cmd->cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
break;
case CMDQ_OP_ATC_INV:
case CMDQ_OP_CFGI_CD:
case CMDQ_OP_CFGI_CD_ALL: {
- u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
+ u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd.data[0]);
if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
return -EIO;
- cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
- cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
+ cmd->cmd.data[0] &= ~CMDQ_CFGI_0_SID;
+ cmd->cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
break;
}
default:
@@ -386,7 +386,7 @@ int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
continue;
/* FIXME always uses the main cmdq rather than trying to group by type */
- ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
+ ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, &last->cmd,
cur - last, true);
if (ret) {
cur--;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index f1f8e01a7e914..1ed8a6f29dc44 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -92,6 +92,16 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
target->data[1] = cpu_to_le64(virt_to_phys(mm->pgd) &
CTXDESC_CD_1_TTB0_MASK);
+
+ /*
+ * Enable Hardware Access and Dirty updates (DBM) if supported.
+ * This is safe to enable by default, as PTE_WRITE and PTE_DBM
+ * share the same bit.
+ */
+ if (master->smmu->features & ARM_SMMU_FEAT_HA)
+ target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA);
+ if (master->smmu->features & ARM_SMMU_FEAT_HD)
+ target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HD);
} else {
target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_EPD0);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e8d7dbe495f03..a10affb483a4f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -268,115 +268,13 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
}
/* High-level queue accessors */
-static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
-{
- memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
- cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
-
- switch (ent->opcode) {
- case CMDQ_OP_TLBI_EL2_ALL:
- case CMDQ_OP_TLBI_NSNH_ALL:
- break;
- case CMDQ_OP_PREFETCH_CFG:
- cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
- break;
- case CMDQ_OP_CFGI_CD:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
- fallthrough;
- case CMDQ_OP_CFGI_STE:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
- cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
- break;
- case CMDQ_OP_CFGI_CD_ALL:
- cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
- break;
- case CMDQ_OP_CFGI_ALL:
- /* Cover the entire SID range */
- cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
- break;
- case CMDQ_OP_TLBI_NH_VA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- fallthrough;
- case CMDQ_OP_TLBI_EL2_VA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
- cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
- break;
- case CMDQ_OP_TLBI_S2_IPA:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
- cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
- cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
- break;
- case CMDQ_OP_TLBI_NH_ASID:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- fallthrough;
- case CMDQ_OP_TLBI_NH_ALL:
- case CMDQ_OP_TLBI_S12_VMALL:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
- break;
- case CMDQ_OP_TLBI_EL2_ASID:
- cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
- break;
- case CMDQ_OP_ATC_INV:
- cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
- cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
- cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
- cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
- break;
- case CMDQ_OP_PRI_RESP:
- cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
- cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
- cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
- cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
- switch (ent->pri.resp) {
- case PRI_RESP_DENY:
- case PRI_RESP_FAIL:
- case PRI_RESP_SUCC:
- break;
- default:
- return -EINVAL;
- }
- cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
- break;
- case CMDQ_OP_RESUME:
- cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
- cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
- cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
- break;
- case CMDQ_OP_CMD_SYNC:
- if (ent->sync.msiaddr) {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
- cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
- } else {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
- }
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
- break;
- default:
- return -ENOENT;
- }
-
- return 0;
-}
-
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
struct arm_smmu_cmdq *cmdq = NULL;
if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
- cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+ cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, cmd);
return cmdq ?: &smmu->cmdq;
}
@@ -390,26 +288,29 @@ static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
}
-static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
+static void arm_smmu_cmdq_build_sync_cmd(struct arm_smmu_cmd *cmd,
+ struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq, u32 prod)
{
struct arm_smmu_queue *q = &cmdq->q;
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
+ u64 msiaddr = 0;
+ unsigned int cs;
/*
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
- ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
- q->ent_dwords * 8;
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) {
+ cs = CMDQ_SYNC_0_CS_NONE;
+ } else if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
+ cs = CMDQ_SYNC_0_CS_IRQ;
+ msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
+ q->ent_dwords * 8;
+ } else {
+ cs = CMDQ_SYNC_0_CS_SEV;
}
- arm_smmu_cmdq_build_cmd(cmd, &ent);
- if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+ *cmd = arm_smmu_make_cmd_sync(cs, msiaddr);
}
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
@@ -422,14 +323,10 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
struct arm_smmu_queue *q = &cmdq->q;
-
int i;
- u64 cmd[CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmd;
u32 cons = readl_relaxed(q->cons_reg);
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
- struct arm_smmu_cmdq_ent cmd_sync = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
@@ -457,17 +354,18 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
* We may have concurrent producers, so we need to be careful
* not to touch any of the shadow cmdq state.
*/
- queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
+ queue_read(cmd.data, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n");
- for (i = 0; i < ARRAY_SIZE(cmd); ++i)
- dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
+ for (i = 0; i < ARRAY_SIZE(cmd.data); ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd.data[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
- if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
- u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
+ cmd = arm_smmu_make_cmd_sync(
+ arm_smmu_cmdq_needs_busy_polling(smmu, cmdq) ?
+ CMDQ_SYNC_0_CS_NONE : CMDQ_SYNC_0_CS_SEV,
+ 0);
- queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
+ queue_write(Q_ENT(q, cons), cmd.data, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
@@ -767,7 +665,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
-static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
+static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds,
u32 prod, int n)
{
int i;
@@ -777,10 +676,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
};
for (i = 0; i < n; ++i) {
- u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
-
prod = queue_inc_prod_n(&llq, i);
- queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
+ queue_write(Q_ENT(&cmdq->q, prod), cmds[i].data,
+ ARRAY_SIZE(cmds[i].data));
}
}
@@ -801,10 +699,11 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* CPU will appear before any of the commands from the other CPU.
*/
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
bool sync)
{
- u64 cmd_sync[CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmd_sync;
u32 prod;
unsigned long flags;
bool owner;
@@ -847,8 +746,9 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
- arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
- queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+ arm_smmu_cmdq_build_sync_cmd(&cmd_sync, smmu, cmdq, prod);
+ queue_write(Q_ENT(&cmdq->q, prod), cmd_sync.data,
+ ARRAY_SIZE(cmd_sync.data));
/*
* In order to determine completion of our CMD_SYNC, we must
@@ -920,73 +820,63 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
return ret;
}
-static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent,
- bool sync)
+static int arm_smmu_cmdq_issue_cmd_p(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmd *cmd, bool sync)
{
- u64 cmd[CMDQ_ENT_DWORDS];
-
- if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- ent->opcode);
- return -EINVAL;
- }
-
return arm_smmu_cmdq_issue_cmdlist(
- smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
+ smmu, arm_smmu_get_cmdq(smmu, cmd), cmd, 1, sync);
}
-static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
-{
- return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
-}
+#define arm_smmu_cmdq_issue_cmd(smmu, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_issue_cmd_p(smmu, &__cmd, false); \
+ })
-static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
-{
- return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
-}
+#define arm_smmu_cmdq_issue_cmd_with_sync(smmu, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_issue_cmd_p(smmu, &__cmd, true); \
+ })
-static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *ent)
+static void arm_smmu_cmdq_batch_init_cmd(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmd *cmd)
{
cmds->num = 0;
- cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, cmd);
}
-static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd)
+static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmd *cmd)
{
- bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
- int index;
+ bool unsupported_cmd;
+ unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
if (force_sync || unsupported_cmd) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, true);
- arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, false);
- arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
}
- index = cmds->num * CMDQ_ENT_DWORDS;
- if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
- dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
- cmd->opcode);
- return;
- }
-
- cmds->num++;
+ cmds->cmds[cmds->num++] = *cmd;
}
+#define arm_smmu_cmdq_batch_add_cmd(smmu, cmds, cmd) \
+ ({ \
+ struct arm_smmu_cmd __cmd = cmd; \
+ arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, &__cmd); \
+ })
+
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
@@ -997,29 +887,29 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
struct iommu_page_response *resp)
{
- struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- int sid = master->streams[0].id;
+ u8 resume_resp;
if (WARN_ON(!master->stall_enabled))
return;
- cmd.opcode = CMDQ_OP_RESUME;
- cmd.resume.sid = sid;
- cmd.resume.stag = resp->grpid;
switch (resp->code) {
case IOMMU_PAGE_RESP_INVALID:
case IOMMU_PAGE_RESP_FAILURE:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+ resume_resp = CMDQ_RESUME_0_RESP_ABORT;
break;
case IOMMU_PAGE_RESP_SUCCESS:
- cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+ resume_resp = CMDQ_RESUME_0_RESP_RETRY;
break;
default:
+ resume_resp = CMDQ_RESUME_0_RESP_TERM;
break;
}
- arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd(master->smmu,
+ arm_smmu_make_cmd_resume(master->streams[0].id,
+ resp->grpid,
+ resume_resp));
/*
* Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
* RESUME consumption guarantees that the stalled transaction will be
@@ -1543,19 +1433,14 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
size_t i;
struct arm_smmu_cmdq_batch cmds;
struct arm_smmu_device *smmu = master->smmu;
- struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_CFGI_CD,
- .cfgi = {
- .ssid = ssid,
- .leaf = leaf,
- },
- };
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_cfgi_cd(0, ssid, leaf);
- arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
- for (i = 0; i < master->num_streams; i++) {
- cmd.cfgi.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
- }
+ arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++)
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_cfgi_cd(master->streams[i].id, ssid,
+ leaf));
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -1742,8 +1627,11 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
- if (WARN_ON(!cdptr))
+ if (!cdptr) {
+ /* Only ats_always_on allows a NULL CD on default substream */
+ WARN_ON(!master->ats_always_on || ssid);
return;
+ }
arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
@@ -1756,6 +1644,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
cd_table->s1cdmax = master->ssid_bits;
+
+ /*
+ * When a device doesn't support PASID (non default SSID), ssid_bits is
+ * set to 0. This also sets S1CDMAX to 0, which disables the substreams
+ * and ignores the S1DSS field.
+ *
+ * On the other hand, if a device demands ATS to be always on even when
+ * its default substream is IOMMU bypassed, it has to use EATS that is
+ * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
+ * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
+ *
+ * Set S1CDMAX no lower than 1. This would add a dummy substream in the
+ * CD table but it should never be used by an actual CD.
+ */
+ if (master->ats_always_on)
+ cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
@@ -1848,15 +1752,10 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
struct arm_smmu_ste_writer *ste_writer =
container_of(writer, struct arm_smmu_ste_writer, writer);
- struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_CFGI_STE,
- .cfgi = {
- .sid = ste_writer->sid,
- .leaf = true,
- },
- };
- arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ writer->master->smmu,
+ arm_smmu_make_cmd_cfgi_ste(ste_writer->sid, true));
}
static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
@@ -1881,15 +1780,9 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
/* It's likely that we'll want to use the new STE soon */
- if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
- struct arm_smmu_cmdq_ent
- prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
- .prefetch = {
- .sid = sid,
- } };
-
- arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
- }
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
+ arm_smmu_cmdq_issue_cmd(smmu,
+ arm_smmu_make_cmd_prefetch_cfg(sid));
}
void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
@@ -2314,20 +2207,10 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
evt[1] & PRIQ_1_ADDR_MASK);
- if (last) {
- struct arm_smmu_cmdq_ent cmd = {
- .opcode = CMDQ_OP_PRI_RESP,
- .substream_valid = ssv,
- .pri = {
- .sid = sid,
- .ssid = ssid,
- .grpid = grpid,
- .resp = PRI_RESP_DENY,
- },
- };
-
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- }
+ if (last)
+ arm_smmu_cmdq_issue_cmd(
+ smmu, arm_smmu_make_cmd_pri_resp(sid, ssid, ssv, grpid,
+ PRI_RESP_DENY));
}
static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
@@ -2415,9 +2298,8 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
return IRQ_WAKE_THREAD;
}
-static void
-arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
- struct arm_smmu_cmdq_ent *cmd)
+static struct arm_smmu_cmd
+arm_smmu_atc_inv_to_cmd(u32 sid, int ssid, unsigned long iova, size_t size)
{
size_t log2_span;
size_t span_mask;
@@ -2439,17 +2321,6 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
* This has the unpleasant side-effect of invalidating all PASID-tagged
* ATC entries within the address range.
*/
- *cmd = (struct arm_smmu_cmdq_ent) {
- .opcode = CMDQ_OP_ATC_INV,
- .substream_valid = (ssid != IOMMU_NO_PASID),
- .atc.ssid = ssid,
- };
-
- if (!size) {
- cmd->atc.size = ATC_INV_SIZE_ALL;
- return;
- }
-
page_start = iova >> inval_grain_shift;
page_end = (iova + size - 1) >> inval_grain_shift;
@@ -2478,24 +2349,25 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
page_start &= ~span_mask;
- cmd->atc.addr = page_start << inval_grain_shift;
- cmd->atc.size = log2_span;
+ return arm_smmu_make_cmd_atc_inv(sid, ssid,
+ page_start << inval_grain_shift,
+ log2_span);
}
static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
ioasid_t ssid)
{
int i;
- struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_cmd cmd;
struct arm_smmu_cmdq_batch cmds;
- arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
-
- arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
- for (i = 0; i < master->num_streams; i++) {
- cmd.atc.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
- }
+ cmd = arm_smmu_make_cmd_atc_inv_all(0, IOMMU_NO_PASID);
+ arm_smmu_cmdq_batch_init_cmd(master->smmu, &cmds, &cmd);
+ for (i = 0; i < master->num_streams; i++)
+ arm_smmu_cmdq_batch_add_cmd(
+ master->smmu, &cmds,
+ arm_smmu_make_cmd_atc_inv_all(master->streams[i].id,
+ ssid));
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
}
@@ -2525,12 +2397,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd,
+ struct arm_smmu_cmd *cmd, bool leaf,
unsigned long iova, size_t size,
size_t granule, size_t pgsize)
{
unsigned long end = iova + size, num_pages = 0, tg = pgsize;
+ u64 orig_data0 = cmd->data[0];
size_t inv_range = granule;
+ u8 ttl = 0, tg_enc = 0;
if (WARN_ON_ONCE(!size))
return;
@@ -2539,7 +2413,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
- cmd->tlbi.tg = (tg - 10) / 2;
+ tg_enc = (tg - 10) / 2;
/*
* Determine what level the granule is at. For non-leaf, both
@@ -2549,8 +2423,8 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
* want to use a range command, so avoid the SVA corner case
* where both scale and num could be 0 as well.
*/
- if (cmd->tlbi.leaf)
- cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ if (leaf)
+ ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
num_pages++;
}
@@ -2568,11 +2442,13 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
- cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
- cmd->tlbi.num = num - 1;
+
+ cmd->data[0] = orig_data0 |
+ FIELD_PREP(CMDQ_TLBI_0_NUM, num - 1) |
+ FIELD_PREP(CMDQ_TLBI_0_SCALE, scale);
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
@@ -2581,8 +2457,17 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
num_pages -= num << scale;
}
- cmd->tlbi.addr = iova;
- arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
+ /*
+ * IPA has fewer bits than VA, but they are reserved in the
+ * command and something would be very broken if iova had them
+ * set.
+ */
+ cmd->data[1] = FIELD_PREP(CMDQ_TLBI_1_LEAF, leaf) |
+ FIELD_PREP(CMDQ_TLBI_1_TTL, ttl) |
+ FIELD_PREP(CMDQ_TLBI_1_TG, tg_enc) |
+ (iova & ~GENMASK_U64(11, 0));
+
+ arm_smmu_cmdq_batch_add_cmd_p(smmu, cmds, cmd);
iova += inv_range;
}
}
@@ -2613,19 +2498,22 @@ static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
/* Used by non INV_TYPE_ATS* invalidations */
static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,
struct arm_smmu_cmdq_batch *cmds,
- struct arm_smmu_cmdq_ent *cmd,
+ struct arm_smmu_cmd *cmd,
+ bool leaf,
unsigned long iova, size_t size,
unsigned int granule)
{
if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {
- cmd->opcode = inv->nsize_opcode;
- arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd);
+ struct arm_smmu_cmd nsize_cmd = *cmd;
+
+ u64p_replace_bits(&nsize_cmd.data[0], inv->nsize_opcode,
+ CMDQ_0_OP);
+ arm_smmu_cmdq_batch_add_cmd_p(inv->smmu, cmds, &nsize_cmd);
return;
}
- cmd->opcode = inv->size_opcode;
- arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,
- inv->pgsize);
+ arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, leaf,
+ iova, size, granule, inv->pgsize);
}
static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,
@@ -2660,48 +2548,51 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
break;
while (cur != end) {
struct arm_smmu_device *smmu = cur->smmu;
- struct arm_smmu_cmdq_ent cmd = {
- /*
- * Pick size_opcode to run arm_smmu_get_cmdq(). This can
- * be changed to nsize_opcode, which would result in the
- * same CMDQ pointer.
- */
- .opcode = cur->size_opcode,
- };
+ /*
+ * Pick size_opcode to run arm_smmu_get_cmdq(). This can
+ * be changed to nsize_opcode, which would result in the
+ * same CMDQ pointer.
+ */
+ struct arm_smmu_cmd cmd =
+ arm_smmu_make_cmd_op(cur->size_opcode);
struct arm_smmu_inv *next;
if (!cmds.num)
- arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd);
switch (cur->type) {
case INV_TYPE_S1_ASID:
- cmd.tlbi.asid = cur->id;
- cmd.tlbi.leaf = leaf;
- arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
- granule);
+ cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
+ cur->id, 0);
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
+ iova, size, granule);
break;
case INV_TYPE_S2_VMID:
- cmd.tlbi.vmid = cur->id;
- cmd.tlbi.leaf = leaf;
- arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
- granule);
+ cmd = arm_smmu_make_cmd_tlbi(cur->size_opcode,
+ 0, cur->id);
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, leaf,
+ iova, size, granule);
break;
case INV_TYPE_S2_VMID_S1_CLEAR:
/* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */
if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))
break;
- cmd.tlbi.vmid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_tlbi(cur->size_opcode, 0,
+ cur->id));
break;
case INV_TYPE_ATS:
- arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd);
- cmd.atc.sid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_atc_inv_to_cmd(cur->id, cur->ssid,
+ iova, size));
break;
case INV_TYPE_ATS_FULL:
- arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
- cmd.atc.sid = cur->id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds,
+ arm_smmu_make_cmd_atc_inv_all(cur->id,
+ IOMMU_NO_PASID));
break;
default:
WARN_ON_ONCE(1);
@@ -3432,22 +3323,21 @@ arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
static void arm_smmu_inv_flush_iotlb_tag(struct arm_smmu_inv *inv)
{
- struct arm_smmu_cmdq_ent cmd = {};
-
switch (inv->type) {
case INV_TYPE_S1_ASID:
- cmd.tlbi.asid = inv->id;
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ inv->smmu,
+ arm_smmu_make_cmd_tlbi(inv->nsize_opcode, inv->id, 0));
break;
case INV_TYPE_S2_VMID:
/* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
- cmd.tlbi.vmid = inv->id;
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ inv->smmu,
+ arm_smmu_make_cmd_tlbi(inv->nsize_opcode, 0, inv->id));
break;
default:
return;
}
-
- cmd.opcode = inv->nsize_opcode;
- arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);
}
/* Should be installed after arm_smmu_install_ste_for_dev() */
@@ -3854,9 +3744,12 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
if (!arm_smmu_ssids_in_use(&master->cd_table)) {
struct iommu_domain *sid_domain =
iommu_driver_get_domain_for_dev(master->dev);
+ bool ats_always_on = master->ats_always_on &&
+ sid_domain->type != IOMMU_DOMAIN_BLOCKED;
+ bool downgrade = sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
+ sid_domain->type == IOMMU_DOMAIN_BLOCKED;
- if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
- sid_domain->type == IOMMU_DOMAIN_BLOCKED)
+ if (!ats_always_on && downgrade)
sid_domain->ops->attach_dev(sid_domain, dev,
sid_domain);
}
@@ -3875,6 +3768,8 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
.old_domain = old_domain,
.ssid = IOMMU_NO_PASID,
};
+ bool ats_always_on = master->ats_always_on &&
+ s1dss != STRTAB_STE_1_S1DSS_TERMINATE;
/*
* Do not allow any ASID to be changed while are working on the STE,
@@ -3886,7 +3781,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
* If the CD table is not in use we can use the provided STE, otherwise
* we use a cdtable STE with the provided S1DSS.
*/
- if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
* on because we have to assume a PASID is using ATS. For
@@ -4215,6 +4110,44 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
kfree(master->build_invs);
}
+static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
+{
+ bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
+ unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
+ struct pci_dev *pdev;
+ int ret;
+
+ if (!dev_is_pci(master->dev))
+ return 0;
+ pdev = to_pci_dev(master->dev);
+
+ if (!arm_smmu_ats_supported(master)) {
+ if (pci_ats_required(pdev)) {
+ dev_err_once(master->dev, "SMMU doesn't support ATS\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+ }
+
+ ret = pci_prepare_ats(pdev, stu);
+ if (ret || !pci_ats_required(pdev))
+ return ret;
+
+ /*
+ * S1DSS is required for ATS to be always on for identity domain cases.
+ * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
+ */
+ if (!s1p || !master->smmu->ssid_bits) {
+ dev_err_once(master->dev,
+ "SMMU doesn't support ATS to be always on\n");
+ return -EOPNOTSUPP;
+ }
+
+ master->ats_always_on = true;
+
+ return arm_smmu_alloc_cd_tables(master);
+}
+
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
@@ -4263,14 +4196,15 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
- if (dev_is_pci(dev)) {
- unsigned int stu = __ffs(smmu->pgsize_bitmap);
-
- pci_prepare_ats(to_pci_dev(dev), stu);
- }
+ ret = arm_smmu_master_prepare_ats(master);
+ if (ret)
+ goto err_disable_pasid;
return &smmu->iommu;
+err_disable_pasid:
+ arm_smmu_disable_pasid(master);
+ arm_smmu_remove_master(master);
err_free_master:
kfree(master);
return ERR_PTR(ret);
@@ -4418,7 +4352,7 @@ int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
GFP_KERNEL);
- if (q->base || qsz < PAGE_SIZE)
+ if (q->base || qsz <= PAGE_SIZE)
break;
q->llq.max_n_shift--;
@@ -4810,7 +4744,6 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
u32 reg, enables;
- struct arm_smmu_cmdq_ent cmd;
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
@@ -4857,17 +4790,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
}
/* Invalidate any cached configuration */
- cmd.opcode = CMDQ_OP_CFGI_ALL;
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, arm_smmu_make_cmd_cfgi_all());
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
- cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ smmu, arm_smmu_make_cmd_op(CMDQ_OP_TLBI_EL2_ALL));
}
- cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_cmdq_issue_cmd_with_sync(
+ smmu, arm_smmu_make_cmd_op(CMDQ_OP_TLBI_NSNH_ALL));
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ef42df4753ec4..c909c9a88538b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -390,6 +390,10 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
+struct arm_smmu_cmd {
+ u64 data[CMDQ_ENT_DWORDS];
+};
+
/*
* This is used to size the command queue and therefore must be at least
* BITS_PER_LONG so that the valid_map works correctly (it relies on the
@@ -426,11 +430,19 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
+#define ATC_INV_SIZE_ALL 52
+
#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
+enum pri_resp {
+ PRI_RESP_DENY = 0,
+ PRI_RESP_FAIL = 1,
+ PRI_RESP_SUCC = 2,
+};
+
#define CMDQ_RESUME_0_RESP_TERM 0UL
#define CMDQ_RESUME_0_RESP_RETRY 1UL
#define CMDQ_RESUME_0_RESP_ABORT 2UL
@@ -447,6 +459,145 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
+enum arm_smmu_cmdq_opcode {
+ CMDQ_OP_PREFETCH_CFG = 0x1,
+ CMDQ_OP_CFGI_STE = 0x3,
+ CMDQ_OP_CFGI_ALL = 0x4,
+ CMDQ_OP_CFGI_CD = 0x5,
+ CMDQ_OP_CFGI_CD_ALL = 0x6,
+ CMDQ_OP_TLBI_NH_ALL = 0x10,
+ CMDQ_OP_TLBI_NH_ASID = 0x11,
+ CMDQ_OP_TLBI_NH_VA = 0x12,
+ CMDQ_OP_TLBI_NH_VAA = 0x13,
+ CMDQ_OP_TLBI_EL2_ALL = 0x20,
+ CMDQ_OP_TLBI_EL2_ASID = 0x21,
+ CMDQ_OP_TLBI_EL2_VA = 0x22,
+ CMDQ_OP_TLBI_S12_VMALL = 0x28,
+ CMDQ_OP_TLBI_S2_IPA = 0x2a,
+ CMDQ_OP_TLBI_NSNH_ALL = 0x30,
+ CMDQ_OP_ATC_INV = 0x40,
+ CMDQ_OP_PRI_RESP = 0x41,
+ CMDQ_OP_RESUME = 0x44,
+ CMDQ_OP_CMD_SYNC = 0x46,
+};
+
+static inline struct arm_smmu_cmd
+arm_smmu_make_cmd_op(enum arm_smmu_cmdq_opcode op)
+{
+ struct arm_smmu_cmd cmd = {};
+
+ cmd.data[0] = FIELD_PREP(CMDQ_0_OP, op);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_all(void)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_ALL);
+
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_prefetch_cfg(u32 sid)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_PREFETCH_CFG);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, sid);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_ste(u32 sid, bool leaf)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_STE);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, leaf);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_cfgi_cd(u32 sid, u32 ssid,
+ bool leaf)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_CD);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid) |
+ FIELD_PREP(CMDQ_CFGI_0_SSID, ssid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, leaf);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_resume(u32 sid, u16 stag,
+ u8 resp)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_RESUME);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, sid) |
+ FIELD_PREP(CMDQ_RESUME_0_RESP, resp);
+ cmd.data[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, stag);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_pri_resp(u32 sid, u32 ssid,
+ bool ssv,
+ u16 grpid,
+ enum pri_resp resp)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_PRI_RESP);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_0_SSV, ssv) |
+ FIELD_PREP(CMDQ_PRI_0_SID, sid) |
+ FIELD_PREP(CMDQ_PRI_0_SSID, ssid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, grpid) |
+ FIELD_PREP(CMDQ_PRI_1_RESP, resp);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_atc_inv(u32 sid, u32 ssid,
+ u64 addr, u8 size)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_ATC_INV);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_0_SSV, ssid != IOMMU_NO_PASID) |
+ FIELD_PREP(CMDQ_ATC_0_SSID, ssid) |
+ FIELD_PREP(CMDQ_ATC_0_SID, sid);
+ cmd.data[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, size) |
+ (addr & CMDQ_ATC_1_ADDR_MASK);
+ return cmd;
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_atc_inv_all(u32 sid,
+ u32 ssid)
+{
+ return arm_smmu_make_cmd_atc_inv(sid, ssid, 0, ATC_INV_SIZE_ALL);
+}
+
+static inline struct arm_smmu_cmd arm_smmu_make_cmd_sync(unsigned int cs,
+ u64 msiaddr)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CMD_SYNC);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, cs) |
+ FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ cmd.data[1] |= msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
+ return cmd;
+}
+
+/*
+ * TLBI commands - the non-sized variants just need opcode + asid/vmid.
+ * For sized variants the caller sets up data[0] with the immutable fields
+ * (opcode + asid/vmid) and the range loop fills in per-iteration fields.
+ */
+static inline struct arm_smmu_cmd
+arm_smmu_make_cmd_tlbi(enum arm_smmu_cmdq_opcode op, u16 asid, u16 vmid)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(op);
+
+ cmd.data[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, asid) |
+ FIELD_PREP(CMDQ_TLBI_0_VMID, vmid);
+ return cmd;
+}
+
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
@@ -507,90 +658,6 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
-enum pri_resp {
- PRI_RESP_DENY = 0,
- PRI_RESP_FAIL = 1,
- PRI_RESP_SUCC = 2,
-};
-
-struct arm_smmu_cmdq_ent {
- /* Common fields */
- u8 opcode;
- bool substream_valid;
-
- /* Command-specific fields */
- union {
- #define CMDQ_OP_PREFETCH_CFG 0x1
- struct {
- u32 sid;
- } prefetch;
-
- #define CMDQ_OP_CFGI_STE 0x3
- #define CMDQ_OP_CFGI_ALL 0x4
- #define CMDQ_OP_CFGI_CD 0x5
- #define CMDQ_OP_CFGI_CD_ALL 0x6
- struct {
- u32 sid;
- u32 ssid;
- union {
- bool leaf;
- u8 span;
- };
- } cfgi;
-
- #define CMDQ_OP_TLBI_NH_ALL 0x10
- #define CMDQ_OP_TLBI_NH_ASID 0x11
- #define CMDQ_OP_TLBI_NH_VA 0x12
- #define CMDQ_OP_TLBI_NH_VAA 0x13
- #define CMDQ_OP_TLBI_EL2_ALL 0x20
- #define CMDQ_OP_TLBI_EL2_ASID 0x21
- #define CMDQ_OP_TLBI_EL2_VA 0x22
- #define CMDQ_OP_TLBI_S12_VMALL 0x28
- #define CMDQ_OP_TLBI_S2_IPA 0x2a
- #define CMDQ_OP_TLBI_NSNH_ALL 0x30
- struct {
- u8 num;
- u8 scale;
- u16 asid;
- u16 vmid;
- bool leaf;
- u8 ttl;
- u8 tg;
- u64 addr;
- } tlbi;
-
- #define CMDQ_OP_ATC_INV 0x40
- #define ATC_INV_SIZE_ALL 52
- struct {
- u32 sid;
- u32 ssid;
- u64 addr;
- u8 size;
- bool global;
- } atc;
-
- #define CMDQ_OP_PRI_RESP 0x41
- struct {
- u32 sid;
- u32 ssid;
- u16 grpid;
- enum pri_resp resp;
- } pri;
-
- #define CMDQ_OP_RESUME 0x44
- struct {
- u32 sid;
- u16 stag;
- u8 resp;
- } resume;
-
- #define CMDQ_OP_CMD_SYNC 0x46
- struct {
- u64 msiaddr;
- } sync;
- };
-};
-
struct arm_smmu_ll_queue {
union {
u64 val;
@@ -633,17 +700,17 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
- bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent);
+ bool (*supports_cmd)(struct arm_smmu_cmd *cmd);
};
static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
- return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true;
+ return cmdq->supports_cmd ? cmdq->supports_cmd(cmd) : true;
}
struct arm_smmu_cmdq_batch {
- u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmd cmds[CMDQ_BATCH_ENTRIES];
struct arm_smmu_cmdq *cmdq;
int num;
};
@@ -807,7 +874,7 @@ struct arm_smmu_impl_ops {
void (*device_remove)(struct arm_smmu_device *smmu);
int (*init_structures)(struct arm_smmu_device *smmu);
struct arm_smmu_cmdq *(*get_secondary_cmdq)(
- struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
+ struct arm_smmu_device *smmu, struct arm_smmu_cmd *cmd);
/*
* An implementation should define its own type other than the default
* IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type
@@ -943,6 +1010,7 @@ struct arm_smmu_master {
bool ats_enabled : 1;
bool ste_ats_enabled : 1;
bool stall_enabled;
+ bool ats_always_on;
unsigned int ssid_bits;
unsigned int iopf_refcount;
};
@@ -1140,7 +1208,8 @@ void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
const struct arm_smmu_ste *target);
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
bool sync);
#ifdef CONFIG_ARM_SMMU_V3_SVA
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 83f6e9f6c51d6..67be62a6e7640 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -367,9 +367,9 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
/* Command Queue Function */
-static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
+static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmd *cmd)
{
- switch (ent->opcode) {
+ switch (FIELD_GET(CMDQ_0_OP, cmd->data[0])) {
case CMDQ_OP_TLBI_NH_ASID:
case CMDQ_OP_TLBI_NH_VA:
case CMDQ_OP_ATC_INV:
@@ -381,7 +381,7 @@ static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *
tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_ent *ent)
+ struct arm_smmu_cmd *cmd)
{
struct tegra241_cmdqv *cmdqv =
container_of(smmu, struct tegra241_cmdqv, smmu);
@@ -409,7 +409,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
return NULL;
/* Unsupported CMD goes for smmu->cmdq pathway */
- if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent))
+ if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, cmd))
return NULL;
return &vcmdq->cmdq;
}
@@ -427,16 +427,16 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
static void tegra241_vcmdq_hw_flush_timeout(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
- u64 cmd_sync[CMDQ_ENT_DWORDS] = {};
+ struct arm_smmu_cmd cmd_sync = {};
- cmd_sync[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
- FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+ cmd_sync.data[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
+ FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
/*
* It does not hurt to insert another CMD_SYNC, taking advantage of the
* arm_smmu_cmdq_issue_cmdlist() that waits for the CMD_SYNC completion.
*/
- arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, cmd_sync, 1, true);
+ arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, &cmd_sync, 1, true);
}
/* This function is for LVCMDQ, so @vcmdq must not be unmapped yet */
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index edd41b5a3b6ac..e2c914fccd6fc 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -39,8 +39,10 @@ static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
{ .compatible = "qcom,adreno-smmu",
.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
- { .compatible = "qcom,fastrpc",
+ { .compatible = "qcom,fastrpc-compute-cb",
.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,glymur-mdss",
+ .data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
{ .compatible = "qcom,qcm2290-mdss",
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
{ .compatible = "qcom,sa8775p-mdss",
@@ -259,6 +261,7 @@ static int qcom_adreno_smmu_set_ttbr0_cfg(const void *cookie,
struct io_pgtable *pgtable = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ int ret;
/* The domain must have split pagetables already enabled */
if (cb->tcr[0] & ARM_SMMU_TCR_EPD1)
@@ -288,8 +291,16 @@ static int qcom_adreno_smmu_set_ttbr0_cfg(const void *cookie,
cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, cb->cfg->asid);
}
+ ret = pm_runtime_resume_and_get(smmu_domain->smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu_domain->smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return -ENODEV;
+ }
+
arm_smmu_write_context_bank(smmu_domain->smmu, cb->cfg->cbndx);
+ pm_runtime_put_autosuspend(smmu_domain->smmu->dev);
+
return 0;
}
diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h b/drivers/iommu/generic_pt/fmt/amdv1.h
index 8d11b08291d73..1426a6758ff25 100644
--- a/drivers/iommu/generic_pt/fmt/amdv1.h
+++ b/drivers/iommu/generic_pt/fmt/amdv1.h
@@ -404,6 +404,7 @@ amdv1pt_iommu_fmt_hw_info(struct pt_iommu_amdv1 *table,
static const struct pt_iommu_amdv1_cfg amdv1_kunit_fmt_cfgs[] = {
/* Matches what io_pgtable does */
[0] = { .starting_level = 2 },
+ [1] = { .starting_level = 2, .common.hw_max_vasz_lg2 = 32 },
};
#define kunit_fmt_cfgs amdv1_kunit_fmt_cfgs
enum { KUNIT_FMT_FEATURES = 0 };
diff --git a/drivers/iommu/generic_pt/fmt/iommu_riscv64.c b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
index cbf60fffa9bf7..b18fc4d109f53 100644
--- a/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
+++ b/drivers/iommu/generic_pt/fmt/iommu_riscv64.c
@@ -6,6 +6,8 @@
#define PT_FMT_VARIANT 64
#define PT_SUPPORTED_FEATURES \
(BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
- BIT(PT_FEAT_RISCV_SVNAPOT_64K))
+ BIT(PT_FEAT_RISCV_SVNAPOT_64K) | \
+ BIT(PT_FEAT_DETAILED_GATHER))
+#define PT_FORCE_ENABLED_FEATURES BIT(PT_FEAT_DETAILED_GATHER)
#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/riscv.h b/drivers/iommu/generic_pt/fmt/riscv.h
index a7fef6266a36a..ae9a76514416c 100644
--- a/drivers/iommu/generic_pt/fmt/riscv.h
+++ b/drivers/iommu/generic_pt/fmt/riscv.h
@@ -64,6 +64,8 @@ enum {
RISCVPT_PPN64 = GENMASK_ULL(53, 10),
RISCVPT_PPN64_64K = GENMASK_ULL(53, 14),
RISCVPT_PBMT = GENMASK_ULL(62, 61),
+ RISCVPT_NC = BIT_ULL(61),
+ RISCVPT_IO = BIT_ULL(62),
RISCVPT_N = BIT_ULL(63),
/* Svnapot encodings for ppn[0] */
@@ -201,7 +203,8 @@ static inline void riscvpt_attr_from_entry(const struct pt_state *pts,
{
attrs->descriptor_bits =
pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U |
- RISCVPT_G | RISCVPT_A | RISCVPT_D);
+ RISCVPT_G | RISCVPT_A | RISCVPT_D | RISCVPT_NC |
+ RISCVPT_IO);
}
#define pt_attr_from_entry riscvpt_attr_from_entry
@@ -237,6 +240,12 @@ static inline int riscvpt_iommu_set_prot(struct pt_common *common,
pte |= RISCVPT_R;
if (!(iommu_prot & IOMMU_NOEXEC))
pte |= RISCVPT_X;
+ if (common->features & BIT(PT_FEAT_RISCV_SVPBMT)) {
+ if (iommu_prot & IOMMU_MMIO)
+ pte |= RISCVPT_IO;
+ else if (!(iommu_prot & IOMMU_CACHE))
+ pte |= RISCVPT_NC;
+ }
/* Caller must specify a supported combination of flags */
if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0))
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index dc91fb4e2f61c..c2752151c80af 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -40,15 +40,40 @@ static void flush_writes_item(const struct pt_state *pts)
PT_ITEM_WORD_SIZE);
}
-static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
- struct pt_iommu *iommu_table, pt_vaddr_t iova,
- pt_vaddr_t len,
- struct iommu_pages_list *free_list)
+struct iommupt_pending_gather {
+ struct iommu_iotlb_gather *iotlb_gather;
+ struct iommu_pages_list free_list;
+ u8 leaf_levels_bitmap;
+ u8 table_levels_bitmap;
+};
+
+static void gather_add_table(struct iommupt_pending_gather *pending,
+ const struct pt_state *pts,
+ struct pt_table_p *table)
+{
+ iommu_pages_list_add(&pending->free_list, table);
+ if (pts_feature(pts, PT_FEAT_DETAILED_GATHER))
+ pending->table_levels_bitmap |= BIT(pts->level);
+}
+
+static void gather_add_leaf(struct iommupt_pending_gather *pending,
+ const struct pt_state *pts)
+{
+ if (!pts_feature(pts, PT_FEAT_DETAILED_GATHER))
+ return;
+
+ pending->leaf_levels_bitmap |= BIT(pts->level);
+}
+
+static void gather_range_pending(struct iommupt_pending_gather *pending,
+ struct pt_iommu *iommu_table, pt_vaddr_t iova,
+ pt_vaddr_t len)
{
+ struct iommu_iotlb_gather *iotlb_gather = pending->iotlb_gather;
struct pt_common *common = common_from_iommu(iommu_table);
if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
- iommu_pages_stop_incoherent_list(free_list,
+ iommu_pages_stop_incoherent_list(&pending->free_list,
iommu_table->iommu_device);
/*
@@ -72,7 +97,17 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
}
- iommu_pages_list_splice(free_list, &iotlb_gather->freelist);
+ iommu_pages_list_splice(&pending->free_list, &iotlb_gather->freelist);
+ INIT_LIST_HEAD(&pending->free_list.pages);
+
+ if (pt_feature(common, PT_FEAT_DETAILED_GATHER)) {
+ iotlb_gather->pt.leaf_levels_bitmap |=
+ pending->leaf_levels_bitmap;
+ iotlb_gather->pt.table_levels_bitmap |=
+ pending->table_levels_bitmap;
+ pending->leaf_levels_bitmap = 0;
+ pending->table_levels_bitmap = 0;
+ }
}
#define DOMAIN_NS(op) CONCATENATE(CONCATENATE(pt_iommu_, PTPFX), op)
@@ -341,7 +376,7 @@ static int __maybe_unused NS(set_dirty)(struct pt_iommu *iommu_table,
}
struct pt_iommu_collect_args {
- struct iommu_pages_list free_list;
+ struct iommupt_pending_gather pending;
/* Fail if any OAs are within the range */
u8 check_mapped : 1;
};
@@ -358,7 +393,8 @@ static int __collect_tables(struct pt_range *range, void *arg,
for_each_pt_level_entry(&pts) {
if (pts.type == PT_ENTRY_TABLE) {
- iommu_pages_list_add(&collect->free_list, pts.table_lower);
+ gather_add_table(&collect->pending, &pts,
+ pts.table_lower);
ret = pt_descend(&pts, arg, __collect_tables);
if (ret)
return ret;
@@ -493,15 +529,18 @@ static int clear_contig(const struct pt_state *start_pts,
struct pt_range range = *start_pts->range;
struct pt_state pts =
pt_init(&range, start_pts->level, start_pts->table);
- struct pt_iommu_collect_args collect = { .check_mapped = true };
+ struct pt_iommu_collect_args collect = {
+ .check_mapped = true,
+ .pending.iotlb_gather = iotlb_gather,
+ .pending.free_list = IOMMU_PAGES_LIST_INIT(
+ collect.pending.free_list),
+ };
int ret;
pts.index = start_pts->index;
pts.end_index = start_pts->index + step;
for (; _pt_iter_load(&pts); pt_next_entry(&pts)) {
if (pts.type == PT_ENTRY_TABLE) {
- collect.free_list =
- IOMMU_PAGES_LIST_INIT(collect.free_list);
ret = pt_walk_descend_all(&pts, __collect_tables,
&collect);
if (ret)
@@ -514,12 +553,11 @@ static int clear_contig(const struct pt_state *start_pts,
pt_clear_entries(&pts, ilog2(1));
flush_writes_item(&pts);
- iommu_pages_list_add(&collect.free_list,
- pt_table_ptr(&pts));
- gather_range_pages(
- iotlb_gather, iommu_table, range.va,
- log2_to_int(pt_table_item_lg2sz(&pts)),
- &collect.free_list);
+ gather_add_table(&collect.pending, &pts,
+ pts.table_lower);
+ gather_range_pending(
+ &collect.pending, iommu_table, range.va,
+ log2_to_int(pt_table_item_lg2sz(&pts)));
} else if (pts.type != PT_ENTRY_EMPTY) {
return -EADDRINUSE;
}
@@ -968,7 +1006,7 @@ static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
}
struct pt_unmap_args {
- struct iommu_pages_list free_list;
+ struct iommupt_pending_gather pending;
pt_vaddr_t unmapped;
};
@@ -1031,8 +1069,8 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
* succeed in clearing the lower table levels.
*/
if (fully_covered) {
- iommu_pages_list_add(&unmap->free_list,
- pts.table_lower);
+ gather_add_table(&unmap->pending, &pts,
+ pts.table_lower);
pt_clear_entries(&pts, ilog2(1));
if (pts.index < flush_start_index)
flush_start_index = pts.index;
@@ -1049,6 +1087,7 @@ start_oa:
*/
num_contig_lg2 = pt_entry_num_contig_lg2(&pts);
pt_clear_entries(&pts, num_contig_lg2);
+ gather_add_leaf(&unmap->pending, &pts);
num_oas += log2_to_int(num_contig_lg2);
if (pts.index < flush_start_index)
flush_start_index = pts.index;
@@ -1071,8 +1110,11 @@ static size_t NS(unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
dma_addr_t len,
struct iommu_iotlb_gather *iotlb_gather)
{
- struct pt_unmap_args unmap = { .free_list = IOMMU_PAGES_LIST_INIT(
- unmap.free_list) };
+ struct pt_unmap_args unmap = {
+ .pending.iotlb_gather = iotlb_gather,
+ .pending.free_list = IOMMU_PAGES_LIST_INIT(
+ unmap.pending.free_list),
+ };
struct pt_range range;
int ret;
@@ -1082,8 +1124,7 @@ static size_t NS(unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
pt_walk_range(&range, __unmap_range, &unmap);
- gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped,
- &unmap.free_list);
+ gather_range_pending(&unmap.pending, iommu_table, iova, unmap.unmapped);
return unmap.unmapped;
}
@@ -1108,8 +1149,12 @@ static void NS(get_info)(struct pt_iommu *iommu_table,
pgsize_bitmap |= pt_possible_sizes(&pts);
}
- /* Hide page sizes larger than the maximum OA */
- info->pgsize_bitmap = oalog2_mod(pgsize_bitmap, common->max_oasz_lg2);
+ /*
+ * Hide page sizes larger than the maximum. -1 because a whole table
+ * pgsize is not allowed
+ */
+ info->pgsize_bitmap = log2_mod(pgsize_bitmap, common->max_vasz_lg2 - 1);
+ info->pgsize_bitmap = oalog2_mod(info->pgsize_bitmap, common->max_oasz_lg2);
}
static void NS(deinit)(struct pt_iommu *iommu_table)
@@ -1117,10 +1162,11 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
struct pt_common *common = common_from_iommu(iommu_table);
struct pt_range range = pt_all_range(common);
struct pt_iommu_collect_args collect = {
- .free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
+ .pending.free_list = IOMMU_PAGES_LIST_INIT(
+ collect.pending.free_list),
};
- iommu_pages_list_add(&collect.free_list, range.top_table);
+ iommu_pages_list_add(&collect.pending.free_list, range.top_table);
pt_walk_range(&range, __collect_tables, &collect);
/*
@@ -1128,9 +1174,9 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
* and invalidated any caching referring to this memory.
*/
if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
- iommu_pages_stop_incoherent_list(&collect.free_list,
+ iommu_pages_stop_incoherent_list(&collect.pending.free_list,
iommu_table->iommu_device);
- iommu_put_pages_list(&collect.free_list);
+ iommu_put_pages_list(&collect.pending.free_list);
}
static const struct pt_iommu_ops NS(ops) = {
@@ -1151,10 +1197,6 @@ static int pt_init_common(struct pt_common *common)
if (PT_WARN_ON(top_range.top_level > PT_MAX_TOP_LEVEL))
return -EINVAL;
- if (top_range.top_level == PT_MAX_TOP_LEVEL ||
- common->max_vasz_lg2 == top_range.max_vasz_lg2)
- common->features &= ~BIT(PT_FEAT_DYNAMIC_TOP);
-
if (top_range.max_vasz_lg2 == PT_VADDR_MAX_LG2)
common->features |= BIT(PT_FEAT_FULL_VA);
diff --git a/drivers/iommu/generic_pt/kunit_generic_pt.h b/drivers/iommu/generic_pt/kunit_generic_pt.h
index 374e475f591e1..ef2c90b6d6af3 100644
--- a/drivers/iommu/generic_pt/kunit_generic_pt.h
+++ b/drivers/iommu/generic_pt/kunit_generic_pt.h
@@ -438,6 +438,9 @@ static void test_lvl_possible_sizes(struct kunit *test, struct pt_state *pts,
{
unsigned int num_items_lg2 = safe_pt_num_items_lg2(pts);
pt_vaddr_t pgsize_bitmap = pt_possible_sizes(pts);
+ /* Matches get_info() */
+ pt_vaddr_t limited_pgsize_bitmap =
+ log2_mod(pgsize_bitmap, pts->range->common->max_vasz_lg2 - 1);
unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
if (!pt_can_have_leaf(pts)) {
@@ -448,7 +451,8 @@ static void test_lvl_possible_sizes(struct kunit *test, struct pt_state *pts,
/* No bits for sizes that would be outside this table */
KUNIT_ASSERT_EQ(test, log2_mod(pgsize_bitmap, isz_lg2), 0);
KUNIT_ASSERT_EQ(
- test, fvalog2_div(pgsize_bitmap, num_items_lg2 + isz_lg2), 0);
+ test,
+ fvalog2_div(limited_pgsize_bitmap, num_items_lg2 + isz_lg2), 0);
/*
* Non contiguous must be supported. AMDv1 has a HW bug where it does
@@ -463,8 +467,8 @@ static void test_lvl_possible_sizes(struct kunit *test, struct pt_state *pts,
/* A contiguous entry should not span the whole table */
if (num_items_lg2 + isz_lg2 != PT_VADDR_MAX_LG2)
KUNIT_ASSERT_FALSE(
- test,
- pgsize_bitmap & log2_to_int(num_items_lg2 + isz_lg2));
+ test, limited_pgsize_bitmap &
+ log2_to_int(num_items_lg2 + isz_lg2));
}
static void test_entry_possible_sizes(struct kunit *test)
diff --git a/drivers/iommu/generic_pt/kunit_iommu_pt.h b/drivers/iommu/generic_pt/kunit_iommu_pt.h
index e8a63c8ea850e..ece1c9b8c55d3 100644
--- a/drivers/iommu/generic_pt/kunit_iommu_pt.h
+++ b/drivers/iommu/generic_pt/kunit_iommu_pt.h
@@ -112,8 +112,9 @@ static void test_increase_level(struct kunit *test)
if (IS_32BIT)
kunit_skip(test, "Unable to test on 32bit");
- KUNIT_ASSERT_GT(test, common->max_vasz_lg2,
- pt_top_range(common).max_vasz_lg2);
+ if (common->max_vasz_lg2 <= pt_top_range(common).max_vasz_lg2)
+ kunit_skip(test,
+ "max_vasz_lg2 fits in starting level, no growth possible");
/* Add every possible level to the max */
while (common->max_vasz_lg2 != pt_top_range(common).max_vasz_lg2) {
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index be8410f0e8414..fdc88817709f2 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -254,37 +254,29 @@ void cache_tag_unassign_domain(struct dmar_domain *domain,
}
static unsigned long calculate_psi_aligned_address(unsigned long start,
- unsigned long end,
- unsigned long *_mask)
+ unsigned long last,
+ unsigned long *size_order)
{
- unsigned long pages = aligned_nrpages(start, end - start + 1);
- unsigned long aligned_pages = __roundup_pow_of_two(pages);
- unsigned long bitmask = aligned_pages - 1;
- unsigned long mask = ilog2(aligned_pages);
- unsigned long pfn = IOVA_PFN(start);
-
- /*
- * PSI masks the low order bits of the base address. If the
- * address isn't aligned to the mask, then compute a mask value
- * needed to ensure the target range is flushed.
- */
- if (unlikely(bitmask & pfn)) {
- unsigned long end_pfn = pfn + pages - 1, shared_bits;
+ unsigned int sz_lg2;
+ /* Compute a sz_lg2 that spans start and last */
+ start &= GENMASK(BITS_PER_LONG - 1, VTD_PAGE_SHIFT);
+ sz_lg2 = fls_long(start ^ last);
+ if (sz_lg2 <= 12) {
+ *size_order = 0;
+ return start;
+ }
+ if (unlikely(sz_lg2 >= BITS_PER_LONG)) {
/*
- * Since end_pfn <= pfn + bitmask, the only way bits
- * higher than bitmask can differ in pfn and end_pfn is
- * by carrying. This means after masking out bitmask,
- * high bits starting with the first set bit in
- * shared_bits are all equal in both pfn and end_pfn.
+ * MAX_AGAW_PFN_WIDTH triggers full invalidation in all
+ * downstream users.
*/
- shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
- mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
+ *size_order = MAX_AGAW_PFN_WIDTH;
+ return 0;
}
- *_mask = mask;
-
- return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
+ *size_order = sz_lg2 - VTD_PAGE_SHIFT;
+ return start & GENMASK(BITS_PER_LONG - 1, sz_lg2);
}
static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
@@ -441,12 +433,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
struct cache_tag *tag;
unsigned long flags;
- if (start == 0 && end == ULONG_MAX) {
- addr = 0;
- mask = MAX_AGAW_PFN_WIDTH;
- } else {
- addr = calculate_psi_aligned_address(start, end, &mask);
- }
+ addr = calculate_psi_aligned_address(start, end, &mask);
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 40e33257d3c2c..1dbef8c550073 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -777,21 +777,27 @@ struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
static struct io_pgtable_cfg *cfg_cookie __initdata;
-static void __init dummy_tlb_flush_all(void *cookie)
+/*
+ * __noipa prevents gcc from turning indirect iommu_flush_ops calls
+ * into direct calls from a specialized __arm_v7s_unmap() that triggers
+ * a build time section mismatch assertion.
+ */
+static __noipa void __init dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
-static void __init dummy_tlb_flush(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static __noipa void __init dummy_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
+static __noipa void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova,
+ size_t granule,
+ void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0208e5897c299..476c0e25631af 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -143,7 +143,7 @@
#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
/* IOPTE accessors */
-#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
+#define iopte_deref(pte, d) phys_to_virt(iopte_to_paddr(pte, d))
#define iopte_type(pte) \
(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
@@ -248,26 +248,15 @@ static dma_addr_t __arm_lpae_dma_addr(void *pages)
return (dma_addr_t)virt_to_phys(pages);
}
-static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
- struct io_pgtable_cfg *cfg,
- void *cookie)
+static void *__arm_lpae_cfg_alloc(size_t size, gfp_t gfp,
+ struct io_pgtable_cfg *cfg,
+ void *cookie)
{
struct device *dev = cfg->iommu_dev;
- size_t alloc_size;
dma_addr_t dma;
void *pages;
- /*
- * For very small starting-level translation tables the HW requires a
- * minimum alignment of at least 64 to cover all cases.
- */
- alloc_size = max(size, 64);
- if (cfg->alloc)
- pages = cfg->alloc(cookie, alloc_size, gfp);
- else
- pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp,
- alloc_size);
-
+ pages = cfg->alloc(cookie, size, gfp);
if (!pages)
return NULL;
@@ -291,24 +280,69 @@ out_unmap:
dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
out_free:
- if (cfg->free)
- cfg->free(cookie, pages, size);
- else
- iommu_free_pages(pages);
-
+ cfg->free(cookie, pages, size);
return NULL;
}
-static void __arm_lpae_free_pages(void *pages, size_t size,
- struct io_pgtable_cfg *cfg,
- void *cookie)
+static void __arm_lpae_cfg_free(void *pages, size_t size,
+ struct io_pgtable_cfg *cfg,
+ void *cookie)
{
if (!cfg->coherent_walk)
dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
size, DMA_TO_DEVICE);
- if (cfg->free)
- cfg->free(cookie, pages, size);
+ cfg->free(cookie, pages, size);
+}
+
+static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
+ struct io_pgtable_cfg *cfg,
+ void *cookie)
+{
+ struct device *dev = cfg->iommu_dev;
+ void *pages;
+
+ /*
+ * For very small starting-level translation tables the HW requires a
+ * minimum alignment of at least 64 to cover all cases.
+ */
+ size = max(size, 64);
+
+ if (cfg->alloc)
+ return __arm_lpae_cfg_alloc(size, gfp, cfg, cookie);
+
+ pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp, size);
+ if (!pages)
+ return NULL;
+
+ if (!cfg->coherent_walk) {
+ int ret = iommu_pages_start_incoherent(pages, dev);
+
+ if (ret) {
+ if (ret == -EOPNOTSUPP)
+ dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+ iommu_free_pages(pages);
+ return NULL;
+ }
+ }
+
+ return pages;
+}
+
+static void __arm_lpae_free_pages(void *pages, size_t size,
+ struct io_pgtable_cfg *cfg,
+ void *cookie)
+{
+ /* See __arm_lpae_alloc_pages(). */
+ size = max(size, 64);
+
+ if (cfg->free) {
+ __arm_lpae_cfg_free(pages, size, cfg, cookie);
+ return;
+ }
+
+ if (!cfg->coherent_walk)
+ iommu_pages_free_incoherent(pages, cfg->iommu_dev);
else
iommu_free_pages(pages);
}
@@ -395,7 +429,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
arm_lpae_iopte old, new;
struct io_pgtable_cfg *cfg = &data->iop.cfg;
- new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
+ new = paddr_to_iopte(virt_to_phys(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
new |= ARM_LPAE_PTE_NSTABLE;
diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 29a0040b1c32e..f2ef9bd3cde96 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -63,6 +63,8 @@
#define RISCV_IOMMU_CAPABILITIES_PD8 BIT_ULL(38)
#define RISCV_IOMMU_CAPABILITIES_PD17 BIT_ULL(39)
#define RISCV_IOMMU_CAPABILITIES_PD20 BIT_ULL(40)
+#define RISCV_IOMMU_CAPABILITIES_NL BIT_ULL(42)
+#define RISCV_IOMMU_CAPABILITIES_S BIT_ULL(43)
/**
* enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
@@ -460,31 +462,33 @@ struct riscv_iommu_command {
};
/* Fields on dword0, common for all commands */
-#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0)
-#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7)
+#define RISCV_IOMMU_CMD0_OPCODE GENMASK_ULL(6, 0)
+#define RISCV_IOMMU_CMD0_FUNC GENMASK_ULL(9, 7)
/* 3.1.1 IOMMU Page-table cache invalidation */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1
#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0
#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1
-#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10)
-#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12)
-#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32)
-#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33)
-#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44)
+#define RISCV_IOMMU_CMD0_IOTINVAL_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD0_IOTINVAL_PSCID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD0_IOTINVAL_PSCV BIT_ULL(32)
+#define RISCV_IOMMU_CMD0_IOTINVAL_GV BIT_ULL(33)
+#define RISCV_IOMMU_CMD0_IOTINVAL_GSCID GENMASK_ULL(59, 44)
+#define RISCV_IOMMU_CMD0_IOTINVAL_NL BIT_ULL(34)
+#define RISCV_IOMMU_CMD1_IOTINVAL_S BIT_ULL(9)
/* dword1[61:10] is the 4K-aligned page address */
-#define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10)
+#define RISCV_IOMMU_CMD1_IOTINVAL_ADDR GENMASK_ULL(61, 10)
/* 3.1.2 IOMMU Command Queue Fences */
/* Fields on dword0 */
#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2
#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0
-#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10)
-#define RISCV_IOMMU_CMD_IOFENCE_WSI BIT_ULL(11)
-#define RISCV_IOMMU_CMD_IOFENCE_PR BIT_ULL(12)
-#define RISCV_IOMMU_CMD_IOFENCE_PW BIT_ULL(13)
-#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32)
+#define RISCV_IOMMU_CMD0_IOFENCE_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD0_IOFENCE_WSI BIT_ULL(11)
+#define RISCV_IOMMU_CMD0_IOFENCE_PR BIT_ULL(12)
+#define RISCV_IOMMU_CMD0_IOFENCE_PW BIT_ULL(13)
+#define RISCV_IOMMU_CMD0_IOFENCE_DATA GENMASK_ULL(63, 32)
/* dword1 is the address, word-size aligned and shifted to the right by two bits. */
/* 3.1.3 IOMMU Directory cache invalidation */
@@ -492,9 +496,9 @@ struct riscv_iommu_command {
#define RISCV_IOMMU_CMD_IODIR_OPCODE 3
#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0
#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1
-#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12)
-#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33)
-#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40)
+#define RISCV_IOMMU_CMD0_IODIR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD0_IODIR_DV BIT_ULL(33)
+#define RISCV_IOMMU_CMD0_IODIR_DID GENMASK_ULL(63, 40)
/* dword1 is reserved for standard use */
/* 3.1.4 IOMMU PCIe ATS */
@@ -502,25 +506,25 @@ struct riscv_iommu_command {
#define RISCV_IOMMU_CMD_ATS_OPCODE 4
#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0
#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1
-#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12)
-#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32)
-#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33)
-#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40)
-#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56)
+#define RISCV_IOMMU_CMD0_ATS_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD0_ATS_PV BIT_ULL(32)
+#define RISCV_IOMMU_CMD0_ATS_DSV BIT_ULL(33)
+#define RISCV_IOMMU_CMD0_ATS_RID GENMASK_ULL(55, 40)
+#define RISCV_IOMMU_CMD0_ATS_DSEG GENMASK_ULL(63, 56)
/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */
/* ATS.INVAL payload*/
-#define RISCV_IOMMU_CMD_ATS_INVAL_G BIT_ULL(0)
+#define RISCV_IOMMU_CMD1_ATS_INVAL_G BIT_ULL(0)
/* Bits 1 - 10 are zeroed */
-#define RISCV_IOMMU_CMD_ATS_INVAL_S BIT_ULL(11)
-#define RISCV_IOMMU_CMD_ATS_INVAL_UADDR GENMASK_ULL(63, 12)
+#define RISCV_IOMMU_CMD1_ATS_INVAL_S BIT_ULL(11)
+#define RISCV_IOMMU_CMD1_ATS_INVAL_UADDR GENMASK_ULL(63, 12)
/* ATS.PRGR payload */
/* Bits 0 - 31 are zeroed */
-#define RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32)
+#define RISCV_IOMMU_CMD1_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32)
/* Bits 41 - 43 are zeroed */
-#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44)
-#define RISCV_IOMMU_CMD_ATS_PRGR_DST_ID GENMASK_ULL(63, 48)
+#define RISCV_IOMMU_CMD1_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44)
+#define RISCV_IOMMU_CMD1_ATS_PRGR_DST_ID GENMASK_ULL(63, 48)
/**
* struct riscv_iommu_fq_record - Fault/Event Queue Record
@@ -711,8 +715,8 @@ struct riscv_iommu_msipte {
static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
{
- cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
- FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD0_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
cmd->dword1 = 0;
}
@@ -720,67 +724,88 @@ static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cm
u64 addr)
{
cmd->dword1 =
- FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, PHYS_PFN(addr));
- cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
+ FIELD_PREP(RISCV_IOMMU_CMD1_IOTINVAL_ADDR, PHYS_PFN(addr));
+ cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_AV;
+}
+
+static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
+{
+ cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_NL;
+}
+
+/*
+ * Set NAPOT-encoded address for range invalidation (S=1).
+ * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
+ * addr must be naturally aligned to 2^sz_lg2.
+ */
+static inline void riscv_iommu_cmd_inval_set_napot(
+ struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
+{
+ u64 pfn = addr >> 12;
+
+ pfn |= BIT_U64(sz_lg2 - 13) - 1;
+ cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD1_IOTINVAL_ADDR, pfn) |
+ RISCV_IOMMU_CMD1_IOTINVAL_S;
+ cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_AV;
}
static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
int pscid)
{
- cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
- RISCV_IOMMU_CMD_IOTINVAL_PSCV;
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD0_IOTINVAL_PSCID, pscid) |
+ RISCV_IOMMU_CMD0_IOTINVAL_PSCV;
}
static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
int gscid)
{
- cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
- RISCV_IOMMU_CMD_IOTINVAL_GV;
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD0_IOTINVAL_GSCID, gscid) |
+ RISCV_IOMMU_CMD0_IOTINVAL_GV;
}
static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
{
- cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
- FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
- RISCV_IOMMU_CMD_IOFENCE_PR | RISCV_IOMMU_CMD_IOFENCE_PW;
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD0_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
+ RISCV_IOMMU_CMD0_IOFENCE_PR | RISCV_IOMMU_CMD0_IOFENCE_PW;
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
u64 addr, u32 data)
{
- cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
- FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
- FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) |
- RISCV_IOMMU_CMD_IOFENCE_AV;
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD0_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_IOFENCE_DATA, data) |
+ RISCV_IOMMU_CMD0_IOFENCE_AV;
cmd->dword1 = addr >> 2;
}
static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
{
- cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
- FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD0_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
{
- cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
- FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
+ cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD0_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
+ FIELD_PREP(RISCV_IOMMU_CMD0_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
cmd->dword1 = 0;
}
static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
unsigned int devid)
{
- cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) |
- RISCV_IOMMU_CMD_IODIR_DV;
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD0_IODIR_DID, devid) |
+ RISCV_IOMMU_CMD0_IODIR_DV;
}
static inline void riscv_iommu_cmd_iodir_set_pid(struct riscv_iommu_command *cmd,
unsigned int pasid)
{
- cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_PID, pasid);
+ cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD0_IODIR_PID, pasid);
}
#endif /* _RISCV_IOMMU_BITS_H_ */
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index a31f50bbad353..cec3ddd7ab103 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -920,20 +920,120 @@ static void riscv_iommu_bond_unlink(struct riscv_iommu_domain *domain,
}
}
-/*
- * Send IOTLB.INVAL for whole address space for ranges larger than 2MB.
- * This limit will be replaced with range invalidations, if supported by
- * the hardware, when RISC-V IOMMU architecture specification update for
- * range invalidations update will be available.
- */
-#define RISCV_IOMMU_IOTLB_INVAL_LIMIT (2 << 20)
+struct riscv_iommu_tlbi {
+ u64 start;
+ u64 last;
+ bool non_leaf;
+ struct {
+ bool use_global;
+ u8 stride_lg2;
+ unsigned int num;
+ } single;
+ struct {
+ u8 sz_lg2;
+ u64 addr;
+ } range;
+};
+
+static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
+ struct iommu_iotlb_gather *gather)
+{
+ u8 combined = gather->pt.leaf_levels_bitmap |
+ gather->pt.table_levels_bitmap;
+ u64 num;
+
+ tlbi->non_leaf = gather->pt.table_levels_bitmap != 0;
+ tlbi->start = gather->start;
+ tlbi->last = gather->end;
+
+ /* No level information available */
+ if (!combined) {
+ tlbi->single.use_global = true;
+ tlbi->range.sz_lg2 = 0;
+ return;
+ }
+
+ /*
+ * Calculate the smallest NAPOT range containing [start, last].
+ * NAPOT encoding requires a power-of-two sized, naturally aligned
+ * range. Over-invalidation is always safe.
+ */
+ tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
+ if (unlikely(tlbi->range.sz_lg2 >= 64)) {
+ tlbi->single.use_global = true;
+ tlbi->range.sz_lg2 = 0;
+ return;
+ }
+ tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
+
+ /*
+ * Calculate stride from the lowest changed level. RISC-V uses 4KiB
+ * granule with 9 bits per level.
+ */
+ tlbi->single.stride_lg2 = 9 * __ffs(combined) + 12;
+ num = (tlbi->last - tlbi->start + 1) >> tlbi->single.stride_lg2;
+ if (!num || num > 512) {
+ tlbi->single.use_global = true;
+ } else {
+ tlbi->single.num = num;
+ tlbi->single.use_global = false;
+ }
+}
+
+static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
+ int pscid,
+ struct riscv_iommu_tlbi *tlbi)
+{
+ bool use_nl = tlbi->non_leaf &&
+ (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
+ struct riscv_iommu_command cmd;
+ unsigned int i;
+
+ riscv_iommu_cmd_inval_vma(&cmd);
+ riscv_iommu_cmd_inval_set_pscid(&cmd, pscid);
+
+ /*
+ * If non-leaf entries were changed and the IOMMU doesn't
+ * support NL, we must fall back to global invalidation (AV=0).
+ */
+ if (tlbi->non_leaf && !use_nl)
+ goto global;
+
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
+ tlbi->range.sz_lg2 >= 13) {
+ riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
+ tlbi->range.sz_lg2);
+ if (use_nl)
+ riscv_iommu_cmd_inval_set_nl(&cmd);
+ riscv_iommu_cmd_send(iommu, &cmd);
+ } else {
+ unsigned long iova;
+
+ if (tlbi->single.use_global)
+ goto global;
+
+ iova = tlbi->start;
+ for (i = 0; i < tlbi->single.num; i++) {
+ riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+ if (use_nl)
+ riscv_iommu_cmd_inval_set_nl(&cmd);
+ riscv_iommu_cmd_send(iommu, &cmd);
+ iova += 1ULL << tlbi->single.stride_lg2;
+ }
+ }
+ return;
+global:
+ riscv_iommu_cmd_send(iommu, &cmd);
+}
static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
- unsigned long start, unsigned long end)
+ struct iommu_iotlb_gather *gather)
{
- struct riscv_iommu_bond *bond;
struct riscv_iommu_device *iommu, *prev;
- struct riscv_iommu_command cmd;
+ struct riscv_iommu_bond *bond;
+ struct riscv_iommu_tlbi tlbi;
+
+ riscv_iommu_tlbi_calc(&tlbi, gather);
/*
* For each IOMMU linked with this protection domain (via bonds->dev),
@@ -974,19 +1074,7 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
if (iommu == prev)
continue;
- riscv_iommu_cmd_inval_vma(&cmd);
- riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
- if (end - start < RISCV_IOMMU_IOTLB_INVAL_LIMIT - 1) {
- unsigned long iova = start;
-
- do {
- riscv_iommu_cmd_inval_set_addr(&cmd, iova);
- riscv_iommu_cmd_send(iommu, &cmd);
- } while (!check_add_overflow(iova, PAGE_SIZE, &iova) &&
- iova < end);
- } else {
- riscv_iommu_cmd_send(iommu, &cmd);
- }
+ riscv_iommu_iotlb_inval_iommu(iommu, domain->pscid, &tlbi);
prev = iommu;
}
@@ -1145,8 +1233,14 @@ static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
static void riscv_iommu_iotlb_flush_all(struct iommu_domain *iommu_domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
+ struct iommu_iotlb_gather gather = {
+ .start = 0,
+ .end = ULONG_MAX,
+ .pt.leaf_levels_bitmap = 0xFF,
+ .pt.table_levels_bitmap = 0xFE,
+ };
- riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
+ riscv_iommu_iotlb_inval(domain, &gather);
}
static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
@@ -1154,19 +1248,8 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- if (iommu_pages_list_empty(&gather->freelist)) {
- riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
- } else {
- /*
- * In 1.0 spec version, the smallest scope we can use to
- * invalidate all levels of page table (i.e. leaf and non-leaf)
- * is an invalidate-all-PSCID IOTINVAL.VMA with AV=0.
- * This will be updated with hardware support for
- * capability.NL (non-leaf) IOTINVAL command.
- */
- riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
- iommu_put_pages_list(&gather->freelist);
- }
+ riscv_iommu_iotlb_inval(domain, gather);
+ iommu_put_pages_list(&gather->freelist);
}
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
@@ -1267,7 +1350,10 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
*/
cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
BIT(PT_FEAT_FLUSH_RANGE) |
- BIT(PT_FEAT_RISCV_SVNAPOT_64K);
+ BIT(PT_FEAT_RISCV_SVNAPOT_64K) |
+ BIT(PT_FEAT_DETAILED_GATHER);
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SVPBMT)
+ cfg.common.features |= BIT(PT_FEAT_RISCV_SVPBMT);
domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
domain->domain.ops = &riscv_iommu_paging_domain_ops;
diff --git a/drivers/iommu/vsi-iommu.c b/drivers/iommu/vsi-iommu.c
new file mode 100644
index 0000000000000..42c424496d073
--- /dev/null
+++ b/drivers/iommu/vsi-iommu.c
@@ -0,0 +1,791 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2025 Collabora Ltd.
+ *
+ * IOMMU API for Verisilicon
+ *
+ * Module Authors: Yandong Lin <yandong.lin@rock-chips.com>
+ * Simon Xue <xxm@rock-chips.com>
+ * Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ *
+ * This hardware block is using a 2 pages tables allocation structure.
+ * That make very similar to Rockhip iommu hardware blocks but it has
+ * it own driver because the registers offset and configuration bits
+ * are completely different. An additional reason is that this hardware
+ * has been developed by Verisilicon to be used by their hardware video
+ * decoders and not for a general purpose like Rockchip iommus.
+ */
+
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "iommu-pages.h"
+
+struct vsi_iommu {
+ struct device *dev;
+ void __iomem *regs;
+ struct clk_bulk_data *clocks;
+ int num_clocks;
+ struct iommu_device iommu;
+ struct list_head node; /* entry in vsi_iommu_domain.iommus */
+ struct iommu_domain *domain; /* domain to which iommu is attached */
+ spinlock_t lock; /* lock to protect vsi_iommu fields */
+ int irq;
+ bool enable;
+};
+
+struct vsi_iommu_domain {
+ struct list_head iommus;
+ struct device *dev;
+ u32 *dt;
+ dma_addr_t dt_dma;
+ struct iommu_domain domain;
+ u64 *pta;
+ dma_addr_t pta_dma;
+ spinlock_t lock; /* lock to protect vsi_iommu_domain fields */
+};
+
+static struct iommu_domain vsi_identity_domain;
+
+#define NUM_DT_ENTRIES 1024
+#define NUM_PT_ENTRIES 1024
+
+#define SPAGE_SIZE BIT(12)
+
+/* vsi iommu regs address */
+#define VSI_MMU_CONFIG1_BASE 0x1ac
+#define VSI_MMU_AHB_EXCEPTION_BASE 0x380
+#define VSI_MMU_AHB_CONTROL_BASE 0x388
+#define VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE 0x38C
+
+/* MMU register offsets */
+#define VSI_MMU_FLUSH_BASE 0x184
+#define VSI_MMU_BIT_FLUSH BIT(4)
+
+#define VSI_MMU_PAGE_FAULT_ADDR 0x380
+#define VSI_MMU_STATUS_BASE 0x384 /* IRQ status */
+
+#define VSI_MMU_BIT_ENABLE BIT(0)
+
+#define VSI_MMU_OUT_OF_BOUND BIT(28)
+/* Irq mask */
+#define VSI_MMU_IRQ_MASK 0x7
+
+#define VSI_DTE_PT_ADDRESS_MASK 0xffffffc0
+#define VSI_DTE_PT_VALID BIT(0)
+
+#define VSI_PAGE_DESC_LO_MASK 0xfffff000
+#define VSI_PAGE_DESC_HI_MASK GENMASK_ULL(39, 32)
+#define VSI_PAGE_DESC_HI_SHIFT (32 - 4)
+
+static inline phys_addr_t vsi_dte_pt_address(u32 dte)
+{
+ return (phys_addr_t)dte & VSI_DTE_PT_ADDRESS_MASK;
+}
+
+static inline u32 vsi_mk_dte(u32 dte)
+{
+ return (phys_addr_t)dte | VSI_DTE_PT_VALID;
+}
+
+#define VSI_PTE_PAGE_WRITABLE BIT(2)
+#define VSI_PTE_PAGE_VALID BIT(0)
+
+static inline phys_addr_t vsi_pte_page_address(u64 pte)
+{
+ return ((pte << VSI_PAGE_DESC_HI_SHIFT) & VSI_PAGE_DESC_HI_MASK) |
+ (pte & VSI_PAGE_DESC_LO_MASK);
+}
+
+static u32 vsi_mk_pte(phys_addr_t page, int prot)
+{
+ u32 flags = 0;
+
+ flags |= (prot & IOMMU_WRITE) ? VSI_PTE_PAGE_WRITABLE : 0;
+
+ page = (page & VSI_PAGE_DESC_LO_MASK) |
+ ((page & VSI_PAGE_DESC_HI_MASK) >> VSI_PAGE_DESC_HI_SHIFT);
+
+ return page | flags | VSI_PTE_PAGE_VALID;
+}
+
+#define VSI_DTE_PT_VALID BIT(0)
+
+static inline bool vsi_dte_is_pt_valid(u32 dte)
+{
+ return dte & VSI_DTE_PT_VALID;
+}
+
+static inline bool vsi_pte_is_page_valid(u32 pte)
+{
+ return pte & VSI_PTE_PAGE_VALID;
+}
+
+static u32 vsi_mk_pte_invalid(u32 pte)
+{
+ return pte & ~VSI_PTE_PAGE_VALID;
+}
+
+#define VSI_MASTER_TLB_MASK GENMASK_ULL(31, 10)
+/* mode 0 : 4k */
+#define VSI_PTA_4K_MODE 0
+
+static u64 vsi_mk_pta(dma_addr_t dt_dma)
+{
+ u64 val = (dt_dma & VSI_MASTER_TLB_MASK) | VSI_PTA_4K_MODE;
+
+ return val;
+}
+
+static struct vsi_iommu_domain *to_vsi_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct vsi_iommu_domain, domain);
+}
+
+static inline void vsi_table_flush(struct vsi_iommu_domain *vsi_domain, dma_addr_t dma,
+ unsigned int count)
+{
+ size_t size = count * sizeof(u32); /* count of u32 entry */
+
+ dma_sync_single_for_device(vsi_domain->dev, dma, size, DMA_TO_DEVICE);
+}
+
+#define VSI_IOVA_DTE_MASK 0xffc00000
+#define VSI_IOVA_DTE_SHIFT 22
+#define VSI_IOVA_PTE_MASK 0x003ff000
+#define VSI_IOVA_PTE_SHIFT 12
+#define VSI_IOVA_PAGE_MASK 0x00000fff
+#define VSI_IOVA_PAGE_SHIFT 0
+
+static u32 vsi_iova_dte_index(u32 iova)
+{
+ return (iova & VSI_IOVA_DTE_MASK) >> VSI_IOVA_DTE_SHIFT;
+}
+
+static u32 vsi_iova_pte_index(u32 iova)
+{
+ return (iova & VSI_IOVA_PTE_MASK) >> VSI_IOVA_PTE_SHIFT;
+}
+
+static u32 vsi_iova_page_offset(u32 iova)
+{
+ return (iova & VSI_IOVA_PAGE_MASK) >> VSI_IOVA_PAGE_SHIFT;
+}
+
+static irqreturn_t vsi_iommu_irq(int irq, void *dev_id)
+{
+ struct vsi_iommu *iommu = dev_id;
+ unsigned long flags;
+ dma_addr_t iova;
+ u32 status;
+
+ if (pm_runtime_resume_and_get(iommu->dev) < 0)
+ return IRQ_NONE;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ status = readl(iommu->regs + VSI_MMU_STATUS_BASE);
+ if (status & VSI_MMU_IRQ_MASK) {
+ dev_err(iommu->dev, "unexpected int_status=%08x\n", status);
+ iova = readl(iommu->regs + VSI_MMU_PAGE_FAULT_ADDR);
+ report_iommu_fault(iommu->domain, iommu->dev, iova, status);
+ }
+ writel(0, iommu->regs + VSI_MMU_STATUS_BASE);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ pm_runtime_put_autosuspend(iommu->dev);
+
+ return IRQ_HANDLED;
+}
+
+static struct vsi_iommu *vsi_iommu_get_from_dev(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct device *iommu_dev = bus_find_device_by_fwnode(&platform_bus_type,
+ fwspec->iommu_fwnode);
+
+ put_device(iommu_dev);
+
+ return iommu_dev ? dev_get_drvdata(iommu_dev) : NULL;
+}
+
+static struct iommu_domain *vsi_iommu_domain_alloc_paging(struct device *dev)
+{
+ struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+ struct vsi_iommu_domain *vsi_domain;
+
+ vsi_domain = kzalloc(sizeof(*vsi_domain), GFP_KERNEL);
+ if (!vsi_domain)
+ return NULL;
+
+ vsi_domain->dev = iommu->dev;
+ spin_lock_init(&vsi_domain->lock);
+
+ /*
+ * iommu use a 2 level pagetable.
+ * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
+ * Allocate one 4 KiB page for each table.
+ */
+ vsi_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+ SPAGE_SIZE);
+ if (!vsi_domain->dt)
+ goto err_free_domain;
+
+ vsi_domain->dt_dma = dma_map_single(vsi_domain->dev, vsi_domain->dt,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(vsi_domain->dev, vsi_domain->dt_dma)) {
+ dev_err(dev, "DMA map error for DT\n");
+ goto err_free_dt;
+ }
+
+ vsi_domain->pta = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+ SPAGE_SIZE);
+ if (!vsi_domain->pta)
+ goto err_unmap_dt;
+
+ vsi_domain->pta[0] = vsi_mk_pta(vsi_domain->dt_dma);
+ vsi_domain->pta_dma = dma_map_single(vsi_domain->dev, vsi_domain->pta,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(vsi_domain->dev, vsi_domain->pta_dma)) {
+ dev_err(dev, "DMA map error for PTA\n");
+ goto err_free_pta;
+ }
+
+ INIT_LIST_HEAD(&vsi_domain->iommus);
+
+ vsi_domain->domain.geometry.aperture_start = 0;
+ vsi_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+ vsi_domain->domain.geometry.force_aperture = true;
+ vsi_domain->domain.pgsize_bitmap = SZ_4K;
+
+ return &vsi_domain->domain;
+
+err_free_pta:
+ iommu_free_pages(vsi_domain->pta);
+err_unmap_dt:
+ dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+err_free_dt:
+ iommu_free_pages(vsi_domain->dt);
+err_free_domain:
+ kfree(vsi_domain);
+
+ return NULL;
+}
+
+static phys_addr_t vsi_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ phys_addr_t pt_phys, phys = 0;
+ unsigned long flags;
+ u32 dte, pte;
+ u32 *page_table;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+ dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+ if (!vsi_dte_is_pt_valid(dte))
+ goto unlock;
+
+ pt_phys = vsi_dte_pt_address(dte);
+ page_table = (u32 *)phys_to_virt(pt_phys);
+ pte = page_table[vsi_iova_pte_index(iova)];
+ if (!vsi_pte_is_page_valid(pte))
+ goto unlock;
+
+ phys = vsi_pte_page_address(pte) + vsi_iova_page_offset(iova);
+
+unlock:
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+ return phys;
+}
+
+static size_t vsi_iommu_unmap_iova(struct vsi_iommu_domain *vsi_domain,
+ u32 *pte_addr, dma_addr_t pte_dma,
+ size_t size)
+{
+ unsigned int pte_count;
+ unsigned int pte_total = size / SPAGE_SIZE;
+
+ for (pte_count = 0;
+ pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) {
+ u32 pte = pte_addr[pte_count];
+
+ if (!vsi_pte_is_page_valid(pte))
+ break;
+
+ pte_addr[pte_count] = vsi_mk_pte_invalid(pte);
+ }
+
+ vsi_table_flush(vsi_domain, pte_dma, pte_total);
+
+ return pte_count * SPAGE_SIZE;
+}
+
+static int vsi_iommu_map_iova(struct vsi_iommu_domain *vsi_domain, u32 *pte_addr,
+ dma_addr_t pte_dma, dma_addr_t iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ unsigned int pte_count;
+ unsigned int pte_total = size / SPAGE_SIZE;
+
+ for (pte_count = 0;
+ pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) {
+ u32 pte = pte_addr[pte_count];
+
+ if (vsi_pte_is_page_valid(pte))
+ return (pte_count - 1) * SPAGE_SIZE;
+
+ pte_addr[pte_count] = vsi_mk_pte(paddr, prot);
+
+ paddr += SPAGE_SIZE;
+ }
+
+ vsi_table_flush(vsi_domain, pte_dma, pte_total);
+
+ return 0;
+}
+
+static void vsi_iommu_flush_tlb(struct iommu_domain *domain)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ struct vsi_iommu *iommu;
+
+ list_for_each_entry(iommu, &vsi_domain->iommus, node) {
+ if (pm_runtime_get(iommu->dev) < 0)
+ continue;
+
+ spin_lock(&iommu->lock);
+
+ if (iommu->enable) {
+ writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE);
+ writel(0, iommu->regs + VSI_MMU_FLUSH_BASE);
+ }
+
+ spin_unlock(&iommu->lock);
+
+ pm_runtime_put_autosuspend(iommu->dev);
+ }
+}
+
+static size_t vsi_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
+ unsigned long flags;
+ phys_addr_t pt_phys;
+ u32 dte;
+ u32 *pte_addr;
+ size_t unmap_size = 0;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+
+ dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+ /* Just return 0 if iova is unmapped */
+ if (!vsi_dte_is_pt_valid(dte))
+ goto unlock;
+
+ pt_phys = vsi_dte_pt_address(dte);
+ pte_addr = (u32 *)phys_to_virt(pt_phys) + vsi_iova_pte_index(iova);
+ pte_dma = pt_phys + vsi_iova_pte_index(iova) * sizeof(u32);
+ unmap_size = vsi_iommu_unmap_iova(vsi_domain, pte_addr, pte_dma, size);
+ if (!unmap_size)
+ goto unlock;
+
+ vsi_iommu_flush_tlb(domain);
+unlock:
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+ return unmap_size;
+}
+
+static u32 *vsi_dte_get_page_table(struct vsi_iommu_domain *vsi_domain,
+ dma_addr_t iova, gfp_t gfp)
+{
+ u32 *page_table, *dte_addr;
+ u32 dte_index, dte;
+ phys_addr_t pt_phys;
+ dma_addr_t pt_dma;
+ gfp_t flags;
+
+ dte_index = vsi_iova_dte_index(iova);
+ dte_addr = &vsi_domain->dt[dte_index];
+ dte = *dte_addr;
+ if (vsi_dte_is_pt_valid(dte))
+ goto done;
+
+ /* Do not allow to sleep while allocating the buffer */
+ flags = (gfp & ~GFP_KERNEL) | GFP_ATOMIC | GFP_DMA32;
+ page_table = iommu_alloc_pages_sz(flags, PAGE_SIZE);
+ if (!page_table)
+ return ERR_PTR(-ENOMEM);
+
+ pt_dma = dma_map_single(vsi_domain->dev, page_table, PAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(vsi_domain->dev, pt_dma)) {
+ dev_err(vsi_domain->dev, "DMA mapping error while allocating page table\n");
+ iommu_free_pages(page_table);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dte = vsi_mk_dte(pt_dma);
+ *dte_addr = dte;
+
+ vsi_table_flush(vsi_domain,
+ vsi_domain->dt_dma + dte_index * sizeof(u32), 1);
+done:
+ pt_phys = vsi_dte_pt_address(dte);
+ return (u32 *)phys_to_virt(pt_phys);
+}
+
+static int vsi_iommu_map(struct iommu_domain *domain, unsigned long _iova,
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
+ u32 *page_table, *pte_addr;
+ u32 dte, pte_index;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+
+ page_table = vsi_dte_get_page_table(vsi_domain, iova, gfp);
+ if (IS_ERR(page_table)) {
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+ return PTR_ERR(page_table);
+ }
+
+ dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+ pte_index = vsi_iova_pte_index(iova);
+ pte_addr = &page_table[pte_index];
+ pte_dma = vsi_dte_pt_address(dte) + pte_index * sizeof(u32);
+ ret = vsi_iommu_map_iova(vsi_domain, pte_addr, pte_dma, iova,
+ paddr, size, prot);
+ if (!ret)
+ *mapped = size;
+
+ vsi_iommu_flush_tlb(domain);
+
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+ return ret;
+}
+
+static void vsi_iommu_disable(struct vsi_iommu *iommu)
+{
+ writel(0, iommu->regs + VSI_MMU_AHB_CONTROL_BASE);
+ iommu->enable = false;
+}
+
+static int vsi_iommu_identity_attach(struct iommu_domain *domain,
+ struct device *dev, struct iommu_domain *old)
+{
+ struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ unsigned long flags;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(iommu->dev);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+ spin_lock(&iommu->lock);
+ if (iommu->domain == domain)
+ goto unlock;
+
+ vsi_iommu_disable(iommu);
+ list_del_init(&iommu->node);
+
+ iommu->domain = domain;
+
+unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+ pm_runtime_put_autosuspend(iommu->dev);
+ return 0;
+}
+
+static const struct iommu_domain_ops vsi_identity_ops = {
+ .attach_dev = vsi_iommu_identity_attach,
+};
+
+static struct iommu_domain vsi_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &vsi_identity_ops,
+};
+
+static void vsi_iommu_enable(struct vsi_iommu *iommu, struct iommu_domain *domain)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+
+ if (domain == &vsi_identity_domain)
+ return;
+
+ writel(vsi_domain->pta_dma, iommu->regs + VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE);
+ writel(VSI_MMU_OUT_OF_BOUND, iommu->regs + VSI_MMU_CONFIG1_BASE);
+ writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_EXCEPTION_BASE);
+ writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_CONTROL_BASE);
+ iommu->enable = true;
+}
+
+static int vsi_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev, struct iommu_domain *old)
+{
+ struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ unsigned long flags;
+ int ret = 0;
+
+ ret = pm_runtime_resume_and_get(iommu->dev);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+ spin_lock(&iommu->lock);
+
+ vsi_iommu_enable(iommu, domain);
+ writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE);
+ writel(0, iommu->regs + VSI_MMU_FLUSH_BASE);
+
+ list_del_init(&iommu->node);
+ list_add_tail(&iommu->node, &vsi_domain->iommus);
+
+ iommu->domain = domain;
+
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+ pm_runtime_put_autosuspend(iommu->dev);
+ return ret;
+}
+
+static void vsi_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+
+ WARN_ON(!list_empty(&vsi_domain->iommus));
+
+ for (i = 0; i < NUM_DT_ENTRIES; i++) {
+ u32 dte = vsi_domain->dt[i];
+
+ if (vsi_dte_is_pt_valid(dte)) {
+ phys_addr_t pt_phys = vsi_dte_pt_address(dte);
+ u32 *page_table = phys_to_virt(pt_phys);
+
+ dma_unmap_single(vsi_domain->dev, pt_phys,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ iommu_free_pages(page_table);
+ }
+ }
+
+ dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ iommu_free_pages(vsi_domain->dt);
+
+ dma_unmap_single(vsi_domain->dev, vsi_domain->pta_dma,
+ SPAGE_SIZE, DMA_TO_DEVICE);
+ iommu_free_pages(vsi_domain->pta);
+
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+ kfree(vsi_domain);
+}
+
+static struct iommu_device *vsi_iommu_probe_device(struct device *dev)
+{
+ struct vsi_iommu *iommu = vsi_iommu_get_from_dev(dev);
+ struct device_link *link;
+
+ link = device_link_add(dev, iommu->dev,
+ DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+ if (!link)
+ dev_err(dev, "Unable to link %s\n", dev_name(iommu->dev));
+
+ dev_iommu_priv_set(dev, iommu);
+ return &iommu->iommu;
+}
+
+static void vsi_iommu_release_device(struct device *dev)
+{
+ struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+
+ device_link_remove(dev, iommu->dev);
+}
+
+static int vsi_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args)
+{
+ return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static const struct iommu_ops vsi_iommu_ops = {
+ .identity_domain = &vsi_identity_domain,
+ .release_domain = &vsi_identity_domain,
+ .domain_alloc_paging = vsi_iommu_domain_alloc_paging,
+ .of_xlate = vsi_iommu_of_xlate,
+ .probe_device = vsi_iommu_probe_device,
+ .release_device = vsi_iommu_release_device,
+ .device_group = generic_single_device_group,
+ .owner = THIS_MODULE,
+ .default_domain_ops = &(const struct iommu_domain_ops) {
+ .attach_dev = vsi_iommu_attach_device,
+ .map_pages = vsi_iommu_map,
+ .unmap_pages = vsi_iommu_unmap,
+ .iova_to_phys = vsi_iommu_iova_to_phys,
+ .free = vsi_iommu_domain_free,
+ }
+};
+
+static const struct of_device_id vsi_iommu_dt_ids[] = {
+ {
+ .compatible = "verisilicon,iommu-1.2",
+ },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, vsi_iommu_dt_ids);
+
+static int vsi_iommu_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct vsi_iommu *iommu;
+ int err;
+
+ iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return -ENOMEM;
+
+ iommu->dev = dev;
+ spin_lock_init(&iommu->lock);
+ INIT_LIST_HEAD(&iommu->node);
+
+ iommu->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iommu->regs))
+ return -ENOMEM;
+
+ iommu->num_clocks = devm_clk_bulk_get_all(dev, &iommu->clocks);
+ if (iommu->num_clocks < 0)
+ return iommu->num_clocks;
+
+ err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
+ if (err)
+ return err;
+
+ iommu->irq = platform_get_irq(pdev, 0);
+ if (iommu->irq < 0)
+ return iommu->irq;
+
+ err = devm_request_irq(iommu->dev, iommu->irq, vsi_iommu_irq,
+ IRQF_SHARED, dev_name(dev), iommu);
+ if (err)
+ goto err_unprepare_clocks;
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+ platform_set_drvdata(pdev, iommu);
+
+ pm_runtime_set_autosuspend_delay(dev, 100);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_enable(dev);
+
+ err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, "%s",
+ dev_name(dev));
+ if (err)
+ goto err_runtime_disable;
+
+ err = iommu_device_register(&iommu->iommu, &vsi_iommu_ops, dev);
+ if (err)
+ goto err_remove_sysfs;
+
+ return 0;
+
+err_remove_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
+err_runtime_disable:
+ pm_runtime_disable(dev);
+err_unprepare_clocks:
+ clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
+ return err;
+}
+
+static void vsi_iommu_shutdown(struct platform_device *pdev)
+{
+ struct vsi_iommu *iommu = platform_get_drvdata(pdev);
+
+ disable_irq(iommu->irq);
+ pm_runtime_force_suspend(&pdev->dev);
+}
+
+static int __maybe_unused vsi_iommu_suspend(struct device *dev)
+{
+ struct vsi_iommu *iommu = dev_get_drvdata(dev);
+
+ vsi_iommu_disable(iommu);
+
+ clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
+ return 0;
+}
+
+static int __maybe_unused vsi_iommu_resume(struct device *dev)
+{
+ struct vsi_iommu *iommu = dev_get_drvdata(dev);
+ unsigned long flags;
+ int ret;
+
+ ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
+ if (ret)
+ return ret;
+
+ if (iommu->domain) {
+ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(iommu->domain);
+
+ spin_lock_irqsave(&vsi_domain->lock, flags);
+ spin_lock(&iommu->lock);
+ vsi_iommu_enable(iommu, iommu->domain);
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&vsi_domain->lock, flags);
+ }
+
+ return 0;
+}
+
+static DEFINE_RUNTIME_DEV_PM_OPS(vsi_iommu_pm_ops,
+ vsi_iommu_suspend, vsi_iommu_resume,
+ NULL);
+
+static struct platform_driver rockchip_vsi_iommu_driver = {
+ .probe = vsi_iommu_probe,
+ .shutdown = vsi_iommu_shutdown,
+ .driver = {
+ .name = "vsi_iommu",
+ .of_match_table = vsi_iommu_dt_ids,
+ .pm = pm_sleep_ptr(&vsi_iommu_pm_ops),
+ .suppress_bind_attrs = true,
+ },
+};
+module_platform_driver(rockchip_vsi_iommu_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Benjamin Gaignard <benjamin.gaignard@collabora.com>");
+MODULE_DESCRIPTION("Verisilicon IOMMU driver");
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index ec6c8dbdc5e9c..96efa00d97433 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -205,6 +205,53 @@ int pci_ats_page_aligned(struct pci_dev *pdev)
return 0;
}
+/*
+ * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on
+ * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host
+ * by means of an ATS request on CXL.io.
+ *
+ * In other words, CXL.cache devices cannot access host physical memory without
+ * ATS.
+ *
+ * Check Cache_Capable instead of Cache_Enable because CXL.cache may be enabled
+ * after the caller uses this to make its ATS decision.
+ */
+static bool pci_cxl_ats_required(struct pci_dev *pdev)
+{
+ int offset;
+ u16 cap;
+
+ offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_DEVICE);
+ if (!offset)
+ return false;
+
+ if (pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap))
+ return false;
+
+ return cap & PCI_DVSEC_CXL_CACHE_CAPABLE;
+}
+
+/**
+ * pci_ats_required - Whether the PCI device requires ATS
+ * @pdev: the PCI device
+ *
+ * Returns true, if the PCI device requires ATS for basic functional operation.
+ */
+bool pci_ats_required(struct pci_dev *pdev)
+{
+ if (!pci_ats_supported(pdev))
+ return false;
+
+ /* A VF inherits its PF's requirement for ATS function */
+ if (pdev->is_virtfn)
+ pdev = pci_physfn(pdev);
+
+ return pci_cxl_ats_required(pdev) ||
+ pci_dev_specific_ats_required(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_ats_required);
+
#ifdef CONFIG_PCI_PRI
void pci_pri_init(struct pci_dev *pdev)
{
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b36667969ad5e..80ffd94970b32 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1145,6 +1145,15 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
}
#endif
+#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_PCI_ATS)
+bool pci_dev_specific_ats_required(struct pci_dev *dev);
+#else
+static inline bool pci_dev_specific_ats_required(struct pci_dev *dev)
+{
+ return false;
+}
+#endif
+
#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
struct resource *res);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1b4ae046dd69b..5d7df95d20106 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5702,6 +5702,48 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats);
+
+static bool quirk_nvidia_gpu_ats_required(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case 0x2e00 ... 0x2e3f: /* GB20B */
+ return true;
+ }
+ return false;
+}
+
+static const struct pci_dev_ats_required {
+ u16 vendor;
+ u16 device;
+ bool (*ats_required)(struct pci_dev *dev);
+} pci_dev_ats_required[] = {
+ /* NVIDIA GPUs */
+ { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, quirk_nvidia_gpu_ats_required },
+ /* NVIDIA CX10 Family NVlink-C2C */
+ { PCI_VENDOR_ID_MELLANOX, 0x2101, NULL },
+ { 0 }
+};
+
+/*
+ * Some NVIDIA devices do not implement CXL config space, but present as PCIe
+ * devices that can issue CXL-like cache operations like CXL.cache. Thus, they
+ * require ATS to obtain host physical addresses, like pci_cxl_ats_required().
+ */
+bool pci_dev_specific_ats_required(struct pci_dev *pdev)
+{
+ const struct pci_dev_ats_required *i;
+
+ for (i = pci_dev_ats_required; i->vendor; i++) {
+ if (i->vendor != pdev->vendor)
+ continue;
+ if (i->ats_required && i->ats_required(pdev))
+ return true;
+ if (!i->ats_required && i->device == pdev->device)
+ return true;
+ }
+
+ return false;
+}
#endif /* CONFIG_PCI_ATS */
/* Freescale PCIe doesn't support MSI in RC mode */
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index c16d4199bf923..836a50f5917a2 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -397,6 +397,17 @@
#endif
/*
+ * Optional: not supported by clang
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Attributes.html#index-noipa
+ */
+#if __has_attribute(noipa)
+# define __noipa __attribute__((noipa))
+#else
+# define __noipa
+#endif
+
+/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
*/
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index fc5d0b5edadc0..07ef1c8341a4f 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -134,6 +134,11 @@ enum pt_features {
* significant amount of page table.
*/
PT_FEAT_FLUSH_RANGE_NO_GAPS,
+ /**
+ * @PT_FEAT_DETAILED_GATHER: Fill in the struct iommu_iotlb_gather pt
+ * sub structure with information about which levels were changed.
+ */
+ PT_FEAT_DETAILED_GATHER,
/* private: */
PT_FEAT_FMT_START,
};
@@ -188,6 +193,10 @@ enum {
* Support the 64k contiguous page size following the Svnapot extension.
*/
PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START,
+ /*
+ * Support Svpbmt extension: encode page-based memory type (PBMT) in PTEs.
+ */
+ PT_FEAT_RISCV_SVPBMT,
};
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e587d4ac4d331..bf8a77a164e4d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -345,12 +345,6 @@ struct iommu_pages_list {
/**
* struct iommu_iotlb_gather - Range information for a pending IOTLB flush
*
- * @start: IOVA representing the start of the range to be flushed
- * @end: IOVA representing the end of the range to be flushed (inclusive)
- * @pgsize: The interval at which to perform the flush
- * @freelist: Removed pages to free after sync
- * @queued: Indicates that the flush will be queued
- *
* This structure is intended to be updated by multiple calls to the
* ->unmap() function in struct iommu_ops before eventually being passed
* into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after
@@ -359,10 +353,44 @@ struct iommu_pages_list {
* later instead of ->iotlb_sync(), so drivers may optimise accordingly.
*/
struct iommu_iotlb_gather {
+ /** @start: IOVA representing the start of the range to be flushed */
unsigned long start;
+ /**
+ * @end: IOVA representing the end of the range to be
+ * flushed (inclusive)
+ */
unsigned long end;
- size_t pgsize;
+
+ union {
+ /**
+ * @pgsize: The interval at which to perform the flush, only
+ * used by arm-smmu-v3
+ */
+ size_t pgsize;
+ struct {
+ /**
+ * @pt.leaf_levels_bitmap: Bitmap of generic_pt
+ * levels where leaf entries were unmapped. Bit 0
+ * means the leaf only level. If 0 no leafs
+ * were unmapped.
+ */
+ u8 leaf_levels_bitmap;
+ /**
+ * @pt.table_levels_bitmap: Bitmap of generic_pt levels
+ * of table entries that were removed. Bit 0 is never
+ * set, bit 1 means a table of all leafs was removed.
+ * When freelist is empty this must be 0.
+ */
+ u8 table_levels_bitmap;
+ } pt;
+ };
+
+ /**
+ * @freelist: Removed pages to free after sync, only used by
+ * iommupt
+ */
struct iommu_pages_list freelist;
+ /** @queued: True if the gather will be completed with a flush all */
bool queued;
};
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 75c6c86cf09dc..f3723b6861294 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps);
void pci_disable_ats(struct pci_dev *dev);
int pci_ats_queue_depth(struct pci_dev *dev);
int pci_ats_page_aligned(struct pci_dev *dev);
+bool pci_ats_required(struct pci_dev *dev);
#else /* CONFIG_PCI_ATS */
static inline bool pci_ats_supported(struct pci_dev *d)
{ return false; }
@@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d)
{ return -ENODEV; }
static inline int pci_ats_page_aligned(struct pci_dev *dev)
{ return 0; }
+static inline bool pci_ats_required(struct pci_dev *dev)
+{ return false; }
#endif /* CONFIG_PCI_ATS */
#ifdef CONFIG_PCI_PRI
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 14f634ab9350d..6ac45be1008b8 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1349,6 +1349,7 @@
/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
#define PCI_DVSEC_CXL_DEVICE 0
#define PCI_DVSEC_CXL_CAP 0xA
+#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
#define PCI_DVSEC_CXL_CTRL 0xC
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 772ddabcbe7d3..b18a682fe3da2 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -1222,7 +1222,7 @@ struct self_test {
static __initconst const struct debug_obj_descr descr_type_test;
-static bool __init is_static_object(void *addr)
+static __noipa bool __init is_static_object(void *addr)
{
struct self_test *obj = addr;