VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   32 Branches   54 Tags
author: Arnd Bergmann <arnd@arndb.de> 2023-03-27 14:13:17 +0200 committer: Emil Renner Berthing <emil.renner.berthing@canonical.com> 2023-07-20 20:53:35 +0200 commit: 34ff3956cc572a2bc5a3f26cacc501d1fb59a25c parent: c377a2f7c49ae5e6bd49408e00cd56ebee4ac5f7
Commit Summary:
dma-mapping: replace custom code with generic implementation
Diffstat:
19 files changed, 453 insertions, 380 deletions
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index ddb96786f765..61cd01646222 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -30,63 +30,33 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	dma_cache_wback_inv(page_to_phys(page), size);
 }
 
-/*
- * Cache operations depending on function and direction argument, inspired by
- * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk
- * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
- * dma-mapping: provide a generic dma-noncoherent implementation)"
- *
- *          |   map          ==  for_device     |   unmap     ==  for_cpu
- *          |----------------------------------------------------------------
- * TO_DEV   |   writeback        writeback      |   none          none
- * FROM_DEV |   invalidate       invalidate     |   invalidate*   invalidate*
- * BIDIR    |   writeback        writeback      |   invalidate    invalidate
- *
- *     [*] needed for CPU speculative prefetches
- *
- * NOTE: we don't check the validity of direction argument as it is done in
- * upper layer functions (in include/linux/dma-mapping.h)
- */
-
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		dma_cache_wback(paddr, size);
-		break;
-
-	case DMA_FROM_DEVICE:
-		dma_cache_inv(paddr, size);
-		break;
-
-	case DMA_BIDIRECTIONAL:
-		dma_cache_wback(paddr, size);
-		break;
+	dma_cache_wback(paddr, size);
+}
 
-	default:
-		break;
-	}
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	dma_cache_inv(paddr, size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		break;
+	dma_cache_wback_inv(paddr, size);
+}
 
-	/* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		dma_cache_inv(paddr, size);
-		break;
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
 
-	default:
-		break;
-	}
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
 }
 
+#include <linux/dma-sync.h>
+
 /*
  * Plug in direct dma map ops.
  */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f5f27ae80aea..e8764237cbbe 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -213,6 +213,9 @@ config LOCKDEP_SUPPORT
 	bool
 	default y
 
+config ARCH_DMA_MARK_DCACHE_CLEAN
+	def_bool y
+
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 12b5c6ae93fc..0817274aed15 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -13,27 +13,36 @@
 
 #include "dma.h"
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	if (dir == DMA_FROM_DEVICE) {
-		dmac_inv_range(__va(paddr), __va(paddr + size));
-		outer_inv_range(paddr, paddr + size);
-	} else {
-		dmac_clean_range(__va(paddr), __va(paddr + size));
-		outer_clean_range(paddr, paddr + size);
-	}
+	dmac_clean_range(__va(paddr), __va(paddr + size));
+	outer_clean_range(paddr, paddr + size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	if (dir != DMA_TO_DEVICE) {
-		outer_inv_range(paddr, paddr + size);
-		dmac_inv_range(__va(paddr), __va(paddr));
-	}
+	dmac_inv_range(__va(paddr), __va(paddr + size));
+	outer_inv_range(paddr, paddr + size);
 }
 
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	dmac_flush_range(__va(paddr), __va(paddr + size));
+	outer_flush_range(paddr, paddr + size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
+
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 9ed1b16fefdd..a0a8d74d1fe1 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -687,6 +687,30 @@ void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
 	}
 }
 
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
+{
+	dma_cache_maint(paddr, size, dmac_clean_range);
+	outer_clean_range(paddr, paddr + size);
+}
+
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	dma_cache_maint(paddr, size, dmac_inv_range);
+	outer_inv_range(paddr, paddr + size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	dma_cache_maint(paddr, size, dmac_flush_range);
+	outer_flush_range(paddr, paddr + size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
 static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
 {
 	if (IS_ENABLED(CONFIG_CPU_V6) ||
@@ -699,45 +723,7 @@ static bool arch_sync_dma_cpu_needs_post_dma_flush(void)
 	return false;
 }
 
-/*
- * Make an area consistent for devices.
- * Note: Drivers should NOT use this function directly.
- * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
- */
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		dma_cache_maint(paddr, size, dmac_clean_range);
-		outer_clean_range(paddr, paddr + size);
-		break;
-	case DMA_FROM_DEVICE:
-		dma_cache_maint(paddr, size, dmac_inv_range);
-		outer_inv_range(paddr, paddr + size);
-		break;
-	case DMA_BIDIRECTIONAL:
-		if (arch_sync_dma_cpu_needs_post_dma_flush()) {
-			dma_cache_maint(paddr, size, dmac_clean_range);
-			outer_clean_range(paddr, paddr + size);
-		} else {
-			dma_cache_maint(paddr, size, dmac_flush_range);
-			outer_flush_range(paddr, paddr + size);
-		}
-		break;
-	default:
-		break;
-	}
-}
-
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	if (dir != DMA_TO_DEVICE && arch_sync_dma_cpu_needs_post_dma_flush()) {
-		outer_inv_range(paddr, paddr + size);
-		dma_cache_maint(paddr, size, dmac_inv_range);
-	}
-}
+#include <linux/dma-sync.h>
 
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3cb101e8cb29..97b7cea5eb23 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -13,25 +13,33 @@
 #include <asm/cacheflush.h>
 #include <asm/xen/xen-ops.h>
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-			      enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	unsigned long start = (unsigned long)phys_to_virt(paddr);
+	dcache_clean_poc(paddr, paddr + size);
+}
 
-	dcache_clean_poc(start, start + size);
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	dcache_inval_poc(paddr, paddr + size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-			   enum dma_data_direction dir)
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
 {
-	unsigned long start = (unsigned long)phys_to_virt(paddr);
+	dcache_clean_inval_poc(paddr, paddr + size);
+}
 
-	if (dir == DMA_TO_DEVICE)
-		return;
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
 
-	dcache_inval_poc(start, start + size);
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
 }
 
+#include <linux/dma-sync.h>
+
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	unsigned long start = (unsigned long)page_address(page);
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index c90f912e2822..9402e101b363 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -55,31 +55,29 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	cache_op(page_to_phys(page), size, dma_wbinv_set_zero_range);
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		cache_op(paddr, size, dma_wb_range);
-		break;
-	default:
-		BUG();
-	}
+	cache_op(paddr, size, dma_wb_range);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		return;
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		cache_op(paddr, size, dma_inv_range);
-		break;
-	default:
-		BUG();
-	}
+	cache_op(paddr, size, dma_inv_range);
 }
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	cache_op(paddr, size, dma_wbinv_range);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 882680e81a30..033d06074618 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -9,29 +9,33 @@
 #include <linux/memblock.h>
 #include <asm/page.h>
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	void *addr = phys_to_virt(paddr);
-
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		hexagon_clean_dcache_range((unsigned long) addr,
-		(unsigned long) addr + size);
-		break;
-	case DMA_FROM_DEVICE:
-		hexagon_inv_dcache_range((unsigned long) addr,
-		(unsigned long) addr + size);
-		break;
-	case DMA_BIDIRECTIONAL:
-		flush_dcache_range((unsigned long) addr,
-		(unsigned long) addr + size);
-		break;
-	default:
-		BUG();
-	}
+	hexagon_clean_dcache_range(paddr, paddr + size);
 }
 
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	hexagon_inv_dcache_range(paddr, paddr + size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	flush_dcache_range(paddr, paddr + size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
+
 /*
  * Our max_low_pfn should have been backed off by 16MB in mm/init.c to create
  * DMA coherent space.  Use that for the pool.
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 2e192a5df949..aa9b434e6df8 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -58,20 +58,33 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 
 #endif /* CONFIG_MMU && !CONFIG_COLDFIRE */
 
-void arch_sync_dma_for_device(phys_addr_t handle, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_BIDIRECTIONAL:
-	case DMA_TO_DEVICE:
-		cache_push(handle, size);
-		break;
-	case DMA_FROM_DEVICE:
-		cache_clear(handle, size);
-		break;
-	default:
-		pr_err_ratelimited("dma_sync_single_for_device: unsupported dir %u\n",
-				   dir);
-		break;
-	}
+	/*
+	 * cache_push() always invalidates in addition to cleaning
+	 * write-back caches.
+	 */
+	cache_push(paddr, size);
+}
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	cache_clear(paddr, size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	cache_push(paddr, size);
 }
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index b4c4e45fd45e..01110d4aa5b0 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -14,32 +14,30 @@
 #include <linux/bug.h>
 #include <asm/cacheflush.h>
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (direction) {
-	case DMA_TO_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		flush_dcache_range(paddr, paddr + size);
-		break;
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range(paddr, paddr + size);
-		break;
-	default:
-		BUG();
-	}
+	/* writeback plus invalidate, could be a nop on WT caches */
+	flush_dcache_range(paddr, paddr + size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		break;
-	case DMA_BIDIRECTIONAL:
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range(paddr, paddr + size);
-		break;
-	default:
-		BUG();
-	}}
+	invalidate_dcache_range(paddr, paddr + size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	flush_dcache_range(paddr, paddr + size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index b9d68bcc5d53..902d4b7c1f85 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -85,50 +85,38 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
 	} while (left);
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		dma_sync_phys(paddr, size, _dma_cache_wback);
-		break;
-	case DMA_FROM_DEVICE:
-		dma_sync_phys(paddr, size, _dma_cache_inv);
-		break;
-	case DMA_BIDIRECTIONAL:
-		if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
-		    cpu_needs_post_dma_flush())
-			dma_sync_phys(paddr, size, _dma_cache_wback);
-		else
-			dma_sync_phys(paddr, size, _dma_cache_wback_inv);
-		break;
-	default:
-		break;
-	}
+	dma_sync_phys(paddr, size, _dma_cache_wback);
 }
 
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		break;
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		if (cpu_needs_post_dma_flush())
-			dma_sync_phys(paddr, size, _dma_cache_inv);
-		break;
-	default:
-		break;
-	}
+	dma_sync_phys(paddr, size, _dma_cache_inv);
 }
-#endif
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	dma_sync_phys(paddr, size, _dma_cache_wback_inv);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
+                    cpu_needs_post_dma_flush();
+}
+
+#include <linux/dma-sync.h>
 
 #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-		const struct iommu_ops *iommu, bool coherent)
+               const struct iommu_ops *iommu, bool coherent)
 {
-	dev->dma_coherent = coherent;
+       dev->dma_coherent = coherent;
 }
 #endif
diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c
index fd887d5f3f9a..29978970955e 100644
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -13,53 +13,46 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/string.h>
+#include <linux/dma-map-ops.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <asm/cacheflush.h>
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
+	/*
+	 * We just need to write back the caches here, but Nios2 flush
+	 * instruction will do both writeback and invalidate.
+	 */
 	void *vaddr = phys_to_virt(paddr);
+	flush_dcache_range((unsigned long)vaddr, (unsigned long)(vaddr + size));
+}
 
-	switch (dir) {
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range((unsigned long)vaddr,
-			(unsigned long)(vaddr + size));
-		break;
-	case DMA_TO_DEVICE:
-		/*
-		 * We just need to flush the caches here , but Nios2 flush
-		 * instruction will do both writeback and invalidate.
-		 */
-	case DMA_BIDIRECTIONAL: /* flush and invalidate */
-		flush_dcache_range((unsigned long)vaddr,
-			(unsigned long)(vaddr + size));
-		break;
-	default:
-		BUG();
-	}
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	unsigned long vaddr = (unsigned long)phys_to_virt(paddr);
+	invalidate_dcache_range(vaddr, (unsigned long)(vaddr + size));
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
 {
 	void *vaddr = phys_to_virt(paddr);
+	flush_dcache_range((unsigned long)vaddr, (unsigned long)(vaddr + size));
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
 
-	switch (dir) {
-	case DMA_BIDIRECTIONAL:
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range((unsigned long)vaddr,
-			(unsigned long)(vaddr + size));
-		break;
-	case DMA_TO_DEVICE:
-		break;
-	default:
-		BUG();
-	}
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
 }
 
+#include <linux/dma-sync.h>
+
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	unsigned long start = (unsigned long)page_address(page);
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 91a00d09ffad..aba2258e62eb 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -95,32 +95,47 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)
 	mmap_write_unlock(&init_mm);
 }
 
-void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
 	unsigned long cl;
 	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		/* Write back the dcache for the requested range */
-		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo->dcache_block_size)
-			mtspr(SPR_DCBWR, cl);
-		break;
-	case DMA_FROM_DEVICE:
-		/* Invalidate the dcache for the requested range */
-		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo->dcache_block_size)
-			mtspr(SPR_DCBIR, cl);
-		break;
-	case DMA_BIDIRECTIONAL:
-		/* Flush the dcache for the requested range */
-		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo->dcache_block_size)
-			mtspr(SPR_DCBFR, cl);
-		break;
-	default:
-		break;
-	}
+	/* Write back the dcache for the requested range */
+	for (cl = paddr; cl < paddr + size;
+	     cl += cpuinfo->dcache_block_size)
+		mtspr(SPR_DCBWR, cl);
 }
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	unsigned long cl;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
+
+	/* Invalidate the dcache for the requested range */
+	for (cl = paddr; cl < paddr + size;
+	     cl += cpuinfo->dcache_block_size)
+		mtspr(SPR_DCBIR, cl);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	unsigned long cl;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
+
+	/* Flush the dcache for the requested range */
+	for (cl = paddr; cl < paddr + size;
+	     cl += cpuinfo->dcache_block_size)
+		mtspr(SPR_DCBFR, cl);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 71ed5391f29d..54e9e805c502 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -443,8 +443,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	free_pages((unsigned long)__va(dma_handle), order);
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
 	/*
 	 * fdc: The data cache line is written back to memory, if and only if
@@ -453,20 +452,28 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 	flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	unsigned long addr = (unsigned long) phys_to_virt(paddr);
+ 	unsigned long virt = (unsigned long)phys_to_virt(paddr);
+ 
+	purge_kernel_dcache_range(virt, size);
+}
 
-	switch (dir) {
-	case DMA_TO_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		flush_kernel_dcache_range(addr, size);
-		return;
-	case DMA_FROM_DEVICE:
-		purge_kernel_dcache_range_asm(addr, addr + size);
-		return;
-	default:
-		BUG();
-	}
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	unsigned long virt = (unsigned long)phys_to_virt(paddr);
+
+	flush_kernel_dcache_range(virt, size);
 }
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 00e59a4faa2b..668d8ec58f1c 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -101,27 +101,33 @@ static void __dma_phys_op(phys_addr_t paddr, size_t size, enum dma_cache_op op)
 #endif
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	__dma_phys_op(start, end, DMA_CACHE_CLEAN);
+	__dma_phys_op(paddr, size, DMA_CACHE_CLEAN);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
-	switch (direction) {
-	case DMA_NONE:
-		BUG();
-	case DMA_TO_DEVICE:
-		break;
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		__dma_phys_op(start, end, DMA_CACHE_INVAL);
-		break;
-	}
+	__dma_phys_op(paddr, size, DMA_CACHE_INVAL);
 }
 
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	__dma_phys_op(paddr, size, DMA_CACHE_FLUSH);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
+
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	unsigned long kaddr = (unsigned long)page_address(page);
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index 69c80b2155a1..b9a9f57e02be 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -12,43 +12,40 @@
 
 static bool noncoherent_supported;
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-			      enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
 	void *vaddr = phys_to_virt(paddr);
 
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
-		break;
-	case DMA_FROM_DEVICE:
-		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
-		break;
-	case DMA_BIDIRECTIONAL:
-		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
-		break;
-	default:
-		break;
-	}
+	ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
 }
 
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-			   enum dma_data_direction dir)
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
 {
 	void *vaddr = phys_to_virt(paddr);
 
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		break;
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
-		break;
-	default:
-		break;
-	}
+	ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
 }
 
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	void *vaddr = phys_to_virt(paddr);
+
+	ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return true;
+}
+
+#include <linux/dma-sync.h>
+
+
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	void *flush_addr = page_address(page);
diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
index 6a44c0e7ba40..41f031ae7609 100644
--- a/arch/sh/kernel/dma-coherent.c
+++ b/arch/sh/kernel/dma-coherent.c
@@ -12,22 +12,35 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	__flush_purge_region(page_address(page), size);
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
 	void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
 
-	switch (dir) {
-	case DMA_FROM_DEVICE:		/* invalidate only */
-		__flush_invalidate_region(addr, size);
-		break;
-	case DMA_TO_DEVICE:		/* writeback only */
-		__flush_wback_region(addr, size);
-		break;
-	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		__flush_purge_region(addr, size);
-		break;
-	default:
-		BUG();
-	}
+	__flush_wback_region(addr, size);
 }
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
+
+	__flush_invalidate_region(addr, size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
+
+	__flush_purge_region(addr, size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 4f3d26066ec2..6926ead2f208 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -300,21 +300,39 @@ arch_initcall(sparc_register_ioport);
 
 #endif /* CONFIG_SBUS */
 
-/*
- * IIep is write-through, not flushing on cpu to device transfer.
- *
- * On LEON systems without cache snooping, the entire D-CACHE must be flushed to
- * make DMA to cacheable memory coherent.
- */
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	if (dir != DMA_TO_DEVICE &&
-	    sparc_cpu_model == sparc_leon &&
+	/* IIep is write-through, not flushing on cpu to device transfer. */
+}
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	/*
+	 * On LEON systems without cache snooping, the entire D-CACHE must be
+	 * flushed to make DMA to cacheable memory coherent.
+	 */
+	if (sparc_cpu_model == sparc_leon &&
 	    !sparc_leon3_snooping_enabled())
 		leon_flush_dcache_all();
 }
 
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	arch_dma_cache_inv(paddr, size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
+
 #ifdef CONFIG_PROC_FS
 
 static int sparc_io_proc_show(struct seq_file *m, void *v)
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index ff3bf015eca4..d4ff96585545 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -43,24 +43,34 @@ static void do_cache_op(phys_addr_t paddr, size_t size,
 		}
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		do_cache_op(paddr, size, __flush_dcache_range);
-		break;
-	case DMA_FROM_DEVICE:
-		do_cache_op(paddr, size, __invalidate_dcache_range);
-		break;
-	case DMA_BIDIRECTIONAL:
-		do_cache_op(paddr, size, __flush_invalidate_dcache_range);
-		break;
-	default:
-		break;
-	}
+	do_cache_op(paddr, size, __flush_dcache_range);
 }
 
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+	do_cache_op(paddr, size, __invalidate_dcache_range);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	do_cache_op(paddr, size, __flush_invalidate_dcache_range);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+	return false;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+	return false;
+}
+
+#include <linux/dma-sync.h>
+
+
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	__invalidate_dcache_range((unsigned long)page_address(page), size);
diff --git a/include/linux/dma-sync.h b/include/linux/dma-sync.h
new file mode 100644
index 000000000000..18e33d5e8eaf
--- /dev/null
+++ b/include/linux/dma-sync.h
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache operations depending on function and direction argument, inspired by
+ * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk
+ * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
+ * dma-mapping: provide a generic dma-noncoherent implementation)"
+ *
+ *          |   map          ==  for_device     |   unmap     ==  for_cpu
+ *          |----------------------------------------------------------------
+ * TO_DEV   |   writeback        writeback      |   none          none
+ * FROM_DEV |   invalidate       invalidate     |   invalidate*   invalidate*
+ * BIDIR    |   writeback        writeback      |   invalidate    invalidate
+ *
+ *     [*] needed for CPU speculative prefetches
+ *
+ * NOTE: we don't check the validity of direction argument as it is done in
+ * upper layer functions (in include/linux/dma-mapping.h)
+ *
+ * This file can be included by arch/.../kernel/dma-noncoherent.c to provide
+ * the respective high-level operations without having to expose the
+ * cache management ops to drivers.
+ */
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		/*
+		 * This may be an empty function on write-through caches,
+		 * and it might invalidate the cache if an architecture has
+		 * a write-back cache but no way to write it back without
+		 * invalidating
+		 */
+		arch_dma_cache_wback(paddr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		/*
+		 * FIXME: this should be handled the same across all
+		 * architectures, see
+		 * https://lore.kernel.org/all/20220606152150.GA31568@willie-the-truck/
+		 */
+		if (!arch_sync_dma_clean_before_fromdevice()) {
+			arch_dma_cache_inv(paddr, size);
+			break;
+		}
+		fallthrough;
+
+	case DMA_BIDIRECTIONAL:
+		/* Skip the invalidate here if it's done later */
+		if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
+		    arch_sync_dma_cpu_needs_post_dma_flush())
+			arch_dma_cache_wback(paddr, size);
+		else
+			arch_dma_cache_wback_inv(paddr, size);
+		break;
+
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+/*
+ * Mark the D-cache clean for these pages to avoid extra flushing.
+ */
+static void arch_dma_mark_dcache_clean(phys_addr_t paddr, size_t size)
+{
+#ifdef CONFIG_ARCH_DMA_MARK_DCACHE_CLEAN
+	unsigned long pfn = PFN_UP(paddr);
+	unsigned long off = paddr & (PAGE_SIZE - 1);
+	size_t left = size;
+
+	if (off)
+		left -= PAGE_SIZE - off;
+
+	while (left >= PAGE_SIZE) {
+		struct page *page = pfn_to_page(pfn++);
+		set_bit(PG_dcache_clean, &page->flags);
+		left -= PAGE_SIZE;
+	}
+#endif
+}
+
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		break;
+
+	case DMA_FROM_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		/* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
+		if (arch_sync_dma_cpu_needs_post_dma_flush())
+			arch_dma_cache_inv(paddr, size);
+
+		if (size > PAGE_SIZE)
+			arch_dma_mark_dcache_clean(paddr, size);
+		break;
+
+	default:
+		break;
+	}
+}
+#endif