@@ -72,8 +72,8 @@ static inline void arch_wmb_pmem(void)
7272 * @size: number of bytes to write back
7373 *
7474 * Write back a cache range using the CLWB (cache line write back)
75- * instruction. This function requires explicit ordering with an
76- * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation .
75+ * instruction. Note that @size is internally rounded up to be cache
76+ * line size aligned .
7777 */
7878static inline void __arch_wb_cache_pmem (void * vaddr , size_t size )
7979{
@@ -87,15 +87,6 @@ static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
8787 clwb (p );
8888}
8989
90- /*
91- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
92- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
93- */
94- static inline bool __iter_needs_pmem_wb (struct iov_iter * i )
95- {
96- return iter_is_iovec (i ) == false;
97- }
98-
9990/**
10091 * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
10192 * @addr: PMEM destination address
@@ -114,8 +105,36 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
114105 /* TODO: skip the write-back by always using non-temporal stores */
115106 len = copy_from_iter_nocache (vaddr , bytes , i );
116107
117- if (__iter_needs_pmem_wb (i ))
118- __arch_wb_cache_pmem (vaddr , bytes );
108+ /*
109+ * In the iovec case on x86_64 copy_from_iter_nocache() uses
110+ * non-temporal stores for the bulk of the transfer, but we need
111+ * to manually flush if the transfer is unaligned. A cached
112+ * memory copy is used when destination or size is not naturally
113+ * aligned. That is:
114+ * - Require 8-byte alignment when size is 8 bytes or larger.
115+ * - Require 4-byte alignment when size is 4 bytes.
116+ *
117+ * In the non-iovec case the entire destination needs to be
118+ * flushed.
119+ */
120+ if (iter_is_iovec (i )) {
121+ unsigned long flushed , dest = (unsigned long ) addr ;
122+
123+ if (bytes < 8 ) {
124+ if (!IS_ALIGNED (dest , 4 ) || (bytes != 4 ))
125+ __arch_wb_cache_pmem (addr , 1 );
126+ } else {
127+ if (!IS_ALIGNED (dest , 8 )) {
128+ dest = ALIGN (dest , boot_cpu_data .x86_clflush_size );
129+ __arch_wb_cache_pmem (addr , 1 );
130+ }
131+
132+ flushed = dest - (unsigned long ) addr ;
133+ if (bytes > flushed && !IS_ALIGNED (bytes - flushed , 8 ))
134+ __arch_wb_cache_pmem (addr + bytes - 1 , 1 );
135+ }
136+ } else
137+ __arch_wb_cache_pmem (addr , bytes );
119138
120139 return len ;
121140}
0 commit comments