@@ -208,18 +208,18 @@ EXC( STORE t2, UNIT(6)(dst), s_exc_p10u)
208208 ADD src, src, 16*NBYTES
209209EXC( STORE t3, UNIT(7 )(dst), s_exc_p9u)
210210 ADD dst, dst, 16*NBYTES
211- EXC( LOAD t0, UNIT(-8 )(src), l_exc_copy )
212- EXC( LOAD t1, UNIT(-7 )(src), l_exc_copy )
213- EXC( LOAD t2, UNIT(-6 )(src), l_exc_copy )
214- EXC( LOAD t3, UNIT(-5 )(src), l_exc_copy )
211+ EXC( LOAD t0, UNIT(-8 )(src), l_exc_copy_rewind16 )
212+ EXC( LOAD t1, UNIT(-7 )(src), l_exc_copy_rewind16 )
213+ EXC( LOAD t2, UNIT(-6 )(src), l_exc_copy_rewind16 )
214+ EXC( LOAD t3, UNIT(-5 )(src), l_exc_copy_rewind16 )
215215EXC( STORE t0, UNIT(-8 )(dst), s_exc_p8u)
216216EXC( STORE t1, UNIT(-7 )(dst), s_exc_p7u)
217217EXC( STORE t2, UNIT(-6 )(dst), s_exc_p6u)
218218EXC( STORE t3, UNIT(-5 )(dst), s_exc_p5u)
219- EXC( LOAD t0, UNIT(-4 )(src), l_exc_copy )
220- EXC( LOAD t1, UNIT(-3 )(src), l_exc_copy )
221- EXC( LOAD t2, UNIT(-2 )(src), l_exc_copy )
222- EXC( LOAD t3, UNIT(-1 )(src), l_exc_copy )
219+ EXC( LOAD t0, UNIT(-4 )(src), l_exc_copy_rewind16 )
220+ EXC( LOAD t1, UNIT(-3 )(src), l_exc_copy_rewind16 )
221+ EXC( LOAD t2, UNIT(-2 )(src), l_exc_copy_rewind16 )
222+ EXC( LOAD t3, UNIT(-1 )(src), l_exc_copy_rewind16 )
223223EXC( STORE t0, UNIT(-4 )(dst), s_exc_p4u)
224224EXC( STORE t1, UNIT(-3 )(dst), s_exc_p3u)
225225EXC( STORE t2, UNIT(-2 )(dst), s_exc_p2u)
@@ -383,6 +383,10 @@ done:
383383 nop
384384 END(memcpy)
385385
386+ l_exc_copy_rewind16:
387+ /* Rewind src and dst by 16*NBYTES for l_exc_copy */
388+ SUB src, src, 16*NBYTES
389+ SUB dst, dst, 16*NBYTES
386390l_exc_copy:
387391 /*
388392 * Copy bytes from src until faulting load address (or until a
0 commit comments