aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMax Filippov2017-12-09 21:18:47 -0800
committerMax Filippov2017-12-10 14:48:53 -0800
commit0013aceb307482ba83a5b6a29f6ba1791be0d32b (patch)
tree63f0312a68e62d0f52212ce7bdab9d75e4ee6ead /arch
parent2da03d4114b2587f0e8e45f4862074e34daee64e (diff)
xtensa: clean up fixups in assembly code
Remove duplicate definitions of EX() and similar TRY/CATCH and SRC/DST macros from assembly sources and put single definition into asm/asmmacro.h Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/xtensa/include/asm/asmmacro.h7
-rw-r--r--arch/xtensa/kernel/entry.S33
-rw-r--r--arch/xtensa/lib/checksum.S74
-rw-r--r--arch/xtensa/lib/memset.S36
-rw-r--r--arch/xtensa/lib/strncpy_user.S52
-rw-r--r--arch/xtensa/lib/strnlen_user.S19
-rw-r--r--arch/xtensa/lib/usercopy.S106
7 files changed, 133 insertions, 194 deletions
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index 746dcc8b5abc..d2a4415a4c08 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -150,5 +150,12 @@
__endl \ar \as
.endm
+/* Load or store instructions that may cause exceptions use the EX macro. */
+
+#define EX(handler) \
+ .section __ex_table, "a"; \
+ .word 97f, handler; \
+ .previous \
+97:
#endif /* _XTENSA_ASMMACRO_H */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 5a2110bb5902..a27a9a65635b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -14,6 +14,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
#include <asm/processor.h>
#include <asm/coprocessor.h>
#include <asm/thread_info.h>
@@ -1094,35 +1095,12 @@ ENDPROC(fast_syscall_unrecoverable)
* < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
*
* Note: we don't have to save a2; a2 holds the return value
- *
- * We use the two macros TRY and CATCH:
- *
- * TRY adds an entry to the __ex_table fixup table for the immediately
- * following instruction.
- *
- * CATCH catches any exception that occurred at one of the preceding TRY
- * statements and continues from there
- *
- * Usage TRY l32i a0, a1, 0
- * <other code>
- * done: rfe
- * CATCH <set return code>
- * j done
*/
.literal_position
#ifdef CONFIG_FAST_SYSCALL_XTENSA
-#define TRY \
- .section __ex_table, "a"; \
- .word 66f, 67f; \
- .text; \
-66:
-
-#define CATCH \
-67:
-
ENTRY(fast_syscall_xtensa)
s32i a7, a2, PT_AREG7 # we need an additional register
@@ -1136,9 +1114,9 @@ ENTRY(fast_syscall_xtensa)
.Lswp: /* Atomic compare and swap */
-TRY l32i a0, a3, 0 # read old value
+EX(.Leac) l32i a0, a3, 0 # read old value
bne a0, a4, 1f # same as old value? jump
-TRY s32i a5, a3, 0 # different, modify value
+EX(.Leac) s32i a5, a3, 0 # different, modify value
l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0
movi a2, 1 # and return 1
@@ -1151,12 +1129,12 @@ TRY s32i a5, a3, 0 # different, modify value
.Lnswp: /* Atomic set, add, and exg_add. */
-TRY l32i a7, a3, 0 # orig
+EX(.Leac) l32i a7, a3, 0 # orig
addi a6, a6, -SYS_XTENSA_ATOMIC_SET
add a0, a4, a7 # + arg
moveqz a0, a4, a6 # set
addi a6, a6, SYS_XTENSA_ATOMIC_SET
-TRY s32i a0, a3, 0 # write new value
+EX(.Leac) s32i a0, a3, 0 # write new value
mov a0, a2
mov a2, a7
@@ -1164,7 +1142,6 @@ TRY s32i a0, a3, 0 # write new value
l32i a0, a0, PT_AREG0 # restore a0
rfe
-CATCH
.Leac: l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0
movi a2, -EFAULT
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index 4eb573d2720e..528fe0dd9339 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -14,9 +14,10 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/errno.h>
+#include <linux/errno.h>
#include <linux/linkage.h>
#include <variant/core.h>
+#include <asm/asmmacro.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
@@ -175,23 +176,8 @@ ENDPROC(csum_partial)
/*
* Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for each access type.
*/
-#define SRC(y...) \
- 9999: y; \
- .section __ex_table, "a"; \
- .long 9999b, 6001f ; \
- .previous
-
-#define DST(y...) \
- 9999: y; \
- .section __ex_table, "a"; \
- .long 9999b, 6002f ; \
- .previous
-
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
int sum, int *src_err_ptr, int *dst_err_ptr)
@@ -244,28 +230,28 @@ ENTRY(csum_partial_copy_generic)
add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
.Loop5:
#endif
-SRC( l32i a9, a2, 0 )
-SRC( l32i a8, a2, 4 )
-DST( s32i a9, a3, 0 )
-DST( s32i a8, a3, 4 )
+EX(10f) l32i a9, a2, 0
+EX(10f) l32i a8, a2, 4
+EX(11f) s32i a9, a3, 0
+EX(11f) s32i a8, a3, 4
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
-SRC( l32i a9, a2, 8 )
-SRC( l32i a8, a2, 12 )
-DST( s32i a9, a3, 8 )
-DST( s32i a8, a3, 12 )
+EX(10f) l32i a9, a2, 8
+EX(10f) l32i a8, a2, 12
+EX(11f) s32i a9, a3, 8
+EX(11f) s32i a8, a3, 12
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
-SRC( l32i a9, a2, 16 )
-SRC( l32i a8, a2, 20 )
-DST( s32i a9, a3, 16 )
-DST( s32i a8, a3, 20 )
+EX(10f) l32i a9, a2, 16
+EX(10f) l32i a8, a2, 20
+EX(11f) s32i a9, a3, 16
+EX(11f) s32i a8, a3, 20
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
-SRC( l32i a9, a2, 24 )
-SRC( l32i a8, a2, 28 )
-DST( s32i a9, a3, 24 )
-DST( s32i a8, a3, 28 )
+EX(10f) l32i a9, a2, 24
+EX(10f) l32i a8, a2, 28
+EX(11f) s32i a9, a3, 24
+EX(11f) s32i a8, a3, 28
ONES_ADD(a5, a9)
ONES_ADD(a5, a8)
addi a2, a2, 32
@@ -284,8 +270,8 @@ DST( s32i a8, a3, 28 )
add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
.Loop6:
#endif
-SRC( l32i a9, a2, 0 )
-DST( s32i a9, a3, 0 )
+EX(10f) l32i a9, a2, 0
+EX(11f) s32i a9, a3, 0
ONES_ADD(a5, a9)
addi a2, a2, 4
addi a3, a3, 4
@@ -315,8 +301,8 @@ DST( s32i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
.Loop7:
#endif
-SRC( l16ui a9, a2, 0 )
-DST( s16i a9, a3, 0 )
+EX(10f) l16ui a9, a2, 0
+EX(11f) s16i a9, a3, 0
ONES_ADD(a5, a9)
addi a2, a2, 2
addi a3, a3, 2
@@ -326,8 +312,8 @@ DST( s16i a9, a3, 0 )
4:
/* This section processes a possible trailing odd byte. */
_bbci.l a4, 0, 8f /* 1-byte chunk */
-SRC( l8ui a9, a2, 0 )
-DST( s8i a9, a3, 0 )
+EX(10f) l8ui a9, a2, 0
+EX(11f) s8i a9, a3, 0
#ifdef __XTENSA_EB__
slli a9, a9, 8 /* shift byte to bits 8..15 */
#endif
@@ -350,10 +336,10 @@ DST( s8i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
.Loop8:
#endif
-SRC( l8ui a9, a2, 0 )
-SRC( l8ui a8, a2, 1 )
-DST( s8i a9, a3, 0 )
-DST( s8i a8, a3, 1 )
+EX(10f) l8ui a9, a2, 0
+EX(10f) l8ui a8, a2, 1
+EX(11f) s8i a9, a3, 0
+EX(11f) s8i a8, a3, 1
#ifdef __XTENSA_EB__
slli a9, a9, 8 /* combine into a single 16-bit value */
#else /* for checksum computation */
@@ -381,7 +367,7 @@ ENDPROC(csum_partial_copy_generic)
a12 = original dst for exception handling
*/
-6001:
+10:
_movi a2, -EFAULT
s32i a2, a6, 0 /* src_err_ptr */
@@ -403,7 +389,7 @@ ENDPROC(csum_partial_copy_generic)
2:
retw
-6002:
+11:
movi a2, -EFAULT
s32i a2, a7, 0 /* dst_err_ptr */
movi a2, 0
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 10b8c400f175..7a724edaf4f1 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -12,6 +12,7 @@
*/
#include <variant/core.h>
+#include <asm/asmmacro.h>
/*
* void *memset(void *dst, int c, size_t length)
@@ -28,15 +29,6 @@
* the alignment labels).
*/
-/* Load or store instructions that may cause exceptions use the EX macro. */
-
-#define EX(insn,reg1,reg2,offset,handler) \
-9: insn reg1, reg2, offset; \
- .section __ex_table, "a"; \
- .word 9b, handler; \
- .previous
-
-
.text
.align 4
.global memset
@@ -73,10 +65,10 @@ memset:
add a6, a6, a5 # a6 = end of last 16B chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
- EX(s32i, a3, a5, 0, memset_fixup)
- EX(s32i, a3, a5, 4, memset_fixup)
- EX(s32i, a3, a5, 8, memset_fixup)
- EX(s32i, a3, a5, 12, memset_fixup)
+EX(10f) s32i a3, a5, 0
+EX(10f) s32i a3, a5, 4
+EX(10f) s32i a3, a5, 8
+EX(10f) s32i a3, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a5, a6, .Loop1
@@ -84,23 +76,23 @@ memset:
.Loop1done:
bbci.l a4, 3, .L2
# set 8 bytes
- EX(s32i, a3, a5, 0, memset_fixup)
- EX(s32i, a3, a5, 4, memset_fixup)
+EX(10f) s32i a3, a5, 0
+EX(10f) s32i a3, a5, 4
addi a5, a5, 8
.L2:
bbci.l a4, 2, .L3
# set 4 bytes
- EX(s32i, a3, a5, 0, memset_fixup)
+EX(10f) s32i a3, a5, 0
addi a5, a5, 4
.L3:
bbci.l a4, 1, .L4
# set 2 bytes
- EX(s16i, a3, a5, 0, memset_fixup)
+EX(10f) s16i a3, a5, 0
addi a5, a5, 2
.L4:
bbci.l a4, 0, .L5
# set 1 byte
- EX(s8i, a3, a5, 0, memset_fixup)
+EX(10f) s8i a3, a5, 0
.L5:
.Lret1:
retw
@@ -114,7 +106,7 @@ memset:
bbci.l a5, 0, .L20 # branch if dst alignment half-aligned
# dst is only byte aligned
# set 1 byte
- EX(s8i, a3, a5, 0, memset_fixup)
+EX(10f) s8i a3, a5, 0
addi a5, a5, 1
addi a4, a4, -1
# now retest if dst aligned
@@ -122,7 +114,7 @@ memset:
.L20:
# dst half-aligned
# set 2 bytes
- EX(s16i, a3, a5, 0, memset_fixup)
+EX(10f) s16i a3, a5, 0
addi a5, a5, 2
addi a4, a4, -2
j .L0 # dst is now aligned, return to main algorithm
@@ -141,7 +133,7 @@ memset:
add a6, a5, a4 # a6 = ending address
#endif /* !XCHAL_HAVE_LOOPS */
.Lbyteloop:
- EX(s8i, a3, a5, 0, memset_fixup)
+EX(10f) s8i a3, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
blt a5, a6, .Lbyteloop
@@ -155,6 +147,6 @@ memset:
/* We return zero if a failure occurred. */
-memset_fixup:
+10:
movi a2, 0
retw
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index 1ad0ecf45368..827e1b393f3f 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -11,16 +11,9 @@
* Copyright (C) 2002 Tensilica Inc.
*/
-#include <variant/core.h>
#include <linux/errno.h>
-
-/* Load or store instructions that may cause exceptions use the EX macro. */
-
-#define EX(insn,reg1,reg2,offset,handler) \
-9: insn reg1, reg2, offset; \
- .section __ex_table, "a"; \
- .word 9b, handler; \
- .previous
+#include <variant/core.h>
+#include <asm/asmmacro.h>
/*
* char *__strncpy_user(char *dst, const char *src, size_t len)
@@ -75,9 +68,9 @@ __strncpy_user:
j .Ldstunaligned
.Lsrc1mod2: # src address is odd
- EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
+EX(11f) l8ui a9, a3, 0 # get byte 0
addi a3, a3, 1 # advance src pointer
- EX(s8i, a9, a11, 0, fixup_s) # store byte 0
+EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
@@ -85,16 +78,16 @@ __strncpy_user:
bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
.Lsrc2mod4: # src address is 2 mod 4
- EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
+EX(11f) l8ui a9, a3, 0 # get byte 0
/* 1-cycle interlock */
- EX(s8i, a9, a11, 0, fixup_s) # store byte 0
+EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
beqz a4, .Lret # if len is zero
- EX(l8ui, a9, a3, 1, fixup_l) # get byte 0
+EX(11f) l8ui a9, a3, 1 # get byte 0
addi a3, a3, 2 # advance src pointer
- EX(s8i, a9, a11, 0, fixup_s) # store byte 0
+EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len
@@ -117,12 +110,12 @@ __strncpy_user:
add a12, a12, a11 # a12 = end of last 4B chunck
#endif
.Loop1:
- EX(l32i, a9, a3, 0, fixup_l) # get word from src
+EX(11f) l32i a9, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
bnone a9, a7, .Lz2 # if byte 2 is zero
- EX(s32i, a9, a11, 0, fixup_s) # store word to dst
+EX(10f) s32i a9, a11, 0 # store word to dst
bnone a9, a8, .Lz3 # if byte 3 is zero
addi a11, a11, 4 # advance dst pointer
#if !XCHAL_HAVE_LOOPS
@@ -132,7 +125,7 @@ __strncpy_user:
.Loop1done:
bbci.l a4, 1, .L100
# copy 2 bytes
- EX(l16ui, a9, a3, 0, fixup_l)
+EX(11f) l16ui a9, a3, 0
addi a3, a3, 2 # advance src pointer
#ifdef __XTENSA_EB__
bnone a9, a7, .Lz0 # if byte 2 is zero
@@ -141,13 +134,13 @@ __strncpy_user:
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
#endif
- EX(s16i, a9, a11, 0, fixup_s)
+EX(10f) s16i a9, a11, 0
addi a11, a11, 2 # advance dst pointer
.L100:
bbci.l a4, 0, .Lret
- EX(l8ui, a9, a3, 0, fixup_l)
+EX(11f) l8ui a9, a3, 0
/* slot */
- EX(s8i, a9, a11, 0, fixup_s)
+EX(10f) s8i a9, a11, 0
beqz a9, .Lret # if byte is zero
addi a11, a11, 1-3 # advance dst ptr 1, but also cancel
# the effect of adding 3 in .Lz3 code
@@ -161,14 +154,14 @@ __strncpy_user:
#ifdef __XTENSA_EB__
movi a9, 0
#endif /* __XTENSA_EB__ */
- EX(s8i, a9, a11, 0, fixup_s)
+EX(10f) s8i a9, a11, 0
sub a2, a11, a2 # compute strlen
retw
.Lz1: # byte 1 is zero
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */
- EX(s16i, a9, a11, 0, fixup_s)
+EX(10f) s16i a9, a11, 0
addi a11, a11, 1 # advance dst pointer
sub a2, a11, a2 # compute strlen
retw
@@ -176,9 +169,9 @@ __strncpy_user:
#ifdef __XTENSA_EB__
extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */
- EX(s16i, a9, a11, 0, fixup_s)
+EX(10f) s16i a9, a11, 0
movi a9, 0
- EX(s8i, a9, a11, 2, fixup_s)
+EX(10f) s8i a9, a11, 2
addi a11, a11, 2 # advance dst pointer
sub a2, a11, a2 # compute strlen
retw
@@ -196,9 +189,9 @@ __strncpy_user:
add a12, a11, a4 # a12 = ending address
#endif /* XCHAL_HAVE_LOOPS */
.Lnextbyte:
- EX(l8ui, a9, a3, 0, fixup_l)
+EX(11f) l8ui a9, a3, 0
addi a3, a3, 1
- EX(s8i, a9, a11, 0, fixup_s)
+EX(10f) s8i a9, a11, 0
beqz a9, .Lunalignedend
addi a11, a11, 1
#if !XCHAL_HAVE_LOOPS
@@ -218,8 +211,7 @@ __strncpy_user:
* implementation in memset(). Thus, we differentiate between
* load/store fixups. */
-fixup_s:
-fixup_l:
+10:
+11:
movi a2, -EFAULT
retw
-
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 4c03b1e581e9..9404ac46ce4c 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -12,14 +12,7 @@
*/
#include <variant/core.h>
-
-/* Load or store instructions that may cause exceptions use the EX macro. */
-
-#define EX(insn,reg1,reg2,offset,handler) \
-9: insn reg1, reg2, offset; \
- .section __ex_table, "a"; \
- .word 9b, handler; \
- .previous
+#include <asm/asmmacro.h>
/*
* size_t __strnlen_user(const char *s, size_t len)
@@ -77,7 +70,7 @@ __strnlen_user:
add a10, a10, a4 # a10 = end of last 4B chunk
#endif /* XCHAL_HAVE_LOOPS */
.Loop:
- EX(l32i, a9, a4, 4, lenfixup) # get next word of string
+EX(10f) l32i a9, a4, 4 # get next word of string
addi a4, a4, 4 # advance string pointer
bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero
@@ -88,7 +81,7 @@ __strnlen_user:
#endif
.Ldone:
- EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks
+EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks
bbci.l a3, 1, .L100
# check two more bytes (bytes 0, 1 of word)
@@ -125,14 +118,14 @@ __strnlen_user:
retw
.L1mod2: # address is odd
- EX(l8ui, a9, a4, 4, lenfixup) # get byte 0
+EX(10f) l8ui a9, a4, 4 # get byte 0
addi a4, a4, 1 # advance string pointer
beqz a9, .Lz3 # if byte 0 is zero
bbci.l a4, 1, .Laligned # if string pointer is now word-aligned
.L2mod4: # address is 2 mod 4
addi a4, a4, 2 # advance ptr for aligned access
- EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string
+EX(10f) l32i a9, a4, 0 # get word with first two bytes of string
bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero
bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero
# byte 3 is zero
@@ -142,6 +135,6 @@ __strnlen_user:
.section .fixup, "ax"
.align 4
-lenfixup:
+10:
movi a2, 0
retw
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index d9cd766bde3e..4172b73b0364 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -54,6 +54,7 @@
*/
#include <variant/core.h>
+#include <asm/asmmacro.h>
#ifdef __XTENSA_EB__
#define ALIGN(R, W0, W1) src R, W0, W1
@@ -63,15 +64,6 @@
#define SSA8(R) ssa8l R
#endif
-/* Load or store instructions that may cause exceptions use the EX macro. */
-
-#define EX(insn,reg1,reg2,offset,handler) \
-9: insn reg1, reg2, offset; \
- .section __ex_table, "a"; \
- .word 9b, handler; \
- .previous
-
-
.text
.align 4
.global __xtensa_copy_user
@@ -102,9 +94,9 @@ __xtensa_copy_user:
bltui a4, 7, .Lbytecopy # do short copies byte by byte
# copy 1 byte
- EX(l8ui, a6, a3, 0, fixup)
+EX(10f) l8ui a6, a3, 0
addi a3, a3, 1
- EX(s8i, a6, a5, 0, fixup)
+EX(10f) s8i a6, a5, 0
addi a5, a5, 1
addi a4, a4, -1
bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
@@ -112,11 +104,11 @@ __xtensa_copy_user:
.Ldst2mod4: # dst 16-bit aligned
# copy 2 bytes
bltui a4, 6, .Lbytecopy # do short copies byte by byte
- EX(l8ui, a6, a3, 0, fixup)
- EX(l8ui, a7, a3, 1, fixup)
+EX(10f) l8ui a6, a3, 0
+EX(10f) l8ui a7, a3, 1
addi a3, a3, 2
- EX(s8i, a6, a5, 0, fixup)
- EX(s8i, a7, a5, 1, fixup)
+EX(10f) s8i a6, a5, 0
+EX(10f) s8i a7, a5, 1
addi a5, a5, 2
addi a4, a4, -2
j .Ldstaligned # dst is now aligned, return to main algorithm
@@ -135,9 +127,9 @@ __xtensa_copy_user:
add a7, a3, a4 # a7 = end address for source
#endif /* !XCHAL_HAVE_LOOPS */
.Lnextbyte:
- EX(l8ui, a6, a3, 0, fixup)
+EX(10f) l8ui a6, a3, 0
addi a3, a3, 1
- EX(s8i, a6, a5, 0, fixup)
+EX(10f) s8i a6, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
blt a3, a7, .Lnextbyte
@@ -161,15 +153,15 @@ __xtensa_copy_user:
add a8, a8, a3 # a8 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
- EX(l32i, a6, a3, 0, fixup)
- EX(l32i, a7, a3, 4, fixup)
- EX(s32i, a6, a5, 0, fixup)
- EX(l32i, a6, a3, 8, fixup)
- EX(s32i, a7, a5, 4, fixup)
- EX(l32i, a7, a3, 12, fixup)
- EX(s32i, a6, a5, 8, fixup)
+EX(10f) l32i a6, a3, 0
+EX(10f) l32i a7, a3, 4
+EX(10f) s32i a6, a5, 0
+EX(10f) l32i a6, a3, 8
+EX(10f) s32i a7, a5, 4
+EX(10f) l32i a7, a3, 12
+EX(10f) s32i a6, a5, 8
addi a3, a3, 16
- EX(s32i, a7, a5, 12, fixup)
+EX(10f) s32i a7, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a3, a8, .Loop1
@@ -177,31 +169,31 @@ __xtensa_copy_user:
.Loop1done:
bbci.l a4, 3, .L2
# copy 8 bytes
- EX(l32i, a6, a3, 0, fixup)
- EX(l32i, a7, a3, 4, fixup)
+EX(10f) l32i a6, a3, 0
+EX(10f) l32i a7, a3, 4
addi a3, a3, 8
- EX(s32i, a6, a5, 0, fixup)
- EX(s32i, a7, a5, 4, fixup)
+EX(10f) s32i a6, a5, 0
+EX(10f) s32i a7, a5, 4
addi a5, a5, 8
.L2:
bbci.l a4, 2, .L3
# copy 4 bytes
- EX(l32i, a6, a3, 0, fixup)
+EX(10f) l32i a6, a3, 0
addi a3, a3, 4
- EX(s32i, a6, a5, 0, fixup)
+EX(10f) s32i a6, a5, 0
addi a5, a5, 4
.L3:
bbci.l a4, 1, .L4
# copy 2 bytes
- EX(l16ui, a6, a3, 0, fixup)
+EX(10f) l16ui a6, a3, 0
addi a3, a3, 2
- EX(s16i, a6, a5, 0, fixup)
+EX(10f) s16i a6, a5, 0
addi a5, a5, 2
.L4:
bbci.l a4, 0, .L5
# copy 1 byte
- EX(l8ui, a6, a3, 0, fixup)
- EX(s8i, a6, a5, 0, fixup)
+EX(10f) l8ui a6, a3, 0
+EX(10f) s8i a6, a5, 0
.L5:
movi a2, 0 # return success for len bytes copied
retw
@@ -217,7 +209,7 @@ __xtensa_copy_user:
# copy 16 bytes per iteration for word-aligned dst and unaligned src
and a10, a3, a8 # save unalignment offset for below
sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
- EX(l32i, a6, a3, 0, fixup) # load first word
+EX(10f) l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS
loopnez a7, .Loop2done
#else /* !XCHAL_HAVE_LOOPS */
@@ -226,19 +218,19 @@ __xtensa_copy_user:
add a12, a12, a3 # a12 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2:
- EX(l32i, a7, a3, 4, fixup)
- EX(l32i, a8, a3, 8, fixup)
+EX(10f) l32i a7, a3, 4
+EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7)
- EX(s32i, a6, a5, 0, fixup)
- EX(l32i, a9, a3, 12, fixup)
+EX(10f) s32i a6, a5, 0
+EX(10f) l32i a9, a3, 12
ALIGN( a7, a7, a8)
- EX(s32i, a7, a5, 4, fixup)
- EX(l32i, a6, a3, 16, fixup)
+EX(10f) s32i a7, a5, 4
+EX(10f) l32i a6, a3, 16
ALIGN( a8, a8, a9)
- EX(s32i, a8, a5, 8, fixup)
+EX(10f) s32i a8, a5, 8
addi a3, a3, 16
ALIGN( a9, a9, a6)
- EX(s32i, a9, a5, 12, fixup)
+EX(10f) s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
blt a3, a12, .Loop2
@@ -246,39 +238,39 @@ __xtensa_copy_user:
.Loop2done:
bbci.l a4, 3, .L12
# copy 8 bytes
- EX(l32i, a7, a3, 4, fixup)
- EX(l32i, a8, a3, 8, fixup)
+EX(10f) l32i a7, a3, 4
+EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7)
- EX(s32i, a6, a5, 0, fixup)
+EX(10f) s32i a6, a5, 0
addi a3, a3, 8
ALIGN( a7, a7, a8)
- EX(s32i, a7, a5, 4, fixup)
+EX(10f) s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
.L12:
bbci.l a4, 2, .L13
# copy 4 bytes
- EX(l32i, a7, a3, 4, fixup)
+EX(10f) l32i a7, a3, 4
addi a3, a3, 4
ALIGN( a6, a6, a7)
- EX(s32i, a6, a5, 0, fixup)
+EX(10f) s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
.L13:
add a3, a3, a10 # readjust a3 with correct misalignment
bbci.l a4, 1, .L14
# copy 2 bytes
- EX(l8ui, a6, a3, 0, fixup)
- EX(l8ui, a7, a3, 1, fixup)
+EX(10f) l8ui a6, a3, 0
+EX(10f) l8ui a7, a3, 1
addi a3, a3, 2
- EX(s8i, a6, a5, 0, fixup)
- EX(s8i, a7, a5, 1, fixup)
+EX(10f) s8i a6, a5, 0
+EX(10f) s8i a7, a5, 1
addi a5, a5, 2
.L14:
bbci.l a4, 0, .L15
# copy 1 byte
- EX(l8ui, a6, a3, 0, fixup)
- EX(s8i, a6, a5, 0, fixup)
+EX(10f) l8ui a6, a3, 0
+EX(10f) s8i a6, a5, 0
.L15:
movi a2, 0 # return success for len bytes copied
retw
@@ -294,7 +286,7 @@ __xtensa_copy_user:
*/
-fixup:
+10:
sub a2, a5, a2 /* a2 <-- bytes copied */
sub a2, a11, a2 /* a2 <-- bytes not copied */
retw