1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
|
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2010, Google Inc.
*
* Brought in from coreboot uldivmod.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
/*
* A, Q = r0 + (r1 << 32)
* B, R = r2 + (r3 << 32)
* A / B = Q ... R
*/
A_0 .req r0
A_1 .req r1
B_0 .req r2
B_1 .req r3
C_0 .req r4
C_1 .req r5
D_0 .req r6
D_1 .req r7
Q_0 .req r0
Q_1 .req r1
R_0 .req r2
R_1 .req r3
THUMB(
TMP .req r8
)
.pushsection .text.__aeabi_uldivmod, "ax"
ENTRY(__aeabi_uldivmod)
stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
@ Test if B == 0
orrs ip, B_0, B_1 @ Z set -> B == 0
beq L_div_by_0
@ Test if B is power of 2: (B & (B - 1)) == 0
subs C_0, B_0, #1
sbc C_1, B_1, #0
tst C_0, B_0
tsteq B_1, C_1
beq L_pow2
@ Test if A_1 == B_1 == 0
orrs ip, A_1, B_1
beq L_div_32_32
L_div_64_64:
/* CLZ only exists in ARM architecture version 5 and above. */
#ifdef HAVE_CLZ
mov C_0, #1
mov C_1, #0
@ D_0 = clz A
teq A_1, #0
clz D_0, A_1
clzeq ip, A_0
addeq D_0, D_0, ip
@ D_1 = clz B
teq B_1, #0
clz D_1, B_1
clzeq ip, B_0
addeq D_1, D_1, ip
@ if clz B - clz A > 0
subs D_0, D_1, D_0
bls L_done_shift
@ B <<= (clz B - clz A)
subs D_1, D_0, #32
rsb ip, D_0, #32
movmi B_1, B_1, lsl D_0
ARM( orrmi B_1, B_1, B_0, lsr ip )
THUMB( lsrmi TMP, B_0, ip )
THUMB( orrmi B_1, B_1, TMP )
movpl B_1, B_0, lsl D_1
mov B_0, B_0, lsl D_0
@ C = 1 << (clz B - clz A)
movmi C_1, C_1, lsl D_0
ARM( orrmi C_1, C_1, C_0, lsr ip )
THUMB( lsrmi TMP, C_0, ip )
THUMB( orrmi C_1, C_1, TMP )
movpl C_1, C_0, lsl D_1
mov C_0, C_0, lsl D_0
L_done_shift:
mov D_0, #0
mov D_1, #0
@ C: current bit; D: result
#else
@ C: current bit; D: result
mov C_0, #1
mov C_1, #0
mov D_0, #0
mov D_1, #0
L_lsl_4:
cmp B_1, #0x10000000
cmpcc B_1, A_1
cmpeq B_0, A_0
bcs L_lsl_1
@ B <<= 4
mov B_1, B_1, lsl #4
orr B_1, B_1, B_0, lsr #28
mov B_0, B_0, lsl #4
@ C <<= 4
mov C_1, C_1, lsl #4
orr C_1, C_1, C_0, lsr #28
mov C_0, C_0, lsl #4
b L_lsl_4
L_lsl_1:
cmp B_1, #0x80000000
cmpcc B_1, A_1
cmpeq B_0, A_0
bcs L_subtract
@ B <<= 1
mov B_1, B_1, lsl #1
orr B_1, B_1, B_0, lsr #31
mov B_0, B_0, lsl #1
@ C <<= 1
mov C_1, C_1, lsl #1
orr C_1, C_1, C_0, lsr #31
mov C_0, C_0, lsl #1
b L_lsl_1
#endif
L_subtract:
@ if A >= B
cmp A_1, B_1
cmpeq A_0, B_0
bcc L_update
@ A -= B
subs A_0, A_0, B_0
sbc A_1, A_1, B_1
@ D |= C
orr D_0, D_0, C_0
orr D_1, D_1, C_1
L_update:
@ if A == 0: break
orrs ip, A_1, A_0
beq L_exit
@ C >>= 1
movs C_1, C_1, lsr #1
movs C_0, C_0, rrx
@ if C == 0: break
orrs ip, C_1, C_0
beq L_exit
@ B >>= 1
movs B_1, B_1, lsr #1
mov B_0, B_0, rrx
b L_subtract
L_exit:
@ Note: A, B & Q, R are aliases
mov R_0, A_0
mov R_1, A_1
mov Q_0, D_0
mov Q_1, D_1
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
L_div_32_32:
@ Note: A_0 & r0 are aliases
@ Q_1 r1
mov r1, B_0
bl __aeabi_uidivmod
mov R_0, r1
mov R_1, #0
mov Q_1, #0
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
L_pow2:
#ifdef HAVE_CLZ
@ Note: A, B and Q, R are aliases
@ R = A & (B - 1)
and C_0, A_0, C_0
and C_1, A_1, C_1
@ Q = A >> log2(B)
@ Note: B must not be 0 here!
clz D_0, B_0
add D_1, D_0, #1
rsbs D_0, D_0, #31
bpl L_1
clz D_0, B_1
rsb D_0, D_0, #31
mov A_0, A_1, lsr D_0
add D_0, D_0, #32
L_1:
movpl A_0, A_0, lsr D_0
ARM( orrpl A_0, A_0, A_1, lsl D_1 )
THUMB( lslpl TMP, A_1, D_1 )
THUMB( orrpl A_0, A_0, TMP )
mov A_1, A_1, lsr D_0
@ Mov back C to R
mov R_0, C_0
mov R_1, C_1
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
#else
@ Note: A, B and Q, R are aliases
@ R = A & (B - 1)
and C_0, A_0, C_0
and C_1, A_1, C_1
@ Q = A >> log2(B)
@ Note: B must not be 0 here!
@ Count the leading zeroes in B.
mov D_0, #0
orrs B_0, B_0, B_0
@ If B is greater than 1 << 31, divide A and B by 1 << 32.
moveq A_0, A_1
moveq A_1, #0
moveq B_0, B_1
@ Count the remaining leading zeroes in B.
movs B_1, B_0, lsl #16
addeq D_0, #16
moveq B_0, B_0, lsr #16
tst B_0, #0xff
addeq D_0, #8
moveq B_0, B_0, lsr #8
tst B_0, #0xf
addeq D_0, #4
moveq B_0, B_0, lsr #4
tst B_0, #0x3
addeq D_0, #2
moveq B_0, B_0, lsr #2
tst B_0, #0x1
addeq D_0, #1
@ Shift A to the right by the appropriate amount.
rsb D_1, D_0, #32
mov Q_0, A_0, lsr D_0
ARM( orr Q_0, Q_0, A_1, lsl D_1 )
THUMB( lsl A_1, D_1 )
THUMB( orr Q_0, A_1 )
mov Q_1, A_1, lsr D_0
@ Move C to R
mov R_0, C_0
mov R_1, C_1
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
#endif
L_div_by_0:
bl __div0
@ As wrong as it could be
mov Q_0, #0
mov Q_1, #0
mov R_0, #0
mov R_1, #0
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
ENDPROC(__aeabi_uldivmod)
.popsection
|