Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / sparc / clip.S
1 /*
2 * Clip testing in SPARC assembly
3 */
4
5 #if __arch64__
6 #define LDPTR ldx
7 #define V4F_DATA 0x00
8 #define V4F_START 0x08
9 #define V4F_COUNT 0x10
10 #define V4F_STRIDE 0x14
11 #define V4F_SIZE 0x18
12 #define V4F_FLAGS 0x1c
13 #else
14 #define LDPTR ld
15 #define V4F_DATA 0x00
16 #define V4F_START 0x04
17 #define V4F_COUNT 0x08
18 #define V4F_STRIDE 0x0c
19 #define V4F_SIZE 0x10
20 #define V4F_FLAGS 0x14
21 #endif
22
23 #define VEC_SIZE_1 1
24 #define VEC_SIZE_2 3
25 #define VEC_SIZE_3 7
26 #define VEC_SIZE_4 15
27
28 .register %g2, #scratch
29 .register %g3, #scratch
30
31 .text
32 .align 64
33
34 one_dot_zero:
35 .word 0x3f800000 /* 1.0f */
36
37 /* This trick is shamelessly stolen from the x86
38 * Mesa asm. Very clever, and we can do it too
39 * since we have the necessary add with carry
40 * instructions on Sparc.
41 */
42 clip_table:
43 .byte 0, 1, 0, 2, 4, 5, 4, 6
44 .byte 0, 1, 0, 2, 8, 9, 8, 10
45 .byte 32, 33, 32, 34, 36, 37, 36, 38
46 .byte 32, 33, 32, 34, 40, 41, 40, 42
47 .byte 0, 1, 0, 2, 4, 5, 4, 6
48 .byte 0, 1, 0, 2, 8, 9, 8, 10
49 .byte 16, 17, 16, 18, 20, 21, 20, 22
50 .byte 16, 17, 16, 18, 24, 25, 24, 26
51 .byte 63, 61, 63, 62, 55, 53, 55, 54
52 .byte 63, 61, 63, 62, 59, 57, 59, 58
53 .byte 47, 45, 47, 46, 39, 37, 39, 38
54 .byte 47, 45, 47, 46, 43, 41, 43, 42
55 .byte 63, 61, 63, 62, 55, 53, 55, 54
56 .byte 63, 61, 63, 62, 59, 57, 59, 58
57 .byte 31, 29, 31, 30, 23, 21, 23, 22
58 .byte 31, 29, 31, 30, 27, 25, 27, 26
59
60 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
61 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
62
63 .align 64
64 __pc_tramp:
65 retl
66 nop
67
68 .globl _mesa_sparc_cliptest_points4
69 _mesa_sparc_cliptest_points4:
70 save %sp, -64, %sp
71 call __pc_tramp
72 sub %o7, (. - one_dot_zero - 4), %g1
73 ld [%g1 + 0x0], %f4
74 add %g1, 0x4, %g1
75
76 ld [%i0 + V4F_STRIDE], %l1
77 ld [%i0 + V4F_COUNT], %l3
78 LDPTR [%i0 + V4F_START], %i0
79 LDPTR [%i1 + V4F_START], %i5
80 ldub [%i3], %g2
81 ldub [%i4], %g3
82 sll %g3, 8, %g3
83 or %g2, %g3, %g2
84
85 ld [%i1 + V4F_FLAGS], %g3
86 or %g3, VEC_SIZE_4, %g3
87 st %g3, [%i1 + V4F_FLAGS]
88 mov 3, %g3
89 st %g3, [%i1 + V4F_SIZE]
90 st %l3, [%i1 + V4F_COUNT]
91 clr %l2
92 clr %l0
93
94 /* l0: i
95 * l3: count
96 * l1: stride
97 * l2: c
98 * g2: (tmpAndMask << 8) | tmpOrMask
99 * g1: clip_table
100 * i0: from[stride][i]
101 * i2: clipMask
102 * i5: vProj[4][i]
103 */
104
105 1: ld [%i0 + 0x0c], %f3 ! LSU Group
106 ld [%i0 + 0x0c], %g5 ! LSU Group
107 ld [%i0 + 0x08], %g4 ! LSU Group
108 fdivs %f4, %f3, %f8 ! FGM
109 addcc %g5, %g5, %g5 ! IEU1 Group
110 addx %g0, 0x0, %g3 ! IEU1 Group
111 addcc %g4, %g4, %g4 ! IEU1 Group
112 addx %g3, %g3, %g3 ! IEU1 Group
113 subcc %g5, %g4, %g0 ! IEU1 Group
114 ld [%i0 + 0x04], %g4 ! LSU Group
115 addx %g3, %g3, %g3 ! IEU1 Group
116 addcc %g4, %g4, %g4 ! IEU1 Group
117 addx %g3, %g3, %g3 ! IEU1 Group
118 subcc %g5, %g4, %g0 ! IEU1 Group
119 ld [%i0 + 0x00], %g4 ! LSU Group
120 addx %g3, %g3, %g3 ! IEU1 Group
121 addcc %g4, %g4, %g4 ! IEU1 Group
122 addx %g3, %g3, %g3 ! IEU1 Group
123 subcc %g5, %g4, %g0 ! IEU1 Group
124 addx %g3, %g3, %g3 ! IEU1 Group
125 ldub [%g1 + %g3], %g3 ! LSU Group
126 cmp %g3, 0 ! IEU1 Group, stall
127 be 2f ! CTI
128 stb %g3, [%i2] ! LSU
129 sll %g3, 8, %g4 ! IEU1 Group
130 add %l2, 1, %l2 ! IEU0
131 st %g0, [%i5 + 0x00] ! LSU
132 or %g4, 0xff, %g4 ! IEU0 Group
133 or %g2, %g3, %g2 ! IEU1
134 st %g0, [%i5 + 0x04] ! LSU
135 and %g2, %g4, %g2 ! IEU0 Group
136 st %g0, [%i5 + 0x08] ! LSU
137 b 3f ! CTI
138 st %f4, [%i5 + 0x0c] ! LSU Group
139 2: ld [%i0 + 0x00], %f0 ! LSU Group
140 ld [%i0 + 0x04], %f1 ! LSU Group
141 ld [%i0 + 0x08], %f2 ! LSU Group
142 fmuls %f0, %f8, %f0 ! FGM
143 st %f0, [%i5 + 0x00] ! LSU Group
144 fmuls %f1, %f8, %f1 ! FGM
145 st %f1, [%i5 + 0x04] ! LSU Group
146 fmuls %f2, %f8, %f2 ! FGM
147 st %f2, [%i5 + 0x08] ! LSU Group
148 st %f8, [%i5 + 0x0c] ! LSU Group
149 3: add %i5, 0x10, %i5 ! IEU1
150 add %l0, 1, %l0 ! IEU0 Group
151 add %i2, 1, %i2 ! IEU0 Group
152 cmp %l0, %l3 ! IEU1 Group
153 bne 1b ! CTI
154 add %i0, %l1, %i0 ! IEU0 Group
155 stb %g2, [%i3] ! LSU
156 srl %g2, 8, %g3 ! IEU0 Group
157 cmp %l2, %l3 ! IEU1 Group
158 bl,a 1f ! CTI
159 clr %g3 ! IEU0
160 1: stb %g3, [%i4] ! LSU Group
161 ret ! CTI Group
162 restore %i1, 0x0, %o0
163
164 .globl _mesa_sparc_cliptest_points4_np
165 _mesa_sparc_cliptest_points4_np:
166 save %sp, -64, %sp
167
168 call __pc_tramp
169 sub %o7, (. - one_dot_zero - 4), %g1
170 add %g1, 0x4, %g1
171
172 ld [%i0 + V4F_STRIDE], %l1
173 ld [%i0 + V4F_COUNT], %l3
174 LDPTR [%i0 + V4F_START], %i0
175 ldub [%i3], %g2
176 ldub [%i4], %g3
177 sll %g3, 8, %g3
178 or %g2, %g3, %g2
179
180 clr %l2
181 clr %l0
182
183 /* l0: i
184 * l3: count
185 * l1: stride
186 * l2: c
187 * g2: (tmpAndMask << 8) | tmpOrMask
188 * g1: clip_table
189 * i0: from[stride][i]
190 * i2: clipMask
191 */
192
193 1: ld [%i0 + 0x0c], %g5 ! LSU Group
194 ld [%i0 + 0x08], %g4 ! LSU Group
195 addcc %g5, %g5, %g5 ! IEU1 Group
196 addx %g0, 0x0, %g3 ! IEU1 Group
197 addcc %g4, %g4, %g4 ! IEU1 Group
198 addx %g3, %g3, %g3 ! IEU1 Group
199 subcc %g5, %g4, %g0 ! IEU1 Group
200 ld [%i0 + 0x04], %g4 ! LSU Group
201 addx %g3, %g3, %g3 ! IEU1 Group
202 addcc %g4, %g4, %g4 ! IEU1 Group
203 addx %g3, %g3, %g3 ! IEU1 Group
204 subcc %g5, %g4, %g0 ! IEU1 Group
205 ld [%i0 + 0x00], %g4 ! LSU Group
206 addx %g3, %g3, %g3 ! IEU1 Group
207 addcc %g4, %g4, %g4 ! IEU1 Group
208 addx %g3, %g3, %g3 ! IEU1 Group
209 subcc %g5, %g4, %g0 ! IEU1 Group
210 addx %g3, %g3, %g3 ! IEU1 Group
211 ldub [%g1 + %g3], %g3 ! LSU Group
212 cmp %g3, 0 ! IEU1 Group, stall
213 be 2f ! CTI
214 stb %g3, [%i2] ! LSU
215 sll %g3, 8, %g4 ! IEU1 Group
216 add %l2, 1, %l2 ! IEU0
217 or %g4, 0xff, %g4 ! IEU0 Group
218 or %g2, %g3, %g2 ! IEU1
219 and %g2, %g4, %g2 ! IEU0 Group
220 2: add %l0, 1, %l0 ! IEU0 Group
221 add %i2, 1, %i2 ! IEU0 Group
222 cmp %l0, %l3 ! IEU1 Group
223 bne 1b ! CTI
224 add %i0, %l1, %i0 ! IEU0 Group
225 stb %g2, [%i3] ! LSU
226 srl %g2, 8, %g3 ! IEU0 Group
227 cmp %l2, %l3 ! IEU1 Group
228 bl,a 1f ! CTI
229 clr %g3 ! IEU0
230 1: stb %g3, [%i4] ! LSU Group
231 ret ! CTI Group
232 restore %i1, 0x0, %o0