mesa: Prefix main includes with dir to avoid conflicts.
[mesa.git] / src / mesa / sparc / clip.S
1 /*
2 * Clip testing in SPARC assembly
3 */
4
5 #if __arch64__
6 #define LDPTR ldx
7 #define V4F_DATA 0x00
8 #define V4F_START 0x08
9 #define V4F_COUNT 0x10
10 #define V4F_STRIDE 0x14
11 #define V4F_SIZE 0x18
12 #define V4F_FLAGS 0x1c
13 #else
14 #define LDPTR ld
15 #define V4F_DATA 0x00
16 #define V4F_START 0x04
17 #define V4F_COUNT 0x08
18 #define V4F_STRIDE 0x0c
19 #define V4F_SIZE 0x10
20 #define V4F_FLAGS 0x14
21 #endif
22
23 #define VEC_SIZE_1 1
24 #define VEC_SIZE_2 3
25 #define VEC_SIZE_3 7
26 #define VEC_SIZE_4 15
27
28 #if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
29 /* Solaris requires this for 64-bit. */
30 .register %g2, #scratch
31 .register %g3, #scratch
32 .register %g7, #scratch
33 #endif
34
35 .text
36 .align 64
37
38 one_dot_zero:
39 .word 0x3f800000 /* 1.0f */
40
41 /* This trick is shamelessly stolen from the x86
42 * Mesa asm. Very clever, and we can do it too
43 * since we have the necessary add with carry
44 * instructions on Sparc.
45 */
46 clip_table:
47 .byte 0, 1, 0, 2, 4, 5, 4, 6
48 .byte 0, 1, 0, 2, 8, 9, 8, 10
49 .byte 32, 33, 32, 34, 36, 37, 36, 38
50 .byte 32, 33, 32, 34, 40, 41, 40, 42
51 .byte 0, 1, 0, 2, 4, 5, 4, 6
52 .byte 0, 1, 0, 2, 8, 9, 8, 10
53 .byte 16, 17, 16, 18, 20, 21, 20, 22
54 .byte 16, 17, 16, 18, 24, 25, 24, 26
55 .byte 63, 61, 63, 62, 55, 53, 55, 54
56 .byte 63, 61, 63, 62, 59, 57, 59, 58
57 .byte 47, 45, 47, 46, 39, 37, 39, 38
58 .byte 47, 45, 47, 46, 43, 41, 43, 42
59 .byte 63, 61, 63, 62, 55, 53, 55, 54
60 .byte 63, 61, 63, 62, 59, 57, 59, 58
61 .byte 31, 29, 31, 30, 23, 21, 23, 22
62 .byte 31, 29, 31, 30, 27, 25, 27, 26
63
64 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
65 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
66
67 .align 64
68 __pc_tramp:
69 retl
70 nop
71
72 .globl _mesa_sparc_cliptest_points4
73 _mesa_sparc_cliptest_points4:
74 save %sp, -64, %sp
75 call __pc_tramp
76 sub %o7, (. - one_dot_zero - 4), %g1
77 ld [%g1 + 0x0], %f4
78 add %g1, 0x4, %g1
79
80 ld [%i0 + V4F_STRIDE], %l1
81 ld [%i0 + V4F_COUNT], %g7
82 LDPTR [%i0 + V4F_START], %i0
83 LDPTR [%i1 + V4F_START], %i5
84 ldub [%i3], %g2
85 ldub [%i4], %g3
86 sll %g3, 8, %g3
87 or %g2, %g3, %g2
88
89 ld [%i1 + V4F_FLAGS], %g3
90 or %g3, VEC_SIZE_4, %g3
91 st %g3, [%i1 + V4F_FLAGS]
92 mov 3, %g3
93 st %g3, [%i1 + V4F_SIZE]
94 st %g7, [%i1 + V4F_COUNT]
95 clr %l2
96 clr %l0
97
98 /* l0: i
99 * g7: count
100 * l1: stride
101 * l2: c
102 * g2: (tmpAndMask << 8) | tmpOrMask
103 * g1: clip_table
104 * i0: from[stride][i]
105 * i2: clipMask
106 * i5: vProj[4][i]
107 */
108
109 1: ld [%i0 + 0x0c], %f3 ! LSU Group
110 ld [%i0 + 0x0c], %g5 ! LSU Group
111 ld [%i0 + 0x08], %g4 ! LSU Group
112 fdivs %f4, %f3, %f8 ! FGM
113 addcc %g5, %g5, %g5 ! IEU1 Group
114 addx %g0, 0x0, %g3 ! IEU1 Group
115 addcc %g4, %g4, %g4 ! IEU1 Group
116 addx %g3, %g3, %g3 ! IEU1 Group
117 subcc %g5, %g4, %g0 ! IEU1 Group
118 ld [%i0 + 0x04], %g4 ! LSU Group
119 addx %g3, %g3, %g3 ! IEU1 Group
120 addcc %g4, %g4, %g4 ! IEU1 Group
121 addx %g3, %g3, %g3 ! IEU1 Group
122 subcc %g5, %g4, %g0 ! IEU1 Group
123 ld [%i0 + 0x00], %g4 ! LSU Group
124 addx %g3, %g3, %g3 ! IEU1 Group
125 addcc %g4, %g4, %g4 ! IEU1 Group
126 addx %g3, %g3, %g3 ! IEU1 Group
127 subcc %g5, %g4, %g0 ! IEU1 Group
128 addx %g3, %g3, %g3 ! IEU1 Group
129 ldub [%g1 + %g3], %g3 ! LSU Group
130 cmp %g3, 0 ! IEU1 Group, stall
131 be 2f ! CTI
132 stb %g3, [%i2] ! LSU
133 sll %g3, 8, %g4 ! IEU1 Group
134 add %l2, 1, %l2 ! IEU0
135 st %g0, [%i5 + 0x00] ! LSU
136 or %g4, 0xff, %g4 ! IEU0 Group
137 or %g2, %g3, %g2 ! IEU1
138 st %g0, [%i5 + 0x04] ! LSU
139 and %g2, %g4, %g2 ! IEU0 Group
140 st %g0, [%i5 + 0x08] ! LSU
141 b 3f ! CTI
142 st %f4, [%i5 + 0x0c] ! LSU Group
143 2: ld [%i0 + 0x00], %f0 ! LSU Group
144 ld [%i0 + 0x04], %f1 ! LSU Group
145 ld [%i0 + 0x08], %f2 ! LSU Group
146 fmuls %f0, %f8, %f0 ! FGM
147 st %f0, [%i5 + 0x00] ! LSU Group
148 fmuls %f1, %f8, %f1 ! FGM
149 st %f1, [%i5 + 0x04] ! LSU Group
150 fmuls %f2, %f8, %f2 ! FGM
151 st %f2, [%i5 + 0x08] ! LSU Group
152 st %f8, [%i5 + 0x0c] ! LSU Group
153 3: add %i5, 0x10, %i5 ! IEU1
154 add %l0, 1, %l0 ! IEU0 Group
155 add %i2, 1, %i2 ! IEU0 Group
156 cmp %l0, %g7 ! IEU1 Group
157 bne 1b ! CTI
158 add %i0, %l1, %i0 ! IEU0 Group
159 stb %g2, [%i3] ! LSU
160 srl %g2, 8, %g3 ! IEU0 Group
161 cmp %l2, %g7 ! IEU1 Group
162 bl,a 1f ! CTI
163 clr %g3 ! IEU0
164 1: stb %g3, [%i4] ! LSU Group
165 ret ! CTI Group
166 restore %i1, 0x0, %o0
167
168 .globl _mesa_sparc_cliptest_points4_np
169 _mesa_sparc_cliptest_points4_np:
170 save %sp, -64, %sp
171
172 call __pc_tramp
173 sub %o7, (. - one_dot_zero - 4), %g1
174 add %g1, 0x4, %g1
175
176 ld [%i0 + V4F_STRIDE], %l1
177 ld [%i0 + V4F_COUNT], %g7
178 LDPTR [%i0 + V4F_START], %i0
179 LDPTR [%i1 + V4F_START], %i5
180 ldub [%i3], %g2
181 ldub [%i4], %g3
182 sll %g3, 8, %g3
183 or %g2, %g3, %g2
184
185 ld [%i1 + V4F_FLAGS], %g3
186 or %g3, VEC_SIZE_4, %g3
187 st %g3, [%i1 + V4F_FLAGS]
188 mov 3, %g3
189 st %g3, [%i1 + V4F_SIZE]
190 st %g7, [%i1 + V4F_COUNT]
191 clr %l2
192 clr %l0
193
194 /* l0: i
195 * g7: count
196 * l1: stride
197 * l2: c
198 * g2: (tmpAndMask << 8) | tmpOrMask
199 * g1: clip_table
200 * i0: from[stride][i]
201 * i2: clipMask
202 */
203
204 1: ld [%i0 + 0x0c], %g5 ! LSU Group
205 ld [%i0 + 0x08], %g4 ! LSU Group
206 addcc %g5, %g5, %g5 ! IEU1 Group
207 addx %g0, 0x0, %g3 ! IEU1 Group
208 addcc %g4, %g4, %g4 ! IEU1 Group
209 addx %g3, %g3, %g3 ! IEU1 Group
210 subcc %g5, %g4, %g0 ! IEU1 Group
211 ld [%i0 + 0x04], %g4 ! LSU Group
212 addx %g3, %g3, %g3 ! IEU1 Group
213 addcc %g4, %g4, %g4 ! IEU1 Group
214 addx %g3, %g3, %g3 ! IEU1 Group
215 subcc %g5, %g4, %g0 ! IEU1 Group
216 ld [%i0 + 0x00], %g4 ! LSU Group
217 addx %g3, %g3, %g3 ! IEU1 Group
218 addcc %g4, %g4, %g4 ! IEU1 Group
219 addx %g3, %g3, %g3 ! IEU1 Group
220 subcc %g5, %g4, %g0 ! IEU1 Group
221 addx %g3, %g3, %g3 ! IEU1 Group
222 ldub [%g1 + %g3], %g3 ! LSU Group
223 cmp %g3, 0 ! IEU1 Group, stall
224 be 2f ! CTI
225 stb %g3, [%i2] ! LSU
226 sll %g3, 8, %g4 ! IEU1 Group
227 add %l2, 1, %l2 ! IEU0
228 or %g4, 0xff, %g4 ! IEU0 Group
229 or %g2, %g3, %g2 ! IEU1
230 and %g2, %g4, %g2 ! IEU0 Group
231 2: add %l0, 1, %l0 ! IEU0 Group
232 add %i2, 1, %i2 ! IEU0 Group
233 cmp %l0, %g7 ! IEU1 Group
234 bne 1b ! CTI
235 add %i0, %l1, %i0 ! IEU0 Group
236 stb %g2, [%i3] ! LSU
237 srl %g2, 8, %g3 ! IEU0 Group
238 cmp %l2, %g7 ! IEU1 Group
239 bl,a 1f ! CTI
240 clr %g3 ! IEU0
241 1: stb %g3, [%i4] ! LSU Group
242 ret ! CTI Group
243 restore %i1, 0x0, %o0