Merge branch 'mesa_7_6_branch'
[mesa.git] / src / mesa / sparc / clip.S
1 /*
2 * Clip testing in SPARC assembly
3 */
4
5 #if __arch64__
6 #define LDPTR ldx
7 #define V4F_DATA 0x00
8 #define V4F_START 0x08
9 #define V4F_COUNT 0x10
10 #define V4F_STRIDE 0x14
11 #define V4F_SIZE 0x18
12 #define V4F_FLAGS 0x1c
13 #else
14 #define LDPTR ld
15 #define V4F_DATA 0x00
16 #define V4F_START 0x04
17 #define V4F_COUNT 0x08
18 #define V4F_STRIDE 0x0c
19 #define V4F_SIZE 0x10
20 #define V4F_FLAGS 0x14
21 #endif
22
23 #define VEC_SIZE_1 1
24 #define VEC_SIZE_2 3
25 #define VEC_SIZE_3 7
26 #define VEC_SIZE_4 15
27
28 .register %g2, #scratch
29 .register %g3, #scratch
30
31 .text
32 .align 64
33
34 one_dot_zero:
35 .word 0x3f800000 /* 1.0f */
36
37 /* This trick is shamelessly stolen from the x86
38 * Mesa asm. Very clever, and we can do it too
39 * since we have the necessary add with carry
40 * instructions on Sparc.
41 */
42 clip_table:
43 .byte 0, 1, 0, 2, 4, 5, 4, 6
44 .byte 0, 1, 0, 2, 8, 9, 8, 10
45 .byte 32, 33, 32, 34, 36, 37, 36, 38
46 .byte 32, 33, 32, 34, 40, 41, 40, 42
47 .byte 0, 1, 0, 2, 4, 5, 4, 6
48 .byte 0, 1, 0, 2, 8, 9, 8, 10
49 .byte 16, 17, 16, 18, 20, 21, 20, 22
50 .byte 16, 17, 16, 18, 24, 25, 24, 26
51 .byte 63, 61, 63, 62, 55, 53, 55, 54
52 .byte 63, 61, 63, 62, 59, 57, 59, 58
53 .byte 47, 45, 47, 46, 39, 37, 39, 38
54 .byte 47, 45, 47, 46, 43, 41, 43, 42
55 .byte 63, 61, 63, 62, 55, 53, 55, 54
56 .byte 63, 61, 63, 62, 59, 57, 59, 58
57 .byte 31, 29, 31, 30, 23, 21, 23, 22
58 .byte 31, 29, 31, 30, 27, 25, 27, 26
59
60 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
61 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
62 GLboolean viewport_z_enable */
63
64 .align 64
65 __pc_tramp:
66 retl
67 nop
68
69 .globl _mesa_sparc_cliptest_points4
70 _mesa_sparc_cliptest_points4:
71 save %sp, -64, %sp
72 call __pc_tramp
73 sub %o7, (. - one_dot_zero - 4), %g1
74 ld [%g1 + 0x0], %f4
75 add %g1, 0x4, %g1
76
77 ld [%i0 + V4F_STRIDE], %l1
78 ld [%i0 + V4F_COUNT], %l3
79 LDPTR [%i0 + V4F_START], %i0
80 LDPTR [%i1 + V4F_START], %i5
81 ldub [%i3], %g2
82 ldub [%i4], %g3
83 sll %g3, 8, %g3
84 or %g2, %g3, %g2
85
86 ld [%i1 + V4F_FLAGS], %g3
87 or %g3, VEC_SIZE_4, %g3
88 st %g3, [%i1 + V4F_FLAGS]
89 mov 3, %g3
90 st %g3, [%i1 + V4F_SIZE]
91 st %l3, [%i1 + V4F_COUNT]
92 clr %l2
93 clr %l0
94
95 /* l0: i
96 * l3: count
97 * l1: stride
98 * l2: c
99 * g2: (tmpAndMask << 8) | tmpOrMask
100 * g1: clip_table
101 * i0: from[stride][i]
102 * i2: clipMask
103 * i5: vProj[4][i]
104 */
105
106 1: ld [%i0 + 0x0c], %f3 ! LSU Group
107 ld [%i0 + 0x0c], %g5 ! LSU Group
108 ld [%i0 + 0x08], %g4 ! LSU Group
109 fdivs %f4, %f3, %f8 ! FGM
110 addcc %g5, %g5, %g5 ! IEU1 Group
111 addx %g0, 0x0, %g3 ! IEU1 Group
112 addcc %g4, %g4, %g4 ! IEU1 Group
113 addx %g3, %g3, %g3 ! IEU1 Group
114 subcc %g5, %g4, %g0 ! IEU1 Group
115 ld [%i0 + 0x04], %g4 ! LSU Group
116 addx %g3, %g3, %g3 ! IEU1 Group
117 addcc %g4, %g4, %g4 ! IEU1 Group
118 addx %g3, %g3, %g3 ! IEU1 Group
119 subcc %g5, %g4, %g0 ! IEU1 Group
120 ld [%i0 + 0x00], %g4 ! LSU Group
121 addx %g3, %g3, %g3 ! IEU1 Group
122 addcc %g4, %g4, %g4 ! IEU1 Group
123 addx %g3, %g3, %g3 ! IEU1 Group
124 subcc %g5, %g4, %g0 ! IEU1 Group
125 addx %g3, %g3, %g3 ! IEU1 Group
126 ldub [%g1 + %g3], %g3 ! LSU Group
127 cmp %g3, 0 ! IEU1 Group, stall
128 be 2f ! CTI
129 stb %g3, [%i2] ! LSU
130 sll %g3, 8, %g4 ! IEU1 Group
131 add %l2, 1, %l2 ! IEU0
132 st %g0, [%i5 + 0x00] ! LSU
133 or %g4, 0xff, %g4 ! IEU0 Group
134 or %g2, %g3, %g2 ! IEU1
135 st %g0, [%i5 + 0x04] ! LSU
136 and %g2, %g4, %g2 ! IEU0 Group
137 st %g0, [%i5 + 0x08] ! LSU
138 b 3f ! CTI
139 st %f4, [%i5 + 0x0c] ! LSU Group
140 2: ld [%i0 + 0x00], %f0 ! LSU Group
141 ld [%i0 + 0x04], %f1 ! LSU Group
142 ld [%i0 + 0x08], %f2 ! LSU Group
143 fmuls %f0, %f8, %f0 ! FGM
144 st %f0, [%i5 + 0x00] ! LSU Group
145 fmuls %f1, %f8, %f1 ! FGM
146 st %f1, [%i5 + 0x04] ! LSU Group
147 fmuls %f2, %f8, %f2 ! FGM
148 st %f2, [%i5 + 0x08] ! LSU Group
149 st %f8, [%i5 + 0x0c] ! LSU Group
150 3: add %i5, 0x10, %i5 ! IEU1
151 add %l0, 1, %l0 ! IEU0 Group
152 add %i2, 1, %i2 ! IEU0 Group
153 cmp %l0, %l3 ! IEU1 Group
154 bne 1b ! CTI
155 add %i0, %l1, %i0 ! IEU0 Group
156 stb %g2, [%i3] ! LSU
157 srl %g2, 8, %g3 ! IEU0 Group
158 cmp %l2, %l3 ! IEU1 Group
159 bl,a 1f ! CTI
160 clr %g3 ! IEU0
161 1: stb %g3, [%i4] ! LSU Group
162 ret ! CTI Group
163 restore %i1, 0x0, %o0
164
165 .globl _mesa_sparc_cliptest_points4_np
166 _mesa_sparc_cliptest_points4_np:
167 save %sp, -64, %sp
168
169 call __pc_tramp
170 sub %o7, (. - one_dot_zero - 4), %g1
171 add %g1, 0x4, %g1
172
173 ld [%i0 + V4F_STRIDE], %l1
174 ld [%i0 + V4F_COUNT], %l3
175 LDPTR [%i0 + V4F_START], %i0
176 ldub [%i3], %g2
177 ldub [%i4], %g3
178 sll %g3, 8, %g3
179 or %g2, %g3, %g2
180
181 clr %l2
182 clr %l0
183
184 /* l0: i
185 * l3: count
186 * l1: stride
187 * l2: c
188 * g2: (tmpAndMask << 8) | tmpOrMask
189 * g1: clip_table
190 * i0: from[stride][i]
191 * i2: clipMask
192 */
193
194 1: ld [%i0 + 0x0c], %g5 ! LSU Group
195 ld [%i0 + 0x08], %g4 ! LSU Group
196 addcc %g5, %g5, %g5 ! IEU1 Group
197 addx %g0, 0x0, %g3 ! IEU1 Group
198 addcc %g4, %g4, %g4 ! IEU1 Group
199 addx %g3, %g3, %g3 ! IEU1 Group
200 subcc %g5, %g4, %g0 ! IEU1 Group
201 ld [%i0 + 0x04], %g4 ! LSU Group
202 addx %g3, %g3, %g3 ! IEU1 Group
203 addcc %g4, %g4, %g4 ! IEU1 Group
204 addx %g3, %g3, %g3 ! IEU1 Group
205 subcc %g5, %g4, %g0 ! IEU1 Group
206 ld [%i0 + 0x00], %g4 ! LSU Group
207 addx %g3, %g3, %g3 ! IEU1 Group
208 addcc %g4, %g4, %g4 ! IEU1 Group
209 addx %g3, %g3, %g3 ! IEU1 Group
210 subcc %g5, %g4, %g0 ! IEU1 Group
211 addx %g3, %g3, %g3 ! IEU1 Group
212 ldub [%g1 + %g3], %g3 ! LSU Group
213 cmp %g3, 0 ! IEU1 Group, stall
214 be 2f ! CTI
215 stb %g3, [%i2] ! LSU
216 sll %g3, 8, %g4 ! IEU1 Group
217 add %l2, 1, %l2 ! IEU0
218 or %g4, 0xff, %g4 ! IEU0 Group
219 or %g2, %g3, %g2 ! IEU1
220 and %g2, %g4, %g2 ! IEU0 Group
221 2: add %l0, 1, %l0 ! IEU0 Group
222 add %i2, 1, %i2 ! IEU0 Group
223 cmp %l0, %l3 ! IEU1 Group
224 bne 1b ! CTI
225 add %i0, %l1, %i0 ! IEU0 Group
226 stb %g2, [%i3] ! LSU
227 srl %g2, 8, %g3 ! IEU0 Group
228 cmp %l2, %l3 ! IEU1 Group
229 bl,a 1f ! CTI
230 clr %g3 ! IEU0
231 1: stb %g3, [%i4] ! LSU Group
232 ret ! CTI Group
233 restore %i1, 0x0, %o0