glsl,driconf: add allow_glsl_120_subset_in_110 for SPECviewperf13
[mesa.git] / src / mesa / sparc / sparc_clip.S
1 /*
2 * Clip testing in SPARC assembly
3 */
4
5 #if __arch64__
6 #define LDPTR ldx
7 #define MATH_ASM_PTR_SIZE 8
8 #include "math/m_vector_asm.h"
9 #else
10 #define LDPTR ld
11 #define MATH_ASM_PTR_SIZE 4
12 #include "math/m_vector_asm.h"
13 #endif
14
15 #define VEC_SIZE_1 1
16 #define VEC_SIZE_2 3
17 #define VEC_SIZE_3 7
18 #define VEC_SIZE_4 15
19
20 .register %g2, #scratch
21 .register %g3, #scratch
22
23 .text
24 .align 64
25
26 one_dot_zero:
27 .word 0x3f800000 /* 1.0f */
28
29 /* This trick is shamelessly stolen from the x86
30 * Mesa asm. Very clever, and we can do it too
31 * since we have the necessary add with carry
32 * instructions on Sparc.
33 */
34 clip_table:
35 .byte 0, 1, 0, 2, 4, 5, 4, 6
36 .byte 0, 1, 0, 2, 8, 9, 8, 10
37 .byte 32, 33, 32, 34, 36, 37, 36, 38
38 .byte 32, 33, 32, 34, 40, 41, 40, 42
39 .byte 0, 1, 0, 2, 4, 5, 4, 6
40 .byte 0, 1, 0, 2, 8, 9, 8, 10
41 .byte 16, 17, 16, 18, 20, 21, 20, 22
42 .byte 16, 17, 16, 18, 24, 25, 24, 26
43 .byte 63, 61, 63, 62, 55, 53, 55, 54
44 .byte 63, 61, 63, 62, 59, 57, 59, 58
45 .byte 47, 45, 47, 46, 39, 37, 39, 38
46 .byte 47, 45, 47, 46, 43, 41, 43, 42
47 .byte 63, 61, 63, 62, 55, 53, 55, 54
48 .byte 63, 61, 63, 62, 59, 57, 59, 58
49 .byte 31, 29, 31, 30, 23, 21, 23, 22
50 .byte 31, 29, 31, 30, 27, 25, 27, 26
51
52 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
53 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
54 GLboolean viewport_z_enable */
55
56 .align 64
57 __pc_tramp:
58 retl
59 nop
60
61 .globl _mesa_sparc_cliptest_points4
62 _mesa_sparc_cliptest_points4:
63 save %sp, -64, %sp
64 call __pc_tramp
65 sub %o7, (. - one_dot_zero - 4), %g1
66 ld [%g1 + 0x0], %f4
67 add %g1, 0x4, %g1
68
69 ld [%i0 + V4F_STRIDE], %l1
70 ld [%i0 + V4F_COUNT], %l3
71 LDPTR [%i0 + V4F_START], %i0
72 LDPTR [%i1 + V4F_START], %i5
73 ldub [%i3], %g2
74 ldub [%i4], %g3
75 sll %g3, 8, %g3
76 or %g2, %g3, %g2
77
78 ld [%i1 + V4F_FLAGS], %g3
79 or %g3, VEC_SIZE_4, %g3
80 st %g3, [%i1 + V4F_FLAGS]
81 mov 3, %g3
82 st %g3, [%i1 + V4F_SIZE]
83 st %l3, [%i1 + V4F_COUNT]
84 clr %l2
85 clr %l0
86
87 /* l0: i
88 * l3: count
89 * l1: stride
90 * l2: c
91 * g2: (tmpAndMask << 8) | tmpOrMask
92 * g1: clip_table
93 * i0: from[stride][i]
94 * i2: clipMask
95 * i5: vProj[4][i]
96 */
97
98 1: ld [%i0 + 0x0c], %f3 ! LSU Group
99 ld [%i0 + 0x0c], %g5 ! LSU Group
100 ld [%i0 + 0x08], %g4 ! LSU Group
101 fdivs %f4, %f3, %f8 ! FGM
102 addcc %g5, %g5, %g5 ! IEU1 Group
103 addx %g0, 0x0, %g3 ! IEU1 Group
104 addcc %g4, %g4, %g4 ! IEU1 Group
105 addx %g3, %g3, %g3 ! IEU1 Group
106 subcc %g5, %g4, %g0 ! IEU1 Group
107 ld [%i0 + 0x04], %g4 ! LSU Group
108 addx %g3, %g3, %g3 ! IEU1 Group
109 addcc %g4, %g4, %g4 ! IEU1 Group
110 addx %g3, %g3, %g3 ! IEU1 Group
111 subcc %g5, %g4, %g0 ! IEU1 Group
112 ld [%i0 + 0x00], %g4 ! LSU Group
113 addx %g3, %g3, %g3 ! IEU1 Group
114 addcc %g4, %g4, %g4 ! IEU1 Group
115 addx %g3, %g3, %g3 ! IEU1 Group
116 subcc %g5, %g4, %g0 ! IEU1 Group
117 addx %g3, %g3, %g3 ! IEU1 Group
118 ldub [%g1 + %g3], %g3 ! LSU Group
119 cmp %g3, 0 ! IEU1 Group, stall
120 be 2f ! CTI
121 stb %g3, [%i2] ! LSU
122 sll %g3, 8, %g4 ! IEU1 Group
123 add %l2, 1, %l2 ! IEU0
124 st %g0, [%i5 + 0x00] ! LSU
125 or %g4, 0xff, %g4 ! IEU0 Group
126 or %g2, %g3, %g2 ! IEU1
127 st %g0, [%i5 + 0x04] ! LSU
128 and %g2, %g4, %g2 ! IEU0 Group
129 st %g0, [%i5 + 0x08] ! LSU
130 b 3f ! CTI
131 st %f4, [%i5 + 0x0c] ! LSU Group
132 2: ld [%i0 + 0x00], %f0 ! LSU Group
133 ld [%i0 + 0x04], %f1 ! LSU Group
134 ld [%i0 + 0x08], %f2 ! LSU Group
135 fmuls %f0, %f8, %f0 ! FGM
136 st %f0, [%i5 + 0x00] ! LSU Group
137 fmuls %f1, %f8, %f1 ! FGM
138 st %f1, [%i5 + 0x04] ! LSU Group
139 fmuls %f2, %f8, %f2 ! FGM
140 st %f2, [%i5 + 0x08] ! LSU Group
141 st %f8, [%i5 + 0x0c] ! LSU Group
142 3: add %i5, 0x10, %i5 ! IEU1
143 add %l0, 1, %l0 ! IEU0 Group
144 add %i2, 1, %i2 ! IEU0 Group
145 cmp %l0, %l3 ! IEU1 Group
146 bne 1b ! CTI
147 add %i0, %l1, %i0 ! IEU0 Group
148 stb %g2, [%i3] ! LSU
149 srl %g2, 8, %g3 ! IEU0 Group
150 cmp %l2, %l3 ! IEU1 Group
151 bl,a 1f ! CTI
152 clr %g3 ! IEU0
153 1: stb %g3, [%i4] ! LSU Group
154 ret ! CTI Group
155 restore %i1, 0x0, %o0
156
157 .globl _mesa_sparc_cliptest_points4_np
158 _mesa_sparc_cliptest_points4_np:
159 save %sp, -64, %sp
160
161 call __pc_tramp
162 sub %o7, (. - one_dot_zero - 4), %g1
163 add %g1, 0x4, %g1
164
165 ld [%i0 + V4F_STRIDE], %l1
166 ld [%i0 + V4F_COUNT], %l3
167 LDPTR [%i0 + V4F_START], %i0
168 ldub [%i3], %g2
169 ldub [%i4], %g3
170 sll %g3, 8, %g3
171 or %g2, %g3, %g2
172
173 clr %l2
174 clr %l0
175
176 /* l0: i
177 * l3: count
178 * l1: stride
179 * l2: c
180 * g2: (tmpAndMask << 8) | tmpOrMask
181 * g1: clip_table
182 * i0: from[stride][i]
183 * i2: clipMask
184 */
185
186 1: ld [%i0 + 0x0c], %g5 ! LSU Group
187 ld [%i0 + 0x08], %g4 ! LSU Group
188 addcc %g5, %g5, %g5 ! IEU1 Group
189 addx %g0, 0x0, %g3 ! IEU1 Group
190 addcc %g4, %g4, %g4 ! IEU1 Group
191 addx %g3, %g3, %g3 ! IEU1 Group
192 subcc %g5, %g4, %g0 ! IEU1 Group
193 ld [%i0 + 0x04], %g4 ! LSU Group
194 addx %g3, %g3, %g3 ! IEU1 Group
195 addcc %g4, %g4, %g4 ! IEU1 Group
196 addx %g3, %g3, %g3 ! IEU1 Group
197 subcc %g5, %g4, %g0 ! IEU1 Group
198 ld [%i0 + 0x00], %g4 ! LSU Group
199 addx %g3, %g3, %g3 ! IEU1 Group
200 addcc %g4, %g4, %g4 ! IEU1 Group
201 addx %g3, %g3, %g3 ! IEU1 Group
202 subcc %g5, %g4, %g0 ! IEU1 Group
203 addx %g3, %g3, %g3 ! IEU1 Group
204 ldub [%g1 + %g3], %g3 ! LSU Group
205 cmp %g3, 0 ! IEU1 Group, stall
206 be 2f ! CTI
207 stb %g3, [%i2] ! LSU
208 sll %g3, 8, %g4 ! IEU1 Group
209 add %l2, 1, %l2 ! IEU0
210 or %g4, 0xff, %g4 ! IEU0 Group
211 or %g2, %g3, %g2 ! IEU1
212 and %g2, %g4, %g2 ! IEU0 Group
213 2: add %l0, 1, %l0 ! IEU0 Group
214 add %i2, 1, %i2 ! IEU0 Group
215 cmp %l0, %l3 ! IEU1 Group
216 bne 1b ! CTI
217 add %i0, %l1, %i0 ! IEU0 Group
218 stb %g2, [%i3] ! LSU
219 srl %g2, 8, %g3 ! IEU0 Group
220 cmp %l2, %l3 ! IEU1 Group
221 bl,a 1f ! CTI
222 clr %g3 ! IEU0
223 1: stb %g3, [%i4] ! LSU Group
224 ret ! CTI Group
225 restore %i1, 0x0, %o0