new X86 CPU detection code (Petr Sebor)
[mesa.git] / src / mesa / sparc / clip.S
1 /* $Id: clip.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
2
3 #ifdef __sparc_v9__
4 #define LDPTR ldx
5 #define V4F_DATA 0x00
6 #define V4F_START 0x08
7 #define V4F_COUNT 0x10
8 #define V4F_STRIDE 0x14
9 #define V4F_SIZE 0x18
10 #define V4F_FLAGS 0x1c
11 #else
12 #define LDPTR ld
13 #define V4F_DATA 0x00
14 #define V4F_START 0x04
15 #define V4F_COUNT 0x08
16 #define V4F_STRIDE 0x0c
17 #define V4F_SIZE 0x10
18 #define V4F_FLAGS 0x14
19 #endif
20
21 #define VEC_SIZE_1 1
22 #define VEC_SIZE_2 3
23 #define VEC_SIZE_3 7
24 #define VEC_SIZE_4 15
25
26 .text
27 .align 64
28
29 one_dot_zero:
30 .word 0x3f800000 /* 1.0f */
31
32 /* This trick is shamelessly stolen from the x86
33 * Mesa asm. Very clever, and we can do it too
34 * since we have the necessary add with carry
35 * instructions on Sparc.
36 */
37 clip_table:
38 .byte 0, 1, 0, 2, 4, 5, 4, 6
39 .byte 0, 1, 0, 2, 8, 9, 8, 10
40 .byte 32, 33, 32, 34, 36, 37, 36, 38
41 .byte 32, 33, 32, 34, 40, 41, 40, 42
42 .byte 0, 1, 0, 2, 4, 5, 4, 6
43 .byte 0, 1, 0, 2, 8, 9, 8, 10
44 .byte 16, 17, 16, 18, 20, 21, 20, 22
45 .byte 16, 17, 16, 18, 24, 25, 24, 26
46 .byte 63, 61, 63, 62, 55, 53, 55, 54
47 .byte 63, 61, 63, 62, 59, 57, 59, 58
48 .byte 47, 45, 47, 46, 39, 37, 39, 38
49 .byte 47, 45, 47, 46, 43, 41, 43, 42
50 .byte 63, 61, 63, 62, 55, 53, 55, 54
51 .byte 63, 61, 63, 62, 59, 57, 59, 58
52 .byte 31, 29, 31, 30, 23, 21, 23, 22
53 .byte 31, 29, 31, 30, 27, 25, 27, 26
54
55 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
56 GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
57
58 .align 64
59 __pc_tramp:
60 retl
61 nop
62
63 .globl _mesa_sparc_cliptest_points4
64 _mesa_sparc_cliptest_points4:
65 save %sp, -64, %sp
66 call __pc_tramp
67 sub %o7, (. - one_dot_zero - 4), %g1
68 ld [%g1 + 0x0], %f4
69 add %g1, 0x4, %g1
70
71 ld [%i0 + V4F_STRIDE], %l1
72 ld [%i0 + V4F_COUNT], %g7
73 LDPTR [%i0 + V4F_START], %i0
74 LDPTR [%i1 + V4F_START], %i5
75 ldub [%i3], %g2
76 ldub [%i4], %g3
77 sll %g3, 8, %g3
78 or %g2, %g3, %g2
79
80 ld [%i1 + V4F_FLAGS], %g3
81 or %g3, VEC_SIZE_4, %g3
82 st %g3, [%i1 + V4F_FLAGS]
83 mov 3, %g3
84 st %g3, [%i1 + V4F_SIZE]
85 st %g7, [%i1 + V4F_COUNT]
86 clr %l2
87 clr %l0
88
89 /* l0: i
90 * g7: count
91 * l1: stride
92 * l2: c
93 * g2: (tmpAndMask << 8) | tmpOrMask
94 * g1: clip_table
95 * i0: from[stride][i]
96 * i2: clipMask
97 * i5: vProj[4][i]
98 */
99
100 1: ld [%i0 + 0x0c], %f3 ! LSU Group
101 ld [%i0 + 0x0c], %g5 ! LSU Group
102 ld [%i0 + 0x08], %g4 ! LSU Group
103 fdivs %f4, %f3, %f8 ! FGM
104 addcc %g5, %g5, %g5 ! IEU1 Group
105 addx %g0, 0x0, %g3 ! IEU1 Group
106 addcc %g4, %g4, %g4 ! IEU1 Group
107 addx %g3, %g3, %g3 ! IEU1 Group
108 subcc %g5, %g4, %g0 ! IEU1 Group
109 ld [%i0 + 0x04], %g4 ! LSU Group
110 addx %g3, %g3, %g3 ! IEU1 Group
111 addcc %g4, %g4, %g4 ! IEU1 Group
112 addx %g3, %g3, %g3 ! IEU1 Group
113 subcc %g5, %g4, %g0 ! IEU1 Group
114 ld [%i0 + 0x00], %g4 ! LSU Group
115 addx %g3, %g3, %g3 ! IEU1 Group
116 addcc %g4, %g4, %g4 ! IEU1 Group
117 addx %g3, %g3, %g3 ! IEU1 Group
118 subcc %g5, %g4, %g0 ! IEU1 Group
119 addx %g3, %g3, %g3 ! IEU1 Group
120 ldub [%g1 + %g3], %g3 ! LSU Group
121 cmp %g3, 0 ! IEU1 Group, stall
122 be 2f ! CTI
123 stb %g3, [%i2] ! LSU
124 sll %g3, 8, %g4 ! IEU1 Group
125 add %l2, 1, %l2 ! IEU0
126 st %g0, [%i5 + 0x00] ! LSU
127 or %g4, 0xff, %g4 ! IEU0 Group
128 or %g2, %g3, %g2 ! IEU1
129 st %g0, [%i5 + 0x04] ! LSU
130 and %g2, %g4, %g2 ! IEU0 Group
131 st %g0, [%i5 + 0x08] ! LSU
132 b 3f ! CTI
133 st %f4, [%i5 + 0x0c] ! LSU Group
134 2: ld [%i0 + 0x00], %f0 ! LSU Group
135 ld [%i0 + 0x04], %f1 ! LSU Group
136 ld [%i0 + 0x08], %f2 ! LSU Group
137 fmuls %f0, %f8, %f0 ! FGM
138 st %f0, [%i5 + 0x00] ! LSU Group
139 fmuls %f1, %f8, %f1 ! FGM
140 st %f1, [%i5 + 0x04] ! LSU Group
141 fmuls %f2, %f8, %f2 ! FGM
142 st %f2, [%i5 + 0x08] ! LSU Group
143 st %f8, [%i5 + 0x0c] ! LSU Group
144 3: add %i5, 0x10, %i5 ! IEU1
145 add %l0, 1, %l0 ! IEU0 Group
146 add %i2, 1, %i2 ! IEU0 Group
147 cmp %l0, %g7 ! IEU1 Group
148 bne 1b ! CTI
149 add %i0, %l1, %i0 ! IEU0 Group
150 stb %g2, [%i3] ! LSU
151 srl %g2, 8, %g3 ! IEU0 Group
152 cmp %l2, %g7 ! IEU1 Group
153 bl,a 1f ! CTI
154 clr %g3 ! IEU0
155 1: stb %g3, [%i4] ! LSU Group
156 ret ! CTI Group
157 restore %i1, 0x0, %o0
158
159 .globl _mesa_sparc_cliptest_points4_np
160 _mesa_sparc_cliptest_points4_np:
161 save %sp, -64, %sp
162
163 call __pc_tramp
164 sub %o7, (. - one_dot_zero - 4), %g1
165 add %g1, 0x4, %g1
166
167 ld [%i0 + V4F_STRIDE], %l1
168 ld [%i0 + V4F_COUNT], %g7
169 LDPTR [%i0 + V4F_START], %i0
170 LDPTR [%i1 + V4F_START], %i5
171 ldub [%i3], %g2
172 ldub [%i4], %g3
173 sll %g3, 8, %g3
174 or %g2, %g3, %g2
175
176 ld [%i1 + V4F_FLAGS], %g3
177 or %g3, VEC_SIZE_4, %g3
178 st %g3, [%i1 + V4F_FLAGS]
179 mov 3, %g3
180 st %g3, [%i1 + V4F_SIZE]
181 st %g7, [%i1 + V4F_COUNT]
182 clr %l2
183 clr %l0
184
185 /* l0: i
186 * g7: count
187 * l1: stride
188 * l2: c
189 * g2: (tmpAndMask << 8) | tmpOrMask
190 * g1: clip_table
191 * i0: from[stride][i]
192 * i2: clipMask
193 */
194
195 1: ld [%i0 + 0x0c], %g5 ! LSU Group
196 ld [%i0 + 0x08], %g4 ! LSU Group
197 addcc %g5, %g5, %g5 ! IEU1 Group
198 addx %g0, 0x0, %g3 ! IEU1 Group
199 addcc %g4, %g4, %g4 ! IEU1 Group
200 addx %g3, %g3, %g3 ! IEU1 Group
201 subcc %g5, %g4, %g0 ! IEU1 Group
202 ld [%i0 + 0x04], %g4 ! LSU Group
203 addx %g3, %g3, %g3 ! IEU1 Group
204 addcc %g4, %g4, %g4 ! IEU1 Group
205 addx %g3, %g3, %g3 ! IEU1 Group
206 subcc %g5, %g4, %g0 ! IEU1 Group
207 ld [%i0 + 0x00], %g4 ! LSU Group
208 addx %g3, %g3, %g3 ! IEU1 Group
209 addcc %g4, %g4, %g4 ! IEU1 Group
210 addx %g3, %g3, %g3 ! IEU1 Group
211 subcc %g5, %g4, %g0 ! IEU1 Group
212 addx %g3, %g3, %g3 ! IEU1 Group
213 ldub [%g1 + %g3], %g3 ! LSU Group
214 cmp %g3, 0 ! IEU1 Group, stall
215 be 2f ! CTI
216 stb %g3, [%i2] ! LSU
217 sll %g3, 8, %g4 ! IEU1 Group
218 add %l2, 1, %l2 ! IEU0
219 or %g4, 0xff, %g4 ! IEU0 Group
220 or %g2, %g3, %g2 ! IEU1
221 and %g2, %g4, %g2 ! IEU0 Group
222 2: add %l0, 1, %l0 ! IEU0 Group
223 add %i2, 1, %i2 ! IEU0 Group
224 cmp %l0, %g7 ! IEU1 Group
225 bne 1b ! CTI
226 add %i0, %l1, %i0 ! IEU0 Group
227 stb %g2, [%i3] ! LSU
228 srl %g2, 8, %g3 ! IEU0 Group
229 cmp %l2, %g7 ! IEU1 Group
230 bl,a 1f ! CTI
231 clr %g3 ! IEU0
232 1: stb %g3, [%i4] ! LSU Group
233 ret ! CTI Group
234 restore %i1, 0x0, %o0