Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / i965simple / brw_sf_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_defines.h"
34 #include "brw_context.h"
35 #include "brw_eu.h"
36 #include "brw_util.h"
37 #include "brw_sf.h"
38
39
40
41 /***********************************************************************
42 * Triangle setup.
43 */
44
45
46 static void alloc_regs( struct brw_sf_compile *c )
47 {
48 unsigned reg, i;
49
50 /* Values computed by fixed function unit:
51 */
52 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
53 c->det = brw_vec1_grf(1, 2);
54 c->dx0 = brw_vec1_grf(1, 3);
55 c->dx2 = brw_vec1_grf(1, 4);
56 c->dy0 = brw_vec1_grf(1, 5);
57 c->dy2 = brw_vec1_grf(1, 6);
58
59 /* z and 1/w passed in seperately:
60 */
61 c->z[0] = brw_vec1_grf(2, 0);
62 c->inv_w[0] = brw_vec1_grf(2, 1);
63 c->z[1] = brw_vec1_grf(2, 2);
64 c->inv_w[1] = brw_vec1_grf(2, 3);
65 c->z[2] = brw_vec1_grf(2, 4);
66 c->inv_w[2] = brw_vec1_grf(2, 5);
67
68 /* The vertices:
69 */
70 reg = 3;
71 for (i = 0; i < c->nr_verts; i++) {
72 c->vert[i] = brw_vec8_grf(reg, 0);
73 reg += c->nr_attr_regs;
74 }
75
76 /* Temporaries, allocated after last vertex reg.
77 */
78 c->inv_det = brw_vec1_grf(reg, 0); reg++;
79 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
80 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
81 c->tmp = brw_vec8_grf(reg, 0); reg++;
82
83 /* Note grf allocation:
84 */
85 c->prog_data.total_grf = reg;
86
87
88 /* Outputs of this program - interpolation coefficients for
89 * rasterization:
90 */
91 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
92 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
93 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
94 }
95
96
97 static void copy_z_inv_w( struct brw_sf_compile *c )
98 {
99 struct brw_compile *p = &c->func;
100 unsigned i;
101
102 brw_push_insn_state(p);
103
104 /* Copy both scalars with a single MOV:
105 */
106 for (i = 0; i < c->nr_verts; i++)
107 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
108
109 brw_pop_insn_state(p);
110 }
111
112
113 static void invert_det( struct brw_sf_compile *c)
114 {
115 brw_math(&c->func,
116 c->inv_det,
117 BRW_MATH_FUNCTION_INV,
118 BRW_MATH_SATURATE_NONE,
119 0,
120 c->det,
121 BRW_MATH_DATA_SCALAR,
122 BRW_MATH_PRECISION_FULL);
123
124 }
125
126 #define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
127 FRAG_BIT_COL0 | \
128 FRAG_BIT_COL1)
129
130 static boolean calculate_masks( struct brw_sf_compile *c,
131 unsigned reg,
132 ushort *pc,
133 ushort *pc_persp,
134 ushort *pc_linear)
135 {
136 boolean is_last_attr = (reg == c->nr_setup_regs - 1);
137 unsigned persp_mask = c->key.persp_mask;
138 unsigned linear_mask = c->key.linear_mask;
139
140 debug_printf("persp_mask: %x\n", persp_mask);
141 debug_printf("linear_mask: %x\n", linear_mask);
142
143 *pc_persp = 0;
144 *pc_linear = 0;
145 *pc = 0xf;
146
147 if (persp_mask & (1 << (reg*2)))
148 *pc_persp = 0xf;
149
150 if (linear_mask & (1 << (reg*2)))
151 *pc_linear = 0xf;
152
153 /* Maybe only processs one attribute on the final round:
154 */
155 if (reg*2+1 < c->nr_setup_attrs) {
156 *pc |= 0xf0;
157
158 if (persp_mask & (1 << (reg*2+1)))
159 *pc_persp |= 0xf0;
160
161 if (linear_mask & (1 << (reg*2+1)))
162 *pc_linear |= 0xf0;
163 }
164
165 debug_printf("pc: %x\n", *pc);
166 debug_printf("pc_persp: %x\n", *pc_persp);
167 debug_printf("pc_linear: %x\n", *pc_linear);
168
169
170 return is_last_attr;
171 }
172
173
174
175 void brw_emit_tri_setup( struct brw_sf_compile *c )
176 {
177 struct brw_compile *p = &c->func;
178 unsigned i;
179
180 debug_printf("%s START ==============\n", __FUNCTION__);
181
182 c->nr_verts = 3;
183 alloc_regs(c);
184 invert_det(c);
185 copy_z_inv_w(c);
186
187
188 for (i = 0; i < c->nr_setup_regs; i++)
189 {
190 /* Pair of incoming attributes:
191 */
192 struct brw_reg a0 = offset(c->vert[0], i);
193 struct brw_reg a1 = offset(c->vert[1], i);
194 struct brw_reg a2 = offset(c->vert[2], i);
195 ushort pc = 0, pc_persp = 0, pc_linear = 0;
196 boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
197
198 if (pc_persp)
199 {
200 brw_set_predicate_control_flag_value(p, pc_persp);
201 brw_MUL(p, a0, a0, c->inv_w[0]);
202 brw_MUL(p, a1, a1, c->inv_w[1]);
203 brw_MUL(p, a2, a2, c->inv_w[2]);
204 }
205
206
207 /* Calculate coefficients for interpolated values:
208 */
209 if (pc_linear)
210 {
211 brw_set_predicate_control_flag_value(p, pc_linear);
212
213 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
214 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
215
216 /* calculate dA/dx
217 */
218 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
219 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
220 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
221
222 /* calculate dA/dy
223 */
224 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
225 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
226 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
227 }
228
229 {
230 brw_set_predicate_control_flag_value(p, pc);
231 /* start point for interpolation
232 */
233 brw_MOV(p, c->m3C0, a0);
234
235 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
236 * the send instruction:
237 */
238 brw_urb_WRITE(p,
239 brw_null_reg(),
240 0,
241 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
242 0, /* allocate */
243 1, /* used */
244 4, /* msg len */
245 0, /* response len */
246 last, /* eot */
247 last, /* writes complete */
248 i*4, /* offset */
249 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
250 }
251 }
252
253 debug_printf("%s DONE ==============\n", __FUNCTION__);
254
255 }
256
257
258
259 void brw_emit_line_setup( struct brw_sf_compile *c )
260 {
261 struct brw_compile *p = &c->func;
262 unsigned i;
263
264
265 c->nr_verts = 2;
266 alloc_regs(c);
267 invert_det(c);
268 copy_z_inv_w(c);
269
270 for (i = 0; i < c->nr_setup_regs; i++)
271 {
272 /* Pair of incoming attributes:
273 */
274 struct brw_reg a0 = offset(c->vert[0], i);
275 struct brw_reg a1 = offset(c->vert[1], i);
276 ushort pc, pc_persp, pc_linear;
277 boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
278
279 if (pc_persp)
280 {
281 brw_set_predicate_control_flag_value(p, pc_persp);
282 brw_MUL(p, a0, a0, c->inv_w[0]);
283 brw_MUL(p, a1, a1, c->inv_w[1]);
284 }
285
286 /* Calculate coefficients for position, color:
287 */
288 if (pc_linear) {
289 brw_set_predicate_control_flag_value(p, pc_linear);
290
291 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
292
293 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
294 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
295
296 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
297 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
298 }
299
300 {
301 brw_set_predicate_control_flag_value(p, pc);
302
303 /* start point for interpolation
304 */
305 brw_MOV(p, c->m3C0, a0);
306
307 /* Copy m0..m3 to URB.
308 */
309 brw_urb_WRITE(p,
310 brw_null_reg(),
311 0,
312 brw_vec8_grf(0, 0),
313 0, /* allocate */
314 1, /* used */
315 4, /* msg len */
316 0, /* response len */
317 last, /* eot */
318 last, /* writes complete */
319 i*4, /* urb destination offset */
320 BRW_URB_SWIZZLE_TRANSPOSE);
321 }
322 }
323 }
324
325
326 /* Points setup - several simplifications as all attributes are
327 * constant across the face of the point (point sprites excluded!)
328 */
329 void brw_emit_point_setup( struct brw_sf_compile *c )
330 {
331 struct brw_compile *p = &c->func;
332 unsigned i;
333
334 c->nr_verts = 1;
335 alloc_regs(c);
336 copy_z_inv_w(c);
337
338 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
339 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
340
341 for (i = 0; i < c->nr_setup_regs; i++)
342 {
343 struct brw_reg a0 = offset(c->vert[0], i);
344 ushort pc, pc_persp, pc_linear;
345 boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
346
347 if (pc_persp)
348 {
349 /* This seems odd as the values are all constant, but the
350 * fragment shader will be expecting it:
351 */
352 brw_set_predicate_control_flag_value(p, pc_persp);
353 brw_MUL(p, a0, a0, c->inv_w[0]);
354 }
355
356
357 /* The delta values are always zero, just send the starting
358 * coordinate. Again, this is to fit in with the interpolation
359 * code in the fragment shader.
360 */
361 {
362 brw_set_predicate_control_flag_value(p, pc);
363
364 brw_MOV(p, c->m3C0, a0); /* constant value */
365
366 /* Copy m0..m3 to URB.
367 */
368 brw_urb_WRITE(p,
369 brw_null_reg(),
370 0,
371 brw_vec8_grf(0, 0),
372 0, /* allocate */
373 1, /* used */
374 4, /* msg len */
375 0, /* response len */
376 last, /* eot */
377 last, /* writes complete */
378 i*4, /* urb destination offset */
379 BRW_URB_SWIZZLE_TRANSPOSE);
380 }
381 }
382 }