Add Intel i965G/Q DRI driver.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_sf_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36
37 #include "intel_batchbuffer.h"
38
39 #include "brw_defines.h"
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "brw_util.h"
43 #include "brw_sf.h"
44
45
46 static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
47 struct brw_reg vert,
48 GLuint attr)
49 {
50 GLuint off = c->attr_to_idx[attr] / 2;
51 GLuint sub = c->attr_to_idx[attr] % 2;
52
53 return brw_vec4_grf(vert.nr + off, sub * 4);
54 }
55
56 static GLboolean have_attr(struct brw_sf_compile *c,
57 GLuint attr)
58 {
59 return (c->key.attrs & (1<<attr)) ? 1 : 0;
60 }
61
62
63
64 /***********************************************************************
65 * Twoside lighting
66 */
67 static void copy_bfc( struct brw_sf_compile *c,
68 struct brw_reg vert )
69 {
70 struct brw_compile *p = &c->func;
71 GLuint i;
72
73 for (i = 0; i < 2; i++) {
74 if (have_attr(c, VERT_RESULT_COL0+i) &&
75 have_attr(c, VERT_RESULT_BFC0+i))
76 brw_MOV(p,
77 get_vert_attr(c, vert, VERT_RESULT_COL0+i),
78 get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
79 }
80 }
81
82
83 static void do_twoside_color( struct brw_sf_compile *c )
84 {
85 struct brw_compile *p = &c->func;
86 struct brw_instruction *if_insn;
87 GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
88
89 /* Already done in clip program:
90 */
91 if (c->key.primitive == SF_UNFILLED_TRIS)
92 return;
93
94 /* XXX: What happens if BFC isn't present? This could only happen
95 * for user-supplied vertex programs, as t_vp_build.c always does
96 * the right thing.
97 */
98 if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
99 !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
100 return;
101
102 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
103 * to get all channels active inside the IF. In the clipping code
104 * we run with NoMask, so it's not an option and we can use
105 * BRW_EXECUTE_1 for all comparisions.
106 */
107 brw_push_insn_state(p);
108 brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
109 if_insn = brw_IF(p, BRW_EXECUTE_4);
110 {
111 switch (c->nr_verts) {
112 case 3: copy_bfc(c, c->vert[2]);
113 case 2: copy_bfc(c, c->vert[1]);
114 case 1: copy_bfc(c, c->vert[0]);
115 }
116 }
117 brw_ENDIF(p, if_insn);
118 brw_pop_insn_state(p);
119 }
120
121
122
123 /***********************************************************************
124 * Flat shading
125 */
126
127 #define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
128 (1<<VERT_RESULT_COL1))
129
130 static void copy_colors( struct brw_sf_compile *c,
131 struct brw_reg dst,
132 struct brw_reg src)
133 {
134 struct brw_compile *p = &c->func;
135 GLuint i;
136
137 for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
138 if (have_attr(c,i))
139 brw_MOV(p,
140 get_vert_attr(c, dst, i),
141 get_vert_attr(c, src, i));
142 }
143 }
144
145
146
147 /* Need to use a computed jump to copy flatshaded attributes as the
148 * vertices are ordered according to y-coordinate before reaching this
149 * point, so the PV could be anywhere.
150 */
151 static void do_flatshade_triangle( struct brw_sf_compile *c )
152 {
153 struct brw_compile *p = &c->func;
154 struct brw_reg ip = brw_ip_reg();
155 GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
156 if (!nr)
157 return;
158
159 /* Already done in clip program:
160 */
161 if (c->key.primitive == SF_UNFILLED_TRIS)
162 return;
163
164 brw_push_insn_state(p);
165
166 brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
167 brw_JMPI(p, ip, ip, c->pv);
168
169 copy_colors(c, c->vert[1], c->vert[0]);
170 copy_colors(c, c->vert[2], c->vert[0]);
171 brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
172
173 copy_colors(c, c->vert[0], c->vert[1]);
174 copy_colors(c, c->vert[2], c->vert[1]);
175 brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
176
177 copy_colors(c, c->vert[0], c->vert[2]);
178 copy_colors(c, c->vert[1], c->vert[2]);
179
180 brw_pop_insn_state(p);
181 }
182
183
184 static void do_flatshade_line( struct brw_sf_compile *c )
185 {
186 struct brw_compile *p = &c->func;
187 struct brw_reg ip = brw_ip_reg();
188 GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
189
190 if (!nr)
191 return;
192
193 /* Already done in clip program:
194 */
195 if (c->key.primitive == SF_UNFILLED_TRIS)
196 return;
197
198 brw_push_insn_state(p);
199
200 brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
201 brw_JMPI(p, ip, ip, c->pv);
202 copy_colors(c, c->vert[1], c->vert[0]);
203
204 brw_JMPI(p, ip, ip, brw_imm_ud(nr));
205 copy_colors(c, c->vert[0], c->vert[1]);
206
207 brw_pop_insn_state(p);
208 }
209
210
211
212 /***********************************************************************
213 * Triangle setup.
214 */
215
216
217 static void alloc_regs( struct brw_sf_compile *c )
218 {
219 GLuint reg, i;
220
221 /* Values computed by fixed function unit:
222 */
223 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
224 c->det = brw_vec1_grf(1, 2);
225 c->dx0 = brw_vec1_grf(1, 3);
226 c->dx2 = brw_vec1_grf(1, 4);
227 c->dy0 = brw_vec1_grf(1, 5);
228 c->dy2 = brw_vec1_grf(1, 6);
229
230 /* z and 1/w passed in seperately:
231 */
232 c->z[0] = brw_vec1_grf(2, 0);
233 c->inv_w[0] = brw_vec1_grf(2, 1);
234 c->z[1] = brw_vec1_grf(2, 2);
235 c->inv_w[1] = brw_vec1_grf(2, 3);
236 c->z[2] = brw_vec1_grf(2, 4);
237 c->inv_w[2] = brw_vec1_grf(2, 5);
238
239 /* The vertices:
240 */
241 reg = 3;
242 for (i = 0; i < c->nr_verts; i++) {
243 c->vert[i] = brw_vec8_grf(reg, 0);
244 reg += c->nr_attr_regs;
245 }
246
247 /* Temporaries, allocated after last vertex reg.
248 */
249 c->inv_det = brw_vec1_grf(reg, 0); reg++;
250 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
251 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
252 c->tmp = brw_vec8_grf(reg, 0); reg++;
253
254 /* Note grf allocation:
255 */
256 c->prog_data.total_grf = reg;
257
258
259 /* Outputs of this program - interpolation coefficients for
260 * rasterization:
261 */
262 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
263 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
264 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
265 }
266
267
268 static void copy_z_inv_w( struct brw_sf_compile *c )
269 {
270 struct brw_compile *p = &c->func;
271 GLuint i;
272
273 brw_push_insn_state(p);
274
275 /* Copy both scalars with a single MOV:
276 */
277 for (i = 0; i < c->nr_verts; i++)
278 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
279
280 brw_pop_insn_state(p);
281 }
282
283
284 static void invert_det( struct brw_sf_compile *c)
285 {
286 /* Looks like we invert all 8 elements just to get 1/det in
287 * position 2 !?!
288 */
289 brw_math(&c->func,
290 c->inv_det,
291 BRW_MATH_FUNCTION_INV,
292 BRW_MATH_SATURATE_NONE,
293 0,
294 c->det,
295 BRW_MATH_DATA_SCALAR,
296 BRW_MATH_PRECISION_FULL);
297
298 }
299
300 #define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
301 FRAG_BIT_COL0 | \
302 FRAG_BIT_COL1)
303
304 static GLboolean calculate_masks( struct brw_sf_compile *c,
305 GLuint reg,
306 GLushort *pc,
307 GLushort *pc_persp,
308 GLushort *pc_linear)
309 {
310 GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
311 GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
312 GLuint linear_mask;
313
314 if (c->key.do_flat_shading)
315 linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
316 else
317 linear_mask = c->key.attrs;
318
319 *pc_persp = 0;
320 *pc_linear = 0;
321 *pc = 0xf;
322
323 if (persp_mask & (1 << c->idx_to_attr[reg*2]))
324 *pc_persp = 0xf;
325
326 if (linear_mask & (1 << c->idx_to_attr[reg*2]))
327 *pc_linear = 0xf;
328
329 /* Maybe only processs one attribute on the final round:
330 */
331 if (reg*2+1 < c->nr_setup_attrs) {
332 *pc |= 0xf0;
333
334 if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
335 *pc_persp |= 0xf0;
336
337 if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
338 *pc_linear |= 0xf0;
339 }
340
341 return is_last_attr;
342 }
343
344
345
346 void brw_emit_tri_setup( struct brw_sf_compile *c )
347 {
348 struct brw_compile *p = &c->func;
349 GLuint i;
350
351 c->nr_verts = 3;
352 alloc_regs(c);
353 invert_det(c);
354 copy_z_inv_w(c);
355
356 if (c->key.do_twoside_color)
357 do_twoside_color(c);
358
359 if (c->key.do_flat_shading)
360 do_flatshade_triangle(c);
361
362
363 for (i = 0; i < c->nr_setup_regs; i++)
364 {
365 /* Pair of incoming attributes:
366 */
367 struct brw_reg a0 = offset(c->vert[0], i);
368 struct brw_reg a1 = offset(c->vert[1], i);
369 struct brw_reg a2 = offset(c->vert[2], i);
370 GLushort pc, pc_persp, pc_linear;
371 GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
372
373 if (pc_persp)
374 {
375 brw_set_predicate_control_flag_value(p, pc_persp);
376 brw_MUL(p, a0, a0, c->inv_w[0]);
377 brw_MUL(p, a1, a1, c->inv_w[1]);
378 brw_MUL(p, a2, a2, c->inv_w[2]);
379 }
380
381
382 /* Calculate coefficients for interpolated values:
383 */
384 if (pc_linear)
385 {
386 brw_set_predicate_control_flag_value(p, pc_linear);
387
388 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
389 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
390
391 /* calculate dA/dx
392 */
393 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
394 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
395 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
396
397 /* calculate dA/dy
398 */
399 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
400 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
401 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
402 }
403
404 {
405 brw_set_predicate_control_flag_value(p, pc);
406 /* start point for interpolation
407 */
408 brw_MOV(p, c->m3C0, a0);
409
410 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
411 * the send instruction:
412 */
413 brw_urb_WRITE(p,
414 brw_null_reg(),
415 0,
416 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
417 0, /* allocate */
418 1, /* used */
419 4, /* msg len */
420 0, /* response len */
421 last, /* eot */
422 last, /* writes complete */
423 i*4, /* offset */
424 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
425 }
426 }
427 }
428
429
430
431 void brw_emit_line_setup( struct brw_sf_compile *c )
432 {
433 struct brw_compile *p = &c->func;
434 GLuint i;
435
436
437 c->nr_verts = 2;
438 alloc_regs(c);
439 invert_det(c);
440 copy_z_inv_w(c);
441
442 if (c->key.do_flat_shading)
443 do_flatshade_line(c);
444
445 for (i = 0; i < c->nr_setup_regs; i++)
446 {
447 /* Pair of incoming attributes:
448 */
449 struct brw_reg a0 = offset(c->vert[0], i);
450 struct brw_reg a1 = offset(c->vert[1], i);
451 GLushort pc, pc_persp, pc_linear;
452 GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
453
454 if (pc_persp)
455 {
456 brw_set_predicate_control_flag_value(p, pc_persp);
457 brw_MUL(p, a0, a0, c->inv_w[0]);
458 brw_MUL(p, a1, a1, c->inv_w[1]);
459 }
460
461 /* Calculate coefficients for position, color:
462 */
463 if (pc_linear) {
464 brw_set_predicate_control_flag_value(p, pc_linear);
465
466 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
467
468 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
469 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
470
471 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
472 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
473 }
474
475 {
476 brw_set_predicate_control_flag_value(p, pc);
477
478 /* start point for interpolation
479 */
480 brw_MOV(p, c->m3C0, a0);
481
482 /* Copy m0..m3 to URB.
483 */
484 brw_urb_WRITE(p,
485 brw_null_reg(),
486 0,
487 brw_vec8_grf(0, 0),
488 0, /* allocate */
489 1, /* used */
490 4, /* msg len */
491 0, /* response len */
492 last, /* eot */
493 last, /* writes complete */
494 i*4, /* urb destination offset */
495 BRW_URB_SWIZZLE_TRANSPOSE);
496 }
497 }
498 }
499
500
501 /* Points setup - several simplifications as all attributes are
502 * constant across the face of the point (point sprites excluded!)
503 */
504 void brw_emit_point_setup( struct brw_sf_compile *c )
505 {
506 struct brw_compile *p = &c->func;
507 GLuint i;
508
509 c->nr_verts = 1;
510 alloc_regs(c);
511 copy_z_inv_w(c);
512
513 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
514 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
515
516 for (i = 0; i < c->nr_setup_regs; i++)
517 {
518 struct brw_reg a0 = offset(c->vert[0], i);
519 GLushort pc, pc_persp, pc_linear;
520 GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
521
522 if (pc_persp)
523 {
524 /* This seems odd as the values are all constant, but the
525 * fragment shader will be expecting it:
526 */
527 brw_set_predicate_control_flag_value(p, pc_persp);
528 brw_MUL(p, a0, a0, c->inv_w[0]);
529 }
530
531
532 /* The delta values are always zero, just send the starting
533 * coordinate. Again, this is to fit in with the interpolation
534 * code in the fragment shader.
535 */
536 {
537 brw_set_predicate_control_flag_value(p, pc);
538
539 brw_MOV(p, c->m3C0, a0); /* constant value */
540
541 /* Copy m0..m3 to URB.
542 */
543 brw_urb_WRITE(p,
544 brw_null_reg(),
545 0,
546 brw_vec8_grf(0, 0),
547 0, /* allocate */
548 1, /* used */
549 4, /* msg len */
550 0, /* response len */
551 last, /* eot */
552 last, /* writes complete */
553 i*4, /* urb destination offset */
554 BRW_URB_SWIZZLE_TRANSPOSE);
555 }
556 }
557 }
558
559 void brw_emit_anyprim_setup( struct brw_sf_compile *c )
560 {
561 struct brw_compile *p = &c->func;
562 struct brw_reg ip = brw_ip_reg();
563 struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
564 struct brw_reg primmask;
565 struct brw_instruction *jmp;
566 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
567
568 alloc_regs(c);
569
570 primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
571
572 brw_MOV(p, primmask, brw_imm_ud(1));
573 brw_SHL(p, primmask, primmask, payload_prim);
574
575 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
576 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
577 (1<<_3DPRIM_TRISTRIP) |
578 (1<<_3DPRIM_TRIFAN) |
579 (1<<_3DPRIM_TRISTRIP_REVERSE) |
580 (1<<_3DPRIM_POLYGON) |
581 (1<<_3DPRIM_RECTLIST) |
582 (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
583 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
584 {
585 brw_emit_tri_setup( c );
586 /* note - thread killed in subroutine */
587 }
588 brw_land_fwd_jump(p, jmp);
589
590 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
591 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
592 (1<<_3DPRIM_LINESTRIP) |
593 (1<<_3DPRIM_LINELOOP) |
594 (1<<_3DPRIM_LINESTRIP_CONT) |
595 (1<<_3DPRIM_LINESTRIP_BF) |
596 (1<<_3DPRIM_LINESTRIP_CONT_BF)));
597 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
598 {
599 brw_emit_line_setup( c );
600 /* note - thread killed in subroutine */
601 }
602 brw_land_fwd_jump(p, jmp);
603
604 brw_emit_point_setup( c );
605 }
606
607
608
609