Merge commit 'origin/master' into i965g-restart
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
39 */
40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
41 {
42 if (reg.vstride)
43 reg.nr++;
44 return reg;
45 }
46
47
48 /* Payload R0:
49 *
50 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
51 * corresponding to each of the 16 execution channels.
52 * R0.1..8 -- ?
53 * R1.0 -- triangle vertex 0.X
54 * R1.1 -- triangle vertex 0.Y
55 * R1.2 -- tile 0 x,y coords (2 packed uwords)
56 * R1.3 -- tile 1 x,y coords (2 packed uwords)
57 * R1.4 -- tile 2 x,y coords (2 packed uwords)
58 * R1.5 -- tile 3 x,y coords (2 packed uwords)
59 * R1.6 -- ?
60 * R1.7 -- ?
61 * R1.8 -- ?
62 */
63
64 void emit_pixel_xy(struct brw_wm_compile *c,
65 const struct brw_reg *dst,
66 GLuint mask)
67 {
68 struct brw_compile *p = &c->func;
69 struct brw_reg r1 = brw_vec1_grf(1, 0);
70 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
71 struct brw_reg dst0_uw, dst1_uw;
72
73 brw_push_insn_state(p);
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 if (c->dispatch_width == 16) {
77 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
78 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
79 } else {
80 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
81 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
82 }
83
84 /* Calculate pixel centers by adding 1 or 0 to each of the
85 * micro-tile coordinates passed in r1.
86 */
87 if (mask & WRITEMASK_X) {
88 brw_ADD(p,
89 dst0_uw,
90 stride(suboffset(r1_uw, 4), 2, 4, 0),
91 brw_imm_v(0x10101010));
92 }
93
94 if (mask & WRITEMASK_Y) {
95 brw_ADD(p,
96 dst1_uw,
97 stride(suboffset(r1_uw,5), 2, 4, 0),
98 brw_imm_v(0x11001100));
99 }
100 brw_pop_insn_state(p);
101 }
102
103
104 void emit_delta_xy(struct brw_compile *p,
105 const struct brw_reg *dst,
106 GLuint mask,
107 const struct brw_reg *arg0)
108 {
109 struct brw_reg r1 = brw_vec1_grf(1, 0);
110
111 /* Calc delta X,Y by subtracting origin in r1 from the pixel
112 * centers.
113 */
114 if (mask & WRITEMASK_X) {
115 brw_ADD(p,
116 dst[0],
117 retype(arg0[0], BRW_REGISTER_TYPE_UW),
118 negate(r1));
119 }
120
121 if (mask & WRITEMASK_Y) {
122 brw_ADD(p,
123 dst[1],
124 retype(arg0[1], BRW_REGISTER_TYPE_UW),
125 negate(suboffset(r1,1)));
126
127 }
128 }
129
130 void emit_wpos_xy(struct brw_wm_compile *c,
131 const struct brw_reg *dst,
132 GLuint mask,
133 const struct brw_reg *arg0)
134 {
135 struct brw_compile *p = &c->func;
136
137 /* Calculate the pixel offset from window bottom left into destination
138 * X and Y channels.
139 */
140 if (mask & WRITEMASK_X) {
141 /* X' = X - origin */
142 brw_ADD(p,
143 dst[0],
144 retype(arg0[0], BRW_REGISTER_TYPE_W),
145 brw_imm_d(0 - c->key.origin_x));
146 }
147
148 if (mask & WRITEMASK_Y) {
149 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
150 brw_ADD(p,
151 dst[1],
152 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
153 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
154 }
155 }
156
157
158 void emit_pixel_w(struct brw_wm_compile *c,
159 const struct brw_reg *dst,
160 GLuint mask,
161 const struct brw_reg *arg0,
162 const struct brw_reg *deltas)
163 {
164 struct brw_compile *p = &c->func;
165
166 /* Don't need this if all you are doing is interpolating color, for
167 * instance.
168 */
169 if (mask & WRITEMASK_W) {
170 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
171
172 /* Calc 1/w - just linterp wpos[3] optimized by putting the
173 * result straight into a message reg.
174 */
175 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
176 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
177
178 /* Calc w */
179 if (c->dispatch_width == 16) {
180 brw_math_16(p, dst[3],
181 BRW_MATH_FUNCTION_INV,
182 BRW_MATH_SATURATE_NONE,
183 2, brw_null_reg(),
184 BRW_MATH_PRECISION_FULL);
185 } else {
186 brw_math(p, dst[3],
187 BRW_MATH_FUNCTION_INV,
188 BRW_MATH_SATURATE_NONE,
189 2, brw_null_reg(),
190 BRW_MATH_DATA_VECTOR,
191 BRW_MATH_PRECISION_FULL);
192 }
193 }
194 }
195
196
197 void emit_linterp(struct brw_compile *p,
198 const struct brw_reg *dst,
199 GLuint mask,
200 const struct brw_reg *arg0,
201 const struct brw_reg *deltas)
202 {
203 struct brw_reg interp[4];
204 GLuint nr = arg0[0].nr;
205 GLuint i;
206
207 interp[0] = brw_vec1_grf(nr, 0);
208 interp[1] = brw_vec1_grf(nr, 4);
209 interp[2] = brw_vec1_grf(nr+1, 0);
210 interp[3] = brw_vec1_grf(nr+1, 4);
211
212 for (i = 0; i < 4; i++) {
213 if (mask & (1<<i)) {
214 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
215 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
216 }
217 }
218 }
219
220
221 void emit_pinterp(struct brw_compile *p,
222 const struct brw_reg *dst,
223 GLuint mask,
224 const struct brw_reg *arg0,
225 const struct brw_reg *deltas,
226 const struct brw_reg *w)
227 {
228 struct brw_reg interp[4];
229 GLuint nr = arg0[0].nr;
230 GLuint i;
231
232 interp[0] = brw_vec1_grf(nr, 0);
233 interp[1] = brw_vec1_grf(nr, 4);
234 interp[2] = brw_vec1_grf(nr+1, 0);
235 interp[3] = brw_vec1_grf(nr+1, 4);
236
237 for (i = 0; i < 4; i++) {
238 if (mask & (1<<i)) {
239 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
240 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
241 }
242 }
243 for (i = 0; i < 4; i++) {
244 if (mask & (1<<i)) {
245 brw_MUL(p, dst[i], dst[i], w[3]);
246 }
247 }
248 }
249
250
251 void emit_cinterp(struct brw_compile *p,
252 const struct brw_reg *dst,
253 GLuint mask,
254 const struct brw_reg *arg0)
255 {
256 struct brw_reg interp[4];
257 GLuint nr = arg0[0].nr;
258 GLuint i;
259
260 interp[0] = brw_vec1_grf(nr, 0);
261 interp[1] = brw_vec1_grf(nr, 4);
262 interp[2] = brw_vec1_grf(nr+1, 0);
263 interp[3] = brw_vec1_grf(nr+1, 4);
264
265 for (i = 0; i < 4; i++) {
266 if (mask & (1<<i)) {
267 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
268 }
269 }
270 }
271
272 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
273 void emit_frontfacing(struct brw_compile *p,
274 const struct brw_reg *dst,
275 GLuint mask)
276 {
277 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
278 GLuint i;
279
280 if (!(mask & WRITEMASK_XYZW))
281 return;
282
283 for (i = 0; i < 4; i++) {
284 if (mask & (1<<i)) {
285 brw_MOV(p, dst[i], brw_imm_f(0.0));
286 }
287 }
288
289 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
290 * us front face
291 */
292 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
293 for (i = 0; i < 4; i++) {
294 if (mask & (1<<i)) {
295 brw_MOV(p, dst[i], brw_imm_f(1.0));
296 }
297 }
298 brw_set_predicate_control_flag_value(p, 0xff);
299 }
300
301 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
302 * looking like:
303 *
304 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
305 *
306 * and we're trying to produce:
307 *
308 * DDX DDY
309 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
310 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
311 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
312 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
313 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
314 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
315 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
316 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
317 *
318 * and add another set of two more subspans if in 16-pixel dispatch mode.
319 *
320 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
321 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
322 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
323 * between each other. We could probably do it like ddx and swizzle the right
324 * order later, but bail for now and just produce
325 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
326 */
327 void emit_ddxy(struct brw_compile *p,
328 const struct brw_reg *dst,
329 GLuint mask,
330 GLboolean is_ddx,
331 const struct brw_reg *arg0)
332 {
333 int i;
334 struct brw_reg src0, src1;
335
336 if (mask & SATURATE)
337 brw_set_saturate(p, 1);
338 for (i = 0; i < 4; i++ ) {
339 if (mask & (1<<i)) {
340 if (is_ddx) {
341 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
342 BRW_REGISTER_TYPE_F,
343 BRW_VERTICAL_STRIDE_2,
344 BRW_WIDTH_2,
345 BRW_HORIZONTAL_STRIDE_0,
346 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
347 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
348 BRW_REGISTER_TYPE_F,
349 BRW_VERTICAL_STRIDE_2,
350 BRW_WIDTH_2,
351 BRW_HORIZONTAL_STRIDE_0,
352 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
353 } else {
354 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
355 BRW_REGISTER_TYPE_F,
356 BRW_VERTICAL_STRIDE_4,
357 BRW_WIDTH_4,
358 BRW_HORIZONTAL_STRIDE_0,
359 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
360 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
361 BRW_REGISTER_TYPE_F,
362 BRW_VERTICAL_STRIDE_4,
363 BRW_WIDTH_4,
364 BRW_HORIZONTAL_STRIDE_0,
365 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
366 }
367 brw_ADD(p, dst[i], src0, negate(src1));
368 }
369 }
370 if (mask & SATURATE)
371 brw_set_saturate(p, 0);
372 }
373
374 void emit_alu1(struct brw_compile *p,
375 struct brw_instruction *(*func)(struct brw_compile *,
376 struct brw_reg,
377 struct brw_reg),
378 const struct brw_reg *dst,
379 GLuint mask,
380 const struct brw_reg *arg0)
381 {
382 GLuint i;
383
384 if (mask & SATURATE)
385 brw_set_saturate(p, 1);
386
387 for (i = 0; i < 4; i++) {
388 if (mask & (1<<i)) {
389 func(p, dst[i], arg0[i]);
390 }
391 }
392
393 if (mask & SATURATE)
394 brw_set_saturate(p, 0);
395 }
396
397
398 void emit_alu2(struct brw_compile *p,
399 struct brw_instruction *(*func)(struct brw_compile *,
400 struct brw_reg,
401 struct brw_reg,
402 struct brw_reg),
403 const struct brw_reg *dst,
404 GLuint mask,
405 const struct brw_reg *arg0,
406 const struct brw_reg *arg1)
407 {
408 GLuint i;
409
410 if (mask & SATURATE)
411 brw_set_saturate(p, 1);
412
413 for (i = 0; i < 4; i++) {
414 if (mask & (1<<i)) {
415 func(p, dst[i], arg0[i], arg1[i]);
416 }
417 }
418
419 if (mask & SATURATE)
420 brw_set_saturate(p, 0);
421 }
422
423
424 void emit_mad(struct brw_compile *p,
425 const struct brw_reg *dst,
426 GLuint mask,
427 const struct brw_reg *arg0,
428 const struct brw_reg *arg1,
429 const struct brw_reg *arg2)
430 {
431 GLuint i;
432
433 for (i = 0; i < 4; i++) {
434 if (mask & (1<<i)) {
435 brw_MUL(p, dst[i], arg0[i], arg1[i]);
436
437 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
438 brw_ADD(p, dst[i], dst[i], arg2[i]);
439 brw_set_saturate(p, 0);
440 }
441 }
442 }
443
444 void emit_lrp(struct brw_compile *p,
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0,
448 const struct brw_reg *arg1,
449 const struct brw_reg *arg2)
450 {
451 GLuint i;
452
453 /* Uses dst as a temporary:
454 */
455 for (i = 0; i < 4; i++) {
456 if (mask & (1<<i)) {
457 /* Can I use the LINE instruction for this?
458 */
459 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
460 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
461
462 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
463 brw_MAC(p, dst[i], arg0[i], arg1[i]);
464 brw_set_saturate(p, 0);
465 }
466 }
467 }
468
469 void emit_sop(struct brw_compile *p,
470 const struct brw_reg *dst,
471 GLuint mask,
472 GLuint cond,
473 const struct brw_reg *arg0,
474 const struct brw_reg *arg1)
475 {
476 GLuint i;
477
478 for (i = 0; i < 4; i++) {
479 if (mask & (1<<i)) {
480 brw_push_insn_state(p);
481 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
482 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
483 brw_MOV(p, dst[i], brw_imm_f(0));
484 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
485 brw_MOV(p, dst[i], brw_imm_f(1.0));
486 brw_pop_insn_state(p);
487 }
488 }
489 }
490
491 static void emit_slt( struct brw_compile *p,
492 const struct brw_reg *dst,
493 GLuint mask,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1 )
496 {
497 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
498 }
499
500 static void emit_sle( struct brw_compile *p,
501 const struct brw_reg *dst,
502 GLuint mask,
503 const struct brw_reg *arg0,
504 const struct brw_reg *arg1 )
505 {
506 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
507 }
508
509 static void emit_sgt( struct brw_compile *p,
510 const struct brw_reg *dst,
511 GLuint mask,
512 const struct brw_reg *arg0,
513 const struct brw_reg *arg1 )
514 {
515 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
516 }
517
518 static void emit_sge( struct brw_compile *p,
519 const struct brw_reg *dst,
520 GLuint mask,
521 const struct brw_reg *arg0,
522 const struct brw_reg *arg1 )
523 {
524 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
525 }
526
527 static void emit_seq( struct brw_compile *p,
528 const struct brw_reg *dst,
529 GLuint mask,
530 const struct brw_reg *arg0,
531 const struct brw_reg *arg1 )
532 {
533 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
534 }
535
536 static void emit_sne( struct brw_compile *p,
537 const struct brw_reg *dst,
538 GLuint mask,
539 const struct brw_reg *arg0,
540 const struct brw_reg *arg1 )
541 {
542 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
543 }
544
545 static void emit_cmp( struct brw_compile *p,
546 const struct brw_reg *dst,
547 GLuint mask,
548 const struct brw_reg *arg0,
549 const struct brw_reg *arg1,
550 const struct brw_reg *arg2 )
551 {
552 GLuint i;
553
554 for (i = 0; i < 4; i++) {
555 if (mask & (1<<i)) {
556 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
557 brw_MOV(p, dst[i], arg2[i]);
558 brw_set_saturate(p, 0);
559
560 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
561
562 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
563 brw_MOV(p, dst[i], arg1[i]);
564 brw_set_saturate(p, 0);
565 brw_set_predicate_control_flag_value(p, 0xff);
566 }
567 }
568 }
569
570 void emit_max(struct brw_compile *p,
571 const struct brw_reg *dst,
572 GLuint mask,
573 const struct brw_reg *arg0,
574 const struct brw_reg *arg1)
575 {
576 GLuint i;
577
578 for (i = 0; i < 4; i++) {
579 if (mask & (1<<i)) {
580 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
581 brw_MOV(p, dst[i], arg0[i]);
582 brw_set_saturate(p, 0);
583
584 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
585
586 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
587 brw_MOV(p, dst[i], arg1[i]);
588 brw_set_saturate(p, 0);
589 brw_set_predicate_control_flag_value(p, 0xff);
590 }
591 }
592 }
593
594 void emit_min(struct brw_compile *p,
595 const struct brw_reg *dst,
596 GLuint mask,
597 const struct brw_reg *arg0,
598 const struct brw_reg *arg1)
599 {
600 GLuint i;
601
602 for (i = 0; i < 4; i++) {
603 if (mask & (1<<i)) {
604 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
605 brw_MOV(p, dst[i], arg1[i]);
606 brw_set_saturate(p, 0);
607
608 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
609
610 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
611 brw_MOV(p, dst[i], arg0[i]);
612 brw_set_saturate(p, 0);
613 brw_set_predicate_control_flag_value(p, 0xff);
614 }
615 }
616 }
617
618
619 void emit_dp3(struct brw_compile *p,
620 const struct brw_reg *dst,
621 GLuint mask,
622 const struct brw_reg *arg0,
623 const struct brw_reg *arg1)
624 {
625 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
626
627 if (!(mask & WRITEMASK_XYZW))
628 return; /* Do not emit dead code */
629
630 assert(is_power_of_two(mask & WRITEMASK_XYZW));
631
632 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
633 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
634
635 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
636 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
637 brw_set_saturate(p, 0);
638 }
639
640
641 void emit_dp4(struct brw_compile *p,
642 const struct brw_reg *dst,
643 GLuint mask,
644 const struct brw_reg *arg0,
645 const struct brw_reg *arg1)
646 {
647 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
648
649 if (!(mask & WRITEMASK_XYZW))
650 return; /* Do not emit dead code */
651
652 assert(is_power_of_two(mask & WRITEMASK_XYZW));
653
654 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
655 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
656 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
657
658 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
659 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
660 brw_set_saturate(p, 0);
661 }
662
663
664 void emit_dph(struct brw_compile *p,
665 const struct brw_reg *dst,
666 GLuint mask,
667 const struct brw_reg *arg0,
668 const struct brw_reg *arg1)
669 {
670 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
671
672 if (!(mask & WRITEMASK_XYZW))
673 return; /* Do not emit dead code */
674
675 assert(is_power_of_two(mask & WRITEMASK_XYZW));
676
677 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
678 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
679 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
680
681 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
682 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
683 brw_set_saturate(p, 0);
684 }
685
686
687 void emit_xpd(struct brw_compile *p,
688 const struct brw_reg *dst,
689 GLuint mask,
690 const struct brw_reg *arg0,
691 const struct brw_reg *arg1)
692 {
693 GLuint i;
694
695 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
696
697 for (i = 0 ; i < 3; i++) {
698 if (mask & (1<<i)) {
699 GLuint i2 = (i+2)%3;
700 GLuint i1 = (i+1)%3;
701
702 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
703
704 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
705 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
706 brw_set_saturate(p, 0);
707 }
708 }
709 }
710
711
712 void emit_math1(struct brw_wm_compile *c,
713 GLuint function,
714 const struct brw_reg *dst,
715 GLuint mask,
716 const struct brw_reg *arg0)
717 {
718 struct brw_compile *p = &c->func;
719 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
720 GLuint saturate = ((mask & SATURATE) ?
721 BRW_MATH_SATURATE_SATURATE :
722 BRW_MATH_SATURATE_NONE);
723
724 if (!(mask & WRITEMASK_XYZW))
725 return; /* Do not emit dead code */
726
727 assert(is_power_of_two(mask & WRITEMASK_XYZW));
728
729 /* If compressed, this will write message reg 2,3 from arg0.x's 16
730 * channels.
731 */
732 brw_MOV(p, brw_message_reg(2), arg0[0]);
733
734 /* Send two messages to perform all 16 operations:
735 */
736 brw_push_insn_state(p);
737 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
738 brw_math(p,
739 dst[dst_chan],
740 function,
741 saturate,
742 2,
743 brw_null_reg(),
744 BRW_MATH_DATA_VECTOR,
745 BRW_MATH_PRECISION_FULL);
746
747 if (c->dispatch_width == 16) {
748 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
749 brw_math(p,
750 offset(dst[dst_chan],1),
751 function,
752 saturate,
753 3,
754 brw_null_reg(),
755 BRW_MATH_DATA_VECTOR,
756 BRW_MATH_PRECISION_FULL);
757 }
758 brw_pop_insn_state(p);
759 }
760
761
762 void emit_math2(struct brw_wm_compile *c,
763 GLuint function,
764 const struct brw_reg *dst,
765 GLuint mask,
766 const struct brw_reg *arg0,
767 const struct brw_reg *arg1)
768 {
769 struct brw_compile *p = &c->func;
770 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
771 GLuint saturate = ((mask & SATURATE) ?
772 BRW_MATH_SATURATE_SATURATE :
773 BRW_MATH_SATURATE_NONE);
774
775 if (!(mask & WRITEMASK_XYZW))
776 return; /* Do not emit dead code */
777
778 assert(is_power_of_two(mask & WRITEMASK_XYZW));
779
780 brw_push_insn_state(p);
781
782 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
783 brw_MOV(p, brw_message_reg(2), arg0[0]);
784 if (c->dispatch_width == 16) {
785 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
786 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
787 }
788
789 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
790 brw_MOV(p, brw_message_reg(3), arg1[0]);
791 if (c->dispatch_width == 16) {
792 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
793 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
794 }
795
796 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
797 brw_math(p,
798 dst[dst_chan],
799 function,
800 saturate,
801 2,
802 brw_null_reg(),
803 BRW_MATH_DATA_VECTOR,
804 BRW_MATH_PRECISION_FULL);
805
806 /* Send two messages to perform all 16 operations:
807 */
808 if (c->dispatch_width == 16) {
809 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
810 brw_math(p,
811 offset(dst[dst_chan],1),
812 function,
813 saturate,
814 4,
815 brw_null_reg(),
816 BRW_MATH_DATA_VECTOR,
817 BRW_MATH_PRECISION_FULL);
818 }
819 brw_pop_insn_state(p);
820 }
821
822
823 void emit_tex(struct brw_wm_compile *c,
824 struct brw_reg *dst,
825 GLuint dst_flags,
826 struct brw_reg *arg,
827 struct brw_reg depth_payload,
828 GLuint tex_idx,
829 GLuint sampler,
830 GLboolean shadow)
831 {
832 struct brw_compile *p = &c->func;
833 struct brw_reg dst_retyped;
834 GLuint cur_mrf = 2, response_length;
835 GLuint i, nr_texcoords;
836 GLuint emit;
837 GLuint msg_type;
838 GLuint mrf_per_channel;
839 GLuint simd_mode;
840
841 if (c->dispatch_width == 16) {
842 mrf_per_channel = 2;
843 response_length = 8;
844 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
845 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
846 } else {
847 mrf_per_channel = 1;
848 response_length = 4;
849 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
850 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
851 }
852
853 /* How many input regs are there?
854 */
855 switch (tex_idx) {
856 case TEXTURE_1D_INDEX:
857 emit = WRITEMASK_X;
858 nr_texcoords = 1;
859 break;
860 case TEXTURE_2D_INDEX:
861 case TEXTURE_RECT_INDEX:
862 emit = WRITEMASK_XY;
863 nr_texcoords = 2;
864 break;
865 case TEXTURE_3D_INDEX:
866 case TEXTURE_CUBE_INDEX:
867 emit = WRITEMASK_XYZ;
868 nr_texcoords = 3;
869 break;
870 default:
871 /* unexpected target */
872 abort();
873 }
874
875 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
876 if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
877 nr_texcoords = 3;
878
879 /* For shadow comparisons, we have to supply u,v,r. */
880 if (shadow)
881 nr_texcoords = 3;
882
883 /* Emit the texcoords. */
884 for (i = 0; i < nr_texcoords; i++) {
885 if (emit & (1<<i))
886 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
887 else
888 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
889 cur_mrf += mrf_per_channel;
890 }
891
892 /* Fill in the shadow comparison reference value. */
893 if (shadow) {
894 if (BRW_IS_IGDNG(p->brw)) {
895 /* Fill in the cube map array index value. */
896 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
897 cur_mrf += mrf_per_channel;
898 } else if (c->dispatch_width == 8) {
899 /* Fill in the LOD bias value. */
900 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
901 cur_mrf += mrf_per_channel;
902 }
903 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
904 cur_mrf += mrf_per_channel;
905 }
906
907 if (BRW_IS_IGDNG(p->brw)) {
908 if (shadow)
909 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
910 else
911 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
912 } else {
913 /* Note that G45 and older determines shadow compare and dispatch width
914 * from message length for most messages.
915 */
916 if (c->dispatch_width == 16 && shadow)
917 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
918 else
919 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
920 }
921
922 brw_SAMPLE(p,
923 dst_retyped,
924 1,
925 retype(depth_payload, BRW_REGISTER_TYPE_UW),
926 SURF_INDEX_TEXTURE(sampler),
927 sampler,
928 dst_flags & WRITEMASK_XYZW,
929 msg_type,
930 response_length,
931 cur_mrf - 1,
932 0,
933 1,
934 simd_mode);
935 }
936
937
938 void emit_txb(struct brw_wm_compile *c,
939 struct brw_reg *dst,
940 GLuint dst_flags,
941 struct brw_reg *arg,
942 struct brw_reg depth_payload,
943 GLuint tex_idx,
944 GLuint sampler)
945 {
946 struct brw_compile *p = &c->func;
947 GLuint msgLength;
948 GLuint msg_type;
949 GLuint mrf_per_channel;
950 GLuint response_length;
951 struct brw_reg dst_retyped;
952
953 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
954 * samples, so we'll use the 16-wide instruction, leave the second halves
955 * undefined, and trust the execution mask to keep the undefined pixels
956 * from mattering.
957 */
958 if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
959 if (BRW_IS_IGDNG(p->brw))
960 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
961 else
962 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
963 mrf_per_channel = 2;
964 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
965 response_length = 8;
966 } else {
967 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
968 mrf_per_channel = 1;
969 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
970 response_length = 4;
971 }
972
973 /* Shadow ignored for txb. */
974 switch (tex_idx) {
975 case TEXTURE_1D_INDEX:
976 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
977 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
978 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
979 break;
980 case TEXTURE_2D_INDEX:
981 case TEXTURE_RECT_INDEX:
982 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
983 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
984 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
985 break;
986 case TEXTURE_3D_INDEX:
987 case TEXTURE_CUBE_INDEX:
988 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
989 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
990 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
991 break;
992 default:
993 /* unexpected target */
994 abort();
995 }
996
997 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
998 msgLength = 2 + 4 * mrf_per_channel - 1;
999
1000 brw_SAMPLE(p,
1001 dst_retyped,
1002 1,
1003 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1004 SURF_INDEX_TEXTURE(sampler),
1005 sampler,
1006 dst_flags & WRITEMASK_XYZW,
1007 msg_type,
1008 response_length,
1009 msgLength,
1010 0,
1011 1,
1012 BRW_SAMPLER_SIMD_MODE_SIMD16);
1013 }
1014
1015
1016 static void emit_lit(struct brw_wm_compile *c,
1017 const struct brw_reg *dst,
1018 GLuint mask,
1019 const struct brw_reg *arg0)
1020 {
1021 struct brw_compile *p = &c->func;
1022
1023 assert((mask & WRITEMASK_XW) == 0);
1024
1025 if (mask & WRITEMASK_Y) {
1026 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1027 brw_MOV(p, dst[1], arg0[0]);
1028 brw_set_saturate(p, 0);
1029 }
1030
1031 if (mask & WRITEMASK_Z) {
1032 emit_math2(c, BRW_MATH_FUNCTION_POW,
1033 &dst[2],
1034 WRITEMASK_X | (mask & SATURATE),
1035 &arg0[1],
1036 &arg0[3]);
1037 }
1038
1039 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1040 * some of the POW calculations above, but 16-wide iff statements
1041 * seem to lock c1 hardware, so this is a nasty workaround:
1042 */
1043 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1044 {
1045 if (mask & WRITEMASK_Y)
1046 brw_MOV(p, dst[1], brw_imm_f(0));
1047
1048 if (mask & WRITEMASK_Z)
1049 brw_MOV(p, dst[2], brw_imm_f(0));
1050 }
1051 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1052 }
1053
1054
1055 /* Kill pixel - set execution mask to zero for those pixels which
1056 * fail.
1057 */
1058 static void emit_kil( struct brw_wm_compile *c,
1059 struct brw_reg *arg0)
1060 {
1061 struct brw_compile *p = &c->func;
1062 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1063 GLuint i;
1064
1065 /* XXX - usually won't need 4 compares!
1066 */
1067 for (i = 0; i < 4; i++) {
1068 brw_push_insn_state(p);
1069 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1070 brw_set_predicate_control_flag_value(p, 0xff);
1071 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1072 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1073 brw_pop_insn_state(p);
1074 }
1075 }
1076
1077 /* KIL_NV kills the pixels that are currently executing, not based on a test
1078 * of the arguments.
1079 */
1080 static void emit_kil_nv( struct brw_wm_compile *c )
1081 {
1082 struct brw_compile *p = &c->func;
1083 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1084
1085 brw_push_insn_state(p);
1086 brw_set_mask_control(p, BRW_MASK_DISABLE);
1087 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1088 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1089 brw_pop_insn_state(p);
1090 }
1091
1092 static void fire_fb_write( struct brw_wm_compile *c,
1093 GLuint base_reg,
1094 GLuint nr,
1095 GLuint target,
1096 GLuint eot )
1097 {
1098 struct brw_compile *p = &c->func;
1099 struct brw_reg dst;
1100
1101 if (c->dispatch_width == 16)
1102 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1103 else
1104 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1105
1106 /* Pass through control information:
1107 */
1108 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1109 {
1110 brw_push_insn_state(p);
1111 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1112 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1113 brw_MOV(p,
1114 brw_message_reg(base_reg + 1),
1115 brw_vec8_grf(1, 0));
1116 brw_pop_insn_state(p);
1117 }
1118
1119 /* Send framebuffer write message: */
1120 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1121 brw_fb_WRITE(p,
1122 dst,
1123 base_reg,
1124 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1125 target,
1126 nr,
1127 0,
1128 eot);
1129 }
1130
1131
1132 static void emit_aa( struct brw_wm_compile *c,
1133 struct brw_reg *arg1,
1134 GLuint reg )
1135 {
1136 struct brw_compile *p = &c->func;
1137 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1138 GLuint off = c->key.aa_dest_stencil_reg % 2;
1139 struct brw_reg aa = offset(arg1[comp], off);
1140
1141 brw_push_insn_state(p);
1142 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1143 brw_MOV(p, brw_message_reg(reg), aa);
1144 brw_pop_insn_state(p);
1145 }
1146
1147
1148 /* Post-fragment-program processing. Send the results to the
1149 * framebuffer.
1150 * \param arg0 the fragment color
1151 * \param arg1 the pass-through depth value
1152 * \param arg2 the shader-computed depth value
1153 */
1154 void emit_fb_write(struct brw_wm_compile *c,
1155 struct brw_reg *arg0,
1156 struct brw_reg *arg1,
1157 struct brw_reg *arg2,
1158 GLuint target,
1159 GLuint eot)
1160 {
1161 struct brw_compile *p = &c->func;
1162 struct brw_context *brw = p->brw;
1163 GLuint nr = 2;
1164 GLuint channel;
1165
1166 /* Reserve a space for AA - may not be needed:
1167 */
1168 if (c->key.aa_dest_stencil_reg)
1169 nr += 1;
1170
1171 /* I don't really understand how this achieves the color interleave
1172 * (ie RGBARGBA) in the result: [Do the saturation here]
1173 */
1174 brw_push_insn_state(p);
1175
1176 for (channel = 0; channel < 4; channel++) {
1177 if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
1178 /* By setting the high bit of the MRF register number, we indicate
1179 * that we want COMPR4 mode - instead of doing the usual destination
1180 * + 1 for the second half we get destination + 4.
1181 */
1182 brw_MOV(p,
1183 brw_message_reg(nr + channel + (1 << 7)),
1184 arg0[channel]);
1185 } else {
1186 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1187 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1188 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1189 brw_MOV(p,
1190 brw_message_reg(nr + channel),
1191 arg0[channel]);
1192
1193 if (c->dispatch_width == 16) {
1194 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1195 brw_MOV(p,
1196 brw_message_reg(nr + channel + 4),
1197 sechalf(arg0[channel]));
1198 }
1199 }
1200 }
1201 /* skip over the regs populated above:
1202 */
1203 nr += 8;
1204 brw_pop_insn_state(p);
1205
1206 if (c->key.source_depth_to_render_target)
1207 {
1208 if (c->key.computes_depth)
1209 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1210 else
1211 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1212
1213 nr += 2;
1214 }
1215
1216 if (c->key.dest_depth_reg)
1217 {
1218 GLuint comp = c->key.dest_depth_reg / 2;
1219 GLuint off = c->key.dest_depth_reg % 2;
1220
1221 if (off != 0) {
1222 brw_push_insn_state(p);
1223 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1224
1225 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1226 /* 2nd half? */
1227 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1228 brw_pop_insn_state(p);
1229 }
1230 else {
1231 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1232 }
1233 nr += 2;
1234 }
1235
1236 if (!c->key.runtime_check_aads_emit) {
1237 if (c->key.aa_dest_stencil_reg)
1238 emit_aa(c, arg1, 2);
1239
1240 fire_fb_write(c, 0, nr, target, eot);
1241 }
1242 else {
1243 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1244 struct brw_reg ip = brw_ip_reg();
1245 struct brw_instruction *jmp;
1246
1247 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1248 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1249 brw_AND(p,
1250 v1_null_ud,
1251 get_element_ud(brw_vec8_grf(1,0), 6),
1252 brw_imm_ud(1<<26));
1253
1254 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1255 {
1256 emit_aa(c, arg1, 2);
1257 fire_fb_write(c, 0, nr, target, eot);
1258 /* note - thread killed in subroutine */
1259 }
1260 brw_land_fwd_jump(p, jmp);
1261
1262 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1263 */
1264 fire_fb_write(c, 1, nr-1, target, eot);
1265 }
1266 }
1267
1268 /**
1269 * Move a GPR to scratch memory.
1270 */
1271 static void emit_spill( struct brw_wm_compile *c,
1272 struct brw_reg reg,
1273 GLuint slot )
1274 {
1275 struct brw_compile *p = &c->func;
1276
1277 /*
1278 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1279 */
1280 brw_MOV(p, brw_message_reg(2), reg);
1281
1282 /*
1283 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1284 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1285 */
1286 brw_dp_WRITE_16(p,
1287 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1288 slot);
1289 }
1290
1291
1292 /**
1293 * Load a GPR from scratch memory.
1294 */
1295 static void emit_unspill( struct brw_wm_compile *c,
1296 struct brw_reg reg,
1297 GLuint slot )
1298 {
1299 struct brw_compile *p = &c->func;
1300
1301 /* Slot 0 is the undef value.
1302 */
1303 if (slot == 0) {
1304 brw_MOV(p, reg, brw_imm_f(0));
1305 return;
1306 }
1307
1308 /*
1309 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1310 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1311 */
1312
1313 brw_dp_READ_16(p,
1314 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1315 slot);
1316 }
1317
1318
1319 /**
1320 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1321 * Args with unspill_reg != 0 will be loaded from scratch memory.
1322 */
1323 static void get_argument_regs( struct brw_wm_compile *c,
1324 struct brw_wm_ref *arg[],
1325 struct brw_reg *regs )
1326 {
1327 GLuint i;
1328
1329 for (i = 0; i < 4; i++) {
1330 if (arg[i]) {
1331 if (arg[i]->unspill_reg)
1332 emit_unspill(c,
1333 brw_vec8_grf(arg[i]->unspill_reg, 0),
1334 arg[i]->value->spill_slot);
1335
1336 regs[i] = arg[i]->hw_reg;
1337 }
1338 else {
1339 regs[i] = brw_null_reg();
1340 }
1341 }
1342 }
1343
1344
1345 /**
1346 * For values that have a spill_slot!=0, write those regs to scratch memory.
1347 */
1348 static void spill_values( struct brw_wm_compile *c,
1349 struct brw_wm_value *values,
1350 GLuint nr )
1351 {
1352 GLuint i;
1353
1354 for (i = 0; i < nr; i++)
1355 if (values[i].spill_slot)
1356 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1357 }
1358
1359
1360 /* Emit the fragment program instructions here.
1361 */
1362 void brw_wm_emit( struct brw_wm_compile *c )
1363 {
1364 struct brw_compile *p = &c->func;
1365 GLuint insn;
1366
1367 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1368
1369 /* Check if any of the payload regs need to be spilled:
1370 */
1371 spill_values(c, c->payload.depth, 4);
1372 spill_values(c, c->creg, c->nr_creg);
1373 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1374
1375
1376 for (insn = 0; insn < c->nr_insns; insn++) {
1377
1378 struct brw_wm_instruction *inst = &c->instruction[insn];
1379 struct brw_reg args[3][4], dst[4];
1380 GLuint i, dst_flags;
1381
1382 /* Get argument regs:
1383 */
1384 for (i = 0; i < 3; i++)
1385 get_argument_regs(c, inst->src[i], args[i]);
1386
1387 /* Get dest regs:
1388 */
1389 for (i = 0; i < 4; i++)
1390 if (inst->dst[i])
1391 dst[i] = inst->dst[i]->hw_reg;
1392 else
1393 dst[i] = brw_null_reg();
1394
1395 /* Flags
1396 */
1397 dst_flags = inst->writemask;
1398 if (inst->saturate)
1399 dst_flags |= SATURATE;
1400
1401 switch (inst->opcode) {
1402 /* Generated instructions for calculating triangle interpolants:
1403 */
1404 case WM_PIXELXY:
1405 emit_pixel_xy(c, dst, dst_flags);
1406 break;
1407
1408 case WM_DELTAXY:
1409 emit_delta_xy(p, dst, dst_flags, args[0]);
1410 break;
1411
1412 case WM_WPOSXY:
1413 emit_wpos_xy(c, dst, dst_flags, args[0]);
1414 break;
1415
1416 case WM_PIXELW:
1417 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1418 break;
1419
1420 case WM_LINTERP:
1421 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1422 break;
1423
1424 case WM_PINTERP:
1425 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1426 break;
1427
1428 case WM_CINTERP:
1429 emit_cinterp(p, dst, dst_flags, args[0]);
1430 break;
1431
1432 case WM_FB_WRITE:
1433 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1434 break;
1435
1436 case WM_FRONTFACING:
1437 emit_frontfacing(p, dst, dst_flags);
1438 break;
1439
1440 /* Straightforward arithmetic:
1441 */
1442 case OPCODE_ADD:
1443 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1444 break;
1445
1446 case OPCODE_FRC:
1447 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1448 break;
1449
1450 case OPCODE_FLR:
1451 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1452 break;
1453
1454 case OPCODE_DDX:
1455 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1456 break;
1457
1458 case OPCODE_DDY:
1459 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1460 break;
1461
1462 case OPCODE_DP3:
1463 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1464 break;
1465
1466 case OPCODE_DP4:
1467 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1468 break;
1469
1470 case OPCODE_DPH:
1471 emit_dph(p, dst, dst_flags, args[0], args[1]);
1472 break;
1473
1474 case OPCODE_TRUNC:
1475 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1476 break;
1477
1478 case OPCODE_LRP:
1479 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1480 break;
1481
1482 case OPCODE_MAD:
1483 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1484 break;
1485
1486 case OPCODE_MOV:
1487 case OPCODE_SWZ:
1488 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1489 break;
1490
1491 case OPCODE_MUL:
1492 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1493 break;
1494
1495 case OPCODE_XPD:
1496 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1497 break;
1498
1499 /* Higher math functions:
1500 */
1501 case OPCODE_RCP:
1502 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1503 break;
1504
1505 case OPCODE_RSQ:
1506 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1507 break;
1508
1509 case OPCODE_SIN:
1510 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1511 break;
1512
1513 case OPCODE_COS:
1514 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1515 break;
1516
1517 case OPCODE_EX2:
1518 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1519 break;
1520
1521 case OPCODE_LG2:
1522 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1523 break;
1524
1525 case OPCODE_SCS:
1526 /* There is an scs math function, but it would need some
1527 * fixup for 16-element execution.
1528 */
1529 if (dst_flags & WRITEMASK_X)
1530 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1531 if (dst_flags & WRITEMASK_Y)
1532 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1533 break;
1534
1535 case OPCODE_POW:
1536 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1537 break;
1538
1539 /* Comparisons:
1540 */
1541 case OPCODE_CMP:
1542 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1543 break;
1544
1545 case OPCODE_MAX:
1546 emit_max(p, dst, dst_flags, args[0], args[1]);
1547 break;
1548
1549 case OPCODE_MIN:
1550 emit_min(p, dst, dst_flags, args[0], args[1]);
1551 break;
1552
1553 case OPCODE_SLT:
1554 emit_slt(p, dst, dst_flags, args[0], args[1]);
1555 break;
1556
1557 case OPCODE_SLE:
1558 emit_sle(p, dst, dst_flags, args[0], args[1]);
1559 break;
1560 case OPCODE_SGT:
1561 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1562 break;
1563 case OPCODE_SGE:
1564 emit_sge(p, dst, dst_flags, args[0], args[1]);
1565 break;
1566 case OPCODE_SEQ:
1567 emit_seq(p, dst, dst_flags, args[0], args[1]);
1568 break;
1569 case OPCODE_SNE:
1570 emit_sne(p, dst, dst_flags, args[0], args[1]);
1571 break;
1572
1573 case OPCODE_LIT:
1574 emit_lit(c, dst, dst_flags, args[0]);
1575 break;
1576
1577 /* Texturing operations:
1578 */
1579 case OPCODE_TEX:
1580 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1581 inst->tex_idx, inst->tex_unit,
1582 inst->tex_shadow);
1583 break;
1584
1585 case OPCODE_TXB:
1586 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1587 inst->tex_idx, inst->tex_unit);
1588 break;
1589
1590 case OPCODE_KIL:
1591 emit_kil(c, args[0]);
1592 break;
1593
1594 case OPCODE_KIL_NV:
1595 emit_kil_nv(c);
1596 break;
1597
1598 default:
1599 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1600 inst->opcode, inst->opcode < MAX_OPCODE ?
1601 _mesa_opcode_string(inst->opcode) :
1602 "unknown");
1603 }
1604
1605 for (i = 0; i < 4; i++)
1606 if (inst->dst[i] && inst->dst[i]->spill_slot)
1607 emit_spill(c,
1608 inst->dst[i]->hw_reg,
1609 inst->dst[i]->spill_slot);
1610 }
1611
1612 if (INTEL_DEBUG & DEBUG_WM) {
1613 int i;
1614
1615 _mesa_printf("wm-native:\n");
1616 for (i = 0; i < p->nr_insn; i++)
1617 brw_disasm(stderr, &p->store[i]);
1618 _mesa_printf("\n");
1619 }
1620 }