bcaa793de161ebf77e418d6e3d90480c6e354776
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/macros.h"
34 #include "brw_context.h"
35 #include "brw_wm.h"
36
37 /* Not quite sure how correct this is - need to understand horiz
38 * vs. vertical strides a little better.
39 */
40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
41 {
42 if (reg.vstride)
43 reg.nr++;
44 return reg;
45 }
46
47
48 /* Payload R0:
49 *
50 * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
51 * corresponding to each of the 16 execution channels.
52 * R0.1..8 -- ?
53 * R1.0 -- triangle vertex 0.X
54 * R1.1 -- triangle vertex 0.Y
55 * R1.2 -- tile 0 x,y coords (2 packed uwords)
56 * R1.3 -- tile 1 x,y coords (2 packed uwords)
57 * R1.4 -- tile 2 x,y coords (2 packed uwords)
58 * R1.5 -- tile 3 x,y coords (2 packed uwords)
59 * R1.6 -- ?
60 * R1.7 -- ?
61 * R1.8 -- ?
62 */
63
64 void emit_pixel_xy(struct brw_wm_compile *c,
65 const struct brw_reg *dst,
66 GLuint mask)
67 {
68 struct brw_compile *p = &c->func;
69 struct brw_reg r1 = brw_vec1_grf(1, 0);
70 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
71 struct brw_reg dst0_uw, dst1_uw;
72
73 brw_push_insn_state(p);
74 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
75
76 if (c->dispatch_width == 16) {
77 dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
78 dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
79 } else {
80 dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
81 dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
82 }
83
84 /* Calculate pixel centers by adding 1 or 0 to each of the
85 * micro-tile coordinates passed in r1.
86 */
87 if (mask & WRITEMASK_X) {
88 brw_ADD(p,
89 dst0_uw,
90 stride(suboffset(r1_uw, 4), 2, 4, 0),
91 brw_imm_v(0x10101010));
92 }
93
94 if (mask & WRITEMASK_Y) {
95 brw_ADD(p,
96 dst1_uw,
97 stride(suboffset(r1_uw,5), 2, 4, 0),
98 brw_imm_v(0x11001100));
99 }
100 brw_pop_insn_state(p);
101 }
102
103
104 void emit_delta_xy(struct brw_compile *p,
105 const struct brw_reg *dst,
106 GLuint mask,
107 const struct brw_reg *arg0)
108 {
109 struct brw_reg r1 = brw_vec1_grf(1, 0);
110
111 /* Calc delta X,Y by subtracting origin in r1 from the pixel
112 * centers.
113 */
114 if (mask & WRITEMASK_X) {
115 brw_ADD(p,
116 dst[0],
117 retype(arg0[0], BRW_REGISTER_TYPE_UW),
118 negate(r1));
119 }
120
121 if (mask & WRITEMASK_Y) {
122 brw_ADD(p,
123 dst[1],
124 retype(arg0[1], BRW_REGISTER_TYPE_UW),
125 negate(suboffset(r1,1)));
126
127 }
128 }
129
130 void emit_wpos_xy(struct brw_wm_compile *c,
131 const struct brw_reg *dst,
132 GLuint mask,
133 const struct brw_reg *arg0)
134 {
135 struct brw_compile *p = &c->func;
136
137 /* Calculate the pixel offset from window bottom left into destination
138 * X and Y channels.
139 */
140 if (mask & WRITEMASK_X) {
141 /* X' = X - origin */
142 brw_ADD(p,
143 dst[0],
144 retype(arg0[0], BRW_REGISTER_TYPE_W),
145 brw_imm_d(0 - c->key.origin_x));
146 }
147
148 if (mask & WRITEMASK_Y) {
149 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
150 brw_ADD(p,
151 dst[1],
152 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
153 brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
154 }
155 }
156
157
158 void emit_pixel_w(struct brw_wm_compile *c,
159 const struct brw_reg *dst,
160 GLuint mask,
161 const struct brw_reg *arg0,
162 const struct brw_reg *deltas)
163 {
164 struct brw_compile *p = &c->func;
165
166 /* Don't need this if all you are doing is interpolating color, for
167 * instance.
168 */
169 if (mask & WRITEMASK_W) {
170 struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
171
172 /* Calc 1/w - just linterp wpos[3] optimized by putting the
173 * result straight into a message reg.
174 */
175 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
176 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
177
178 /* Calc w */
179 if (c->dispatch_width == 16) {
180 brw_math_16(p, dst[3],
181 BRW_MATH_FUNCTION_INV,
182 BRW_MATH_SATURATE_NONE,
183 2, brw_null_reg(),
184 BRW_MATH_PRECISION_FULL);
185 } else {
186 brw_math(p, dst[3],
187 BRW_MATH_FUNCTION_INV,
188 BRW_MATH_SATURATE_NONE,
189 2, brw_null_reg(),
190 BRW_MATH_DATA_VECTOR,
191 BRW_MATH_PRECISION_FULL);
192 }
193 }
194 }
195
196
197 void emit_linterp(struct brw_compile *p,
198 const struct brw_reg *dst,
199 GLuint mask,
200 const struct brw_reg *arg0,
201 const struct brw_reg *deltas)
202 {
203 struct brw_reg interp[4];
204 GLuint nr = arg0[0].nr;
205 GLuint i;
206
207 interp[0] = brw_vec1_grf(nr, 0);
208 interp[1] = brw_vec1_grf(nr, 4);
209 interp[2] = brw_vec1_grf(nr+1, 0);
210 interp[3] = brw_vec1_grf(nr+1, 4);
211
212 for (i = 0; i < 4; i++) {
213 if (mask & (1<<i)) {
214 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
215 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
216 }
217 }
218 }
219
220
221 void emit_pinterp(struct brw_compile *p,
222 const struct brw_reg *dst,
223 GLuint mask,
224 const struct brw_reg *arg0,
225 const struct brw_reg *deltas,
226 const struct brw_reg *w)
227 {
228 struct brw_reg interp[4];
229 GLuint nr = arg0[0].nr;
230 GLuint i;
231
232 interp[0] = brw_vec1_grf(nr, 0);
233 interp[1] = brw_vec1_grf(nr, 4);
234 interp[2] = brw_vec1_grf(nr+1, 0);
235 interp[3] = brw_vec1_grf(nr+1, 4);
236
237 for (i = 0; i < 4; i++) {
238 if (mask & (1<<i)) {
239 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
240 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
241 }
242 }
243 for (i = 0; i < 4; i++) {
244 if (mask & (1<<i)) {
245 brw_MUL(p, dst[i], dst[i], w[3]);
246 }
247 }
248 }
249
250
251 void emit_cinterp(struct brw_compile *p,
252 const struct brw_reg *dst,
253 GLuint mask,
254 const struct brw_reg *arg0)
255 {
256 struct brw_reg interp[4];
257 GLuint nr = arg0[0].nr;
258 GLuint i;
259
260 interp[0] = brw_vec1_grf(nr, 0);
261 interp[1] = brw_vec1_grf(nr, 4);
262 interp[2] = brw_vec1_grf(nr+1, 0);
263 interp[3] = brw_vec1_grf(nr+1, 4);
264
265 for (i = 0; i < 4; i++) {
266 if (mask & (1<<i)) {
267 brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
268 }
269 }
270 }
271
272 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
273 void emit_frontfacing(struct brw_compile *p,
274 const struct brw_reg *dst,
275 GLuint mask)
276 {
277 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
278 GLuint i;
279
280 if (!(mask & WRITEMASK_XYZW))
281 return;
282
283 for (i = 0; i < 4; i++) {
284 if (mask & (1<<i)) {
285 brw_MOV(p, dst[i], brw_imm_f(0.0));
286 }
287 }
288
289 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
290 * us front face
291 */
292 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
293 for (i = 0; i < 4; i++) {
294 if (mask & (1<<i)) {
295 brw_MOV(p, dst[i], brw_imm_f(1.0));
296 }
297 }
298 brw_set_predicate_control_flag_value(p, 0xff);
299 }
300
301 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
302 * looking like:
303 *
304 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
305 *
306 * and we're trying to produce:
307 *
308 * DDX DDY
309 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
310 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
311 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
312 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
313 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
314 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
315 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
316 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
317 *
318 * and add another set of two more subspans if in 16-pixel dispatch mode.
319 *
320 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
321 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
322 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
323 * between each other. We could probably do it like ddx and swizzle the right
324 * order later, but bail for now and just produce
325 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
326 */
327 void emit_ddxy(struct brw_compile *p,
328 const struct brw_reg *dst,
329 GLuint mask,
330 GLboolean is_ddx,
331 const struct brw_reg *arg0)
332 {
333 int i;
334 struct brw_reg src0, src1;
335
336 if (mask & SATURATE)
337 brw_set_saturate(p, 1);
338 for (i = 0; i < 4; i++ ) {
339 if (mask & (1<<i)) {
340 if (is_ddx) {
341 src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
342 BRW_REGISTER_TYPE_F,
343 BRW_VERTICAL_STRIDE_2,
344 BRW_WIDTH_2,
345 BRW_HORIZONTAL_STRIDE_0,
346 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
347 src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
348 BRW_REGISTER_TYPE_F,
349 BRW_VERTICAL_STRIDE_2,
350 BRW_WIDTH_2,
351 BRW_HORIZONTAL_STRIDE_0,
352 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
353 } else {
354 src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
355 BRW_REGISTER_TYPE_F,
356 BRW_VERTICAL_STRIDE_4,
357 BRW_WIDTH_4,
358 BRW_HORIZONTAL_STRIDE_0,
359 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
360 src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
361 BRW_REGISTER_TYPE_F,
362 BRW_VERTICAL_STRIDE_4,
363 BRW_WIDTH_4,
364 BRW_HORIZONTAL_STRIDE_0,
365 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
366 }
367 brw_ADD(p, dst[i], src0, negate(src1));
368 }
369 }
370 if (mask & SATURATE)
371 brw_set_saturate(p, 0);
372 }
373
374 void emit_alu1(struct brw_compile *p,
375 struct brw_instruction *(*func)(struct brw_compile *,
376 struct brw_reg,
377 struct brw_reg),
378 const struct brw_reg *dst,
379 GLuint mask,
380 const struct brw_reg *arg0)
381 {
382 GLuint i;
383
384 if (mask & SATURATE)
385 brw_set_saturate(p, 1);
386
387 for (i = 0; i < 4; i++) {
388 if (mask & (1<<i)) {
389 func(p, dst[i], arg0[i]);
390 }
391 }
392
393 if (mask & SATURATE)
394 brw_set_saturate(p, 0);
395 }
396
397
398 void emit_alu2(struct brw_compile *p,
399 struct brw_instruction *(*func)(struct brw_compile *,
400 struct brw_reg,
401 struct brw_reg,
402 struct brw_reg),
403 const struct brw_reg *dst,
404 GLuint mask,
405 const struct brw_reg *arg0,
406 const struct brw_reg *arg1)
407 {
408 GLuint i;
409
410 if (mask & SATURATE)
411 brw_set_saturate(p, 1);
412
413 for (i = 0; i < 4; i++) {
414 if (mask & (1<<i)) {
415 func(p, dst[i], arg0[i], arg1[i]);
416 }
417 }
418
419 if (mask & SATURATE)
420 brw_set_saturate(p, 0);
421 }
422
423
424 void emit_mad(struct brw_compile *p,
425 const struct brw_reg *dst,
426 GLuint mask,
427 const struct brw_reg *arg0,
428 const struct brw_reg *arg1,
429 const struct brw_reg *arg2)
430 {
431 GLuint i;
432
433 for (i = 0; i < 4; i++) {
434 if (mask & (1<<i)) {
435 brw_MUL(p, dst[i], arg0[i], arg1[i]);
436
437 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
438 brw_ADD(p, dst[i], dst[i], arg2[i]);
439 brw_set_saturate(p, 0);
440 }
441 }
442 }
443
444 void emit_lrp(struct brw_compile *p,
445 const struct brw_reg *dst,
446 GLuint mask,
447 const struct brw_reg *arg0,
448 const struct brw_reg *arg1,
449 const struct brw_reg *arg2)
450 {
451 GLuint i;
452
453 /* Uses dst as a temporary:
454 */
455 for (i = 0; i < 4; i++) {
456 if (mask & (1<<i)) {
457 /* Can I use the LINE instruction for this?
458 */
459 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
460 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
461
462 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
463 brw_MAC(p, dst[i], arg0[i], arg1[i]);
464 brw_set_saturate(p, 0);
465 }
466 }
467 }
468
469 void emit_sop(struct brw_compile *p,
470 const struct brw_reg *dst,
471 GLuint mask,
472 GLuint cond,
473 const struct brw_reg *arg0,
474 const struct brw_reg *arg1)
475 {
476 GLuint i;
477
478 for (i = 0; i < 4; i++) {
479 if (mask & (1<<i)) {
480 brw_push_insn_state(p);
481 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
482 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
483 brw_MOV(p, dst[i], brw_imm_f(0));
484 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
485 brw_MOV(p, dst[i], brw_imm_f(1.0));
486 brw_pop_insn_state(p);
487 }
488 }
489 }
490
491 static void emit_slt( struct brw_compile *p,
492 const struct brw_reg *dst,
493 GLuint mask,
494 const struct brw_reg *arg0,
495 const struct brw_reg *arg1 )
496 {
497 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
498 }
499
500 static void emit_sle( struct brw_compile *p,
501 const struct brw_reg *dst,
502 GLuint mask,
503 const struct brw_reg *arg0,
504 const struct brw_reg *arg1 )
505 {
506 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
507 }
508
509 static void emit_sgt( struct brw_compile *p,
510 const struct brw_reg *dst,
511 GLuint mask,
512 const struct brw_reg *arg0,
513 const struct brw_reg *arg1 )
514 {
515 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
516 }
517
518 static void emit_sge( struct brw_compile *p,
519 const struct brw_reg *dst,
520 GLuint mask,
521 const struct brw_reg *arg0,
522 const struct brw_reg *arg1 )
523 {
524 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
525 }
526
527 static void emit_seq( struct brw_compile *p,
528 const struct brw_reg *dst,
529 GLuint mask,
530 const struct brw_reg *arg0,
531 const struct brw_reg *arg1 )
532 {
533 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
534 }
535
536 static void emit_sne( struct brw_compile *p,
537 const struct brw_reg *dst,
538 GLuint mask,
539 const struct brw_reg *arg0,
540 const struct brw_reg *arg1 )
541 {
542 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
543 }
544
545 static void emit_cmp( struct brw_compile *p,
546 const struct brw_reg *dst,
547 GLuint mask,
548 const struct brw_reg *arg0,
549 const struct brw_reg *arg1,
550 const struct brw_reg *arg2 )
551 {
552 GLuint i;
553
554 for (i = 0; i < 4; i++) {
555 if (mask & (1<<i)) {
556 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
557 brw_MOV(p, dst[i], arg2[i]);
558 brw_set_saturate(p, 0);
559
560 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
561
562 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
563 brw_MOV(p, dst[i], arg1[i]);
564 brw_set_saturate(p, 0);
565 brw_set_predicate_control_flag_value(p, 0xff);
566 }
567 }
568 }
569
570 void emit_max(struct brw_compile *p,
571 const struct brw_reg *dst,
572 GLuint mask,
573 const struct brw_reg *arg0,
574 const struct brw_reg *arg1)
575 {
576 GLuint i;
577
578 for (i = 0; i < 4; i++) {
579 if (mask & (1<<i)) {
580 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
581 brw_MOV(p, dst[i], arg0[i]);
582 brw_set_saturate(p, 0);
583
584 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
585
586 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
587 brw_MOV(p, dst[i], arg1[i]);
588 brw_set_saturate(p, 0);
589 brw_set_predicate_control_flag_value(p, 0xff);
590 }
591 }
592 }
593
594 void emit_min(struct brw_compile *p,
595 const struct brw_reg *dst,
596 GLuint mask,
597 const struct brw_reg *arg0,
598 const struct brw_reg *arg1)
599 {
600 GLuint i;
601
602 for (i = 0; i < 4; i++) {
603 if (mask & (1<<i)) {
604 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
605 brw_MOV(p, dst[i], arg1[i]);
606 brw_set_saturate(p, 0);
607
608 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
609
610 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
611 brw_MOV(p, dst[i], arg0[i]);
612 brw_set_saturate(p, 0);
613 brw_set_predicate_control_flag_value(p, 0xff);
614 }
615 }
616 }
617
618
619 void emit_dp3(struct brw_compile *p,
620 const struct brw_reg *dst,
621 GLuint mask,
622 const struct brw_reg *arg0,
623 const struct brw_reg *arg1)
624 {
625 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
626
627 if (!(mask & WRITEMASK_XYZW))
628 return; /* Do not emit dead code */
629
630 assert(is_power_of_two(mask & WRITEMASK_XYZW));
631
632 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
633 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
634
635 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
636 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
637 brw_set_saturate(p, 0);
638 }
639
640
641 void emit_dp4(struct brw_compile *p,
642 const struct brw_reg *dst,
643 GLuint mask,
644 const struct brw_reg *arg0,
645 const struct brw_reg *arg1)
646 {
647 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
648
649 if (!(mask & WRITEMASK_XYZW))
650 return; /* Do not emit dead code */
651
652 assert(is_power_of_two(mask & WRITEMASK_XYZW));
653
654 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
655 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
656 brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
657
658 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
659 brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
660 brw_set_saturate(p, 0);
661 }
662
663
664 void emit_dph(struct brw_compile *p,
665 const struct brw_reg *dst,
666 GLuint mask,
667 const struct brw_reg *arg0,
668 const struct brw_reg *arg1)
669 {
670 const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
671
672 if (!(mask & WRITEMASK_XYZW))
673 return; /* Do not emit dead code */
674
675 assert(is_power_of_two(mask & WRITEMASK_XYZW));
676
677 brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
678 brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
679 brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
680
681 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
682 brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
683 brw_set_saturate(p, 0);
684 }
685
686
687 void emit_xpd(struct brw_compile *p,
688 const struct brw_reg *dst,
689 GLuint mask,
690 const struct brw_reg *arg0,
691 const struct brw_reg *arg1)
692 {
693 GLuint i;
694
695 assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
696
697 for (i = 0 ; i < 3; i++) {
698 if (mask & (1<<i)) {
699 GLuint i2 = (i+2)%3;
700 GLuint i1 = (i+1)%3;
701
702 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
703
704 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
705 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
706 brw_set_saturate(p, 0);
707 }
708 }
709 }
710
711
712 void emit_math1(struct brw_wm_compile *c,
713 GLuint function,
714 const struct brw_reg *dst,
715 GLuint mask,
716 const struct brw_reg *arg0)
717 {
718 struct brw_compile *p = &c->func;
719 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
720 GLuint saturate = ((mask & SATURATE) ?
721 BRW_MATH_SATURATE_SATURATE :
722 BRW_MATH_SATURATE_NONE);
723
724 if (!(mask & WRITEMASK_XYZW))
725 return; /* Do not emit dead code */
726
727 assert(is_power_of_two(mask & WRITEMASK_XYZW));
728
729 /* If compressed, this will write message reg 2,3 from arg0.x's 16
730 * channels.
731 */
732 brw_MOV(p, brw_message_reg(2), arg0[0]);
733
734 /* Send two messages to perform all 16 operations:
735 */
736 brw_push_insn_state(p);
737 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
738 brw_math(p,
739 dst[dst_chan],
740 function,
741 saturate,
742 2,
743 brw_null_reg(),
744 BRW_MATH_DATA_VECTOR,
745 BRW_MATH_PRECISION_FULL);
746
747 if (c->dispatch_width == 16) {
748 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
749 brw_math(p,
750 offset(dst[dst_chan],1),
751 function,
752 saturate,
753 3,
754 brw_null_reg(),
755 BRW_MATH_DATA_VECTOR,
756 BRW_MATH_PRECISION_FULL);
757 }
758 brw_pop_insn_state(p);
759 }
760
761
762 void emit_math2(struct brw_wm_compile *c,
763 GLuint function,
764 const struct brw_reg *dst,
765 GLuint mask,
766 const struct brw_reg *arg0,
767 const struct brw_reg *arg1)
768 {
769 struct brw_compile *p = &c->func;
770 int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
771 GLuint saturate = ((mask & SATURATE) ?
772 BRW_MATH_SATURATE_SATURATE :
773 BRW_MATH_SATURATE_NONE);
774
775 if (!(mask & WRITEMASK_XYZW))
776 return; /* Do not emit dead code */
777
778 assert(is_power_of_two(mask & WRITEMASK_XYZW));
779
780 brw_push_insn_state(p);
781
782 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
783 brw_MOV(p, brw_message_reg(2), arg0[0]);
784 if (c->dispatch_width == 16) {
785 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
786 brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
787 }
788
789 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
790 brw_MOV(p, brw_message_reg(3), arg1[0]);
791 if (c->dispatch_width == 16) {
792 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
793 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
794 }
795
796 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
797 brw_math(p,
798 dst[dst_chan],
799 function,
800 saturate,
801 2,
802 brw_null_reg(),
803 BRW_MATH_DATA_VECTOR,
804 BRW_MATH_PRECISION_FULL);
805
806 /* Send two messages to perform all 16 operations:
807 */
808 if (c->dispatch_width == 16) {
809 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
810 brw_math(p,
811 offset(dst[dst_chan],1),
812 function,
813 saturate,
814 4,
815 brw_null_reg(),
816 BRW_MATH_DATA_VECTOR,
817 BRW_MATH_PRECISION_FULL);
818 }
819 brw_pop_insn_state(p);
820 }
821
822
823 void emit_tex(struct brw_wm_compile *c,
824 struct brw_reg *dst,
825 GLuint dst_flags,
826 struct brw_reg *arg,
827 struct brw_reg depth_payload,
828 GLuint tex_idx,
829 GLuint sampler,
830 GLboolean shadow)
831 {
832 struct brw_compile *p = &c->func;
833 struct intel_context *intel = &p->brw->intel;
834 struct brw_reg dst_retyped;
835 GLuint cur_mrf = 2, response_length;
836 GLuint i, nr_texcoords;
837 GLuint emit;
838 GLuint msg_type;
839 GLuint mrf_per_channel;
840 GLuint simd_mode;
841
842 if (c->dispatch_width == 16) {
843 mrf_per_channel = 2;
844 response_length = 8;
845 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
846 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
847 } else {
848 mrf_per_channel = 1;
849 response_length = 4;
850 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
851 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
852 }
853
854 /* How many input regs are there?
855 */
856 switch (tex_idx) {
857 case TEXTURE_1D_INDEX:
858 emit = WRITEMASK_X;
859 nr_texcoords = 1;
860 break;
861 case TEXTURE_2D_INDEX:
862 case TEXTURE_RECT_INDEX:
863 emit = WRITEMASK_XY;
864 nr_texcoords = 2;
865 break;
866 case TEXTURE_3D_INDEX:
867 case TEXTURE_CUBE_INDEX:
868 emit = WRITEMASK_XYZ;
869 nr_texcoords = 3;
870 break;
871 default:
872 /* unexpected target */
873 abort();
874 }
875
876 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
877 if (!intel->is_ironlake && c->dispatch_width == 8)
878 nr_texcoords = 3;
879
880 /* For shadow comparisons, we have to supply u,v,r. */
881 if (shadow)
882 nr_texcoords = 3;
883
884 /* Emit the texcoords. */
885 for (i = 0; i < nr_texcoords; i++) {
886 if (emit & (1<<i))
887 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
888 else
889 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
890 cur_mrf += mrf_per_channel;
891 }
892
893 /* Fill in the shadow comparison reference value. */
894 if (shadow) {
895 if (intel->is_ironlake) {
896 /* Fill in the cube map array index value. */
897 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
898 cur_mrf += mrf_per_channel;
899 } else if (c->dispatch_width == 8) {
900 /* Fill in the LOD bias value. */
901 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
902 cur_mrf += mrf_per_channel;
903 }
904 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
905 cur_mrf += mrf_per_channel;
906 }
907
908 if (intel->is_ironlake) {
909 if (shadow)
910 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
911 else
912 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
913 } else {
914 /* Note that G45 and older determines shadow compare and dispatch width
915 * from message length for most messages.
916 */
917 if (c->dispatch_width == 16 && shadow)
918 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
919 else
920 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
921 }
922
923 brw_SAMPLE(p,
924 dst_retyped,
925 1,
926 retype(depth_payload, BRW_REGISTER_TYPE_UW),
927 SURF_INDEX_TEXTURE(sampler),
928 sampler,
929 dst_flags & WRITEMASK_XYZW,
930 msg_type,
931 response_length,
932 cur_mrf - 1,
933 0,
934 1,
935 simd_mode);
936 }
937
938
939 void emit_txb(struct brw_wm_compile *c,
940 struct brw_reg *dst,
941 GLuint dst_flags,
942 struct brw_reg *arg,
943 struct brw_reg depth_payload,
944 GLuint tex_idx,
945 GLuint sampler)
946 {
947 struct brw_compile *p = &c->func;
948 struct intel_context *intel = &p->brw->intel;
949 GLuint msgLength;
950 GLuint msg_type;
951 GLuint mrf_per_channel;
952 GLuint response_length;
953 struct brw_reg dst_retyped;
954
955 /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
956 * samples, so we'll use the 16-wide instruction, leave the second halves
957 * undefined, and trust the execution mask to keep the undefined pixels
958 * from mattering.
959 */
960 if (c->dispatch_width == 16 || !intel->is_ironlake) {
961 if (intel->is_ironlake)
962 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
963 else
964 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
965 mrf_per_channel = 2;
966 dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
967 response_length = 8;
968 } else {
969 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
970 mrf_per_channel = 1;
971 dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
972 response_length = 4;
973 }
974
975 /* Shadow ignored for txb. */
976 switch (tex_idx) {
977 case TEXTURE_1D_INDEX:
978 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
979 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
980 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
981 break;
982 case TEXTURE_2D_INDEX:
983 case TEXTURE_RECT_INDEX:
984 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
985 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
986 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
987 break;
988 case TEXTURE_3D_INDEX:
989 case TEXTURE_CUBE_INDEX:
990 brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
991 brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
992 brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
993 break;
994 default:
995 /* unexpected target */
996 abort();
997 }
998
999 brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1000 msgLength = 2 + 4 * mrf_per_channel - 1;
1001
1002 brw_SAMPLE(p,
1003 dst_retyped,
1004 1,
1005 retype(depth_payload, BRW_REGISTER_TYPE_UW),
1006 SURF_INDEX_TEXTURE(sampler),
1007 sampler,
1008 dst_flags & WRITEMASK_XYZW,
1009 msg_type,
1010 response_length,
1011 msgLength,
1012 0,
1013 1,
1014 BRW_SAMPLER_SIMD_MODE_SIMD16);
1015 }
1016
1017
1018 static void emit_lit(struct brw_wm_compile *c,
1019 const struct brw_reg *dst,
1020 GLuint mask,
1021 const struct brw_reg *arg0)
1022 {
1023 struct brw_compile *p = &c->func;
1024
1025 assert((mask & WRITEMASK_XW) == 0);
1026
1027 if (mask & WRITEMASK_Y) {
1028 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1029 brw_MOV(p, dst[1], arg0[0]);
1030 brw_set_saturate(p, 0);
1031 }
1032
1033 if (mask & WRITEMASK_Z) {
1034 emit_math2(c, BRW_MATH_FUNCTION_POW,
1035 &dst[2],
1036 WRITEMASK_X | (mask & SATURATE),
1037 &arg0[1],
1038 &arg0[3]);
1039 }
1040
1041 /* Ordinarily you'd use an iff statement to skip or shortcircuit
1042 * some of the POW calculations above, but 16-wide iff statements
1043 * seem to lock c1 hardware, so this is a nasty workaround:
1044 */
1045 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1046 {
1047 if (mask & WRITEMASK_Y)
1048 brw_MOV(p, dst[1], brw_imm_f(0));
1049
1050 if (mask & WRITEMASK_Z)
1051 brw_MOV(p, dst[2], brw_imm_f(0));
1052 }
1053 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1054 }
1055
1056
1057 /* Kill pixel - set execution mask to zero for those pixels which
1058 * fail.
1059 */
1060 static void emit_kil( struct brw_wm_compile *c,
1061 struct brw_reg *arg0)
1062 {
1063 struct brw_compile *p = &c->func;
1064 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1065 GLuint i;
1066
1067 /* XXX - usually won't need 4 compares!
1068 */
1069 for (i = 0; i < 4; i++) {
1070 brw_push_insn_state(p);
1071 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1072 brw_set_predicate_control_flag_value(p, 0xff);
1073 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1074 brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1075 brw_pop_insn_state(p);
1076 }
1077 }
1078
1079 /* KIL_NV kills the pixels that are currently executing, not based on a test
1080 * of the arguments.
1081 */
1082 static void emit_kil_nv( struct brw_wm_compile *c )
1083 {
1084 struct brw_compile *p = &c->func;
1085 struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1086
1087 brw_push_insn_state(p);
1088 brw_set_mask_control(p, BRW_MASK_DISABLE);
1089 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1090 brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1091 brw_pop_insn_state(p);
1092 }
1093
1094 static void fire_fb_write( struct brw_wm_compile *c,
1095 GLuint base_reg,
1096 GLuint nr,
1097 GLuint target,
1098 GLuint eot )
1099 {
1100 struct brw_compile *p = &c->func;
1101 struct brw_reg dst;
1102
1103 if (c->dispatch_width == 16)
1104 dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1105 else
1106 dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1107
1108 /* Pass through control information:
1109 */
1110 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
1111 {
1112 brw_push_insn_state(p);
1113 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1114 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1115 brw_MOV(p,
1116 brw_message_reg(base_reg + 1),
1117 brw_vec8_grf(1, 0));
1118 brw_pop_insn_state(p);
1119 }
1120
1121 /* Send framebuffer write message: */
1122 /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
1123 brw_fb_WRITE(p,
1124 dst,
1125 base_reg,
1126 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1127 target,
1128 nr,
1129 0,
1130 eot);
1131 }
1132
1133
1134 static void emit_aa( struct brw_wm_compile *c,
1135 struct brw_reg *arg1,
1136 GLuint reg )
1137 {
1138 struct brw_compile *p = &c->func;
1139 GLuint comp = c->key.aa_dest_stencil_reg / 2;
1140 GLuint off = c->key.aa_dest_stencil_reg % 2;
1141 struct brw_reg aa = offset(arg1[comp], off);
1142
1143 brw_push_insn_state(p);
1144 brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1145 brw_MOV(p, brw_message_reg(reg), aa);
1146 brw_pop_insn_state(p);
1147 }
1148
1149
1150 /* Post-fragment-program processing. Send the results to the
1151 * framebuffer.
1152 * \param arg0 the fragment color
1153 * \param arg1 the pass-through depth value
1154 * \param arg2 the shader-computed depth value
1155 */
1156 void emit_fb_write(struct brw_wm_compile *c,
1157 struct brw_reg *arg0,
1158 struct brw_reg *arg1,
1159 struct brw_reg *arg2,
1160 GLuint target,
1161 GLuint eot)
1162 {
1163 struct brw_compile *p = &c->func;
1164 struct brw_context *brw = p->brw;
1165 struct intel_context *intel = &brw->intel;
1166 GLuint nr = 2;
1167 GLuint channel;
1168
1169 /* Reserve a space for AA - may not be needed:
1170 */
1171 if (c->key.aa_dest_stencil_reg)
1172 nr += 1;
1173
1174 /* I don't really understand how this achieves the color interleave
1175 * (ie RGBARGBA) in the result: [Do the saturation here]
1176 */
1177 brw_push_insn_state(p);
1178
1179 for (channel = 0; channel < 4; channel++) {
1180 if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || intel->is_ironlake)) {
1181 /* By setting the high bit of the MRF register number, we indicate
1182 * that we want COMPR4 mode - instead of doing the usual destination
1183 * + 1 for the second half we get destination + 4.
1184 */
1185 brw_MOV(p,
1186 brw_message_reg(nr + channel + (1 << 7)),
1187 arg0[channel]);
1188 } else {
1189 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
1190 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
1191 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1192 brw_MOV(p,
1193 brw_message_reg(nr + channel),
1194 arg0[channel]);
1195
1196 if (c->dispatch_width == 16) {
1197 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1198 brw_MOV(p,
1199 brw_message_reg(nr + channel + 4),
1200 sechalf(arg0[channel]));
1201 }
1202 }
1203 }
1204 /* skip over the regs populated above:
1205 */
1206 nr += 8;
1207 brw_pop_insn_state(p);
1208
1209 if (c->key.source_depth_to_render_target)
1210 {
1211 if (c->key.computes_depth)
1212 brw_MOV(p, brw_message_reg(nr), arg2[2]);
1213 else
1214 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1215
1216 nr += 2;
1217 }
1218
1219 if (c->key.dest_depth_reg)
1220 {
1221 GLuint comp = c->key.dest_depth_reg / 2;
1222 GLuint off = c->key.dest_depth_reg % 2;
1223
1224 if (off != 0) {
1225 brw_push_insn_state(p);
1226 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1227
1228 brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1229 /* 2nd half? */
1230 brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1231 brw_pop_insn_state(p);
1232 }
1233 else {
1234 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1235 }
1236 nr += 2;
1237 }
1238
1239 if (!c->key.runtime_check_aads_emit) {
1240 if (c->key.aa_dest_stencil_reg)
1241 emit_aa(c, arg1, 2);
1242
1243 fire_fb_write(c, 0, nr, target, eot);
1244 }
1245 else {
1246 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1247 struct brw_reg ip = brw_ip_reg();
1248 struct brw_instruction *jmp;
1249
1250 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1251 brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1252 brw_AND(p,
1253 v1_null_ud,
1254 get_element_ud(brw_vec8_grf(1,0), 6),
1255 brw_imm_ud(1<<26));
1256
1257 jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1258 {
1259 emit_aa(c, arg1, 2);
1260 fire_fb_write(c, 0, nr, target, eot);
1261 /* note - thread killed in subroutine */
1262 }
1263 brw_land_fwd_jump(p, jmp);
1264
1265 /* ELSE: Shuffle up one register to fill in the hole left for AA:
1266 */
1267 fire_fb_write(c, 1, nr-1, target, eot);
1268 }
1269 }
1270
1271 /**
1272 * Move a GPR to scratch memory.
1273 */
1274 static void emit_spill( struct brw_wm_compile *c,
1275 struct brw_reg reg,
1276 GLuint slot )
1277 {
1278 struct brw_compile *p = &c->func;
1279
1280 /*
1281 mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
1282 */
1283 brw_MOV(p, brw_message_reg(2), reg);
1284
1285 /*
1286 mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
1287 send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
1288 */
1289 brw_dp_WRITE_16(p,
1290 retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1291 slot);
1292 }
1293
1294
1295 /**
1296 * Load a GPR from scratch memory.
1297 */
1298 static void emit_unspill( struct brw_wm_compile *c,
1299 struct brw_reg reg,
1300 GLuint slot )
1301 {
1302 struct brw_compile *p = &c->func;
1303
1304 /* Slot 0 is the undef value.
1305 */
1306 if (slot == 0) {
1307 brw_MOV(p, reg, brw_imm_f(0));
1308 return;
1309 }
1310
1311 /*
1312 mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
1313 send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
1314 */
1315
1316 brw_dp_READ_16(p,
1317 retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1318 slot);
1319 }
1320
1321
1322 /**
1323 * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1324 * Args with unspill_reg != 0 will be loaded from scratch memory.
1325 */
1326 static void get_argument_regs( struct brw_wm_compile *c,
1327 struct brw_wm_ref *arg[],
1328 struct brw_reg *regs )
1329 {
1330 GLuint i;
1331
1332 for (i = 0; i < 4; i++) {
1333 if (arg[i]) {
1334 if (arg[i]->unspill_reg)
1335 emit_unspill(c,
1336 brw_vec8_grf(arg[i]->unspill_reg, 0),
1337 arg[i]->value->spill_slot);
1338
1339 regs[i] = arg[i]->hw_reg;
1340 }
1341 else {
1342 regs[i] = brw_null_reg();
1343 }
1344 }
1345 }
1346
1347
1348 /**
1349 * For values that have a spill_slot!=0, write those regs to scratch memory.
1350 */
1351 static void spill_values( struct brw_wm_compile *c,
1352 struct brw_wm_value *values,
1353 GLuint nr )
1354 {
1355 GLuint i;
1356
1357 for (i = 0; i < nr; i++)
1358 if (values[i].spill_slot)
1359 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1360 }
1361
1362
1363 /* Emit the fragment program instructions here.
1364 */
1365 void brw_wm_emit( struct brw_wm_compile *c )
1366 {
1367 struct brw_compile *p = &c->func;
1368 GLuint insn;
1369
1370 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1371
1372 /* Check if any of the payload regs need to be spilled:
1373 */
1374 spill_values(c, c->payload.depth, 4);
1375 spill_values(c, c->creg, c->nr_creg);
1376 spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1377
1378
1379 for (insn = 0; insn < c->nr_insns; insn++) {
1380
1381 struct brw_wm_instruction *inst = &c->instruction[insn];
1382 struct brw_reg args[3][4], dst[4];
1383 GLuint i, dst_flags;
1384
1385 /* Get argument regs:
1386 */
1387 for (i = 0; i < 3; i++)
1388 get_argument_regs(c, inst->src[i], args[i]);
1389
1390 /* Get dest regs:
1391 */
1392 for (i = 0; i < 4; i++)
1393 if (inst->dst[i])
1394 dst[i] = inst->dst[i]->hw_reg;
1395 else
1396 dst[i] = brw_null_reg();
1397
1398 /* Flags
1399 */
1400 dst_flags = inst->writemask;
1401 if (inst->saturate)
1402 dst_flags |= SATURATE;
1403
1404 switch (inst->opcode) {
1405 /* Generated instructions for calculating triangle interpolants:
1406 */
1407 case WM_PIXELXY:
1408 emit_pixel_xy(c, dst, dst_flags);
1409 break;
1410
1411 case WM_DELTAXY:
1412 emit_delta_xy(p, dst, dst_flags, args[0]);
1413 break;
1414
1415 case WM_WPOSXY:
1416 emit_wpos_xy(c, dst, dst_flags, args[0]);
1417 break;
1418
1419 case WM_PIXELW:
1420 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1421 break;
1422
1423 case WM_LINTERP:
1424 emit_linterp(p, dst, dst_flags, args[0], args[1]);
1425 break;
1426
1427 case WM_PINTERP:
1428 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1429 break;
1430
1431 case WM_CINTERP:
1432 emit_cinterp(p, dst, dst_flags, args[0]);
1433 break;
1434
1435 case WM_FB_WRITE:
1436 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1437 break;
1438
1439 case WM_FRONTFACING:
1440 emit_frontfacing(p, dst, dst_flags);
1441 break;
1442
1443 /* Straightforward arithmetic:
1444 */
1445 case OPCODE_ADD:
1446 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1447 break;
1448
1449 case OPCODE_FRC:
1450 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1451 break;
1452
1453 case OPCODE_FLR:
1454 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1455 break;
1456
1457 case OPCODE_DDX:
1458 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1459 break;
1460
1461 case OPCODE_DDY:
1462 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1463 break;
1464
1465 case OPCODE_DP3:
1466 emit_dp3(p, dst, dst_flags, args[0], args[1]);
1467 break;
1468
1469 case OPCODE_DP4:
1470 emit_dp4(p, dst, dst_flags, args[0], args[1]);
1471 break;
1472
1473 case OPCODE_DPH:
1474 emit_dph(p, dst, dst_flags, args[0], args[1]);
1475 break;
1476
1477 case OPCODE_TRUNC:
1478 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1479 break;
1480
1481 case OPCODE_LRP:
1482 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1483 break;
1484
1485 case OPCODE_MAD:
1486 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1487 break;
1488
1489 case OPCODE_MOV:
1490 case OPCODE_SWZ:
1491 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1492 break;
1493
1494 case OPCODE_MUL:
1495 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1496 break;
1497
1498 case OPCODE_XPD:
1499 emit_xpd(p, dst, dst_flags, args[0], args[1]);
1500 break;
1501
1502 /* Higher math functions:
1503 */
1504 case OPCODE_RCP:
1505 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1506 break;
1507
1508 case OPCODE_RSQ:
1509 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1510 break;
1511
1512 case OPCODE_SIN:
1513 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1514 break;
1515
1516 case OPCODE_COS:
1517 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1518 break;
1519
1520 case OPCODE_EX2:
1521 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1522 break;
1523
1524 case OPCODE_LG2:
1525 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1526 break;
1527
1528 case OPCODE_SCS:
1529 /* There is an scs math function, but it would need some
1530 * fixup for 16-element execution.
1531 */
1532 if (dst_flags & WRITEMASK_X)
1533 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1534 if (dst_flags & WRITEMASK_Y)
1535 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1536 break;
1537
1538 case OPCODE_POW:
1539 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1540 break;
1541
1542 /* Comparisons:
1543 */
1544 case OPCODE_CMP:
1545 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1546 break;
1547
1548 case OPCODE_MAX:
1549 emit_max(p, dst, dst_flags, args[0], args[1]);
1550 break;
1551
1552 case OPCODE_MIN:
1553 emit_min(p, dst, dst_flags, args[0], args[1]);
1554 break;
1555
1556 case OPCODE_SLT:
1557 emit_slt(p, dst, dst_flags, args[0], args[1]);
1558 break;
1559
1560 case OPCODE_SLE:
1561 emit_sle(p, dst, dst_flags, args[0], args[1]);
1562 break;
1563 case OPCODE_SGT:
1564 emit_sgt(p, dst, dst_flags, args[0], args[1]);
1565 break;
1566 case OPCODE_SGE:
1567 emit_sge(p, dst, dst_flags, args[0], args[1]);
1568 break;
1569 case OPCODE_SEQ:
1570 emit_seq(p, dst, dst_flags, args[0], args[1]);
1571 break;
1572 case OPCODE_SNE:
1573 emit_sne(p, dst, dst_flags, args[0], args[1]);
1574 break;
1575
1576 case OPCODE_LIT:
1577 emit_lit(c, dst, dst_flags, args[0]);
1578 break;
1579
1580 /* Texturing operations:
1581 */
1582 case OPCODE_TEX:
1583 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1584 inst->tex_idx, inst->tex_unit,
1585 inst->tex_shadow);
1586 break;
1587
1588 case OPCODE_TXB:
1589 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1590 inst->tex_idx, inst->tex_unit);
1591 break;
1592
1593 case OPCODE_KIL:
1594 emit_kil(c, args[0]);
1595 break;
1596
1597 case OPCODE_KIL_NV:
1598 emit_kil_nv(c);
1599 break;
1600
1601 default:
1602 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1603 inst->opcode, inst->opcode < MAX_OPCODE ?
1604 _mesa_opcode_string(inst->opcode) :
1605 "unknown");
1606 }
1607
1608 for (i = 0; i < 4; i++)
1609 if (inst->dst[i] && inst->dst[i]->spill_slot)
1610 emit_spill(c,
1611 inst->dst[i]->hw_reg,
1612 inst->dst[i]->spill_slot);
1613 }
1614
1615 if (INTEL_DEBUG & DEBUG_WM) {
1616 int i;
1617
1618 _mesa_printf("wm-native:\n");
1619 for (i = 0; i < p->nr_insn; i++)
1620 brw_disasm(stderr, &p->store[i]);
1621 _mesa_printf("\n");
1622 }
1623 }