Fix SOP in fragment shader, brick is ok now.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_glsl.c
1 #include "macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
4 #include "brw_eu.h"
5 #include "brw_wm.h"
6
7 /* Only guess, need a flag in gl_fragment_program later */
8 GLboolean brw_wm_is_glsl(struct gl_fragment_program *fp)
9 {
10 int i;
11 for (i = 0; i < fp->Base.NumInstructions; i++) {
12 struct prog_instruction *inst = &fp->Base.Instructions[i];
13 switch (inst->Opcode) {
14 case OPCODE_IF:
15 case OPCODE_ENDIF:
16 case OPCODE_CAL:
17 case OPCODE_BRK:
18 case OPCODE_RET:
19 case OPCODE_BGNLOOP:
20 return GL_TRUE;
21 default:
22 break;
23 }
24 }
25 return GL_FALSE;
26 }
27
28 static void set_reg(struct brw_wm_compile *c, int file, int index,
29 int component, struct brw_reg reg)
30 {
31 c->wm_regs[file][index][component].reg = reg;
32 c->wm_regs[file][index][component].inited = GL_TRUE;
33 }
34
35 static int get_scalar_dst_index(struct prog_instruction *inst)
36 {
37 int i;
38 for (i = 0; i < 4; i++)
39 if (inst->DstReg.WriteMask & (1<<i))
40 break;
41 return i;
42 }
43
44 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
45 {
46 struct brw_reg reg;
47 reg = brw_vec8_grf(c->tmp_index--, 0);
48 return reg;
49 }
50
51 static void release_tmps(struct brw_wm_compile *c)
52 {
53 c->tmp_index = 127;
54 }
55
56 static struct brw_reg
57 get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
58 {
59 struct brw_reg reg;
60 switch (file) {
61 case PROGRAM_STATE_VAR:
62 case PROGRAM_CONSTANT:
63 case PROGRAM_UNIFORM:
64 file = PROGRAM_STATE_VAR;
65 break;
66 case PROGRAM_UNDEFINED:
67 return brw_null_reg();
68 default:
69 break;
70 }
71
72 if(c->wm_regs[file][index][component].inited)
73 reg = c->wm_regs[file][index][component].reg;
74 else
75 reg = brw_vec8_grf(c->reg_index, 0);
76
77 if(!c->wm_regs[file][index][component].inited) {
78 set_reg(c, file, index, component, reg);
79 c->reg_index++;
80 }
81
82 if (neg & (1<< component)) {
83 reg = negate(reg);
84 }
85 if (abs)
86 reg = brw_abs(reg);
87 return reg;
88 }
89
90 static void prealloc_reg(struct brw_wm_compile *c)
91 {
92 int i, j;
93 struct brw_reg reg;
94 int nr_interp_regs = 0;
95 GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
96
97 for (i = 0; i < 4; i++) {
98 reg = (i < c->key.nr_depth_regs)
99 ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
100 set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
101 }
102 c->reg_index += 2*c->key.nr_depth_regs;
103 {
104 int nr_params = c->fp->program.Base.Parameters->NumParameters;
105 struct gl_program_parameter_list *plist =
106 c->fp->program.Base.Parameters;
107 int index = 0;
108 c->prog_data.nr_params = 4*nr_params;
109 for (i = 0; i < nr_params; i++) {
110 for (j = 0; j < 4; j++, index++) {
111 reg = brw_vec1_grf(c->reg_index + index/8,
112 index%8);
113 c->prog_data.param[index] =
114 &plist->ParameterValues[i][j];
115 set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
116 }
117 }
118 c->nr_creg = 2*((4*nr_params+15)/16);
119 c->reg_index += c->nr_creg;
120 }
121 for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
122 if (inputs & (1<<i)) {
123 nr_interp_regs++;
124 reg = brw_vec8_grf(c->reg_index, 0);
125 for (j = 0; j < 4; j++)
126 set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
127 c->reg_index += 2;
128
129 }
130 }
131 c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
132 c->prog_data.urb_read_length = nr_interp_regs * 2;
133 c->prog_data.curb_read_length = c->nr_creg;
134 c->ret_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
135 c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 1);
136 c->reg_index++;
137 }
138
139 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
140 struct prog_instruction *inst, int component, int nr)
141 {
142 return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
143 0, 0);
144 }
145
146 static struct brw_reg get_src_reg(struct brw_wm_compile *c,
147 struct prog_src_register *src, int index, int nr)
148 {
149 int component = GET_SWZ(src->Swizzle, index);
150 return get_reg(c, src->File, src->Index, component, nr,
151 src->NegateBase, src->Abs);
152 }
153
154 static void emit_abs( struct brw_wm_compile *c,
155 struct prog_instruction *inst)
156 {
157 int i;
158 struct brw_compile *p = &c->func;
159 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
160 for (i = 0; i < 4; i++) {
161 if (inst->DstReg.WriteMask & (1<<i)) {
162 struct brw_reg src, dst;
163 dst = get_dst_reg(c, inst, i, 1);
164 src = get_src_reg(c, &inst->SrcReg[0], i, 1);
165 brw_MOV(p, dst, brw_abs(src));
166 }
167 }
168 brw_set_saturate(p, 0);
169 }
170
171 static void emit_mov( struct brw_wm_compile *c,
172 struct prog_instruction *inst)
173 {
174 int i;
175 struct brw_compile *p = &c->func;
176 GLuint mask = inst->DstReg.WriteMask;
177 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
178 for (i = 0; i < 4; i++) {
179 if (mask & (1<<i)) {
180 struct brw_reg src, dst;
181 dst = get_dst_reg(c, inst, i, 1);
182 src = get_src_reg(c, &inst->SrcReg[0], i, 1);
183 brw_MOV(p, dst, src);
184 }
185 }
186 brw_set_saturate(p, 0);
187 }
188
189 static void emit_pixel_xy(struct brw_wm_compile *c,
190 struct prog_instruction *inst)
191 {
192 struct brw_reg r1 = brw_vec1_grf(1, 0);
193 struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
194
195 struct brw_reg dst0, dst1;
196 struct brw_compile *p = &c->func;
197 GLuint mask = inst->DstReg.WriteMask;
198
199 dst0 = get_dst_reg(c, inst, 0, 1);
200 dst1 = get_dst_reg(c, inst, 1, 1);
201 /* Calculate pixel centers by adding 1 or 0 to each of the
202 * micro-tile coordinates passed in r1.
203 */
204 if (mask & WRITEMASK_X) {
205 brw_ADD(p,
206 vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
207 stride(suboffset(r1_uw, 4), 2, 4, 0),
208 brw_imm_v(0x10101010));
209 }
210
211 if (mask & WRITEMASK_Y) {
212 brw_ADD(p,
213 vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
214 stride(suboffset(r1_uw, 5), 2, 4, 0),
215 brw_imm_v(0x11001100));
216 }
217
218 }
219
220 static void emit_delta_xy(struct brw_wm_compile *c,
221 struct prog_instruction *inst)
222 {
223 struct brw_reg r1 = brw_vec1_grf(1, 0);
224 struct brw_reg dst0, dst1, src0, src1;
225 struct brw_compile *p = &c->func;
226 GLuint mask = inst->DstReg.WriteMask;
227
228 dst0 = get_dst_reg(c, inst, 0, 1);
229 dst1 = get_dst_reg(c, inst, 1, 1);
230 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
231 src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
232 /* Calc delta X,Y by subtracting origin in r1 from the pixel
233 * centers.
234 */
235 if (mask & WRITEMASK_X) {
236 brw_ADD(p,
237 dst0,
238 retype(src0, BRW_REGISTER_TYPE_UW),
239 negate(r1));
240 }
241
242 if (mask & WRITEMASK_Y) {
243 brw_ADD(p,
244 dst1,
245 retype(src1, BRW_REGISTER_TYPE_UW),
246 negate(suboffset(r1,1)));
247
248 }
249
250 }
251
252
253 static void fire_fb_write( struct brw_wm_compile *c,
254 GLuint base_reg,
255 GLuint nr )
256 {
257 struct brw_compile *p = &c->func;
258
259 /* Pass through control information:
260 */
261 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
262 {
263 brw_push_insn_state(p);
264 brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
265 brw_MOV(p,
266 brw_message_reg(base_reg + 1),
267 brw_vec8_grf(1, 0));
268 brw_pop_insn_state(p);
269 }
270 /* Send framebuffer write message: */
271 brw_fb_WRITE(p,
272 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
273 base_reg,
274 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
275 0, /* render surface always 0 */
276 nr,
277 0,
278 1);
279 }
280
281 static void emit_fb_write(struct brw_wm_compile *c,
282 struct prog_instruction *inst)
283 {
284 struct brw_compile *p = &c->func;
285 int nr = 2;
286 int channel;
287 struct brw_reg src0;//, src1, src2, dst;
288
289 /* Reserve a space for AA - may not be needed:
290 */
291 if (c->key.aa_dest_stencil_reg)
292 nr += 1;
293 {
294 brw_push_insn_state(p);
295 for (channel = 0; channel < 4; channel++) {
296 src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
297 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
298 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
299 brw_MOV(p, brw_message_reg(nr + channel), src0);
300 }
301 /* skip over the regs populated above: */
302 nr += 8;
303 brw_pop_insn_state(p);
304 }
305 fire_fb_write(c, 0, nr);
306 }
307
308 static void emit_pixel_w( struct brw_wm_compile *c,
309 struct prog_instruction *inst)
310 {
311 struct brw_compile *p = &c->func;
312 GLuint mask = inst->DstReg.WriteMask;
313 if (mask & WRITEMASK_W) {
314 struct brw_reg dst, src0, delta0, delta1;
315 struct brw_reg interp3;
316
317 dst = get_dst_reg(c, inst, 3, 1);
318 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
319 delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
320 delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
321
322 interp3 = brw_vec1_grf(src0.nr+1, 4);
323 /* Calc 1/w - just linterp wpos[3] optimized by putting the
324 * result straight into a message reg.
325 */
326 brw_LINE(p, brw_null_reg(), interp3, delta0);
327 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
328
329 /* Calc w */
330 brw_math_16( p, dst,
331 BRW_MATH_FUNCTION_INV,
332 BRW_MATH_SATURATE_NONE,
333 2, brw_null_reg(),
334 BRW_MATH_PRECISION_FULL);
335 }
336 }
337
338 static void emit_linterp(struct brw_wm_compile *c,
339 struct prog_instruction *inst)
340 {
341 struct brw_compile *p = &c->func;
342 GLuint mask = inst->DstReg.WriteMask;
343 struct brw_reg interp[4];
344 struct brw_reg dst, delta0, delta1;
345 struct brw_reg src0;
346
347 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
348 delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
349 delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
350 GLuint nr = src0.nr;
351 int i;
352
353 interp[0] = brw_vec1_grf(nr, 0);
354 interp[1] = brw_vec1_grf(nr, 4);
355 interp[2] = brw_vec1_grf(nr+1, 0);
356 interp[3] = brw_vec1_grf(nr+1, 4);
357
358 for(i = 0; i < 4; i++ ) {
359 if (mask & (1<<i)) {
360 dst = get_dst_reg(c, inst, i, 1);
361 brw_LINE(p, brw_null_reg(), interp[i], delta0);
362 brw_MAC(p, dst, suboffset(interp[i],1), delta1);
363 }
364 }
365 }
366
367 static void emit_cinterp(struct brw_wm_compile *c,
368 struct prog_instruction *inst)
369 {
370 struct brw_compile *p = &c->func;
371 GLuint mask = inst->DstReg.WriteMask;
372
373 struct brw_reg interp[4];
374 struct brw_reg dst, src0;
375
376 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
377 GLuint nr = src0.nr;
378 int i;
379
380 interp[0] = brw_vec1_grf(nr, 0);
381 interp[1] = brw_vec1_grf(nr, 4);
382 interp[2] = brw_vec1_grf(nr+1, 0);
383 interp[3] = brw_vec1_grf(nr+1, 4);
384
385 for(i = 0; i < 4; i++ ) {
386 if (mask & (1<<i)) {
387 dst = get_dst_reg(c, inst, i, 1);
388 brw_MOV(p, dst, suboffset(interp[i],3));
389 }
390 }
391 }
392
393 static void emit_pinterp(struct brw_wm_compile *c,
394 struct prog_instruction *inst)
395 {
396 struct brw_compile *p = &c->func;
397 GLuint mask = inst->DstReg.WriteMask;
398
399 struct brw_reg interp[4];
400 struct brw_reg dst, delta0, delta1;
401 struct brw_reg src0, w;
402
403 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
404 delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
405 delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
406 w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
407 GLuint nr = src0.nr;
408 int i;
409
410 interp[0] = brw_vec1_grf(nr, 0);
411 interp[1] = brw_vec1_grf(nr, 4);
412 interp[2] = brw_vec1_grf(nr+1, 0);
413 interp[3] = brw_vec1_grf(nr+1, 4);
414
415 for(i = 0; i < 4; i++ ) {
416 if (mask & (1<<i)) {
417 dst = get_dst_reg(c, inst, i, 1);
418 brw_LINE(p, brw_null_reg(), interp[i], delta0);
419 brw_MAC(p, dst, suboffset(interp[i],1),
420 delta1);
421 brw_MUL(p, dst, dst, w);
422 }
423 }
424 }
425
426 static void emit_xpd(struct brw_wm_compile *c,
427 struct prog_instruction *inst)
428 {
429 int i;
430 struct brw_compile *p = &c->func;
431 GLuint mask = inst->DstReg.WriteMask;
432 for (i = 0; i < 4; i++) {
433 GLuint i2 = (i+2)%3;
434 GLuint i1 = (i+1)%3;
435 if (mask & (1<<i)) {
436 struct brw_reg src0, src1, dst;
437 dst = get_dst_reg(c, inst, i, 1);
438 src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
439 src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
440 brw_MUL(p, brw_null_reg(), src0, src1);
441 src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
442 src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
443 brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
444 brw_MAC(p, dst, src0, src1);
445 brw_set_saturate(p, 0);
446 }
447 }
448 brw_set_saturate(p, 0);
449 }
450
451 static void emit_dp3(struct brw_wm_compile *c,
452 struct prog_instruction *inst)
453 {
454 struct brw_reg src0[3], src1[3], dst;
455 int i;
456 struct brw_compile *p = &c->func;
457 for (i = 0; i < 3; i++) {
458 src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
459 src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
460 }
461
462 dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
463 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
464 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
465 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
466 brw_MAC(p, dst, src0[2], src1[2]);
467 brw_set_saturate(p, 0);
468 }
469
470 static void emit_dp4(struct brw_wm_compile *c,
471 struct prog_instruction *inst)
472 {
473 struct brw_reg src0[4], src1[4], dst;
474 int i;
475 struct brw_compile *p = &c->func;
476 for (i = 0; i < 4; i++) {
477 src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
478 src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
479 }
480 dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
481 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
482 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
483 brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
484 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
485 brw_MAC(p, dst, src0[3], src1[3]);
486 brw_set_saturate(p, 0);
487 }
488
489 static void emit_dph(struct brw_wm_compile *c,
490 struct prog_instruction *inst)
491 {
492 struct brw_reg src0[4], src1[4], dst;
493 int i;
494 struct brw_compile *p = &c->func;
495 for (i = 0; i < 4; i++) {
496 src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
497 src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
498 }
499 dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
500 brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
501 brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
502 brw_MAC(p, dst, src0[2], src1[2]);
503 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
504 brw_ADD(p, dst, src0[3], src1[3]);
505 brw_set_saturate(p, 0);
506 }
507
508 static void emit_math1(struct brw_wm_compile *c,
509 struct prog_instruction *inst, GLuint func)
510 {
511 struct brw_compile *p = &c->func;
512 struct brw_reg src0, dst;
513
514 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
515 dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
516 brw_MOV(p, brw_message_reg(2), src0);
517 brw_math(p,
518 dst,
519 func,
520 (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
521 2,
522 brw_null_reg(),
523 BRW_MATH_DATA_VECTOR,
524 BRW_MATH_PRECISION_FULL);
525 }
526
527 static void emit_rcp(struct brw_wm_compile *c,
528 struct prog_instruction *inst)
529 {
530 emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
531 }
532
533 static void emit_rsq(struct brw_wm_compile *c,
534 struct prog_instruction *inst)
535 {
536 emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
537 }
538
539 static void emit_sin(struct brw_wm_compile *c,
540 struct prog_instruction *inst)
541 {
542 emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
543 }
544
545 static void emit_cos(struct brw_wm_compile *c,
546 struct prog_instruction *inst)
547 {
548 emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
549 }
550
551 static void emit_ex2(struct brw_wm_compile *c,
552 struct prog_instruction *inst)
553 {
554 emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
555 }
556
557 static void emit_lg2(struct brw_wm_compile *c,
558 struct prog_instruction *inst)
559 {
560 emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
561 }
562
563 static void emit_add(struct brw_wm_compile *c,
564 struct prog_instruction *inst)
565 {
566 struct brw_compile *p = &c->func;
567 struct brw_reg src0, src1, dst;
568 GLuint mask = inst->DstReg.WriteMask;
569 int i;
570 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
571 for (i = 0 ; i < 4; i++) {
572 if (mask & (1<<i)) {
573 dst = get_dst_reg(c, inst, i, 1);
574 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
575 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
576 brw_ADD(p, dst, src0, src1);
577 }
578 }
579 brw_set_saturate(p, 0);
580 }
581
582 static void emit_sub(struct brw_wm_compile *c,
583 struct prog_instruction *inst)
584 {
585 struct brw_compile *p = &c->func;
586 struct brw_reg src0, src1, dst;
587 GLuint mask = inst->DstReg.WriteMask;
588 int i;
589 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
590 for (i = 0 ; i < 4; i++) {
591 if (mask & (1<<i)) {
592 dst = get_dst_reg(c, inst, i, 1);
593 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
594 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
595 brw_ADD(p, dst, src0, negate(src1));
596 }
597 }
598 brw_set_saturate(p, 0);
599 }
600
601 static void emit_mul(struct brw_wm_compile *c,
602 struct prog_instruction *inst)
603 {
604 struct brw_compile *p = &c->func;
605 struct brw_reg src0, src1, dst;
606 GLuint mask = inst->DstReg.WriteMask;
607 int i;
608 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
609 for (i = 0 ; i < 4; i++) {
610 if (mask & (1<<i)) {
611 dst = get_dst_reg(c, inst, i, 1);
612 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
613 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
614 brw_MUL(p, dst, src0, src1);
615 }
616 }
617 brw_set_saturate(p, 0);
618 }
619
620 static void emit_frc(struct brw_wm_compile *c,
621 struct prog_instruction *inst)
622 {
623 struct brw_compile *p = &c->func;
624 struct brw_reg src0, dst;
625 GLuint mask = inst->DstReg.WriteMask;
626 int i;
627 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
628 for (i = 0 ; i < 4; i++) {
629 if (mask & (1<<i)) {
630 dst = get_dst_reg(c, inst, i, 1);
631 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
632 brw_FRC(p, dst, src0);
633 }
634 }
635 if (inst->SaturateMode != SATURATE_OFF)
636 brw_set_saturate(p, 0);
637 }
638
639 static void emit_flr(struct brw_wm_compile *c,
640 struct prog_instruction *inst)
641 {
642 struct brw_compile *p = &c->func;
643 struct brw_reg src0, dst;
644 GLuint mask = inst->DstReg.WriteMask;
645 int i;
646 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
647 for (i = 0 ; i < 4; i++) {
648 if (mask & (1<<i)) {
649 dst = get_dst_reg(c, inst, i, 1);
650 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
651 brw_RNDD(p, dst, src0);
652 }
653 }
654 brw_set_saturate(p, 0);
655 }
656
657 static void emit_max(struct brw_wm_compile *c,
658 struct prog_instruction *inst)
659 {
660 struct brw_compile *p = &c->func;
661 GLuint mask = inst->DstReg.WriteMask;
662 struct brw_reg src0, src1, dst;
663 int i;
664 brw_push_insn_state(p);
665 for (i = 0; i < 4; i++) {
666 if (mask & (1<<i)) {
667 dst = get_dst_reg(c, inst, i, 1);
668 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
669 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
670 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
671 brw_MOV(p, dst, src0);
672 brw_set_saturate(p, 0);
673
674 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
675 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
676 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
677 brw_MOV(p, dst, src1);
678 brw_set_saturate(p, 0);
679 brw_set_predicate_control_flag_value(p, 0xff);
680 }
681 }
682 brw_pop_insn_state(p);
683 }
684
685 static void emit_min(struct brw_wm_compile *c,
686 struct prog_instruction *inst)
687 {
688 struct brw_compile *p = &c->func;
689 GLuint mask = inst->DstReg.WriteMask;
690 struct brw_reg src0, src1, dst;
691 int i;
692 brw_push_insn_state(p);
693 for (i = 0; i < 4; i++) {
694 if (mask & (1<<i)) {
695 dst = get_dst_reg(c, inst, i, 1);
696 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
697 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
698 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
699 brw_MOV(p, dst, src0);
700 brw_set_saturate(p, 0);
701
702 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
703 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
704 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
705 brw_MOV(p, dst, src1);
706 brw_set_saturate(p, 0);
707 brw_set_predicate_control_flag_value(p, 0xff);
708 }
709 }
710 brw_pop_insn_state(p);
711 }
712
713 static void emit_pow(struct brw_wm_compile *c,
714 struct prog_instruction *inst)
715 {
716 struct brw_compile *p = &c->func;
717 struct brw_reg dst, src0, src1;
718 dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
719 src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
720 src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
721
722 brw_MOV(p, brw_message_reg(2), src0);
723 brw_MOV(p, brw_message_reg(3), src1);
724
725 brw_math(p,
726 dst,
727 BRW_MATH_FUNCTION_POW,
728 (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
729 2,
730 brw_null_reg(),
731 BRW_MATH_DATA_VECTOR,
732 BRW_MATH_PRECISION_FULL);
733 }
734
735 static void emit_lrp(struct brw_wm_compile *c,
736 struct prog_instruction *inst)
737 {
738 struct brw_compile *p = &c->func;
739 GLuint mask = inst->DstReg.WriteMask;
740 struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
741 int i;
742 for (i = 0; i < 4; i++) {
743 if (mask & (1<<i)) {
744 dst = get_dst_reg(c, inst, i, 1);
745 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
746
747 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
748
749 if (src1.nr == dst.nr) {
750 tmp1 = alloc_tmp(c);
751 brw_MOV(p, tmp1, src1);
752 } else
753 tmp1 = src1;
754
755 src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
756 if (src2.nr == dst.nr) {
757 tmp2 = alloc_tmp(c);
758 brw_MOV(p, tmp2, src2);
759 } else
760 tmp2 = src2;
761
762 brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
763 brw_MUL(p, brw_null_reg(), dst, tmp2);
764 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
765 brw_MAC(p, dst, src0, tmp1);
766 brw_set_saturate(p, 0);
767 }
768 release_tmps(c);
769 }
770 }
771
772 static void emit_kil(struct brw_wm_compile *c)
773 {
774 struct brw_compile *p = &c->func;
775 struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
776 brw_push_insn_state(p);
777 brw_set_mask_control(p, BRW_MASK_DISABLE);
778 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
779 brw_AND(p, depth, c->emit_mask_reg, depth);
780 brw_pop_insn_state(p);
781 }
782
783 static void emit_mad(struct brw_wm_compile *c,
784 struct prog_instruction *inst)
785 {
786 struct brw_compile *p = &c->func;
787 GLuint mask = inst->DstReg.WriteMask;
788 struct brw_reg dst, src0, src1, src2;
789 int i;
790
791 for (i = 0; i < 4; i++) {
792 if (mask & (1<<i)) {
793 dst = get_dst_reg(c, inst, i, 1);
794 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
795 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
796 src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
797 brw_MUL(p, dst, src0, src1);
798
799 brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
800 brw_ADD(p, dst, dst, src2);
801 brw_set_saturate(p, 0);
802 }
803 }
804 }
805
806 static void emit_sop(struct brw_wm_compile *c,
807 struct prog_instruction *inst, GLuint cond)
808 {
809 struct brw_compile *p = &c->func;
810 GLuint mask = inst->DstReg.WriteMask;
811 struct brw_reg dst, src0, src1;
812 int i;
813
814 brw_push_insn_state(p);
815 for (i = 0; i < 4; i++) {
816 if (mask & (1<<i)) {
817 dst = get_dst_reg(c, inst, i, 1);
818 src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
819 src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
820 brw_CMP(p, brw_null_reg(), cond, src0, src1);
821 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
822 brw_MOV(p, dst, brw_imm_f(0.0));
823 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
824 brw_MOV(p, dst, brw_imm_f(1.0));
825 }
826 }
827 brw_pop_insn_state(p);
828 }
829
830 static void emit_slt(struct brw_wm_compile *c,
831 struct prog_instruction *inst)
832 {
833 emit_sop(c, inst, BRW_CONDITIONAL_L);
834 }
835
836 static void emit_sle(struct brw_wm_compile *c,
837 struct prog_instruction *inst)
838 {
839 emit_sop(c, inst, BRW_CONDITIONAL_LE);
840 }
841
842 static void emit_sgt(struct brw_wm_compile *c,
843 struct prog_instruction *inst)
844 {
845 emit_sop(c, inst, BRW_CONDITIONAL_G);
846 }
847
848 static void emit_sge(struct brw_wm_compile *c,
849 struct prog_instruction *inst)
850 {
851 emit_sop(c, inst, BRW_CONDITIONAL_GE);
852 }
853
854 static void emit_seq(struct brw_wm_compile *c,
855 struct prog_instruction *inst)
856 {
857 emit_sop(c, inst, BRW_CONDITIONAL_EQ);
858 }
859
860 static void emit_sne(struct brw_wm_compile *c,
861 struct prog_instruction *inst)
862 {
863 emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
864 }
865 /* TODO
866 BIAS on SIMD8 not workind yet...
867 */
868 static void emit_txb(struct brw_wm_compile *c,
869 struct prog_instruction *inst)
870 {
871 struct brw_compile *p = &c->func;
872 struct brw_reg dst[4], src[4], payload_reg;
873 GLuint i;
874 payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
875 for (i = 0; i < 4; i++)
876 dst[i] = get_dst_reg(c, inst, i, 1);
877 for (i = 0; i < 4; i++)
878 src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
879
880 switch (inst->TexSrcTarget) {
881 case TEXTURE_1D_INDEX:
882 brw_MOV(p, brw_message_reg(2), src[0]);
883 brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
884 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
885 break;
886 case TEXTURE_2D_INDEX:
887 case TEXTURE_RECT_INDEX:
888 brw_MOV(p, brw_message_reg(2), src[0]);
889 brw_MOV(p, brw_message_reg(3), src[1]);
890 brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
891 break;
892 default:
893 brw_MOV(p, brw_message_reg(2), src[0]);
894 brw_MOV(p, brw_message_reg(3), src[1]);
895 brw_MOV(p, brw_message_reg(4), src[2]);
896 break;
897 }
898 brw_MOV(p, brw_message_reg(5), src[3]);
899 brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
900 brw_SAMPLE(p,
901 retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
902 1,
903 retype(payload_reg, BRW_REGISTER_TYPE_UW),
904 inst->TexSrcUnit + 1, /* surface */
905 inst->TexSrcUnit, /* sampler */
906 inst->DstReg.WriteMask,
907 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
908 4,
909 4,
910 0);
911 }
912
913 static void emit_tex(struct brw_wm_compile *c,
914 struct prog_instruction *inst)
915 {
916 struct brw_compile *p = &c->func;
917 struct brw_reg dst[4], src[4], payload_reg;
918 GLuint msg_len;
919 GLuint i, nr;
920 GLuint emit;
921
922 payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
923 for (i = 0; i < 4; i++)
924 dst[i] = get_dst_reg(c, inst, i, 1);
925 for (i = 0; i < 4; i++)
926 src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
927
928
929 switch (inst->TexSrcTarget) {
930 case TEXTURE_1D_INDEX:
931 emit = WRITEMASK_X;
932 nr = 1;
933 break;
934 case TEXTURE_2D_INDEX:
935 case TEXTURE_RECT_INDEX:
936 emit = WRITEMASK_XY;
937 nr = 2;
938 break;
939 default:
940 emit = WRITEMASK_XYZ;
941 nr = 3;
942 break;
943 }
944 msg_len = 1;
945
946 for (i = 0; i < nr; i++) {
947 static const GLuint swz[4] = {0,1,2,2};
948 if (emit & (1<<i))
949 brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
950 else
951 brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
952 msg_len += 1;
953 }
954
955 brw_SAMPLE(p,
956 retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
957 1,
958 retype(payload_reg, BRW_REGISTER_TYPE_UW),
959 inst->TexSrcUnit + 1, /* surface */
960 inst->TexSrcUnit, /* sampler */
961 inst->DstReg.WriteMask,
962 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
963 4,
964 4,
965 0);
966 }
967
968 static void post_wm_emit( struct brw_wm_compile *c )
969 {
970 GLuint nr_insns = c->fp->program.Base.NumInstructions;
971 GLuint insn, target_insn;
972 struct prog_instruction *inst1, *inst2;
973 struct brw_instruction *brw_inst1, *brw_inst2;
974 int offset;
975 for (insn = 0; insn < nr_insns; insn++) {
976 inst1 = &c->fp->program.Base.Instructions[insn];
977 brw_inst1 = inst1->Data;
978 switch (inst1->Opcode) {
979 case OPCODE_CAL:
980 target_insn = inst1->BranchTarget;
981 inst2 = &c->fp->program.Base.Instructions[target_insn];
982 brw_inst2 = inst2->Data;
983 offset = brw_inst2 - brw_inst1;
984 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
985 break;
986 default:
987 break;
988 }
989 }
990 }
991
992 static void brw_wm_emit_glsl(struct brw_wm_compile *c)
993
994 {
995 #define MAX_IFSN 32
996 #define MAX_LOOP_DEPTH 32
997 struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
998 struct brw_instruction *inst0, *inst1;
999 int i, if_insn = 0, loop_insn = 0;
1000 struct brw_compile *p = &c->func;
1001 brw_init_compile(&c->func);
1002 c->reg_index = 0;
1003 prealloc_reg(c);
1004 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1005 for (i = 0; i < c->nr_fp_insns; i++) {
1006 struct prog_instruction *inst = &c->prog_instructions[i];
1007 struct prog_instruction *orig_inst;
1008
1009 if ((orig_inst = inst->Data) != 0)
1010 orig_inst->Data = current_insn(p);
1011
1012 switch (inst->Opcode) {
1013 case WM_PIXELXY:
1014 emit_pixel_xy(c, inst);
1015 break;
1016 case WM_DELTAXY:
1017 emit_delta_xy(c, inst);
1018 break;
1019 case WM_PIXELW:
1020 emit_pixel_w(c, inst);
1021 break;
1022 case WM_LINTERP:
1023 emit_linterp(c, inst);
1024 break;
1025 case WM_PINTERP:
1026 emit_pinterp(c, inst);
1027 break;
1028 case WM_CINTERP:
1029 emit_cinterp(c, inst);
1030 break;
1031 case WM_FB_WRITE:
1032 emit_fb_write(c, inst);
1033 break;
1034 case OPCODE_ABS:
1035 emit_abs(c, inst);
1036 break;
1037 case OPCODE_ADD:
1038 emit_add(c, inst);
1039 break;
1040 case OPCODE_SUB:
1041 emit_sub(c, inst);
1042 break;
1043 case OPCODE_FRC:
1044 emit_frc(c, inst);
1045 break;
1046 case OPCODE_FLR:
1047 emit_flr(c, inst);
1048 break;
1049 case OPCODE_LRP:
1050 emit_lrp(c, inst);
1051 break;
1052 case OPCODE_MOV:
1053 emit_mov(c, inst);
1054 break;
1055 case OPCODE_DP3:
1056 emit_dp3(c, inst);
1057 break;
1058 case OPCODE_DP4:
1059 emit_dp4(c, inst);
1060 break;
1061 case OPCODE_XPD:
1062 emit_xpd(c, inst);
1063 break;
1064 case OPCODE_DPH:
1065 emit_dph(c, inst);
1066 break;
1067 case OPCODE_RCP:
1068 emit_rcp(c, inst);
1069 break;
1070 case OPCODE_RSQ:
1071 emit_rsq(c, inst);
1072 break;
1073 case OPCODE_SIN:
1074 emit_sin(c, inst);
1075 break;
1076 case OPCODE_COS:
1077 emit_cos(c, inst);
1078 break;
1079 case OPCODE_EX2:
1080 emit_ex2(c, inst);
1081 break;
1082 case OPCODE_LG2:
1083 emit_lg2(c, inst);
1084 break;
1085 case OPCODE_MAX:
1086 emit_max(c, inst);
1087 break;
1088 case OPCODE_MIN:
1089 emit_min(c, inst);
1090 break;
1091 case OPCODE_SLT:
1092 emit_slt(c, inst);
1093 break;
1094 case OPCODE_SLE:
1095 emit_sle(c, inst);
1096 break;
1097 case OPCODE_SGT:
1098 emit_sgt(c, inst);
1099 break;
1100 case OPCODE_SGE:
1101 emit_sge(c, inst);
1102 break;
1103 case OPCODE_SEQ:
1104 emit_seq(c, inst);
1105 break;
1106 case OPCODE_SNE:
1107 emit_sne(c, inst);
1108 break;
1109 case OPCODE_MUL:
1110 emit_mul(c, inst);
1111 break;
1112 case OPCODE_POW:
1113 emit_pow(c, inst);
1114 break;
1115 case OPCODE_MAD:
1116 emit_mad(c, inst);
1117 break;
1118 case OPCODE_TEX:
1119 emit_tex(c, inst);
1120 break;
1121 case OPCODE_TXB:
1122 emit_txb(c, inst);
1123 break;
1124 case OPCODE_KIL_NV:
1125 emit_kil(c);
1126 break;
1127 case OPCODE_IF:
1128 assert(if_insn < MAX_IFSN);
1129 if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
1130 break;
1131 case OPCODE_ELSE:
1132 if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
1133 break;
1134 case OPCODE_ENDIF:
1135 assert(if_insn > 0);
1136 brw_ENDIF(p, if_inst[--if_insn]);
1137 break;
1138 case OPCODE_BGNSUB:
1139 case OPCODE_ENDSUB:
1140 break;
1141 case OPCODE_CAL:
1142 brw_push_insn_state(p);
1143 brw_set_mask_control(p, BRW_MASK_DISABLE);
1144 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1145 brw_ADD(p, c->ret_reg, brw_ip_reg(), brw_imm_d(2*16));
1146 orig_inst = inst->Data;
1147 orig_inst->Data = current_insn(p);
1148 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1149 brw_pop_insn_state(p);
1150 break;
1151 case OPCODE_RET:
1152 brw_push_insn_state(p);
1153 brw_set_mask_control(p, BRW_MASK_DISABLE);
1154 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1155 brw_MOV(p, brw_ip_reg(), c->ret_reg);
1156 brw_pop_insn_state(p);
1157 break;
1158 case OPCODE_BGNLOOP:
1159 loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
1160 break;
1161 case OPCODE_BRK:
1162 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
1163 brw_BREAK(p);
1164 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1165 break;
1166 case OPCODE_ENDLOOP:
1167 loop_insn--;
1168 inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
1169 /* patch all the BREAK instructions from
1170 last BEGINLOOP */
1171 while (inst0 > loop_inst[loop_insn]) {
1172 inst0--;
1173 if (inst0->header.opcode == BRW_OPCODE_BREAK) {
1174 inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
1175 inst0->bits3.if_else.pop_count = 0;
1176 }
1177 }
1178 break;
1179 default:
1180 _mesa_printf("unsupported IR in fragment shader %d\n",
1181 inst->Opcode);
1182 }
1183 }
1184 post_wm_emit(c);
1185 }
1186
1187 void brw_wm_glsl_emit(struct brw_wm_compile *c)
1188 {
1189 brw_wm_pass_fp(c);
1190 c->tmp_index = 127;
1191 brw_wm_emit_glsl(c);
1192 c->prog_data.total_grf = c->reg_index;
1193 c->prog_data.total_scratch = 0;
1194 }