1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
11 /* Only guess, need a flag in gl_fragment_program later */
12 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
15 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
16 struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
17 switch (inst
->Opcode
) {
39 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
40 int component
, struct brw_reg reg
)
42 c
->wm_regs
[file
][index
][component
].reg
= reg
;
43 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
46 static int get_scalar_dst_index(struct prog_instruction
*inst
)
49 for (i
= 0; i
< 4; i
++)
50 if (inst
->DstReg
.WriteMask
& (1<<i
))
55 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
58 if(c
->tmp_index
== c
->tmp_max
)
59 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
61 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
65 static int mark_tmps(struct brw_wm_compile
*c
)
70 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
72 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
75 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
81 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
, int nr
, GLuint neg
, GLuint abs
)
85 case PROGRAM_STATE_VAR
:
86 case PROGRAM_CONSTANT
:
88 file
= PROGRAM_STATE_VAR
;
90 case PROGRAM_UNDEFINED
:
91 return brw_null_reg();
96 if(c
->wm_regs
[file
][index
][component
].inited
)
97 reg
= c
->wm_regs
[file
][index
][component
].reg
;
99 reg
= brw_vec8_grf(c
->reg_index
, 0);
101 if(!c
->wm_regs
[file
][index
][component
].inited
) {
102 set_reg(c
, file
, index
, component
, reg
);
106 if (neg
& (1<< component
)) {
114 static void prealloc_reg(struct brw_wm_compile
*c
)
118 int nr_interp_regs
= 0;
119 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
121 for (i
= 0; i
< 4; i
++) {
122 reg
= (i
< c
->key
.nr_depth_regs
)
123 ? brw_vec8_grf(i
*2, 0) : brw_vec8_grf(0, 0);
124 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
126 c
->reg_index
+= 2*c
->key
.nr_depth_regs
;
128 int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
129 struct gl_program_parameter_list
*plist
=
130 c
->fp
->program
.Base
.Parameters
;
132 c
->prog_data
.nr_params
= 4*nr_params
;
133 for (i
= 0; i
< nr_params
; i
++) {
134 for (j
= 0; j
< 4; j
++, index
++) {
135 reg
= brw_vec1_grf(c
->reg_index
+ index
/8,
137 c
->prog_data
.param
[index
] =
138 &plist
->ParameterValues
[i
][j
];
139 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
142 c
->nr_creg
= 2*((4*nr_params
+15)/16);
143 c
->reg_index
+= c
->nr_creg
;
145 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
146 if (inputs
& (1<<i
)) {
148 reg
= brw_vec8_grf(c
->reg_index
, 0);
149 for (j
= 0; j
< 4; j
++)
150 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
155 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
156 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
157 c
->prog_data
.curb_read_length
= c
->nr_creg
;
158 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
160 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
164 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
165 struct prog_instruction
*inst
, int component
, int nr
)
167 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
171 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
172 struct prog_src_register
*src
, int index
, int nr
)
174 int component
= GET_SWZ(src
->Swizzle
, index
);
175 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
176 src
->NegateBase
, src
->Abs
);
179 /* Subroutines are minimal support for resusable instruction sequences.
180 They are implemented as simply as possible to minimise overhead: there
181 is no explicit support for communication between the caller and callee
182 other than saving the return address in a temporary register, nor is
183 there any automatic local storage. This implies that great care is
184 required before attempting reentrancy or any kind of nested
185 subroutine invocations. */
186 static void invoke_subroutine( struct brw_wm_compile
*c
,
187 enum _subroutine subroutine
,
188 void (*emit
)( struct brw_wm_compile
* ) )
190 struct brw_compile
*p
= &c
->func
;
192 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
194 if( c
->subroutines
[ subroutine
] ) {
195 /* subroutine previously emitted: reuse existing instructions */
197 int mark
= mark_tmps( c
);
198 struct brw_reg return_address
= retype( alloc_tmp( c
),
199 BRW_REGISTER_TYPE_UD
);
200 int here
= p
->nr_insn
;
202 brw_push_insn_state(p
);
203 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
204 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
206 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
207 brw_imm_d( ( c
->subroutines
[ subroutine
] -
209 brw_pop_insn_state(p
);
211 release_tmps( c
, mark
);
213 /* previously unused subroutine: emit, and mark for later reuse */
215 int mark
= mark_tmps( c
);
216 struct brw_reg return_address
= retype( alloc_tmp( c
),
217 BRW_REGISTER_TYPE_UD
);
218 struct brw_instruction
*calc
;
219 int base
= p
->nr_insn
;
221 brw_push_insn_state(p
);
222 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
223 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
224 brw_pop_insn_state(p
);
226 c
->subroutines
[ subroutine
] = p
->nr_insn
;
230 brw_push_insn_state(p
);
231 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
232 brw_MOV( p
, brw_ip_reg(), return_address
);
233 brw_pop_insn_state(p
);
235 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
237 release_tmps( c
, mark
);
241 static void emit_abs( struct brw_wm_compile
*c
,
242 struct prog_instruction
*inst
)
245 struct brw_compile
*p
= &c
->func
;
246 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
247 for (i
= 0; i
< 4; i
++) {
248 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
249 struct brw_reg src
, dst
;
250 dst
= get_dst_reg(c
, inst
, i
, 1);
251 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
252 brw_MOV(p
, dst
, brw_abs(src
));
255 brw_set_saturate(p
, 0);
258 static void emit_trunc( struct brw_wm_compile
*c
,
259 struct prog_instruction
*inst
)
262 struct brw_compile
*p
= &c
->func
;
263 GLuint mask
= inst
->DstReg
.WriteMask
;
264 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
265 for (i
= 0; i
< 4; i
++) {
267 struct brw_reg src
, dst
;
268 dst
= get_dst_reg(c
, inst
, i
, 1) ;
269 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
270 brw_RNDD(p
, dst
, src
);
273 brw_set_saturate(p
, 0);
276 static void emit_mov( struct brw_wm_compile
*c
,
277 struct prog_instruction
*inst
)
280 struct brw_compile
*p
= &c
->func
;
281 GLuint mask
= inst
->DstReg
.WriteMask
;
282 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
283 for (i
= 0; i
< 4; i
++) {
285 struct brw_reg src
, dst
;
286 dst
= get_dst_reg(c
, inst
, i
, 1);
287 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
288 brw_MOV(p
, dst
, src
);
291 brw_set_saturate(p
, 0);
294 static void emit_pixel_xy(struct brw_wm_compile
*c
,
295 struct prog_instruction
*inst
)
297 struct brw_reg r1
= brw_vec1_grf(1, 0);
298 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
300 struct brw_reg dst0
, dst1
;
301 struct brw_compile
*p
= &c
->func
;
302 GLuint mask
= inst
->DstReg
.WriteMask
;
304 dst0
= get_dst_reg(c
, inst
, 0, 1);
305 dst1
= get_dst_reg(c
, inst
, 1, 1);
306 /* Calculate pixel centers by adding 1 or 0 to each of the
307 * micro-tile coordinates passed in r1.
309 if (mask
& WRITEMASK_X
) {
311 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
312 stride(suboffset(r1_uw
, 4), 2, 4, 0),
313 brw_imm_v(0x10101010));
316 if (mask
& WRITEMASK_Y
) {
318 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
319 stride(suboffset(r1_uw
, 5), 2, 4, 0),
320 brw_imm_v(0x11001100));
325 static void emit_delta_xy(struct brw_wm_compile
*c
,
326 struct prog_instruction
*inst
)
328 struct brw_reg r1
= brw_vec1_grf(1, 0);
329 struct brw_reg dst0
, dst1
, src0
, src1
;
330 struct brw_compile
*p
= &c
->func
;
331 GLuint mask
= inst
->DstReg
.WriteMask
;
333 dst0
= get_dst_reg(c
, inst
, 0, 1);
334 dst1
= get_dst_reg(c
, inst
, 1, 1);
335 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
336 src1
= get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
337 /* Calc delta X,Y by subtracting origin in r1 from the pixel
340 if (mask
& WRITEMASK_X
) {
343 retype(src0
, BRW_REGISTER_TYPE_UW
),
347 if (mask
& WRITEMASK_Y
) {
350 retype(src1
, BRW_REGISTER_TYPE_UW
),
351 negate(suboffset(r1
,1)));
358 static void fire_fb_write( struct brw_wm_compile
*c
,
364 struct brw_compile
*p
= &c
->func
;
365 /* Pass through control information:
367 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
369 brw_push_insn_state(p
);
370 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
372 brw_message_reg(base_reg
+ 1),
374 brw_pop_insn_state(p
);
376 /* Send framebuffer write message: */
378 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
380 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
387 static void emit_fb_write(struct brw_wm_compile
*c
,
388 struct prog_instruction
*inst
)
390 struct brw_compile
*p
= &c
->func
;
396 /* Reserve a space for AA - may not be needed:
398 if (c
->key
.aa_dest_stencil_reg
)
401 brw_push_insn_state(p
);
402 for (channel
= 0; channel
< 4; channel
++) {
403 src0
= get_src_reg(c
, &inst
->SrcReg
[0], channel
, 1);
404 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
405 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
406 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
408 /* skip over the regs populated above: */
410 brw_pop_insn_state(p
);
413 if (c
->key
.source_depth_to_render_target
)
415 if (c
->key
.computes_depth
) {
416 src0
= get_src_reg(c
, &inst
->SrcReg
[2], 2, 1);
417 brw_MOV(p
, brw_message_reg(nr
), src0
);
419 src0
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
420 brw_MOV(p
, brw_message_reg(nr
), src0
);
425 target
= inst
->Sampler
>> 1;
426 eot
= inst
->Sampler
& 1;
427 fire_fb_write(c
, 0, nr
, target
, eot
);
430 static void emit_pixel_w( struct brw_wm_compile
*c
,
431 struct prog_instruction
*inst
)
433 struct brw_compile
*p
= &c
->func
;
434 GLuint mask
= inst
->DstReg
.WriteMask
;
435 if (mask
& WRITEMASK_W
) {
436 struct brw_reg dst
, src0
, delta0
, delta1
;
437 struct brw_reg interp3
;
439 dst
= get_dst_reg(c
, inst
, 3, 1);
440 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
441 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
442 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
444 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
445 /* Calc 1/w - just linterp wpos[3] optimized by putting the
446 * result straight into a message reg.
448 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
449 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
453 BRW_MATH_FUNCTION_INV
,
454 BRW_MATH_SATURATE_NONE
,
456 BRW_MATH_PRECISION_FULL
);
460 static void emit_linterp(struct brw_wm_compile
*c
,
461 struct prog_instruction
*inst
)
463 struct brw_compile
*p
= &c
->func
;
464 GLuint mask
= inst
->DstReg
.WriteMask
;
465 struct brw_reg interp
[4];
466 struct brw_reg dst
, delta0
, delta1
;
469 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
470 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
471 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
475 interp
[0] = brw_vec1_grf(nr
, 0);
476 interp
[1] = brw_vec1_grf(nr
, 4);
477 interp
[2] = brw_vec1_grf(nr
+1, 0);
478 interp
[3] = brw_vec1_grf(nr
+1, 4);
480 for(i
= 0; i
< 4; i
++ ) {
482 dst
= get_dst_reg(c
, inst
, i
, 1);
483 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
484 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
489 static void emit_cinterp(struct brw_wm_compile
*c
,
490 struct prog_instruction
*inst
)
492 struct brw_compile
*p
= &c
->func
;
493 GLuint mask
= inst
->DstReg
.WriteMask
;
495 struct brw_reg interp
[4];
496 struct brw_reg dst
, src0
;
498 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
502 interp
[0] = brw_vec1_grf(nr
, 0);
503 interp
[1] = brw_vec1_grf(nr
, 4);
504 interp
[2] = brw_vec1_grf(nr
+1, 0);
505 interp
[3] = brw_vec1_grf(nr
+1, 4);
507 for(i
= 0; i
< 4; i
++ ) {
509 dst
= get_dst_reg(c
, inst
, i
, 1);
510 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
515 static void emit_pinterp(struct brw_wm_compile
*c
,
516 struct prog_instruction
*inst
)
518 struct brw_compile
*p
= &c
->func
;
519 GLuint mask
= inst
->DstReg
.WriteMask
;
521 struct brw_reg interp
[4];
522 struct brw_reg dst
, delta0
, delta1
;
523 struct brw_reg src0
, w
;
525 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
526 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
527 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
528 w
= get_src_reg(c
, &inst
->SrcReg
[2], 3, 1);
532 interp
[0] = brw_vec1_grf(nr
, 0);
533 interp
[1] = brw_vec1_grf(nr
, 4);
534 interp
[2] = brw_vec1_grf(nr
+1, 0);
535 interp
[3] = brw_vec1_grf(nr
+1, 4);
537 for(i
= 0; i
< 4; i
++ ) {
539 dst
= get_dst_reg(c
, inst
, i
, 1);
540 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
541 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
543 brw_MUL(p
, dst
, dst
, w
);
548 static void emit_xpd(struct brw_wm_compile
*c
,
549 struct prog_instruction
*inst
)
552 struct brw_compile
*p
= &c
->func
;
553 GLuint mask
= inst
->DstReg
.WriteMask
;
554 for (i
= 0; i
< 4; i
++) {
558 struct brw_reg src0
, src1
, dst
;
559 dst
= get_dst_reg(c
, inst
, i
, 1);
560 src0
= negate(get_src_reg(c
, &inst
->SrcReg
[0], i2
, 1));
561 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i1
, 1);
562 brw_MUL(p
, brw_null_reg(), src0
, src1
);
563 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i1
, 1);
564 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i2
, 1);
565 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
566 brw_MAC(p
, dst
, src0
, src1
);
567 brw_set_saturate(p
, 0);
570 brw_set_saturate(p
, 0);
573 static void emit_dp3(struct brw_wm_compile
*c
,
574 struct prog_instruction
*inst
)
576 struct brw_reg src0
[3], src1
[3], dst
;
578 struct brw_compile
*p
= &c
->func
;
579 for (i
= 0; i
< 3; i
++) {
580 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
581 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
584 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
585 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
586 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
587 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
588 brw_MAC(p
, dst
, src0
[2], src1
[2]);
589 brw_set_saturate(p
, 0);
592 static void emit_dp4(struct brw_wm_compile
*c
,
593 struct prog_instruction
*inst
)
595 struct brw_reg src0
[4], src1
[4], dst
;
597 struct brw_compile
*p
= &c
->func
;
598 for (i
= 0; i
< 4; i
++) {
599 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
600 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
602 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
603 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
604 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
605 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
606 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
607 brw_MAC(p
, dst
, src0
[3], src1
[3]);
608 brw_set_saturate(p
, 0);
611 static void emit_dph(struct brw_wm_compile
*c
,
612 struct prog_instruction
*inst
)
614 struct brw_reg src0
[4], src1
[4], dst
;
616 struct brw_compile
*p
= &c
->func
;
617 for (i
= 0; i
< 4; i
++) {
618 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
619 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
621 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
622 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
623 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
624 brw_MAC(p
, dst
, src0
[2], src1
[2]);
625 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
626 brw_ADD(p
, dst
, src0
[3], src1
[3]);
627 brw_set_saturate(p
, 0);
630 static void emit_math1(struct brw_wm_compile
*c
,
631 struct prog_instruction
*inst
, GLuint func
)
633 struct brw_compile
*p
= &c
->func
;
634 struct brw_reg src0
, dst
;
636 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
637 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
638 brw_MOV(p
, brw_message_reg(2), src0
);
642 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
645 BRW_MATH_DATA_VECTOR
,
646 BRW_MATH_PRECISION_FULL
);
649 static void emit_rcp(struct brw_wm_compile
*c
,
650 struct prog_instruction
*inst
)
652 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
655 static void emit_rsq(struct brw_wm_compile
*c
,
656 struct prog_instruction
*inst
)
658 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
661 static void emit_sin(struct brw_wm_compile
*c
,
662 struct prog_instruction
*inst
)
664 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
667 static void emit_cos(struct brw_wm_compile
*c
,
668 struct prog_instruction
*inst
)
670 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
673 static void emit_ex2(struct brw_wm_compile
*c
,
674 struct prog_instruction
*inst
)
676 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
679 static void emit_lg2(struct brw_wm_compile
*c
,
680 struct prog_instruction
*inst
)
682 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
685 static void emit_add(struct brw_wm_compile
*c
,
686 struct prog_instruction
*inst
)
688 struct brw_compile
*p
= &c
->func
;
689 struct brw_reg src0
, src1
, dst
;
690 GLuint mask
= inst
->DstReg
.WriteMask
;
692 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
693 for (i
= 0 ; i
< 4; i
++) {
695 dst
= get_dst_reg(c
, inst
, i
, 1);
696 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
697 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
698 brw_ADD(p
, dst
, src0
, src1
);
701 brw_set_saturate(p
, 0);
704 static void emit_sub(struct brw_wm_compile
*c
,
705 struct prog_instruction
*inst
)
707 struct brw_compile
*p
= &c
->func
;
708 struct brw_reg src0
, src1
, dst
;
709 GLuint mask
= inst
->DstReg
.WriteMask
;
711 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
712 for (i
= 0 ; i
< 4; i
++) {
714 dst
= get_dst_reg(c
, inst
, i
, 1);
715 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
716 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
717 brw_ADD(p
, dst
, src0
, negate(src1
));
720 brw_set_saturate(p
, 0);
723 static void emit_mul(struct brw_wm_compile
*c
,
724 struct prog_instruction
*inst
)
726 struct brw_compile
*p
= &c
->func
;
727 struct brw_reg src0
, src1
, dst
;
728 GLuint mask
= inst
->DstReg
.WriteMask
;
730 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
731 for (i
= 0 ; i
< 4; i
++) {
733 dst
= get_dst_reg(c
, inst
, i
, 1);
734 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
735 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
736 brw_MUL(p
, dst
, src0
, src1
);
739 brw_set_saturate(p
, 0);
742 static void emit_frc(struct brw_wm_compile
*c
,
743 struct prog_instruction
*inst
)
745 struct brw_compile
*p
= &c
->func
;
746 struct brw_reg src0
, dst
;
747 GLuint mask
= inst
->DstReg
.WriteMask
;
749 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
750 for (i
= 0 ; i
< 4; i
++) {
752 dst
= get_dst_reg(c
, inst
, i
, 1);
753 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
754 brw_FRC(p
, dst
, src0
);
757 if (inst
->SaturateMode
!= SATURATE_OFF
)
758 brw_set_saturate(p
, 0);
761 static void emit_flr(struct brw_wm_compile
*c
,
762 struct prog_instruction
*inst
)
764 struct brw_compile
*p
= &c
->func
;
765 struct brw_reg src0
, dst
;
766 GLuint mask
= inst
->DstReg
.WriteMask
;
768 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
769 for (i
= 0 ; i
< 4; i
++) {
771 dst
= get_dst_reg(c
, inst
, i
, 1);
772 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
773 brw_RNDD(p
, dst
, src0
);
776 brw_set_saturate(p
, 0);
779 static void emit_max(struct brw_wm_compile
*c
,
780 struct prog_instruction
*inst
)
782 struct brw_compile
*p
= &c
->func
;
783 GLuint mask
= inst
->DstReg
.WriteMask
;
784 struct brw_reg src0
, src1
, dst
;
786 brw_push_insn_state(p
);
787 for (i
= 0; i
< 4; i
++) {
789 dst
= get_dst_reg(c
, inst
, i
, 1);
790 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
791 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
792 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
793 brw_MOV(p
, dst
, src0
);
794 brw_set_saturate(p
, 0);
796 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src0
, src1
);
797 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
798 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
799 brw_MOV(p
, dst
, src1
);
800 brw_set_saturate(p
, 0);
801 brw_set_predicate_control_flag_value(p
, 0xff);
804 brw_pop_insn_state(p
);
807 static void emit_min(struct brw_wm_compile
*c
,
808 struct prog_instruction
*inst
)
810 struct brw_compile
*p
= &c
->func
;
811 GLuint mask
= inst
->DstReg
.WriteMask
;
812 struct brw_reg src0
, src1
, dst
;
814 brw_push_insn_state(p
);
815 for (i
= 0; i
< 4; i
++) {
817 dst
= get_dst_reg(c
, inst
, i
, 1);
818 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
819 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
820 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
821 brw_MOV(p
, dst
, src0
);
822 brw_set_saturate(p
, 0);
824 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
825 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
826 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
827 brw_MOV(p
, dst
, src1
);
828 brw_set_saturate(p
, 0);
829 brw_set_predicate_control_flag_value(p
, 0xff);
832 brw_pop_insn_state(p
);
835 static void emit_pow(struct brw_wm_compile
*c
,
836 struct prog_instruction
*inst
)
838 struct brw_compile
*p
= &c
->func
;
839 struct brw_reg dst
, src0
, src1
;
840 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
841 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
842 src1
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
844 brw_MOV(p
, brw_message_reg(2), src0
);
845 brw_MOV(p
, brw_message_reg(3), src1
);
849 BRW_MATH_FUNCTION_POW
,
850 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
853 BRW_MATH_DATA_VECTOR
,
854 BRW_MATH_PRECISION_FULL
);
857 static void emit_lrp(struct brw_wm_compile
*c
,
858 struct prog_instruction
*inst
)
860 struct brw_compile
*p
= &c
->func
;
861 GLuint mask
= inst
->DstReg
.WriteMask
;
862 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
864 int mark
= mark_tmps(c
);
865 for (i
= 0; i
< 4; i
++) {
867 dst
= get_dst_reg(c
, inst
, i
, 1);
868 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
870 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
872 if (src1
.nr
== dst
.nr
) {
874 brw_MOV(p
, tmp1
, src1
);
878 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
879 if (src2
.nr
== dst
.nr
) {
881 brw_MOV(p
, tmp2
, src2
);
885 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
886 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
887 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
888 brw_MAC(p
, dst
, src0
, tmp1
);
889 brw_set_saturate(p
, 0);
891 release_tmps(c
, mark
);
895 static void emit_kil(struct brw_wm_compile
*c
)
897 struct brw_compile
*p
= &c
->func
;
898 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
899 brw_push_insn_state(p
);
900 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
901 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
902 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
903 brw_pop_insn_state(p
);
906 static void emit_mad(struct brw_wm_compile
*c
,
907 struct prog_instruction
*inst
)
909 struct brw_compile
*p
= &c
->func
;
910 GLuint mask
= inst
->DstReg
.WriteMask
;
911 struct brw_reg dst
, src0
, src1
, src2
;
914 for (i
= 0; i
< 4; i
++) {
916 dst
= get_dst_reg(c
, inst
, i
, 1);
917 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
918 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
919 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
920 brw_MUL(p
, dst
, src0
, src1
);
922 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
923 brw_ADD(p
, dst
, dst
, src2
);
924 brw_set_saturate(p
, 0);
929 static void emit_sop(struct brw_wm_compile
*c
,
930 struct prog_instruction
*inst
, GLuint cond
)
932 struct brw_compile
*p
= &c
->func
;
933 GLuint mask
= inst
->DstReg
.WriteMask
;
934 struct brw_reg dst
, src0
, src1
;
937 for (i
= 0; i
< 4; i
++) {
939 dst
= get_dst_reg(c
, inst
, i
, 1);
940 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
941 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
942 brw_push_insn_state(p
);
943 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
944 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
945 brw_MOV(p
, dst
, brw_imm_f(0.0));
946 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
947 brw_MOV(p
, dst
, brw_imm_f(1.0));
948 brw_pop_insn_state(p
);
953 static void emit_slt(struct brw_wm_compile
*c
,
954 struct prog_instruction
*inst
)
956 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
959 static void emit_sle(struct brw_wm_compile
*c
,
960 struct prog_instruction
*inst
)
962 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
965 static void emit_sgt(struct brw_wm_compile
*c
,
966 struct prog_instruction
*inst
)
968 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
971 static void emit_sge(struct brw_wm_compile
*c
,
972 struct prog_instruction
*inst
)
974 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
977 static void emit_seq(struct brw_wm_compile
*c
,
978 struct prog_instruction
*inst
)
980 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
983 static void emit_sne(struct brw_wm_compile
*c
,
984 struct prog_instruction
*inst
)
986 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
989 static void emit_ddx(struct brw_wm_compile
*c
,
990 struct prog_instruction
*inst
)
992 struct brw_compile
*p
= &c
->func
;
993 GLuint mask
= inst
->DstReg
.WriteMask
;
994 struct brw_reg interp
[4];
996 struct brw_reg src0
, w
;
998 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
999 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1001 interp
[0] = brw_vec1_grf(nr
, 0);
1002 interp
[1] = brw_vec1_grf(nr
, 4);
1003 interp
[2] = brw_vec1_grf(nr
+1, 0);
1004 interp
[3] = brw_vec1_grf(nr
+1, 4);
1005 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1006 for(i
= 0; i
< 4; i
++ ) {
1007 if (mask
& (1<<i
)) {
1008 dst
= get_dst_reg(c
, inst
, i
, 1);
1009 brw_MOV(p
, dst
, interp
[i
]);
1010 brw_MUL(p
, dst
, dst
, w
);
1013 brw_set_saturate(p
, 0);
1016 static void emit_ddy(struct brw_wm_compile
*c
,
1017 struct prog_instruction
*inst
)
1019 struct brw_compile
*p
= &c
->func
;
1020 GLuint mask
= inst
->DstReg
.WriteMask
;
1021 struct brw_reg interp
[4];
1023 struct brw_reg src0
, w
;
1026 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1028 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1029 interp
[0] = brw_vec1_grf(nr
, 0);
1030 interp
[1] = brw_vec1_grf(nr
, 4);
1031 interp
[2] = brw_vec1_grf(nr
+1, 0);
1032 interp
[3] = brw_vec1_grf(nr
+1, 4);
1033 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1034 for(i
= 0; i
< 4; i
++ ) {
1035 if (mask
& (1<<i
)) {
1036 dst
= get_dst_reg(c
, inst
, i
, 1);
1037 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1038 brw_MUL(p
, dst
, dst
, w
);
1041 brw_set_saturate(p
, 0);
1044 static __inline
struct brw_reg
high_words( struct brw_reg reg
)
1046 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1050 static __inline
struct brw_reg
low_words( struct brw_reg reg
)
1052 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1055 static __inline
struct brw_reg
even_bytes( struct brw_reg reg
)
1057 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1060 static __inline
struct brw_reg
odd_bytes( struct brw_reg reg
)
1062 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1066 /* One-, two- and three-dimensional Perlin noise, similar to the description
1067 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1068 static void noise1_sub( struct brw_wm_compile
*c
) {
1070 struct brw_compile
*p
= &c
->func
;
1071 struct brw_reg param
,
1072 x0
, x1
, /* gradients at each end */
1073 t
, tmp
[ 2 ], /* float temporaries */
1074 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1076 int mark
= mark_tmps( c
);
1078 x0
= alloc_tmp( c
);
1079 x1
= alloc_tmp( c
);
1081 tmp
[ 0 ] = alloc_tmp( c
);
1082 tmp
[ 1 ] = alloc_tmp( c
);
1083 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1084 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1085 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1086 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1087 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1089 param
= lookup_tmp( c
, mark
- 2 );
1091 brw_set_access_mode( p
, BRW_ALIGN_1
);
1093 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1095 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1096 be hashed. Also compute the remainder (offset within the unit
1097 length), interleaved to reduce register dependency penalties. */
1098 brw_RNDD( p
, itmp
[ 0 ], param
);
1099 brw_FRC( p
, param
, param
);
1100 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1101 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1102 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1104 /* We're now ready to perform the hashing. The two hashes are
1105 interleaved for performance. The hash function used is
1106 designed to rapidly achieve avalanche and require only 32x16
1107 bit multiplication, and 16-bit swizzles (which we get for
1108 free). We can't use immediate operands in the multiplies,
1109 because immediates are permitted only in src1 and the 16-bit
1110 factor is permitted only in src0. */
1111 for( i
= 0; i
< 2; i
++ )
1112 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1113 for( i
= 0; i
< 2; i
++ )
1114 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1115 high_words( itmp
[ i
] ) );
1116 for( i
= 0; i
< 2; i
++ )
1117 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1118 for( i
= 0; i
< 2; i
++ )
1119 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1120 high_words( itmp
[ i
] ) );
1121 for( i
= 0; i
< 2; i
++ )
1122 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1123 for( i
= 0; i
< 2; i
++ )
1124 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1125 high_words( itmp
[ i
] ) );
1127 /* Now we want to initialise the two gradients based on the
1128 hashes. Format conversion from signed integer to float leaves
1129 everything scaled too high by a factor of pow( 2, 31 ), but
1130 we correct for that right at the end. */
1131 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1132 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1133 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1135 brw_MUL( p
, x0
, x0
, param
);
1136 brw_MUL( p
, x1
, x1
, t
);
1138 /* We interpolate between the gradients using the polynomial
1139 6t^5 - 15t^4 + 10t^3 (Perlin). */
1140 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1141 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1142 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1143 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1144 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1145 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1147 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1148 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1149 brw_MUL( p
, x1
, x1
, param
);
1150 brw_ADD( p
, x0
, x0
, x1
);
1151 /* scale by pow( 2, -30 ), to compensate for the format conversion
1152 above and an extra factor of 2 so that a single gradient covers
1154 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1156 release_tmps( c
, mark
);
1159 static void emit_noise1( struct brw_wm_compile
*c
,
1160 struct prog_instruction
*inst
)
1162 struct brw_compile
*p
= &c
->func
;
1163 struct brw_reg src
, param
, dst
;
1164 GLuint mask
= inst
->DstReg
.WriteMask
;
1166 int mark
= mark_tmps( c
);
1168 assert( mark
== 0 );
1170 src
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1172 param
= alloc_tmp( c
);
1174 brw_MOV( p
, param
, src
);
1176 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1178 /* Fill in the result: */
1179 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1180 for (i
= 0 ; i
< 4; i
++) {
1181 if (mask
& (1<<i
)) {
1182 dst
= get_dst_reg(c
, inst
, i
, 1);
1183 brw_MOV( p
, dst
, param
);
1186 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1187 brw_set_saturate( p
, 0 );
1189 release_tmps( c
, mark
);
1192 static void noise2_sub( struct brw_wm_compile
*c
) {
1194 struct brw_compile
*p
= &c
->func
;
1195 struct brw_reg param0
, param1
,
1196 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1197 t
, tmp
[ 4 ], /* float temporaries */
1198 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1200 int mark
= mark_tmps( c
);
1202 x0y0
= alloc_tmp( c
);
1203 x0y1
= alloc_tmp( c
);
1204 x1y0
= alloc_tmp( c
);
1205 x1y1
= alloc_tmp( c
);
1207 for( i
= 0; i
< 4; i
++ ) {
1208 tmp
[ i
] = alloc_tmp( c
);
1209 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1211 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1212 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1213 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1215 param0
= lookup_tmp( c
, mark
- 3 );
1216 param1
= lookup_tmp( c
, mark
- 2 );
1218 brw_set_access_mode( p
, BRW_ALIGN_1
);
1220 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1221 be hashed. Also compute the remainders (offsets within the unit
1222 square), interleaved to reduce register dependency penalties. */
1223 brw_RNDD( p
, itmp
[ 0 ], param0
);
1224 brw_RNDD( p
, itmp
[ 1 ], param1
);
1225 brw_FRC( p
, param0
, param0
);
1226 brw_FRC( p
, param1
, param1
);
1227 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1228 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1229 low_words( itmp
[ 1 ] ) );
1230 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1231 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1232 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1233 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1234 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1236 /* We're now ready to perform the hashing. The four hashes are
1237 interleaved for performance. The hash function used is
1238 designed to rapidly achieve avalanche and require only 32x16
1239 bit multiplication, and 16-bit swizzles (which we get for
1240 free). We can't use immediate operands in the multiplies,
1241 because immediates are permitted only in src1 and the 16-bit
1242 factor is permitted only in src0. */
1243 for( i
= 0; i
< 4; i
++ )
1244 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1245 for( i
= 0; i
< 4; i
++ )
1246 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1247 high_words( itmp
[ i
] ) );
1248 for( i
= 0; i
< 4; i
++ )
1249 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1250 for( i
= 0; i
< 4; i
++ )
1251 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1252 high_words( itmp
[ i
] ) );
1253 for( i
= 0; i
< 4; i
++ )
1254 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1255 for( i
= 0; i
< 4; i
++ )
1256 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1257 high_words( itmp
[ i
] ) );
1259 /* Now we want to initialise the four gradients based on the
1260 hashes. Format conversion from signed integer to float leaves
1261 everything scaled too high by a factor of pow( 2, 15 ), but
1262 we correct for that right at the end. */
1263 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1264 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1265 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1266 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1267 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1269 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1270 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1271 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1272 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1274 brw_MUL( p
, x1y0
, x1y0
, t
);
1275 brw_MUL( p
, x1y1
, x1y1
, t
);
1276 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1277 brw_MUL( p
, x0y0
, x0y0
, param0
);
1278 brw_MUL( p
, x0y1
, x0y1
, param0
);
1280 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1281 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1282 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1283 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1285 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1286 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1287 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1288 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1290 /* We interpolate between the gradients using the polynomial
1291 6t^5 - 15t^4 + 10t^3 (Perlin). */
1292 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1293 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1294 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1295 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1296 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1297 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1298 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1300 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1301 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1302 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1303 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1304 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1306 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1307 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1308 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1309 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1311 /* Here we interpolate in the y dimension... */
1312 brw_MUL( p
, x0y1
, x0y1
, param1
);
1313 brw_MUL( p
, x1y1
, x1y1
, param1
);
1314 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1315 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1317 /* And now in x. There are horrible register dependencies here,
1318 but we have nothing else to do. */
1319 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1320 brw_MUL( p
, x1y0
, x1y0
, param0
);
1321 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1323 /* scale by pow( 2, -15 ), as described above */
1324 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1326 release_tmps( c
, mark
);
1329 static void emit_noise2( struct brw_wm_compile
*c
,
1330 struct prog_instruction
*inst
)
1332 struct brw_compile
*p
= &c
->func
;
1333 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1334 GLuint mask
= inst
->DstReg
.WriteMask
;
1336 int mark
= mark_tmps( c
);
1338 assert( mark
== 0 );
1340 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1341 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1343 param0
= alloc_tmp( c
);
1344 param1
= alloc_tmp( c
);
1346 brw_MOV( p
, param0
, src0
);
1347 brw_MOV( p
, param1
, src1
);
1349 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1351 /* Fill in the result: */
1352 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1353 for (i
= 0 ; i
< 4; i
++) {
1354 if (mask
& (1<<i
)) {
1355 dst
= get_dst_reg(c
, inst
, i
, 1);
1356 brw_MOV( p
, dst
, param0
);
1359 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1360 brw_set_saturate( p
, 0 );
1362 release_tmps( c
, mark
);
1365 /* The three-dimensional case is much like the one- and two- versions above,
1366 but since the number of corners is rapidly growing we now pack 16 16-bit
1367 hashes into each register to extract more parallelism from the EUs. */
1368 static void noise3_sub( struct brw_wm_compile
*c
) {
1370 struct brw_compile
*p
= &c
->func
;
1371 struct brw_reg param0
, param1
, param2
,
1372 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1373 xi
, yi
, zi
, /* interpolation coefficients */
1374 t
, tmp
[ 8 ], /* float temporaries */
1375 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1376 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1378 int mark
= mark_tmps( c
);
1380 x0y0
= alloc_tmp( c
);
1381 x0y1
= alloc_tmp( c
);
1382 x1y0
= alloc_tmp( c
);
1383 x1y1
= alloc_tmp( c
);
1384 xi
= alloc_tmp( c
);
1385 yi
= alloc_tmp( c
);
1386 zi
= alloc_tmp( c
);
1388 for( i
= 0; i
< 8; i
++ ) {
1389 tmp
[ i
] = alloc_tmp( c
);
1390 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1391 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1394 param0
= lookup_tmp( c
, mark
- 4 );
1395 param1
= lookup_tmp( c
, mark
- 3 );
1396 param2
= lookup_tmp( c
, mark
- 2 );
1398 brw_set_access_mode( p
, BRW_ALIGN_1
);
1400 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1401 be hashed. Also compute the remainders (offsets within the unit
1402 cube), interleaved to reduce register dependency penalties. */
1403 brw_RNDD( p
, itmp
[ 0 ], param0
);
1404 brw_RNDD( p
, itmp
[ 1 ], param1
);
1405 brw_RNDD( p
, itmp
[ 2 ], param2
);
1406 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */
1407 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */
1408 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */
1409 brw_FRC( p
, param0
, param0
);
1410 brw_FRC( p
, param1
, param1
);
1411 brw_FRC( p
, param2
, param2
);
1412 /* Since we now have only 16 bits of precision in the hash, we must
1413 be more careful about thorough mixing to maintain entropy as we
1414 squash the input vector into a small scalar. */
1415 brw_MUL( p
, brw_acc_reg(), itmp
[ 4 ], itmp
[ 0 ] );
1416 brw_MAC( p
, brw_acc_reg(), itmp
[ 5 ], itmp
[ 1 ] );
1417 brw_MAC( p
, itmp
[ 0 ], itmp
[ 6 ], itmp
[ 2 ] );
1418 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1419 brw_imm_uw( 0xBC8F ) );
1421 /* Temporarily disable the execution mask while we work with ExecSize=16
1422 channels (the mask is set for ExecSize=8 and is probably incorrect).
1423 Although this might cause execution of unwanted channels, the code
1424 writes only to temporary registers and has no side effects, so
1425 disabling the mask is harmless. */
1426 brw_push_insn_state( p
);
1427 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1428 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1429 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1430 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1432 /* We're now ready to perform the hashing. The eight hashes are
1433 interleaved for performance. The hash function used is
1434 designed to rapidly achieve avalanche and require only 16x16
1435 bit multiplication, and 8-bit swizzles (which we get for
1437 for( i
= 0; i
< 4; i
++ )
1438 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1439 for( i
= 0; i
< 4; i
++ )
1440 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1441 odd_bytes( wtmp
[ i
] ) );
1442 for( i
= 0; i
< 4; i
++ )
1443 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1444 for( i
= 0; i
< 4; i
++ )
1445 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1446 odd_bytes( wtmp
[ i
] ) );
1447 brw_pop_insn_state( p
);
1449 /* Now we want to initialise the four rear gradients based on the
1450 hashes. Format conversion from signed integer to float leaves
1451 everything scaled too high by a factor of pow( 2, 15 ), but
1452 we correct for that right at the end. */
1454 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1455 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1456 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1457 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1458 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1460 brw_push_insn_state( p
);
1461 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1462 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1463 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1464 brw_pop_insn_state( p
);
1466 brw_MUL( p
, x1y0
, x1y0
, t
);
1467 brw_MUL( p
, x1y1
, x1y1
, t
);
1468 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1469 brw_MUL( p
, x0y0
, x0y0
, param0
);
1470 brw_MUL( p
, x0y1
, x0y1
, param0
);
1473 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1474 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1475 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1476 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1478 brw_push_insn_state( p
);
1479 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1480 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1481 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1482 brw_pop_insn_state( p
);
1484 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1485 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1486 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1487 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1488 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1490 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1491 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1492 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1493 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1496 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1497 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1498 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1499 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1501 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1502 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1503 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1504 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1506 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1507 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1508 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1509 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1511 /* We interpolate between the gradients using the polynomial
1512 6t^5 - 15t^4 + 10t^3 (Perlin). */
1513 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1514 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1515 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1516 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1517 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1518 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1519 brw_MUL( p
, xi
, xi
, param0
);
1520 brw_MUL( p
, yi
, yi
, param1
);
1521 brw_MUL( p
, zi
, zi
, param2
);
1522 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1523 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1524 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1525 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1526 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1527 brw_MUL( p
, xi
, xi
, param0
);
1528 brw_MUL( p
, yi
, yi
, param1
);
1529 brw_MUL( p
, zi
, zi
, param2
);
1530 brw_MUL( p
, xi
, xi
, param0
);
1531 brw_MUL( p
, yi
, yi
, param1
);
1532 brw_MUL( p
, zi
, zi
, param2
);
1533 brw_MUL( p
, xi
, xi
, param0
);
1534 brw_MUL( p
, yi
, yi
, param1
);
1535 brw_MUL( p
, zi
, zi
, param2
);
1537 /* Here we interpolate in the y dimension... */
1538 brw_MUL( p
, x0y1
, x0y1
, yi
);
1539 brw_MUL( p
, x1y1
, x1y1
, yi
);
1540 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1541 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1543 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1544 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1545 brw_MUL( p
, x1y0
, x1y0
, xi
);
1546 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1548 /* Now do the same thing for the front four gradients... */
1550 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1551 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1552 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1553 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1555 brw_push_insn_state( p
);
1556 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1557 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1558 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1559 brw_pop_insn_state( p
);
1561 brw_MUL( p
, x1y0
, x1y0
, t
);
1562 brw_MUL( p
, x1y1
, x1y1
, t
);
1563 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1564 brw_MUL( p
, x0y0
, x0y0
, param0
);
1565 brw_MUL( p
, x0y1
, x0y1
, param0
);
1568 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1569 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1570 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1571 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1573 brw_push_insn_state( p
);
1574 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1575 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1576 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1577 brw_pop_insn_state( p
);
1579 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1580 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1581 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1582 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1583 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1585 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1586 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1587 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1588 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1591 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1592 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1593 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1594 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1596 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1597 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1598 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1599 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1601 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1602 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1603 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1604 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1606 /* The interpolation coefficients are still around from last time, so
1607 again interpolate in the y dimension... */
1608 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1609 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1610 brw_MUL( p
, x0y1
, x0y1
, yi
);
1611 brw_MUL( p
, x1y1
, x1y1
, yi
);
1612 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1613 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1615 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1616 time put the front face in tmp[ 1 ] and we're nearly there... */
1617 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1618 brw_MUL( p
, x1y0
, x1y0
, xi
);
1619 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1621 /* The final interpolation, in the z dimension: */
1622 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1623 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1624 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1626 /* scale by pow( 2, -15 ), as described above */
1627 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1629 release_tmps( c
, mark
);
1632 static void emit_noise3( struct brw_wm_compile
*c
,
1633 struct prog_instruction
*inst
)
1635 struct brw_compile
*p
= &c
->func
;
1636 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1637 GLuint mask
= inst
->DstReg
.WriteMask
;
1639 int mark
= mark_tmps( c
);
1641 assert( mark
== 0 );
1643 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1644 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1645 src2
= get_src_reg( c
, inst
->SrcReg
, 2, 1 );
1647 param0
= alloc_tmp( c
);
1648 param1
= alloc_tmp( c
);
1649 param2
= alloc_tmp( c
);
1651 brw_MOV( p
, param0
, src0
);
1652 brw_MOV( p
, param1
, src1
);
1653 brw_MOV( p
, param2
, src2
);
1655 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1657 /* Fill in the result: */
1658 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1659 for (i
= 0 ; i
< 4; i
++) {
1660 if (mask
& (1<<i
)) {
1661 dst
= get_dst_reg(c
, inst
, i
, 1);
1662 brw_MOV( p
, dst
, param0
);
1665 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1666 brw_set_saturate( p
, 0 );
1668 release_tmps( c
, mark
);
1671 static void emit_wpos_xy(struct brw_wm_compile
*c
,
1672 struct prog_instruction
*inst
)
1674 struct brw_compile
*p
= &c
->func
;
1675 GLuint mask
= inst
->DstReg
.WriteMask
;
1676 struct brw_reg src0
[2], dst
[2];
1678 dst
[0] = get_dst_reg(c
, inst
, 0, 1);
1679 dst
[1] = get_dst_reg(c
, inst
, 1, 1);
1681 src0
[0] = get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1682 src0
[1] = get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
1684 /* Calculate the pixel offset from window bottom left into destination
1687 if (mask
& WRITEMASK_X
) {
1688 /* X' = X - origin_x */
1691 retype(src0
[0], BRW_REGISTER_TYPE_W
),
1692 brw_imm_d(0 - c
->key
.origin_x
));
1695 if (mask
& WRITEMASK_Y
) {
1696 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
1699 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
1700 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
1705 BIAS on SIMD8 not workind yet...
1707 static void emit_txb(struct brw_wm_compile
*c
,
1708 struct prog_instruction
*inst
)
1710 struct brw_compile
*p
= &c
->func
;
1711 struct brw_reg dst
[4], src
[4], payload_reg
;
1712 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
1715 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
1716 for (i
= 0; i
< 4; i
++)
1717 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
1718 for (i
= 0; i
< 4; i
++)
1719 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
1721 switch (inst
->TexSrcTarget
) {
1722 case TEXTURE_1D_INDEX
:
1723 brw_MOV(p
, brw_message_reg(2), src
[0]);
1724 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0));
1725 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
1727 case TEXTURE_2D_INDEX
:
1728 case TEXTURE_RECT_INDEX
:
1729 brw_MOV(p
, brw_message_reg(2), src
[0]);
1730 brw_MOV(p
, brw_message_reg(3), src
[1]);
1731 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
1734 brw_MOV(p
, brw_message_reg(2), src
[0]);
1735 brw_MOV(p
, brw_message_reg(3), src
[1]);
1736 brw_MOV(p
, brw_message_reg(4), src
[2]);
1739 brw_MOV(p
, brw_message_reg(5), src
[3]);
1740 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
1742 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
1744 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
1745 unit
+ MAX_DRAW_BUFFERS
, /* surface */
1747 inst
->DstReg
.WriteMask
,
1748 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
1754 static void emit_tex(struct brw_wm_compile
*c
,
1755 struct prog_instruction
*inst
)
1757 struct brw_compile
*p
= &c
->func
;
1758 struct brw_reg dst
[4], src
[4], payload_reg
;
1759 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
1764 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
1766 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
1768 for (i
= 0; i
< 4; i
++)
1769 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
1770 for (i
= 0; i
< 4; i
++)
1771 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
1774 switch (inst
->TexSrcTarget
) {
1775 case TEXTURE_1D_INDEX
:
1779 case TEXTURE_2D_INDEX
:
1780 case TEXTURE_RECT_INDEX
:
1781 emit
= WRITEMASK_XY
;
1785 emit
= WRITEMASK_XYZ
;
1791 for (i
= 0; i
< nr
; i
++) {
1792 static const GLuint swz
[4] = {0,1,2,2};
1794 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
1796 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
1801 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0));
1802 brw_MOV(p
, brw_message_reg(6), src
[2]);
1806 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
1808 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
1809 unit
+ MAX_DRAW_BUFFERS
, /* surface */
1811 inst
->DstReg
.WriteMask
,
1812 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
,
1818 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
1821 static void post_wm_emit( struct brw_wm_compile
*c
)
1823 GLuint nr_insns
= c
->fp
->program
.Base
.NumInstructions
;
1824 GLuint insn
, target_insn
;
1825 struct prog_instruction
*inst1
, *inst2
;
1826 struct brw_instruction
*brw_inst1
, *brw_inst2
;
1828 for (insn
= 0; insn
< nr_insns
; insn
++) {
1829 inst1
= &c
->fp
->program
.Base
.Instructions
[insn
];
1830 brw_inst1
= inst1
->Data
;
1831 switch (inst1
->Opcode
) {
1833 target_insn
= inst1
->BranchTarget
;
1834 inst2
= &c
->fp
->program
.Base
.Instructions
[target_insn
];
1835 brw_inst2
= inst2
->Data
;
1836 offset
= brw_inst2
- brw_inst1
;
1837 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
1845 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1848 #define MAX_LOOP_DEPTH 32
1849 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
1850 struct brw_instruction
*inst0
, *inst1
;
1851 int i
, if_insn
= 0, loop_insn
= 0;
1852 struct brw_compile
*p
= &c
->func
;
1853 struct brw_indirect stack_index
= brw_indirect(0, 0);
1857 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1858 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1860 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
1861 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
1862 struct prog_instruction
*orig_inst
;
1864 if ((orig_inst
= inst
->Data
) != 0)
1865 orig_inst
->Data
= current_insn(p
);
1867 if (inst
->CondUpdate
)
1868 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
1870 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
1872 switch (inst
->Opcode
) {
1874 emit_pixel_xy(c
, inst
);
1877 emit_delta_xy(c
, inst
);
1880 emit_pixel_w(c
, inst
);
1883 emit_linterp(c
, inst
);
1886 emit_pinterp(c
, inst
);
1889 emit_cinterp(c
, inst
);
1892 emit_wpos_xy(c
, inst
);
1895 emit_fb_write(c
, inst
);
1916 emit_trunc(c
, inst
);
1991 emit_noise1(c
, inst
);
1994 emit_noise2(c
, inst
);
1997 emit_noise3(c
, inst
);
1999 /* case OPCODE_NOISE4: */
2000 /* not yet implemented */
2011 assert(if_insn
< MAX_IFSN
);
2012 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2015 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2018 assert(if_insn
> 0);
2019 brw_ENDIF(p
, if_inst
[--if_insn
]);
2025 brw_push_insn_state(p
);
2026 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2027 brw_set_access_mode(p
, BRW_ALIGN_1
);
2028 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2029 brw_set_access_mode(p
, BRW_ALIGN_16
);
2030 brw_ADD(p
, get_addr_reg(stack_index
),
2031 get_addr_reg(stack_index
), brw_imm_d(4));
2032 orig_inst
= inst
->Data
;
2033 orig_inst
->Data
= &p
->store
[p
->nr_insn
];
2034 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2035 brw_pop_insn_state(p
);
2039 brw_push_insn_state(p
);
2040 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2041 brw_ADD(p
, get_addr_reg(stack_index
),
2042 get_addr_reg(stack_index
), brw_imm_d(-4));
2043 brw_set_access_mode(p
, BRW_ALIGN_1
);
2044 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2045 brw_set_access_mode(p
, BRW_ALIGN_16
);
2046 brw_pop_insn_state(p
);
2049 case OPCODE_BGNLOOP
:
2050 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2054 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2058 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2060 case OPCODE_ENDLOOP
:
2062 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2063 /* patch all the BREAK instructions from
2065 while (inst0
> loop_inst
[loop_insn
]) {
2067 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2068 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2069 inst0
->bits3
.if_else
.pop_count
= 0;
2070 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2071 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2072 inst0
->bits3
.if_else
.pop_count
= 0;
2077 _mesa_printf("unsupported IR in fragment shader %d\n",
2080 if (inst
->CondUpdate
)
2081 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2083 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2086 for (i
= 0; i
< c
->fp
->program
.Base
.NumInstructions
; i
++)
2087 c
->fp
->program
.Base
.Instructions
[i
].Data
= NULL
;
2090 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2093 brw_wm_emit_glsl(brw
, c
);
2094 c
->prog_data
.total_grf
= c
->reg_index
;
2095 c
->prog_data
.total_scratch
= 0;