1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
11 /* Only guess, need a flag in gl_fragment_program later */
12 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
15 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
16 struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
17 switch (inst
->Opcode
) {
39 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
40 int component
, struct brw_reg reg
)
42 c
->wm_regs
[file
][index
][component
].reg
= reg
;
43 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
46 static int get_scalar_dst_index(struct prog_instruction
*inst
)
49 for (i
= 0; i
< 4; i
++)
50 if (inst
->DstReg
.WriteMask
& (1<<i
))
55 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
58 if(c
->tmp_index
== c
->tmp_max
)
59 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
61 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
65 static int mark_tmps(struct brw_wm_compile
*c
)
70 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
72 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
75 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
81 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
, int nr
, GLuint neg
, GLuint abs
)
85 case PROGRAM_STATE_VAR
:
86 case PROGRAM_CONSTANT
:
88 file
= PROGRAM_STATE_VAR
;
90 case PROGRAM_UNDEFINED
:
91 return brw_null_reg();
96 if(c
->wm_regs
[file
][index
][component
].inited
)
97 reg
= c
->wm_regs
[file
][index
][component
].reg
;
99 reg
= brw_vec8_grf(c
->reg_index
, 0);
101 if(!c
->wm_regs
[file
][index
][component
].inited
) {
102 set_reg(c
, file
, index
, component
, reg
);
106 if (neg
& (1<< component
)) {
114 static void prealloc_reg(struct brw_wm_compile
*c
)
118 int nr_interp_regs
= 0;
119 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
121 for (i
= 0; i
< 4; i
++) {
122 reg
= (i
< c
->key
.nr_depth_regs
)
123 ? brw_vec8_grf(i
*2, 0) : brw_vec8_grf(0, 0);
124 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
126 c
->reg_index
+= 2*c
->key
.nr_depth_regs
;
128 int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
129 struct gl_program_parameter_list
*plist
=
130 c
->fp
->program
.Base
.Parameters
;
132 c
->prog_data
.nr_params
= 4*nr_params
;
133 for (i
= 0; i
< nr_params
; i
++) {
134 for (j
= 0; j
< 4; j
++, index
++) {
135 reg
= brw_vec1_grf(c
->reg_index
+ index
/8,
137 c
->prog_data
.param
[index
] =
138 &plist
->ParameterValues
[i
][j
];
139 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
142 c
->nr_creg
= 2*((4*nr_params
+15)/16);
143 c
->reg_index
+= c
->nr_creg
;
145 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
146 if (inputs
& (1<<i
)) {
148 reg
= brw_vec8_grf(c
->reg_index
, 0);
149 for (j
= 0; j
< 4; j
++)
150 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
155 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
156 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
157 c
->prog_data
.curb_read_length
= c
->nr_creg
;
158 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
160 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
164 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
165 struct prog_instruction
*inst
, int component
, int nr
)
167 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
171 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
172 struct prog_src_register
*src
, int index
, int nr
)
174 int component
= GET_SWZ(src
->Swizzle
, index
);
175 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
176 src
->NegateBase
, src
->Abs
);
179 /* Subroutines are minimal support for resusable instruction sequences.
180 They are implemented as simply as possible to minimise overhead: there
181 is no explicit support for communication between the caller and callee
182 other than saving the return address in a temporary register, nor is
183 there any automatic local storage. This implies that great care is
184 required before attempting reentrancy or any kind of nested
185 subroutine invocations. */
186 static void invoke_subroutine( struct brw_wm_compile
*c
,
187 enum _subroutine subroutine
,
188 void (*emit
)( struct brw_wm_compile
* ) )
190 struct brw_compile
*p
= &c
->func
;
192 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
194 if( c
->subroutines
[ subroutine
] ) {
195 /* subroutine previously emitted: reuse existing instructions */
197 int mark
= mark_tmps( c
);
198 struct brw_reg return_address
= retype( alloc_tmp( c
),
199 BRW_REGISTER_TYPE_UD
);
200 int here
= p
->nr_insn
;
202 brw_push_insn_state(p
);
203 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
204 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
206 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
207 brw_imm_d( ( c
->subroutines
[ subroutine
] -
209 brw_pop_insn_state(p
);
211 release_tmps( c
, mark
);
213 /* previously unused subroutine: emit, and mark for later reuse */
215 int mark
= mark_tmps( c
);
216 struct brw_reg return_address
= retype( alloc_tmp( c
),
217 BRW_REGISTER_TYPE_UD
);
218 struct brw_instruction
*calc
;
219 int base
= p
->nr_insn
;
221 brw_push_insn_state(p
);
222 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
223 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
224 brw_pop_insn_state(p
);
226 c
->subroutines
[ subroutine
] = p
->nr_insn
;
230 brw_push_insn_state(p
);
231 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
232 brw_MOV( p
, brw_ip_reg(), return_address
);
233 brw_pop_insn_state(p
);
235 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
237 release_tmps( c
, mark
);
241 static void emit_abs( struct brw_wm_compile
*c
,
242 struct prog_instruction
*inst
)
245 struct brw_compile
*p
= &c
->func
;
246 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
247 for (i
= 0; i
< 4; i
++) {
248 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
249 struct brw_reg src
, dst
;
250 dst
= get_dst_reg(c
, inst
, i
, 1);
251 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
252 brw_MOV(p
, dst
, brw_abs(src
));
255 brw_set_saturate(p
, 0);
258 static void emit_trunc( struct brw_wm_compile
*c
,
259 struct prog_instruction
*inst
)
262 struct brw_compile
*p
= &c
->func
;
263 GLuint mask
= inst
->DstReg
.WriteMask
;
264 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
265 for (i
= 0; i
< 4; i
++) {
267 struct brw_reg src
, dst
;
268 dst
= get_dst_reg(c
, inst
, i
, 1) ;
269 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
270 brw_RNDZ(p
, dst
, src
);
273 brw_set_saturate(p
, 0);
276 static void emit_mov( struct brw_wm_compile
*c
,
277 struct prog_instruction
*inst
)
280 struct brw_compile
*p
= &c
->func
;
281 GLuint mask
= inst
->DstReg
.WriteMask
;
282 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
283 for (i
= 0; i
< 4; i
++) {
285 struct brw_reg src
, dst
;
286 dst
= get_dst_reg(c
, inst
, i
, 1);
287 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
288 brw_MOV(p
, dst
, src
);
291 brw_set_saturate(p
, 0);
294 static void emit_pixel_xy(struct brw_wm_compile
*c
,
295 struct prog_instruction
*inst
)
297 struct brw_reg r1
= brw_vec1_grf(1, 0);
298 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
300 struct brw_reg dst0
, dst1
;
301 struct brw_compile
*p
= &c
->func
;
302 GLuint mask
= inst
->DstReg
.WriteMask
;
304 dst0
= get_dst_reg(c
, inst
, 0, 1);
305 dst1
= get_dst_reg(c
, inst
, 1, 1);
306 /* Calculate pixel centers by adding 1 or 0 to each of the
307 * micro-tile coordinates passed in r1.
309 if (mask
& WRITEMASK_X
) {
311 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
312 stride(suboffset(r1_uw
, 4), 2, 4, 0),
313 brw_imm_v(0x10101010));
316 if (mask
& WRITEMASK_Y
) {
318 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
319 stride(suboffset(r1_uw
, 5), 2, 4, 0),
320 brw_imm_v(0x11001100));
324 static void emit_delta_xy(struct brw_wm_compile
*c
,
325 struct prog_instruction
*inst
)
327 struct brw_reg r1
= brw_vec1_grf(1, 0);
328 struct brw_reg dst0
, dst1
, src0
, src1
;
329 struct brw_compile
*p
= &c
->func
;
330 GLuint mask
= inst
->DstReg
.WriteMask
;
332 dst0
= get_dst_reg(c
, inst
, 0, 1);
333 dst1
= get_dst_reg(c
, inst
, 1, 1);
334 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
335 src1
= get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
336 /* Calc delta X,Y by subtracting origin in r1 from the pixel
339 if (mask
& WRITEMASK_X
) {
342 retype(src0
, BRW_REGISTER_TYPE_UW
),
346 if (mask
& WRITEMASK_Y
) {
349 retype(src1
, BRW_REGISTER_TYPE_UW
),
350 negate(suboffset(r1
,1)));
355 static void fire_fb_write( struct brw_wm_compile
*c
,
361 struct brw_compile
*p
= &c
->func
;
362 /* Pass through control information:
364 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
366 brw_push_insn_state(p
);
367 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
369 brw_message_reg(base_reg
+ 1),
371 brw_pop_insn_state(p
);
373 /* Send framebuffer write message: */
375 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
377 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
384 static void emit_fb_write(struct brw_wm_compile
*c
,
385 struct prog_instruction
*inst
)
387 struct brw_compile
*p
= &c
->func
;
393 /* Reserve a space for AA - may not be needed:
395 if (c
->key
.aa_dest_stencil_reg
)
398 brw_push_insn_state(p
);
399 for (channel
= 0; channel
< 4; channel
++) {
400 src0
= get_src_reg(c
, &inst
->SrcReg
[0], channel
, 1);
401 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
402 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
403 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
405 /* skip over the regs populated above: */
407 brw_pop_insn_state(p
);
409 if (c
->key
.source_depth_to_render_target
) {
410 if (c
->key
.computes_depth
) {
411 src0
= get_src_reg(c
, &inst
->SrcReg
[2], 2, 1);
412 brw_MOV(p
, brw_message_reg(nr
), src0
);
415 src0
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
416 brw_MOV(p
, brw_message_reg(nr
), src0
);
422 target
= inst
->Sampler
>> 1;
423 eot
= inst
->Sampler
& 1;
424 fire_fb_write(c
, 0, nr
, target
, eot
);
427 static void emit_pixel_w( struct brw_wm_compile
*c
,
428 struct prog_instruction
*inst
)
430 struct brw_compile
*p
= &c
->func
;
431 GLuint mask
= inst
->DstReg
.WriteMask
;
432 if (mask
& WRITEMASK_W
) {
433 struct brw_reg dst
, src0
, delta0
, delta1
;
434 struct brw_reg interp3
;
436 dst
= get_dst_reg(c
, inst
, 3, 1);
437 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
438 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
439 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
441 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
442 /* Calc 1/w - just linterp wpos[3] optimized by putting the
443 * result straight into a message reg.
445 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
446 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
450 BRW_MATH_FUNCTION_INV
,
451 BRW_MATH_SATURATE_NONE
,
453 BRW_MATH_PRECISION_FULL
);
457 static void emit_linterp(struct brw_wm_compile
*c
,
458 struct prog_instruction
*inst
)
460 struct brw_compile
*p
= &c
->func
;
461 GLuint mask
= inst
->DstReg
.WriteMask
;
462 struct brw_reg interp
[4];
463 struct brw_reg dst
, delta0
, delta1
;
466 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
467 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
468 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
472 interp
[0] = brw_vec1_grf(nr
, 0);
473 interp
[1] = brw_vec1_grf(nr
, 4);
474 interp
[2] = brw_vec1_grf(nr
+1, 0);
475 interp
[3] = brw_vec1_grf(nr
+1, 4);
477 for(i
= 0; i
< 4; i
++ ) {
479 dst
= get_dst_reg(c
, inst
, i
, 1);
480 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
481 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
486 static void emit_cinterp(struct brw_wm_compile
*c
,
487 struct prog_instruction
*inst
)
489 struct brw_compile
*p
= &c
->func
;
490 GLuint mask
= inst
->DstReg
.WriteMask
;
492 struct brw_reg interp
[4];
493 struct brw_reg dst
, src0
;
495 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
499 interp
[0] = brw_vec1_grf(nr
, 0);
500 interp
[1] = brw_vec1_grf(nr
, 4);
501 interp
[2] = brw_vec1_grf(nr
+1, 0);
502 interp
[3] = brw_vec1_grf(nr
+1, 4);
504 for(i
= 0; i
< 4; i
++ ) {
506 dst
= get_dst_reg(c
, inst
, i
, 1);
507 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
512 static void emit_pinterp(struct brw_wm_compile
*c
,
513 struct prog_instruction
*inst
)
515 struct brw_compile
*p
= &c
->func
;
516 GLuint mask
= inst
->DstReg
.WriteMask
;
518 struct brw_reg interp
[4];
519 struct brw_reg dst
, delta0
, delta1
;
520 struct brw_reg src0
, w
;
522 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
523 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
524 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
525 w
= get_src_reg(c
, &inst
->SrcReg
[2], 3, 1);
529 interp
[0] = brw_vec1_grf(nr
, 0);
530 interp
[1] = brw_vec1_grf(nr
, 4);
531 interp
[2] = brw_vec1_grf(nr
+1, 0);
532 interp
[3] = brw_vec1_grf(nr
+1, 4);
534 for(i
= 0; i
< 4; i
++ ) {
536 dst
= get_dst_reg(c
, inst
, i
, 1);
537 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
538 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
540 brw_MUL(p
, dst
, dst
, w
);
545 static void emit_xpd(struct brw_wm_compile
*c
,
546 struct prog_instruction
*inst
)
549 struct brw_compile
*p
= &c
->func
;
550 GLuint mask
= inst
->DstReg
.WriteMask
;
551 for (i
= 0; i
< 4; i
++) {
555 struct brw_reg src0
, src1
, dst
;
556 dst
= get_dst_reg(c
, inst
, i
, 1);
557 src0
= negate(get_src_reg(c
, &inst
->SrcReg
[0], i2
, 1));
558 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i1
, 1);
559 brw_MUL(p
, brw_null_reg(), src0
, src1
);
560 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i1
, 1);
561 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i2
, 1);
562 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
563 brw_MAC(p
, dst
, src0
, src1
);
564 brw_set_saturate(p
, 0);
567 brw_set_saturate(p
, 0);
570 static void emit_dp3(struct brw_wm_compile
*c
,
571 struct prog_instruction
*inst
)
573 struct brw_reg src0
[3], src1
[3], dst
;
575 struct brw_compile
*p
= &c
->func
;
576 for (i
= 0; i
< 3; i
++) {
577 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
578 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
581 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
582 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
583 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
584 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
585 brw_MAC(p
, dst
, src0
[2], src1
[2]);
586 brw_set_saturate(p
, 0);
589 static void emit_dp4(struct brw_wm_compile
*c
,
590 struct prog_instruction
*inst
)
592 struct brw_reg src0
[4], src1
[4], dst
;
594 struct brw_compile
*p
= &c
->func
;
595 for (i
= 0; i
< 4; i
++) {
596 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
597 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
599 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
600 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
601 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
602 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
603 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
604 brw_MAC(p
, dst
, src0
[3], src1
[3]);
605 brw_set_saturate(p
, 0);
608 static void emit_dph(struct brw_wm_compile
*c
,
609 struct prog_instruction
*inst
)
611 struct brw_reg src0
[4], src1
[4], dst
;
613 struct brw_compile
*p
= &c
->func
;
614 for (i
= 0; i
< 4; i
++) {
615 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
616 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
618 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
619 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
620 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
621 brw_MAC(p
, dst
, src0
[2], src1
[2]);
622 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
623 brw_ADD(p
, dst
, dst
, src1
[3]);
624 brw_set_saturate(p
, 0);
627 static void emit_math1(struct brw_wm_compile
*c
,
628 struct prog_instruction
*inst
, GLuint func
)
630 struct brw_compile
*p
= &c
->func
;
631 struct brw_reg src0
, dst
;
633 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
634 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
635 brw_MOV(p
, brw_message_reg(2), src0
);
639 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
642 BRW_MATH_DATA_VECTOR
,
643 BRW_MATH_PRECISION_FULL
);
646 static void emit_rcp(struct brw_wm_compile
*c
,
647 struct prog_instruction
*inst
)
649 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
652 static void emit_rsq(struct brw_wm_compile
*c
,
653 struct prog_instruction
*inst
)
655 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
658 static void emit_sin(struct brw_wm_compile
*c
,
659 struct prog_instruction
*inst
)
661 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
664 static void emit_cos(struct brw_wm_compile
*c
,
665 struct prog_instruction
*inst
)
667 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
670 static void emit_ex2(struct brw_wm_compile
*c
,
671 struct prog_instruction
*inst
)
673 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
676 static void emit_lg2(struct brw_wm_compile
*c
,
677 struct prog_instruction
*inst
)
679 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
682 static void emit_add(struct brw_wm_compile
*c
,
683 struct prog_instruction
*inst
)
685 struct brw_compile
*p
= &c
->func
;
686 struct brw_reg src0
, src1
, dst
;
687 GLuint mask
= inst
->DstReg
.WriteMask
;
689 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
690 for (i
= 0 ; i
< 4; i
++) {
692 dst
= get_dst_reg(c
, inst
, i
, 1);
693 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
694 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
695 brw_ADD(p
, dst
, src0
, src1
);
698 brw_set_saturate(p
, 0);
701 static void emit_sub(struct brw_wm_compile
*c
,
702 struct prog_instruction
*inst
)
704 struct brw_compile
*p
= &c
->func
;
705 struct brw_reg src0
, src1
, dst
;
706 GLuint mask
= inst
->DstReg
.WriteMask
;
708 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
709 for (i
= 0 ; i
< 4; i
++) {
711 dst
= get_dst_reg(c
, inst
, i
, 1);
712 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
713 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
714 brw_ADD(p
, dst
, src0
, negate(src1
));
717 brw_set_saturate(p
, 0);
720 static void emit_mul(struct brw_wm_compile
*c
,
721 struct prog_instruction
*inst
)
723 struct brw_compile
*p
= &c
->func
;
724 struct brw_reg src0
, src1
, dst
;
725 GLuint mask
= inst
->DstReg
.WriteMask
;
727 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
728 for (i
= 0 ; i
< 4; i
++) {
730 dst
= get_dst_reg(c
, inst
, i
, 1);
731 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
732 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
733 brw_MUL(p
, dst
, src0
, src1
);
736 brw_set_saturate(p
, 0);
739 static void emit_frc(struct brw_wm_compile
*c
,
740 struct prog_instruction
*inst
)
742 struct brw_compile
*p
= &c
->func
;
743 struct brw_reg src0
, dst
;
744 GLuint mask
= inst
->DstReg
.WriteMask
;
746 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
747 for (i
= 0 ; i
< 4; i
++) {
749 dst
= get_dst_reg(c
, inst
, i
, 1);
750 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
751 brw_FRC(p
, dst
, src0
);
754 if (inst
->SaturateMode
!= SATURATE_OFF
)
755 brw_set_saturate(p
, 0);
758 static void emit_flr(struct brw_wm_compile
*c
,
759 struct prog_instruction
*inst
)
761 struct brw_compile
*p
= &c
->func
;
762 struct brw_reg src0
, dst
;
763 GLuint mask
= inst
->DstReg
.WriteMask
;
765 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
766 for (i
= 0 ; i
< 4; i
++) {
768 dst
= get_dst_reg(c
, inst
, i
, 1);
769 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
770 brw_RNDD(p
, dst
, src0
);
773 brw_set_saturate(p
, 0);
776 static void emit_max(struct brw_wm_compile
*c
,
777 struct prog_instruction
*inst
)
779 struct brw_compile
*p
= &c
->func
;
780 GLuint mask
= inst
->DstReg
.WriteMask
;
781 struct brw_reg src0
, src1
, dst
;
783 brw_push_insn_state(p
);
784 for (i
= 0; i
< 4; i
++) {
786 dst
= get_dst_reg(c
, inst
, i
, 1);
787 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
788 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
789 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
790 brw_MOV(p
, dst
, src0
);
791 brw_set_saturate(p
, 0);
793 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src0
, src1
);
794 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
795 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
796 brw_MOV(p
, dst
, src1
);
797 brw_set_saturate(p
, 0);
798 brw_set_predicate_control_flag_value(p
, 0xff);
801 brw_pop_insn_state(p
);
804 static void emit_min(struct brw_wm_compile
*c
,
805 struct prog_instruction
*inst
)
807 struct brw_compile
*p
= &c
->func
;
808 GLuint mask
= inst
->DstReg
.WriteMask
;
809 struct brw_reg src0
, src1
, dst
;
811 brw_push_insn_state(p
);
812 for (i
= 0; i
< 4; i
++) {
814 dst
= get_dst_reg(c
, inst
, i
, 1);
815 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
816 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
817 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
818 brw_MOV(p
, dst
, src0
);
819 brw_set_saturate(p
, 0);
821 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
822 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
823 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
824 brw_MOV(p
, dst
, src1
);
825 brw_set_saturate(p
, 0);
826 brw_set_predicate_control_flag_value(p
, 0xff);
829 brw_pop_insn_state(p
);
832 static void emit_pow(struct brw_wm_compile
*c
,
833 struct prog_instruction
*inst
)
835 struct brw_compile
*p
= &c
->func
;
836 struct brw_reg dst
, src0
, src1
;
837 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
838 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
839 src1
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
841 brw_MOV(p
, brw_message_reg(2), src0
);
842 brw_MOV(p
, brw_message_reg(3), src1
);
846 BRW_MATH_FUNCTION_POW
,
847 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
850 BRW_MATH_DATA_VECTOR
,
851 BRW_MATH_PRECISION_FULL
);
854 static void emit_lrp(struct brw_wm_compile
*c
,
855 struct prog_instruction
*inst
)
857 struct brw_compile
*p
= &c
->func
;
858 GLuint mask
= inst
->DstReg
.WriteMask
;
859 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
861 int mark
= mark_tmps(c
);
862 for (i
= 0; i
< 4; i
++) {
864 dst
= get_dst_reg(c
, inst
, i
, 1);
865 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
867 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
869 if (src1
.nr
== dst
.nr
) {
871 brw_MOV(p
, tmp1
, src1
);
875 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
876 if (src2
.nr
== dst
.nr
) {
878 brw_MOV(p
, tmp2
, src2
);
882 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
883 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
884 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
885 brw_MAC(p
, dst
, src0
, tmp1
);
886 brw_set_saturate(p
, 0);
888 release_tmps(c
, mark
);
893 * For GLSL shaders, this KIL will be unconditional.
894 * It may be contained inside an IF/ENDIF structure of course.
896 static void emit_kil(struct brw_wm_compile
*c
)
898 struct brw_compile
*p
= &c
->func
;
899 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
900 brw_push_insn_state(p
);
901 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
902 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
903 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
904 brw_pop_insn_state(p
);
907 static void emit_mad(struct brw_wm_compile
*c
,
908 struct prog_instruction
*inst
)
910 struct brw_compile
*p
= &c
->func
;
911 GLuint mask
= inst
->DstReg
.WriteMask
;
912 struct brw_reg dst
, src0
, src1
, src2
;
915 for (i
= 0; i
< 4; i
++) {
917 dst
= get_dst_reg(c
, inst
, i
, 1);
918 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
919 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
920 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
921 brw_MUL(p
, dst
, src0
, src1
);
923 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
924 brw_ADD(p
, dst
, dst
, src2
);
925 brw_set_saturate(p
, 0);
930 static void emit_sop(struct brw_wm_compile
*c
,
931 struct prog_instruction
*inst
, GLuint cond
)
933 struct brw_compile
*p
= &c
->func
;
934 GLuint mask
= inst
->DstReg
.WriteMask
;
935 struct brw_reg dst
, src0
, src1
;
938 for (i
= 0; i
< 4; i
++) {
940 dst
= get_dst_reg(c
, inst
, i
, 1);
941 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
942 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
943 brw_push_insn_state(p
);
944 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
945 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
946 brw_MOV(p
, dst
, brw_imm_f(0.0));
947 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
948 brw_MOV(p
, dst
, brw_imm_f(1.0));
949 brw_pop_insn_state(p
);
954 static void emit_slt(struct brw_wm_compile
*c
,
955 struct prog_instruction
*inst
)
957 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
960 static void emit_sle(struct brw_wm_compile
*c
,
961 struct prog_instruction
*inst
)
963 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
966 static void emit_sgt(struct brw_wm_compile
*c
,
967 struct prog_instruction
*inst
)
969 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
972 static void emit_sge(struct brw_wm_compile
*c
,
973 struct prog_instruction
*inst
)
975 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
978 static void emit_seq(struct brw_wm_compile
*c
,
979 struct prog_instruction
*inst
)
981 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
984 static void emit_sne(struct brw_wm_compile
*c
,
985 struct prog_instruction
*inst
)
987 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
990 static void emit_ddx(struct brw_wm_compile
*c
,
991 struct prog_instruction
*inst
)
993 struct brw_compile
*p
= &c
->func
;
994 GLuint mask
= inst
->DstReg
.WriteMask
;
995 struct brw_reg interp
[4];
997 struct brw_reg src0
, w
;
999 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1000 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1002 interp
[0] = brw_vec1_grf(nr
, 0);
1003 interp
[1] = brw_vec1_grf(nr
, 4);
1004 interp
[2] = brw_vec1_grf(nr
+1, 0);
1005 interp
[3] = brw_vec1_grf(nr
+1, 4);
1006 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1007 for(i
= 0; i
< 4; i
++ ) {
1008 if (mask
& (1<<i
)) {
1009 dst
= get_dst_reg(c
, inst
, i
, 1);
1010 brw_MOV(p
, dst
, interp
[i
]);
1011 brw_MUL(p
, dst
, dst
, w
);
1014 brw_set_saturate(p
, 0);
1017 static void emit_ddy(struct brw_wm_compile
*c
,
1018 struct prog_instruction
*inst
)
1020 struct brw_compile
*p
= &c
->func
;
1021 GLuint mask
= inst
->DstReg
.WriteMask
;
1022 struct brw_reg interp
[4];
1024 struct brw_reg src0
, w
;
1027 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1029 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1030 interp
[0] = brw_vec1_grf(nr
, 0);
1031 interp
[1] = brw_vec1_grf(nr
, 4);
1032 interp
[2] = brw_vec1_grf(nr
+1, 0);
1033 interp
[3] = brw_vec1_grf(nr
+1, 4);
1034 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1035 for(i
= 0; i
< 4; i
++ ) {
1036 if (mask
& (1<<i
)) {
1037 dst
= get_dst_reg(c
, inst
, i
, 1);
1038 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1039 brw_MUL(p
, dst
, dst
, w
);
1042 brw_set_saturate(p
, 0);
1045 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1047 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1051 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1053 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1056 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1058 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1061 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1063 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1067 /* One-, two- and three-dimensional Perlin noise, similar to the description
1068 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1069 static void noise1_sub( struct brw_wm_compile
*c
) {
1071 struct brw_compile
*p
= &c
->func
;
1072 struct brw_reg param
,
1073 x0
, x1
, /* gradients at each end */
1074 t
, tmp
[ 2 ], /* float temporaries */
1075 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1077 int mark
= mark_tmps( c
);
1079 x0
= alloc_tmp( c
);
1080 x1
= alloc_tmp( c
);
1082 tmp
[ 0 ] = alloc_tmp( c
);
1083 tmp
[ 1 ] = alloc_tmp( c
);
1084 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1085 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1086 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1087 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1088 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1090 param
= lookup_tmp( c
, mark
- 2 );
1092 brw_set_access_mode( p
, BRW_ALIGN_1
);
1094 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1096 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1097 be hashed. Also compute the remainder (offset within the unit
1098 length), interleaved to reduce register dependency penalties. */
1099 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1100 brw_FRC( p
, param
, param
);
1101 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1102 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1103 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1105 /* We're now ready to perform the hashing. The two hashes are
1106 interleaved for performance. The hash function used is
1107 designed to rapidly achieve avalanche and require only 32x16
1108 bit multiplication, and 16-bit swizzles (which we get for
1109 free). We can't use immediate operands in the multiplies,
1110 because immediates are permitted only in src1 and the 16-bit
1111 factor is permitted only in src0. */
1112 for( i
= 0; i
< 2; i
++ )
1113 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1114 for( i
= 0; i
< 2; i
++ )
1115 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1116 high_words( itmp
[ i
] ) );
1117 for( i
= 0; i
< 2; i
++ )
1118 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1119 for( i
= 0; i
< 2; i
++ )
1120 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1121 high_words( itmp
[ i
] ) );
1122 for( i
= 0; i
< 2; i
++ )
1123 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1124 for( i
= 0; i
< 2; i
++ )
1125 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1126 high_words( itmp
[ i
] ) );
1128 /* Now we want to initialise the two gradients based on the
1129 hashes. Format conversion from signed integer to float leaves
1130 everything scaled too high by a factor of pow( 2, 31 ), but
1131 we correct for that right at the end. */
1132 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1133 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1134 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1136 brw_MUL( p
, x0
, x0
, param
);
1137 brw_MUL( p
, x1
, x1
, t
);
1139 /* We interpolate between the gradients using the polynomial
1140 6t^5 - 15t^4 + 10t^3 (Perlin). */
1141 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1142 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1143 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1144 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1145 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1146 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1148 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1149 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1150 brw_MUL( p
, x1
, x1
, param
);
1151 brw_ADD( p
, x0
, x0
, x1
);
1152 /* scale by pow( 2, -30 ), to compensate for the format conversion
1153 above and an extra factor of 2 so that a single gradient covers
1155 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1157 release_tmps( c
, mark
);
1160 static void emit_noise1( struct brw_wm_compile
*c
,
1161 struct prog_instruction
*inst
)
1163 struct brw_compile
*p
= &c
->func
;
1164 struct brw_reg src
, param
, dst
;
1165 GLuint mask
= inst
->DstReg
.WriteMask
;
1167 int mark
= mark_tmps( c
);
1169 assert( mark
== 0 );
1171 src
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1173 param
= alloc_tmp( c
);
1175 brw_MOV( p
, param
, src
);
1177 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1179 /* Fill in the result: */
1180 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1181 for (i
= 0 ; i
< 4; i
++) {
1182 if (mask
& (1<<i
)) {
1183 dst
= get_dst_reg(c
, inst
, i
, 1);
1184 brw_MOV( p
, dst
, param
);
1187 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1188 brw_set_saturate( p
, 0 );
1190 release_tmps( c
, mark
);
1193 static void noise2_sub( struct brw_wm_compile
*c
) {
1195 struct brw_compile
*p
= &c
->func
;
1196 struct brw_reg param0
, param1
,
1197 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1198 t
, tmp
[ 4 ], /* float temporaries */
1199 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1201 int mark
= mark_tmps( c
);
1203 x0y0
= alloc_tmp( c
);
1204 x0y1
= alloc_tmp( c
);
1205 x1y0
= alloc_tmp( c
);
1206 x1y1
= alloc_tmp( c
);
1208 for( i
= 0; i
< 4; i
++ ) {
1209 tmp
[ i
] = alloc_tmp( c
);
1210 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1212 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1213 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1214 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1216 param0
= lookup_tmp( c
, mark
- 3 );
1217 param1
= lookup_tmp( c
, mark
- 2 );
1219 brw_set_access_mode( p
, BRW_ALIGN_1
);
1221 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1222 be hashed. Also compute the remainders (offsets within the unit
1223 square), interleaved to reduce register dependency penalties. */
1224 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1225 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1226 brw_FRC( p
, param0
, param0
);
1227 brw_FRC( p
, param1
, param1
);
1228 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1229 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1230 low_words( itmp
[ 1 ] ) );
1231 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1232 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1233 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1234 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1235 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1237 /* We're now ready to perform the hashing. The four hashes are
1238 interleaved for performance. The hash function used is
1239 designed to rapidly achieve avalanche and require only 32x16
1240 bit multiplication, and 16-bit swizzles (which we get for
1241 free). We can't use immediate operands in the multiplies,
1242 because immediates are permitted only in src1 and the 16-bit
1243 factor is permitted only in src0. */
1244 for( i
= 0; i
< 4; i
++ )
1245 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1246 for( i
= 0; i
< 4; i
++ )
1247 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1248 high_words( itmp
[ i
] ) );
1249 for( i
= 0; i
< 4; i
++ )
1250 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1251 for( i
= 0; i
< 4; i
++ )
1252 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1253 high_words( itmp
[ i
] ) );
1254 for( i
= 0; i
< 4; i
++ )
1255 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1256 for( i
= 0; i
< 4; i
++ )
1257 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1258 high_words( itmp
[ i
] ) );
1260 /* Now we want to initialise the four gradients based on the
1261 hashes. Format conversion from signed integer to float leaves
1262 everything scaled too high by a factor of pow( 2, 15 ), but
1263 we correct for that right at the end. */
1264 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1265 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1266 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1267 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1268 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1270 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1271 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1272 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1273 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1275 brw_MUL( p
, x1y0
, x1y0
, t
);
1276 brw_MUL( p
, x1y1
, x1y1
, t
);
1277 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1278 brw_MUL( p
, x0y0
, x0y0
, param0
);
1279 brw_MUL( p
, x0y1
, x0y1
, param0
);
1281 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1282 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1283 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1284 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1286 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1287 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1288 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1289 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1291 /* We interpolate between the gradients using the polynomial
1292 6t^5 - 15t^4 + 10t^3 (Perlin). */
1293 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1294 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1295 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1296 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1297 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1298 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1299 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1301 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1302 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1303 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1304 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1305 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1307 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1308 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1309 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1310 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1312 /* Here we interpolate in the y dimension... */
1313 brw_MUL( p
, x0y1
, x0y1
, param1
);
1314 brw_MUL( p
, x1y1
, x1y1
, param1
);
1315 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1316 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1318 /* And now in x. There are horrible register dependencies here,
1319 but we have nothing else to do. */
1320 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1321 brw_MUL( p
, x1y0
, x1y0
, param0
);
1322 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1324 /* scale by pow( 2, -15 ), as described above */
1325 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1327 release_tmps( c
, mark
);
1330 static void emit_noise2( struct brw_wm_compile
*c
,
1331 struct prog_instruction
*inst
)
1333 struct brw_compile
*p
= &c
->func
;
1334 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1335 GLuint mask
= inst
->DstReg
.WriteMask
;
1337 int mark
= mark_tmps( c
);
1339 assert( mark
== 0 );
1341 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1342 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1344 param0
= alloc_tmp( c
);
1345 param1
= alloc_tmp( c
);
1347 brw_MOV( p
, param0
, src0
);
1348 brw_MOV( p
, param1
, src1
);
1350 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1352 /* Fill in the result: */
1353 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1354 for (i
= 0 ; i
< 4; i
++) {
1355 if (mask
& (1<<i
)) {
1356 dst
= get_dst_reg(c
, inst
, i
, 1);
1357 brw_MOV( p
, dst
, param0
);
1360 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1361 brw_set_saturate( p
, 0 );
1363 release_tmps( c
, mark
);
1366 /* The three-dimensional case is much like the one- and two- versions above,
1367 but since the number of corners is rapidly growing we now pack 16 16-bit
1368 hashes into each register to extract more parallelism from the EUs. */
1369 static void noise3_sub( struct brw_wm_compile
*c
) {
1371 struct brw_compile
*p
= &c
->func
;
1372 struct brw_reg param0
, param1
, param2
,
1373 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1374 xi
, yi
, zi
, /* interpolation coefficients */
1375 t
, tmp
[ 8 ], /* float temporaries */
1376 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1377 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1379 int mark
= mark_tmps( c
);
1381 x0y0
= alloc_tmp( c
);
1382 x0y1
= alloc_tmp( c
);
1383 x1y0
= alloc_tmp( c
);
1384 x1y1
= alloc_tmp( c
);
1385 xi
= alloc_tmp( c
);
1386 yi
= alloc_tmp( c
);
1387 zi
= alloc_tmp( c
);
1389 for( i
= 0; i
< 8; i
++ ) {
1390 tmp
[ i
] = alloc_tmp( c
);
1391 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1392 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1395 param0
= lookup_tmp( c
, mark
- 4 );
1396 param1
= lookup_tmp( c
, mark
- 3 );
1397 param2
= lookup_tmp( c
, mark
- 2 );
1399 brw_set_access_mode( p
, BRW_ALIGN_1
);
1401 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1402 be hashed. Also compute the remainders (offsets within the unit
1403 cube), interleaved to reduce register dependency penalties. */
1404 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1405 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1406 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1407 brw_FRC( p
, param0
, param0
);
1408 brw_FRC( p
, param1
, param1
);
1409 brw_FRC( p
, param2
, param2
);
1410 /* Since we now have only 16 bits of precision in the hash, we must
1411 be more careful about thorough mixing to maintain entropy as we
1412 squash the input vector into a small scalar. */
1413 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1414 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1415 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1416 brw_imm_uw( 0x9B93 ) );
1417 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1418 brw_imm_uw( 0xBC8F ) );
1420 /* Temporarily disable the execution mask while we work with ExecSize=16
1421 channels (the mask is set for ExecSize=8 and is probably incorrect).
1422 Although this might cause execution of unwanted channels, the code
1423 writes only to temporary registers and has no side effects, so
1424 disabling the mask is harmless. */
1425 brw_push_insn_state( p
);
1426 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1427 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1428 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1429 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1431 /* We're now ready to perform the hashing. The eight hashes are
1432 interleaved for performance. The hash function used is
1433 designed to rapidly achieve avalanche and require only 16x16
1434 bit multiplication, and 8-bit swizzles (which we get for
1436 for( i
= 0; i
< 4; i
++ )
1437 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1438 for( i
= 0; i
< 4; i
++ )
1439 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1440 odd_bytes( wtmp
[ i
] ) );
1441 for( i
= 0; i
< 4; i
++ )
1442 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1443 for( i
= 0; i
< 4; i
++ )
1444 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1445 odd_bytes( wtmp
[ i
] ) );
1446 brw_pop_insn_state( p
);
1448 /* Now we want to initialise the four rear gradients based on the
1449 hashes. Format conversion from signed integer to float leaves
1450 everything scaled too high by a factor of pow( 2, 15 ), but
1451 we correct for that right at the end. */
1453 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1454 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1455 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1456 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1457 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1459 brw_push_insn_state( p
);
1460 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1461 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1462 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1463 brw_pop_insn_state( p
);
1465 brw_MUL( p
, x1y0
, x1y0
, t
);
1466 brw_MUL( p
, x1y1
, x1y1
, t
);
1467 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1468 brw_MUL( p
, x0y0
, x0y0
, param0
);
1469 brw_MUL( p
, x0y1
, x0y1
, param0
);
1472 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1473 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1474 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1475 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1477 brw_push_insn_state( p
);
1478 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1479 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1480 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1481 brw_pop_insn_state( p
);
1483 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1484 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1485 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1486 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1487 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1489 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1490 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1491 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1492 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1495 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1496 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1497 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1498 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1500 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1501 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1502 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1503 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1505 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1506 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1507 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1508 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1510 /* We interpolate between the gradients using the polynomial
1511 6t^5 - 15t^4 + 10t^3 (Perlin). */
1512 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1513 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1514 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1515 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1516 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1517 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1518 brw_MUL( p
, xi
, xi
, param0
);
1519 brw_MUL( p
, yi
, yi
, param1
);
1520 brw_MUL( p
, zi
, zi
, param2
);
1521 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1522 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1523 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1524 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1525 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1526 brw_MUL( p
, xi
, xi
, param0
);
1527 brw_MUL( p
, yi
, yi
, param1
);
1528 brw_MUL( p
, zi
, zi
, param2
);
1529 brw_MUL( p
, xi
, xi
, param0
);
1530 brw_MUL( p
, yi
, yi
, param1
);
1531 brw_MUL( p
, zi
, zi
, param2
);
1532 brw_MUL( p
, xi
, xi
, param0
);
1533 brw_MUL( p
, yi
, yi
, param1
);
1534 brw_MUL( p
, zi
, zi
, param2
);
1536 /* Here we interpolate in the y dimension... */
1537 brw_MUL( p
, x0y1
, x0y1
, yi
);
1538 brw_MUL( p
, x1y1
, x1y1
, yi
);
1539 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1540 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1542 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1543 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1544 brw_MUL( p
, x1y0
, x1y0
, xi
);
1545 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1547 /* Now do the same thing for the front four gradients... */
1549 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1550 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1551 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1552 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1554 brw_push_insn_state( p
);
1555 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1556 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1557 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1558 brw_pop_insn_state( p
);
1560 brw_MUL( p
, x1y0
, x1y0
, t
);
1561 brw_MUL( p
, x1y1
, x1y1
, t
);
1562 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1563 brw_MUL( p
, x0y0
, x0y0
, param0
);
1564 brw_MUL( p
, x0y1
, x0y1
, param0
);
1567 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1568 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1569 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1570 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1572 brw_push_insn_state( p
);
1573 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1574 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1575 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1576 brw_pop_insn_state( p
);
1578 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1579 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1580 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1581 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1582 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1584 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1585 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1586 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1587 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1590 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1591 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1592 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1593 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1595 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1596 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1597 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1598 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1600 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1601 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1602 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1603 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1605 /* The interpolation coefficients are still around from last time, so
1606 again interpolate in the y dimension... */
1607 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1608 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1609 brw_MUL( p
, x0y1
, x0y1
, yi
);
1610 brw_MUL( p
, x1y1
, x1y1
, yi
);
1611 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1612 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1614 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1615 time put the front face in tmp[ 1 ] and we're nearly there... */
1616 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1617 brw_MUL( p
, x1y0
, x1y0
, xi
);
1618 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1620 /* The final interpolation, in the z dimension: */
1621 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1622 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1623 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1625 /* scale by pow( 2, -15 ), as described above */
1626 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1628 release_tmps( c
, mark
);
1631 static void emit_noise3( struct brw_wm_compile
*c
,
1632 struct prog_instruction
*inst
)
1634 struct brw_compile
*p
= &c
->func
;
1635 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1636 GLuint mask
= inst
->DstReg
.WriteMask
;
1638 int mark
= mark_tmps( c
);
1640 assert( mark
== 0 );
1642 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1643 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1644 src2
= get_src_reg( c
, inst
->SrcReg
, 2, 1 );
1646 param0
= alloc_tmp( c
);
1647 param1
= alloc_tmp( c
);
1648 param2
= alloc_tmp( c
);
1650 brw_MOV( p
, param0
, src0
);
1651 brw_MOV( p
, param1
, src1
);
1652 brw_MOV( p
, param2
, src2
);
1654 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1656 /* Fill in the result: */
1657 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1658 for (i
= 0 ; i
< 4; i
++) {
1659 if (mask
& (1<<i
)) {
1660 dst
= get_dst_reg(c
, inst
, i
, 1);
1661 brw_MOV( p
, dst
, param0
);
1664 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1665 brw_set_saturate( p
, 0 );
1667 release_tmps( c
, mark
);
1670 /* For the four-dimensional case, the little micro-optimisation benefits
1671 we obtain by unrolling all the loops aren't worth the massive bloat it
1672 now causes. Instead, we loop twice around performing a similar operation
1673 to noise3, once for the w=0 cube and once for the w=1, with a bit more
1674 code to glue it all together. */
1675 static void noise4_sub( struct brw_wm_compile
*c
)
1677 struct brw_compile
*p
= &c
->func
;
1678 struct brw_reg param
[ 4 ],
1679 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1680 w0
, /* noise for the w=0 cube */
1681 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1682 interp
[ 4 ], /* interpolation coefficients */
1683 t
, tmp
[ 8 ], /* float temporaries */
1684 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1685 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1687 int mark
= mark_tmps( c
);
1688 GLuint loop
, origin
;
1690 x0y0
= alloc_tmp( c
);
1691 x0y1
= alloc_tmp( c
);
1692 x1y0
= alloc_tmp( c
);
1693 x1y1
= alloc_tmp( c
);
1695 w0
= alloc_tmp( c
);
1696 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1697 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1699 for( i
= 0; i
< 4; i
++ ) {
1700 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1701 interp
[ i
] = alloc_tmp( c
);
1704 for( i
= 0; i
< 8; i
++ ) {
1705 tmp
[ i
] = alloc_tmp( c
);
1706 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1707 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1710 brw_set_access_mode( p
, BRW_ALIGN_1
);
1712 /* We only want 16 bits of precision from the integral part of each
1713 co-ordinate, but unfortunately the RNDD semantics would saturate
1714 at 16 bits if we performed the operation directly to a 16-bit
1715 destination. Therefore, we round to 32-bit temporaries where
1716 appropriate, and then store only the lower 16 bits. */
1717 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1718 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1719 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1720 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1721 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1722 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1724 /* Modify the flag register here, because the side effect is useful
1725 later (see below). We know for certain that all flags will be
1726 cleared, since the FRC instruction cannot possibly generate
1727 negative results. Even for exceptional inputs (infinities, denormals,
1728 NaNs), the architecture guarantees that the L conditional is false. */
1729 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1730 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1731 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1732 for( i
= 1; i
< 4; i
++ )
1733 brw_FRC( p
, param
[ i
], param
[ i
] );
1735 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1737 for( i
= 0; i
< 4; i
++ )
1738 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1739 for( i
= 0; i
< 4; i
++ )
1740 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1741 for( i
= 0; i
< 4; i
++ )
1742 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1743 for( i
= 0; i
< 4; i
++ )
1744 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1745 for( j
= 0; j
< 3; j
++ )
1746 for( i
= 0; i
< 4; i
++ )
1747 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1749 /* Mark the current address, as it will be a jump destination. The
1750 following code will be executed twice: first, with the flag
1751 register clear indicating the w=0 case, and second with flags
1755 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1756 be hashed. Since we have only 16 bits of precision in the hash, we
1757 must be careful about thorough mixing to maintain entropy as we
1758 squash the input vector into a small scalar. */
1759 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1760 brw_imm_uw( 0xBC8F ) );
1761 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1762 brw_imm_uw( 0xD0BD ) );
1763 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1764 brw_imm_uw( 0x9B93 ) );
1765 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1766 brw_imm_uw( 0xA359 ) );
1767 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1768 brw_imm_uw( 0xBC8F ) );
1770 /* Temporarily disable the execution mask while we work with ExecSize=16
1771 channels (the mask is set for ExecSize=8 and is probably incorrect).
1772 Although this might cause execution of unwanted channels, the code
1773 writes only to temporary registers and has no side effects, so
1774 disabling the mask is harmless. */
1775 brw_push_insn_state( p
);
1776 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1777 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1778 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1779 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1781 /* We're now ready to perform the hashing. The eight hashes are
1782 interleaved for performance. The hash function used is
1783 designed to rapidly achieve avalanche and require only 16x16
1784 bit multiplication, and 8-bit swizzles (which we get for
1786 for( i
= 0; i
< 4; i
++ )
1787 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1788 for( i
= 0; i
< 4; i
++ )
1789 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1790 odd_bytes( wtmp
[ i
] ) );
1791 for( i
= 0; i
< 4; i
++ )
1792 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1793 for( i
= 0; i
< 4; i
++ )
1794 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1795 odd_bytes( wtmp
[ i
] ) );
1796 brw_pop_insn_state( p
);
1798 /* Now we want to initialise the four rear gradients based on the
1799 hashes. Format conversion from signed integer to float leaves
1800 everything scaled too high by a factor of pow( 2, 15 ), but
1801 we correct for that right at the end. */
1803 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1804 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1805 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1806 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1807 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1809 brw_push_insn_state( p
);
1810 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1811 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1812 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1813 brw_pop_insn_state( p
);
1815 brw_MUL( p
, x1y0
, x1y0
, t
);
1816 brw_MUL( p
, x1y1
, x1y1
, t
);
1817 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1818 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1819 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1822 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1823 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1824 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1825 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1827 brw_push_insn_state( p
);
1828 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1829 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1830 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1831 brw_pop_insn_state( p
);
1833 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1834 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1835 /* prepare t for the w component (used below): w the first time through
1836 the loop; w - 1 the second time) */
1837 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1838 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1839 p
->current
->header
.predicate_inverse
= 1;
1840 brw_MOV( p
, t
, param
[ 3 ] );
1841 p
->current
->header
.predicate_inverse
= 0;
1842 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1843 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1844 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1846 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1847 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1848 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1849 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1852 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1853 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1854 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1855 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1857 brw_push_insn_state( p
);
1858 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1859 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1860 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1861 brw_pop_insn_state( p
);
1863 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
1864 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
1865 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
1866 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
1868 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1869 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1870 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1871 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1874 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1875 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1876 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1877 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1879 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1880 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1881 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1882 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1883 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1885 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1886 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1887 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1888 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1890 /* Here we interpolate in the y dimension... */
1891 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1892 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1893 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1894 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1895 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1896 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1898 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1899 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1900 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1901 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1903 /* Now do the same thing for the front four gradients... */
1905 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1906 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1907 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1908 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1910 brw_push_insn_state( p
);
1911 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1912 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1913 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1914 brw_pop_insn_state( p
);
1916 brw_MUL( p
, x1y0
, x1y0
, t
);
1917 brw_MUL( p
, x1y1
, x1y1
, t
);
1918 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1919 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1920 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1923 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1924 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1925 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1926 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1928 brw_push_insn_state( p
);
1929 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1930 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1931 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1932 brw_pop_insn_state( p
);
1934 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1935 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1936 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
1937 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1938 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1940 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1941 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1942 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1943 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1946 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1947 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1948 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1949 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1951 brw_push_insn_state( p
);
1952 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1953 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1954 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1955 brw_pop_insn_state( p
);
1957 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1958 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1959 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1960 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1961 /* prepare t for the w component (used below): w the first time through
1962 the loop; w - 1 the second time) */
1963 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1964 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1965 p
->current
->header
.predicate_inverse
= 1;
1966 brw_MOV( p
, t
, param
[ 3 ] );
1967 p
->current
->header
.predicate_inverse
= 0;
1968 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1970 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1971 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1972 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1973 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1976 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1977 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1978 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1979 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1981 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1982 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1983 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1984 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1986 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1987 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1988 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1989 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1991 /* Interpolate in the y dimension: */
1992 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1993 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1994 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1995 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1996 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1997 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1999 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2000 time put the front face in tmp[ 1 ] and we're nearly there... */
2001 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2002 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2003 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2005 /* Another interpolation, in the z dimension: */
2006 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2007 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2008 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2010 /* Exit the loop if we've computed both cubes... */
2011 origin
= p
->nr_insn
;
2012 brw_push_insn_state( p
);
2013 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2014 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2015 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2016 brw_pop_insn_state( p
);
2018 /* Save the result for the w=0 case, and increment the w coordinate: */
2019 brw_MOV( p
, w0
, tmp
[ 0 ] );
2020 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2023 /* Loop around for the other cube. Explicitly set the flag register
2024 (unfortunately we must spend an extra instruction to do this: we
2025 can't rely on a side effect of the previous MOV or ADD because
2026 conditional modifiers which are normally true might be false in
2027 exceptional circumstances, e.g. given a NaN input; the add to
2028 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2029 brw_push_insn_state( p
);
2030 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2031 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2032 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2033 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2034 brw_pop_insn_state( p
);
2036 /* Patch the previous conditional branch now that we know the
2037 destination address. */
2038 brw_set_src1( p
->store
+ origin
,
2039 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2041 /* The very last interpolation. */
2042 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2043 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2044 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2046 /* scale by pow( 2, -15 ), as described above */
2047 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2049 release_tmps( c
, mark
);
2052 static void emit_noise4( struct brw_wm_compile
*c
,
2053 struct prog_instruction
*inst
)
2055 struct brw_compile
*p
= &c
->func
;
2056 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2057 GLuint mask
= inst
->DstReg
.WriteMask
;
2059 int mark
= mark_tmps( c
);
2061 assert( mark
== 0 );
2063 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
2064 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
2065 src2
= get_src_reg( c
, inst
->SrcReg
, 2, 1 );
2066 src3
= get_src_reg( c
, inst
->SrcReg
, 3, 1 );
2068 param0
= alloc_tmp( c
);
2069 param1
= alloc_tmp( c
);
2070 param2
= alloc_tmp( c
);
2071 param3
= alloc_tmp( c
);
2073 brw_MOV( p
, param0
, src0
);
2074 brw_MOV( p
, param1
, src1
);
2075 brw_MOV( p
, param2
, src2
);
2076 brw_MOV( p
, param3
, src3
);
2078 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2080 /* Fill in the result: */
2081 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2082 for (i
= 0 ; i
< 4; i
++) {
2083 if (mask
& (1<<i
)) {
2084 dst
= get_dst_reg(c
, inst
, i
, 1);
2085 brw_MOV( p
, dst
, param0
);
2088 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2089 brw_set_saturate( p
, 0 );
2091 release_tmps( c
, mark
);
2094 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2095 struct prog_instruction
*inst
)
2097 struct brw_compile
*p
= &c
->func
;
2098 GLuint mask
= inst
->DstReg
.WriteMask
;
2099 struct brw_reg src0
[2], dst
[2];
2101 dst
[0] = get_dst_reg(c
, inst
, 0, 1);
2102 dst
[1] = get_dst_reg(c
, inst
, 1, 1);
2104 src0
[0] = get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
2105 src0
[1] = get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
2107 /* Calculate the pixel offset from window bottom left into destination
2110 if (mask
& WRITEMASK_X
) {
2111 /* X' = X - origin_x */
2114 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2115 brw_imm_d(0 - c
->key
.origin_x
));
2118 if (mask
& WRITEMASK_Y
) {
2119 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2122 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2123 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2128 BIAS on SIMD8 not workind yet...
2130 static void emit_txb(struct brw_wm_compile
*c
,
2131 struct prog_instruction
*inst
)
2133 struct brw_compile
*p
= &c
->func
;
2134 struct brw_reg dst
[4], src
[4], payload_reg
;
2135 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2138 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2139 for (i
= 0; i
< 4; i
++)
2140 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
2141 for (i
= 0; i
< 4; i
++)
2142 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
2144 switch (inst
->TexSrcTarget
) {
2145 case TEXTURE_1D_INDEX
:
2146 brw_MOV(p
, brw_message_reg(2), src
[0]);
2147 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0));
2148 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2150 case TEXTURE_2D_INDEX
:
2151 case TEXTURE_RECT_INDEX
:
2152 brw_MOV(p
, brw_message_reg(2), src
[0]);
2153 brw_MOV(p
, brw_message_reg(3), src
[1]);
2154 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2157 brw_MOV(p
, brw_message_reg(2), src
[0]);
2158 brw_MOV(p
, brw_message_reg(3), src
[1]);
2159 brw_MOV(p
, brw_message_reg(4), src
[2]);
2162 brw_MOV(p
, brw_message_reg(5), src
[3]);
2163 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
2165 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2167 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2168 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2170 inst
->DstReg
.WriteMask
,
2171 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
2177 static void emit_tex(struct brw_wm_compile
*c
,
2178 struct prog_instruction
*inst
)
2180 struct brw_compile
*p
= &c
->func
;
2181 struct brw_reg dst
[4], src
[4], payload_reg
;
2182 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2187 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2189 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2191 for (i
= 0; i
< 4; i
++)
2192 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
2193 for (i
= 0; i
< 4; i
++)
2194 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
2197 switch (inst
->TexSrcTarget
) {
2198 case TEXTURE_1D_INDEX
:
2202 case TEXTURE_2D_INDEX
:
2203 case TEXTURE_RECT_INDEX
:
2204 emit
= WRITEMASK_XY
;
2208 emit
= WRITEMASK_XYZ
;
2214 for (i
= 0; i
< nr
; i
++) {
2215 static const GLuint swz
[4] = {0,1,2,2};
2217 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2219 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2224 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0));
2225 brw_MOV(p
, brw_message_reg(6), src
[2]);
2229 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2231 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2232 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2234 inst
->DstReg
.WriteMask
,
2235 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
,
2241 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2244 static void post_wm_emit( struct brw_wm_compile
*c
)
2246 GLuint nr_insns
= c
->fp
->program
.Base
.NumInstructions
;
2247 GLuint insn
, target_insn
;
2248 struct prog_instruction
*inst1
, *inst2
;
2249 struct brw_instruction
*brw_inst1
, *brw_inst2
;
2251 for (insn
= 0; insn
< nr_insns
; insn
++) {
2252 inst1
= &c
->fp
->program
.Base
.Instructions
[insn
];
2253 brw_inst1
= inst1
->Data
;
2254 switch (inst1
->Opcode
) {
2256 target_insn
= inst1
->BranchTarget
;
2257 inst2
= &c
->fp
->program
.Base
.Instructions
[target_insn
];
2258 brw_inst2
= inst2
->Data
;
2259 offset
= brw_inst2
- brw_inst1
;
2260 brw_set_src1(brw_inst1
, brw_imm_d(offset
*16));
2268 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2271 #define MAX_LOOP_DEPTH 32
2272 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2273 struct brw_instruction
*inst0
, *inst1
;
2274 int i
, if_insn
= 0, loop_insn
= 0;
2275 struct brw_compile
*p
= &c
->func
;
2276 struct brw_indirect stack_index
= brw_indirect(0, 0);
2280 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2281 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2283 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2284 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2285 struct prog_instruction
*orig_inst
;
2287 if ((orig_inst
= inst
->Data
) != 0)
2288 orig_inst
->Data
= current_insn(p
);
2290 if (inst
->CondUpdate
)
2291 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2293 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2295 switch (inst
->Opcode
) {
2297 emit_pixel_xy(c
, inst
);
2300 emit_delta_xy(c
, inst
);
2303 emit_pixel_w(c
, inst
);
2306 emit_linterp(c
, inst
);
2309 emit_pinterp(c
, inst
);
2312 emit_cinterp(c
, inst
);
2315 emit_wpos_xy(c
, inst
);
2318 emit_fb_write(c
, inst
);
2339 emit_trunc(c
, inst
);
2414 emit_noise1(c
, inst
);
2417 emit_noise2(c
, inst
);
2420 emit_noise3(c
, inst
);
2423 emit_noise4(c
, inst
);
2435 assert(if_insn
< MAX_IFSN
);
2436 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2439 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2442 assert(if_insn
> 0);
2443 brw_ENDIF(p
, if_inst
[--if_insn
]);
2449 brw_push_insn_state(p
);
2450 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2451 brw_set_access_mode(p
, BRW_ALIGN_1
);
2452 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2453 brw_set_access_mode(p
, BRW_ALIGN_16
);
2454 brw_ADD(p
, get_addr_reg(stack_index
),
2455 get_addr_reg(stack_index
), brw_imm_d(4));
2456 orig_inst
= inst
->Data
;
2457 orig_inst
->Data
= &p
->store
[p
->nr_insn
];
2458 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2459 brw_pop_insn_state(p
);
2463 brw_push_insn_state(p
);
2464 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2465 brw_ADD(p
, get_addr_reg(stack_index
),
2466 get_addr_reg(stack_index
), brw_imm_d(-4));
2467 brw_set_access_mode(p
, BRW_ALIGN_1
);
2468 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2469 brw_set_access_mode(p
, BRW_ALIGN_16
);
2470 brw_pop_insn_state(p
);
2473 case OPCODE_BGNLOOP
:
2474 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2478 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2482 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2484 case OPCODE_ENDLOOP
:
2486 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2487 /* patch all the BREAK instructions from
2489 while (inst0
> loop_inst
[loop_insn
]) {
2491 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2492 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2493 inst0
->bits3
.if_else
.pop_count
= 0;
2494 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2495 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2496 inst0
->bits3
.if_else
.pop_count
= 0;
2501 _mesa_printf("unsupported IR in fragment shader %d\n",
2504 if (inst
->CondUpdate
)
2505 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2507 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2510 for (i
= 0; i
< c
->fp
->program
.Base
.NumInstructions
; i
++)
2511 c
->fp
->program
.Base
.Instructions
[i
].Data
= NULL
;
2514 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2517 brw_wm_emit_glsl(brw
, c
);
2518 c
->prog_data
.total_grf
= c
->reg_index
;
2519 c
->prog_data
.total_scratch
= 0;