1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
44 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
45 int component
, struct brw_reg reg
)
47 c
->wm_regs
[file
][index
][component
].reg
= reg
;
48 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
51 static int get_scalar_dst_index(struct prog_instruction
*inst
)
54 for (i
= 0; i
< 4; i
++)
55 if (inst
->DstReg
.WriteMask
& (1<<i
))
60 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
63 if(c
->tmp_index
== c
->tmp_max
)
64 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
66 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
70 static int mark_tmps(struct brw_wm_compile
*c
)
75 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
77 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
80 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
86 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
, int nr
, GLuint neg
, GLuint abs
)
90 case PROGRAM_STATE_VAR
:
91 case PROGRAM_CONSTANT
:
93 file
= PROGRAM_STATE_VAR
;
95 case PROGRAM_UNDEFINED
:
96 return brw_null_reg();
101 if(c
->wm_regs
[file
][index
][component
].inited
)
102 reg
= c
->wm_regs
[file
][index
][component
].reg
;
104 reg
= brw_vec8_grf(c
->reg_index
, 0);
106 if(!c
->wm_regs
[file
][index
][component
].inited
) {
107 set_reg(c
, file
, index
, component
, reg
);
111 if (neg
& (1<< component
)) {
119 static void prealloc_reg(struct brw_wm_compile
*c
)
123 int nr_interp_regs
= 0;
124 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
126 for (i
= 0; i
< 4; i
++) {
127 reg
= (i
< c
->key
.nr_depth_regs
)
128 ? brw_vec8_grf(i
*2, 0) : brw_vec8_grf(0, 0);
129 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
131 c
->reg_index
+= 2*c
->key
.nr_depth_regs
;
133 int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
134 struct gl_program_parameter_list
*plist
=
135 c
->fp
->program
.Base
.Parameters
;
137 c
->prog_data
.nr_params
= 4*nr_params
;
138 for (i
= 0; i
< nr_params
; i
++) {
139 for (j
= 0; j
< 4; j
++, index
++) {
140 reg
= brw_vec1_grf(c
->reg_index
+ index
/8,
142 c
->prog_data
.param
[index
] =
143 &plist
->ParameterValues
[i
][j
];
144 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
147 c
->nr_creg
= 2*((4*nr_params
+15)/16);
148 c
->reg_index
+= c
->nr_creg
;
150 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
151 if (inputs
& (1<<i
)) {
153 reg
= brw_vec8_grf(c
->reg_index
, 0);
154 for (j
= 0; j
< 4; j
++)
155 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
160 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
161 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
162 c
->prog_data
.curb_read_length
= c
->nr_creg
;
163 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
165 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
169 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
170 struct prog_instruction
*inst
, int component
, int nr
)
172 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
176 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
177 struct prog_src_register
*src
, int index
, int nr
)
179 int component
= GET_SWZ(src
->Swizzle
, index
);
180 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
181 src
->NegateBase
, src
->Abs
);
185 * Subroutines are minimal support for resusable instruction sequences.
186 * They are implemented as simply as possible to minimise overhead: there
187 * is no explicit support for communication between the caller and callee
188 * other than saving the return address in a temporary register, nor is
189 * there any automatic local storage. This implies that great care is
190 * required before attempting reentrancy or any kind of nested
191 * subroutine invocations.
193 static void invoke_subroutine( struct brw_wm_compile
*c
,
194 enum _subroutine subroutine
,
195 void (*emit
)( struct brw_wm_compile
* ) )
197 struct brw_compile
*p
= &c
->func
;
199 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
201 if( c
->subroutines
[ subroutine
] ) {
202 /* subroutine previously emitted: reuse existing instructions */
204 int mark
= mark_tmps( c
);
205 struct brw_reg return_address
= retype( alloc_tmp( c
),
206 BRW_REGISTER_TYPE_UD
);
207 int here
= p
->nr_insn
;
209 brw_push_insn_state(p
);
210 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
211 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
213 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
214 brw_imm_d( ( c
->subroutines
[ subroutine
] -
216 brw_pop_insn_state(p
);
218 release_tmps( c
, mark
);
220 /* previously unused subroutine: emit, and mark for later reuse */
222 int mark
= mark_tmps( c
);
223 struct brw_reg return_address
= retype( alloc_tmp( c
),
224 BRW_REGISTER_TYPE_UD
);
225 struct brw_instruction
*calc
;
226 int base
= p
->nr_insn
;
228 brw_push_insn_state(p
);
229 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
230 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
231 brw_pop_insn_state(p
);
233 c
->subroutines
[ subroutine
] = p
->nr_insn
;
237 brw_push_insn_state(p
);
238 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
239 brw_MOV( p
, brw_ip_reg(), return_address
);
240 brw_pop_insn_state(p
);
242 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
244 release_tmps( c
, mark
);
248 static void emit_abs( struct brw_wm_compile
*c
,
249 struct prog_instruction
*inst
)
252 struct brw_compile
*p
= &c
->func
;
253 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
254 for (i
= 0; i
< 4; i
++) {
255 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
256 struct brw_reg src
, dst
;
257 dst
= get_dst_reg(c
, inst
, i
, 1);
258 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
259 brw_MOV(p
, dst
, brw_abs(src
));
262 brw_set_saturate(p
, 0);
265 static void emit_trunc( struct brw_wm_compile
*c
,
266 struct prog_instruction
*inst
)
269 struct brw_compile
*p
= &c
->func
;
270 GLuint mask
= inst
->DstReg
.WriteMask
;
271 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
272 for (i
= 0; i
< 4; i
++) {
274 struct brw_reg src
, dst
;
275 dst
= get_dst_reg(c
, inst
, i
, 1) ;
276 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
277 brw_RNDZ(p
, dst
, src
);
280 brw_set_saturate(p
, 0);
283 static void emit_mov( struct brw_wm_compile
*c
,
284 struct prog_instruction
*inst
)
287 struct brw_compile
*p
= &c
->func
;
288 GLuint mask
= inst
->DstReg
.WriteMask
;
289 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
290 for (i
= 0; i
< 4; i
++) {
292 struct brw_reg src
, dst
;
293 dst
= get_dst_reg(c
, inst
, i
, 1);
294 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
295 brw_MOV(p
, dst
, src
);
298 brw_set_saturate(p
, 0);
301 static void emit_pixel_xy(struct brw_wm_compile
*c
,
302 struct prog_instruction
*inst
)
304 struct brw_reg r1
= brw_vec1_grf(1, 0);
305 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
307 struct brw_reg dst0
, dst1
;
308 struct brw_compile
*p
= &c
->func
;
309 GLuint mask
= inst
->DstReg
.WriteMask
;
311 dst0
= get_dst_reg(c
, inst
, 0, 1);
312 dst1
= get_dst_reg(c
, inst
, 1, 1);
313 /* Calculate pixel centers by adding 1 or 0 to each of the
314 * micro-tile coordinates passed in r1.
316 if (mask
& WRITEMASK_X
) {
318 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
319 stride(suboffset(r1_uw
, 4), 2, 4, 0),
320 brw_imm_v(0x10101010));
323 if (mask
& WRITEMASK_Y
) {
325 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
326 stride(suboffset(r1_uw
, 5), 2, 4, 0),
327 brw_imm_v(0x11001100));
331 static void emit_delta_xy(struct brw_wm_compile
*c
,
332 struct prog_instruction
*inst
)
334 struct brw_reg r1
= brw_vec1_grf(1, 0);
335 struct brw_reg dst0
, dst1
, src0
, src1
;
336 struct brw_compile
*p
= &c
->func
;
337 GLuint mask
= inst
->DstReg
.WriteMask
;
339 dst0
= get_dst_reg(c
, inst
, 0, 1);
340 dst1
= get_dst_reg(c
, inst
, 1, 1);
341 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
342 src1
= get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
343 /* Calc delta X,Y by subtracting origin in r1 from the pixel
346 if (mask
& WRITEMASK_X
) {
349 retype(src0
, BRW_REGISTER_TYPE_UW
),
353 if (mask
& WRITEMASK_Y
) {
356 retype(src1
, BRW_REGISTER_TYPE_UW
),
357 negate(suboffset(r1
,1)));
362 static void fire_fb_write( struct brw_wm_compile
*c
,
368 struct brw_compile
*p
= &c
->func
;
369 /* Pass through control information:
371 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
373 brw_push_insn_state(p
);
374 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
376 brw_message_reg(base_reg
+ 1),
378 brw_pop_insn_state(p
);
380 /* Send framebuffer write message: */
382 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
384 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
391 static void emit_fb_write(struct brw_wm_compile
*c
,
392 struct prog_instruction
*inst
)
394 struct brw_compile
*p
= &c
->func
;
400 /* Reserve a space for AA - may not be needed:
402 if (c
->key
.aa_dest_stencil_reg
)
405 brw_push_insn_state(p
);
406 for (channel
= 0; channel
< 4; channel
++) {
407 src0
= get_src_reg(c
, &inst
->SrcReg
[0], channel
, 1);
408 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
409 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
410 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
412 /* skip over the regs populated above: */
414 brw_pop_insn_state(p
);
416 if (c
->key
.source_depth_to_render_target
) {
417 if (c
->key
.computes_depth
) {
418 src0
= get_src_reg(c
, &inst
->SrcReg
[2], 2, 1);
419 brw_MOV(p
, brw_message_reg(nr
), src0
);
422 src0
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
423 brw_MOV(p
, brw_message_reg(nr
), src0
);
429 target
= inst
->Sampler
>> 1;
430 eot
= inst
->Sampler
& 1;
431 fire_fb_write(c
, 0, nr
, target
, eot
);
434 static void emit_pixel_w( struct brw_wm_compile
*c
,
435 struct prog_instruction
*inst
)
437 struct brw_compile
*p
= &c
->func
;
438 GLuint mask
= inst
->DstReg
.WriteMask
;
439 if (mask
& WRITEMASK_W
) {
440 struct brw_reg dst
, src0
, delta0
, delta1
;
441 struct brw_reg interp3
;
443 dst
= get_dst_reg(c
, inst
, 3, 1);
444 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
445 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
446 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
448 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
449 /* Calc 1/w - just linterp wpos[3] optimized by putting the
450 * result straight into a message reg.
452 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
453 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
457 BRW_MATH_FUNCTION_INV
,
458 BRW_MATH_SATURATE_NONE
,
460 BRW_MATH_PRECISION_FULL
);
464 static void emit_linterp(struct brw_wm_compile
*c
,
465 struct prog_instruction
*inst
)
467 struct brw_compile
*p
= &c
->func
;
468 GLuint mask
= inst
->DstReg
.WriteMask
;
469 struct brw_reg interp
[4];
470 struct brw_reg dst
, delta0
, delta1
;
473 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
474 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
475 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
479 interp
[0] = brw_vec1_grf(nr
, 0);
480 interp
[1] = brw_vec1_grf(nr
, 4);
481 interp
[2] = brw_vec1_grf(nr
+1, 0);
482 interp
[3] = brw_vec1_grf(nr
+1, 4);
484 for(i
= 0; i
< 4; i
++ ) {
486 dst
= get_dst_reg(c
, inst
, i
, 1);
487 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
488 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
493 static void emit_cinterp(struct brw_wm_compile
*c
,
494 struct prog_instruction
*inst
)
496 struct brw_compile
*p
= &c
->func
;
497 GLuint mask
= inst
->DstReg
.WriteMask
;
499 struct brw_reg interp
[4];
500 struct brw_reg dst
, src0
;
502 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
506 interp
[0] = brw_vec1_grf(nr
, 0);
507 interp
[1] = brw_vec1_grf(nr
, 4);
508 interp
[2] = brw_vec1_grf(nr
+1, 0);
509 interp
[3] = brw_vec1_grf(nr
+1, 4);
511 for(i
= 0; i
< 4; i
++ ) {
513 dst
= get_dst_reg(c
, inst
, i
, 1);
514 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
519 static void emit_pinterp(struct brw_wm_compile
*c
,
520 struct prog_instruction
*inst
)
522 struct brw_compile
*p
= &c
->func
;
523 GLuint mask
= inst
->DstReg
.WriteMask
;
525 struct brw_reg interp
[4];
526 struct brw_reg dst
, delta0
, delta1
;
527 struct brw_reg src0
, w
;
529 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
530 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
531 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1, 1);
532 w
= get_src_reg(c
, &inst
->SrcReg
[2], 3, 1);
536 interp
[0] = brw_vec1_grf(nr
, 0);
537 interp
[1] = brw_vec1_grf(nr
, 4);
538 interp
[2] = brw_vec1_grf(nr
+1, 0);
539 interp
[3] = brw_vec1_grf(nr
+1, 4);
541 for(i
= 0; i
< 4; i
++ ) {
543 dst
= get_dst_reg(c
, inst
, i
, 1);
544 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
545 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
547 brw_MUL(p
, dst
, dst
, w
);
552 static void emit_xpd(struct brw_wm_compile
*c
,
553 struct prog_instruction
*inst
)
556 struct brw_compile
*p
= &c
->func
;
557 GLuint mask
= inst
->DstReg
.WriteMask
;
558 for (i
= 0; i
< 4; i
++) {
562 struct brw_reg src0
, src1
, dst
;
563 dst
= get_dst_reg(c
, inst
, i
, 1);
564 src0
= negate(get_src_reg(c
, &inst
->SrcReg
[0], i2
, 1));
565 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i1
, 1);
566 brw_MUL(p
, brw_null_reg(), src0
, src1
);
567 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i1
, 1);
568 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i2
, 1);
569 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
570 brw_MAC(p
, dst
, src0
, src1
);
571 brw_set_saturate(p
, 0);
574 brw_set_saturate(p
, 0);
577 static void emit_dp3(struct brw_wm_compile
*c
,
578 struct prog_instruction
*inst
)
580 struct brw_reg src0
[3], src1
[3], dst
;
582 struct brw_compile
*p
= &c
->func
;
583 for (i
= 0; i
< 3; i
++) {
584 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
585 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
588 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
589 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
590 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
591 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
592 brw_MAC(p
, dst
, src0
[2], src1
[2]);
593 brw_set_saturate(p
, 0);
596 static void emit_dp4(struct brw_wm_compile
*c
,
597 struct prog_instruction
*inst
)
599 struct brw_reg src0
[4], src1
[4], dst
;
601 struct brw_compile
*p
= &c
->func
;
602 for (i
= 0; i
< 4; i
++) {
603 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
604 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
606 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
607 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
608 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
609 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
610 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
611 brw_MAC(p
, dst
, src0
[3], src1
[3]);
612 brw_set_saturate(p
, 0);
615 static void emit_dph(struct brw_wm_compile
*c
,
616 struct prog_instruction
*inst
)
618 struct brw_reg src0
[4], src1
[4], dst
;
620 struct brw_compile
*p
= &c
->func
;
621 for (i
= 0; i
< 4; i
++) {
622 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
623 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
625 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
626 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
627 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
628 brw_MAC(p
, dst
, src0
[2], src1
[2]);
629 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
630 brw_ADD(p
, dst
, dst
, src1
[3]);
631 brw_set_saturate(p
, 0);
634 static void emit_math1(struct brw_wm_compile
*c
,
635 struct prog_instruction
*inst
, GLuint func
)
637 struct brw_compile
*p
= &c
->func
;
638 struct brw_reg src0
, dst
;
640 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
641 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
642 brw_MOV(p
, brw_message_reg(2), src0
);
646 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
649 BRW_MATH_DATA_VECTOR
,
650 BRW_MATH_PRECISION_FULL
);
653 static void emit_rcp(struct brw_wm_compile
*c
,
654 struct prog_instruction
*inst
)
656 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
659 static void emit_rsq(struct brw_wm_compile
*c
,
660 struct prog_instruction
*inst
)
662 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
665 static void emit_sin(struct brw_wm_compile
*c
,
666 struct prog_instruction
*inst
)
668 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
671 static void emit_cos(struct brw_wm_compile
*c
,
672 struct prog_instruction
*inst
)
674 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
677 static void emit_ex2(struct brw_wm_compile
*c
,
678 struct prog_instruction
*inst
)
680 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
683 static void emit_lg2(struct brw_wm_compile
*c
,
684 struct prog_instruction
*inst
)
686 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
689 static void emit_add(struct brw_wm_compile
*c
,
690 struct prog_instruction
*inst
)
692 struct brw_compile
*p
= &c
->func
;
693 struct brw_reg src0
, src1
, dst
;
694 GLuint mask
= inst
->DstReg
.WriteMask
;
696 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
697 for (i
= 0 ; i
< 4; i
++) {
699 dst
= get_dst_reg(c
, inst
, i
, 1);
700 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
701 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
702 brw_ADD(p
, dst
, src0
, src1
);
705 brw_set_saturate(p
, 0);
708 static void emit_sub(struct brw_wm_compile
*c
,
709 struct prog_instruction
*inst
)
711 struct brw_compile
*p
= &c
->func
;
712 struct brw_reg src0
, src1
, dst
;
713 GLuint mask
= inst
->DstReg
.WriteMask
;
715 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
716 for (i
= 0 ; i
< 4; i
++) {
718 dst
= get_dst_reg(c
, inst
, i
, 1);
719 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
720 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
721 brw_ADD(p
, dst
, src0
, negate(src1
));
724 brw_set_saturate(p
, 0);
727 static void emit_mul(struct brw_wm_compile
*c
,
728 struct prog_instruction
*inst
)
730 struct brw_compile
*p
= &c
->func
;
731 struct brw_reg src0
, src1
, dst
;
732 GLuint mask
= inst
->DstReg
.WriteMask
;
734 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
735 for (i
= 0 ; i
< 4; i
++) {
737 dst
= get_dst_reg(c
, inst
, i
, 1);
738 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
739 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
740 brw_MUL(p
, dst
, src0
, src1
);
743 brw_set_saturate(p
, 0);
746 static void emit_frc(struct brw_wm_compile
*c
,
747 struct prog_instruction
*inst
)
749 struct brw_compile
*p
= &c
->func
;
750 struct brw_reg src0
, dst
;
751 GLuint mask
= inst
->DstReg
.WriteMask
;
753 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
754 for (i
= 0 ; i
< 4; i
++) {
756 dst
= get_dst_reg(c
, inst
, i
, 1);
757 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
758 brw_FRC(p
, dst
, src0
);
761 if (inst
->SaturateMode
!= SATURATE_OFF
)
762 brw_set_saturate(p
, 0);
765 static void emit_flr(struct brw_wm_compile
*c
,
766 struct prog_instruction
*inst
)
768 struct brw_compile
*p
= &c
->func
;
769 struct brw_reg src0
, dst
;
770 GLuint mask
= inst
->DstReg
.WriteMask
;
772 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
773 for (i
= 0 ; i
< 4; i
++) {
775 dst
= get_dst_reg(c
, inst
, i
, 1);
776 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
777 brw_RNDD(p
, dst
, src0
);
780 brw_set_saturate(p
, 0);
783 static void emit_max(struct brw_wm_compile
*c
,
784 struct prog_instruction
*inst
)
786 struct brw_compile
*p
= &c
->func
;
787 GLuint mask
= inst
->DstReg
.WriteMask
;
788 struct brw_reg src0
, src1
, dst
;
790 brw_push_insn_state(p
);
791 for (i
= 0; i
< 4; i
++) {
793 dst
= get_dst_reg(c
, inst
, i
, 1);
794 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
795 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
796 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
797 brw_MOV(p
, dst
, src0
);
798 brw_set_saturate(p
, 0);
800 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src0
, src1
);
801 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
802 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
803 brw_MOV(p
, dst
, src1
);
804 brw_set_saturate(p
, 0);
805 brw_set_predicate_control_flag_value(p
, 0xff);
808 brw_pop_insn_state(p
);
811 static void emit_min(struct brw_wm_compile
*c
,
812 struct prog_instruction
*inst
)
814 struct brw_compile
*p
= &c
->func
;
815 GLuint mask
= inst
->DstReg
.WriteMask
;
816 struct brw_reg src0
, src1
, dst
;
818 brw_push_insn_state(p
);
819 for (i
= 0; i
< 4; i
++) {
821 dst
= get_dst_reg(c
, inst
, i
, 1);
822 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
823 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
824 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
825 brw_MOV(p
, dst
, src0
);
826 brw_set_saturate(p
, 0);
828 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
829 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
830 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
831 brw_MOV(p
, dst
, src1
);
832 brw_set_saturate(p
, 0);
833 brw_set_predicate_control_flag_value(p
, 0xff);
836 brw_pop_insn_state(p
);
839 static void emit_pow(struct brw_wm_compile
*c
,
840 struct prog_instruction
*inst
)
842 struct brw_compile
*p
= &c
->func
;
843 struct brw_reg dst
, src0
, src1
;
844 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
), 1);
845 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
846 src1
= get_src_reg(c
, &inst
->SrcReg
[1], 0, 1);
848 brw_MOV(p
, brw_message_reg(2), src0
);
849 brw_MOV(p
, brw_message_reg(3), src1
);
853 BRW_MATH_FUNCTION_POW
,
854 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
857 BRW_MATH_DATA_VECTOR
,
858 BRW_MATH_PRECISION_FULL
);
861 static void emit_lrp(struct brw_wm_compile
*c
,
862 struct prog_instruction
*inst
)
864 struct brw_compile
*p
= &c
->func
;
865 GLuint mask
= inst
->DstReg
.WriteMask
;
866 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
868 int mark
= mark_tmps(c
);
869 for (i
= 0; i
< 4; i
++) {
871 dst
= get_dst_reg(c
, inst
, i
, 1);
872 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
874 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
876 if (src1
.nr
== dst
.nr
) {
878 brw_MOV(p
, tmp1
, src1
);
882 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
883 if (src2
.nr
== dst
.nr
) {
885 brw_MOV(p
, tmp2
, src2
);
889 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
890 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
891 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
892 brw_MAC(p
, dst
, src0
, tmp1
);
893 brw_set_saturate(p
, 0);
895 release_tmps(c
, mark
);
900 * For GLSL shaders, this KIL will be unconditional.
901 * It may be contained inside an IF/ENDIF structure of course.
903 static void emit_kil(struct brw_wm_compile
*c
)
905 struct brw_compile
*p
= &c
->func
;
906 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
907 brw_push_insn_state(p
);
908 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
909 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
910 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
911 brw_pop_insn_state(p
);
914 static void emit_mad(struct brw_wm_compile
*c
,
915 struct prog_instruction
*inst
)
917 struct brw_compile
*p
= &c
->func
;
918 GLuint mask
= inst
->DstReg
.WriteMask
;
919 struct brw_reg dst
, src0
, src1
, src2
;
922 for (i
= 0; i
< 4; i
++) {
924 dst
= get_dst_reg(c
, inst
, i
, 1);
925 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
926 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
927 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
, 1);
928 brw_MUL(p
, dst
, src0
, src1
);
930 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
931 brw_ADD(p
, dst
, dst
, src2
);
932 brw_set_saturate(p
, 0);
937 static void emit_sop(struct brw_wm_compile
*c
,
938 struct prog_instruction
*inst
, GLuint cond
)
940 struct brw_compile
*p
= &c
->func
;
941 GLuint mask
= inst
->DstReg
.WriteMask
;
942 struct brw_reg dst
, src0
, src1
;
945 for (i
= 0; i
< 4; i
++) {
947 dst
= get_dst_reg(c
, inst
, i
, 1);
948 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
949 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
, 1);
950 brw_push_insn_state(p
);
951 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
952 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
953 brw_MOV(p
, dst
, brw_imm_f(0.0));
954 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
955 brw_MOV(p
, dst
, brw_imm_f(1.0));
956 brw_pop_insn_state(p
);
961 static void emit_slt(struct brw_wm_compile
*c
,
962 struct prog_instruction
*inst
)
964 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
967 static void emit_sle(struct brw_wm_compile
*c
,
968 struct prog_instruction
*inst
)
970 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
973 static void emit_sgt(struct brw_wm_compile
*c
,
974 struct prog_instruction
*inst
)
976 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
979 static void emit_sge(struct brw_wm_compile
*c
,
980 struct prog_instruction
*inst
)
982 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
985 static void emit_seq(struct brw_wm_compile
*c
,
986 struct prog_instruction
*inst
)
988 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
991 static void emit_sne(struct brw_wm_compile
*c
,
992 struct prog_instruction
*inst
)
994 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
997 static void emit_ddx(struct brw_wm_compile
*c
,
998 struct prog_instruction
*inst
)
1000 struct brw_compile
*p
= &c
->func
;
1001 GLuint mask
= inst
->DstReg
.WriteMask
;
1002 struct brw_reg interp
[4];
1004 struct brw_reg src0
, w
;
1006 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1007 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1009 interp
[0] = brw_vec1_grf(nr
, 0);
1010 interp
[1] = brw_vec1_grf(nr
, 4);
1011 interp
[2] = brw_vec1_grf(nr
+1, 0);
1012 interp
[3] = brw_vec1_grf(nr
+1, 4);
1013 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1014 for(i
= 0; i
< 4; i
++ ) {
1015 if (mask
& (1<<i
)) {
1016 dst
= get_dst_reg(c
, inst
, i
, 1);
1017 brw_MOV(p
, dst
, interp
[i
]);
1018 brw_MUL(p
, dst
, dst
, w
);
1021 brw_set_saturate(p
, 0);
1024 static void emit_ddy(struct brw_wm_compile
*c
,
1025 struct prog_instruction
*inst
)
1027 struct brw_compile
*p
= &c
->func
;
1028 GLuint mask
= inst
->DstReg
.WriteMask
;
1029 struct brw_reg interp
[4];
1031 struct brw_reg src0
, w
;
1034 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
1036 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3, 1);
1037 interp
[0] = brw_vec1_grf(nr
, 0);
1038 interp
[1] = brw_vec1_grf(nr
, 4);
1039 interp
[2] = brw_vec1_grf(nr
+1, 0);
1040 interp
[3] = brw_vec1_grf(nr
+1, 4);
1041 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1042 for(i
= 0; i
< 4; i
++ ) {
1043 if (mask
& (1<<i
)) {
1044 dst
= get_dst_reg(c
, inst
, i
, 1);
1045 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1046 brw_MUL(p
, dst
, dst
, w
);
1049 brw_set_saturate(p
, 0);
1052 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1054 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1058 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1060 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1063 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1065 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1068 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1070 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1074 /* One-, two- and three-dimensional Perlin noise, similar to the description
1075 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1076 static void noise1_sub( struct brw_wm_compile
*c
) {
1078 struct brw_compile
*p
= &c
->func
;
1079 struct brw_reg param
,
1080 x0
, x1
, /* gradients at each end */
1081 t
, tmp
[ 2 ], /* float temporaries */
1082 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1084 int mark
= mark_tmps( c
);
1086 x0
= alloc_tmp( c
);
1087 x1
= alloc_tmp( c
);
1089 tmp
[ 0 ] = alloc_tmp( c
);
1090 tmp
[ 1 ] = alloc_tmp( c
);
1091 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1092 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1093 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1094 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1095 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1097 param
= lookup_tmp( c
, mark
- 2 );
1099 brw_set_access_mode( p
, BRW_ALIGN_1
);
1101 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1103 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1104 be hashed. Also compute the remainder (offset within the unit
1105 length), interleaved to reduce register dependency penalties. */
1106 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1107 brw_FRC( p
, param
, param
);
1108 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1109 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1110 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1112 /* We're now ready to perform the hashing. The two hashes are
1113 interleaved for performance. The hash function used is
1114 designed to rapidly achieve avalanche and require only 32x16
1115 bit multiplication, and 16-bit swizzles (which we get for
1116 free). We can't use immediate operands in the multiplies,
1117 because immediates are permitted only in src1 and the 16-bit
1118 factor is permitted only in src0. */
1119 for( i
= 0; i
< 2; i
++ )
1120 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1121 for( i
= 0; i
< 2; i
++ )
1122 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1123 high_words( itmp
[ i
] ) );
1124 for( i
= 0; i
< 2; i
++ )
1125 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1126 for( i
= 0; i
< 2; i
++ )
1127 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1128 high_words( itmp
[ i
] ) );
1129 for( i
= 0; i
< 2; i
++ )
1130 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1131 for( i
= 0; i
< 2; i
++ )
1132 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1133 high_words( itmp
[ i
] ) );
1135 /* Now we want to initialise the two gradients based on the
1136 hashes. Format conversion from signed integer to float leaves
1137 everything scaled too high by a factor of pow( 2, 31 ), but
1138 we correct for that right at the end. */
1139 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1140 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1141 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1143 brw_MUL( p
, x0
, x0
, param
);
1144 brw_MUL( p
, x1
, x1
, t
);
1146 /* We interpolate between the gradients using the polynomial
1147 6t^5 - 15t^4 + 10t^3 (Perlin). */
1148 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1149 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1150 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1151 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1152 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1153 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1155 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1156 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1157 brw_MUL( p
, x1
, x1
, param
);
1158 brw_ADD( p
, x0
, x0
, x1
);
1159 /* scale by pow( 2, -30 ), to compensate for the format conversion
1160 above and an extra factor of 2 so that a single gradient covers
1162 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1164 release_tmps( c
, mark
);
1167 static void emit_noise1( struct brw_wm_compile
*c
,
1168 struct prog_instruction
*inst
)
1170 struct brw_compile
*p
= &c
->func
;
1171 struct brw_reg src
, param
, dst
;
1172 GLuint mask
= inst
->DstReg
.WriteMask
;
1174 int mark
= mark_tmps( c
);
1176 assert( mark
== 0 );
1178 src
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1180 param
= alloc_tmp( c
);
1182 brw_MOV( p
, param
, src
);
1184 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1186 /* Fill in the result: */
1187 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1188 for (i
= 0 ; i
< 4; i
++) {
1189 if (mask
& (1<<i
)) {
1190 dst
= get_dst_reg(c
, inst
, i
, 1);
1191 brw_MOV( p
, dst
, param
);
1194 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1195 brw_set_saturate( p
, 0 );
1197 release_tmps( c
, mark
);
1200 static void noise2_sub( struct brw_wm_compile
*c
) {
1202 struct brw_compile
*p
= &c
->func
;
1203 struct brw_reg param0
, param1
,
1204 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1205 t
, tmp
[ 4 ], /* float temporaries */
1206 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1208 int mark
= mark_tmps( c
);
1210 x0y0
= alloc_tmp( c
);
1211 x0y1
= alloc_tmp( c
);
1212 x1y0
= alloc_tmp( c
);
1213 x1y1
= alloc_tmp( c
);
1215 for( i
= 0; i
< 4; i
++ ) {
1216 tmp
[ i
] = alloc_tmp( c
);
1217 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1219 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1220 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1221 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1223 param0
= lookup_tmp( c
, mark
- 3 );
1224 param1
= lookup_tmp( c
, mark
- 2 );
1226 brw_set_access_mode( p
, BRW_ALIGN_1
);
1228 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1229 be hashed. Also compute the remainders (offsets within the unit
1230 square), interleaved to reduce register dependency penalties. */
1231 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1232 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1233 brw_FRC( p
, param0
, param0
);
1234 brw_FRC( p
, param1
, param1
);
1235 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1236 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1237 low_words( itmp
[ 1 ] ) );
1238 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1239 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1240 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1241 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1242 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1244 /* We're now ready to perform the hashing. The four hashes are
1245 interleaved for performance. The hash function used is
1246 designed to rapidly achieve avalanche and require only 32x16
1247 bit multiplication, and 16-bit swizzles (which we get for
1248 free). We can't use immediate operands in the multiplies,
1249 because immediates are permitted only in src1 and the 16-bit
1250 factor is permitted only in src0. */
1251 for( i
= 0; i
< 4; i
++ )
1252 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1253 for( i
= 0; i
< 4; i
++ )
1254 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1255 high_words( itmp
[ i
] ) );
1256 for( i
= 0; i
< 4; i
++ )
1257 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1258 for( i
= 0; i
< 4; i
++ )
1259 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1260 high_words( itmp
[ i
] ) );
1261 for( i
= 0; i
< 4; i
++ )
1262 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1263 for( i
= 0; i
< 4; i
++ )
1264 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1265 high_words( itmp
[ i
] ) );
1267 /* Now we want to initialise the four gradients based on the
1268 hashes. Format conversion from signed integer to float leaves
1269 everything scaled too high by a factor of pow( 2, 15 ), but
1270 we correct for that right at the end. */
1271 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1272 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1273 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1274 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1275 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1277 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1278 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1279 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1280 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1282 brw_MUL( p
, x1y0
, x1y0
, t
);
1283 brw_MUL( p
, x1y1
, x1y1
, t
);
1284 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1285 brw_MUL( p
, x0y0
, x0y0
, param0
);
1286 brw_MUL( p
, x0y1
, x0y1
, param0
);
1288 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1289 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1290 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1291 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1293 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1294 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1295 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1296 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1298 /* We interpolate between the gradients using the polynomial
1299 6t^5 - 15t^4 + 10t^3 (Perlin). */
1300 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1301 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1302 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1303 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1304 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1305 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1306 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1308 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1309 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1310 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1311 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1312 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1314 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1315 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1316 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1317 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1319 /* Here we interpolate in the y dimension... */
1320 brw_MUL( p
, x0y1
, x0y1
, param1
);
1321 brw_MUL( p
, x1y1
, x1y1
, param1
);
1322 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1323 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1325 /* And now in x. There are horrible register dependencies here,
1326 but we have nothing else to do. */
1327 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1328 brw_MUL( p
, x1y0
, x1y0
, param0
);
1329 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1331 /* scale by pow( 2, -15 ), as described above */
1332 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1334 release_tmps( c
, mark
);
1337 static void emit_noise2( struct brw_wm_compile
*c
,
1338 struct prog_instruction
*inst
)
1340 struct brw_compile
*p
= &c
->func
;
1341 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1342 GLuint mask
= inst
->DstReg
.WriteMask
;
1344 int mark
= mark_tmps( c
);
1346 assert( mark
== 0 );
1348 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1349 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1351 param0
= alloc_tmp( c
);
1352 param1
= alloc_tmp( c
);
1354 brw_MOV( p
, param0
, src0
);
1355 brw_MOV( p
, param1
, src1
);
1357 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1359 /* Fill in the result: */
1360 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1361 for (i
= 0 ; i
< 4; i
++) {
1362 if (mask
& (1<<i
)) {
1363 dst
= get_dst_reg(c
, inst
, i
, 1);
1364 brw_MOV( p
, dst
, param0
);
1367 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1368 brw_set_saturate( p
, 0 );
1370 release_tmps( c
, mark
);
1374 * The three-dimensional case is much like the one- and two- versions above,
1375 * but since the number of corners is rapidly growing we now pack 16 16-bit
1376 * hashes into each register to extract more parallelism from the EUs.
1378 static void noise3_sub( struct brw_wm_compile
*c
) {
1380 struct brw_compile
*p
= &c
->func
;
1381 struct brw_reg param0
, param1
, param2
,
1382 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1383 xi
, yi
, zi
, /* interpolation coefficients */
1384 t
, tmp
[ 8 ], /* float temporaries */
1385 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1386 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1388 int mark
= mark_tmps( c
);
1390 x0y0
= alloc_tmp( c
);
1391 x0y1
= alloc_tmp( c
);
1392 x1y0
= alloc_tmp( c
);
1393 x1y1
= alloc_tmp( c
);
1394 xi
= alloc_tmp( c
);
1395 yi
= alloc_tmp( c
);
1396 zi
= alloc_tmp( c
);
1398 for( i
= 0; i
< 8; i
++ ) {
1399 tmp
[ i
] = alloc_tmp( c
);
1400 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1401 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1404 param0
= lookup_tmp( c
, mark
- 4 );
1405 param1
= lookup_tmp( c
, mark
- 3 );
1406 param2
= lookup_tmp( c
, mark
- 2 );
1408 brw_set_access_mode( p
, BRW_ALIGN_1
);
1410 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1411 be hashed. Also compute the remainders (offsets within the unit
1412 cube), interleaved to reduce register dependency penalties. */
1413 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1414 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1415 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1416 brw_FRC( p
, param0
, param0
);
1417 brw_FRC( p
, param1
, param1
);
1418 brw_FRC( p
, param2
, param2
);
1419 /* Since we now have only 16 bits of precision in the hash, we must
1420 be more careful about thorough mixing to maintain entropy as we
1421 squash the input vector into a small scalar. */
1422 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1423 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1424 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1425 brw_imm_uw( 0x9B93 ) );
1426 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1427 brw_imm_uw( 0xBC8F ) );
1429 /* Temporarily disable the execution mask while we work with ExecSize=16
1430 channels (the mask is set for ExecSize=8 and is probably incorrect).
1431 Although this might cause execution of unwanted channels, the code
1432 writes only to temporary registers and has no side effects, so
1433 disabling the mask is harmless. */
1434 brw_push_insn_state( p
);
1435 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1436 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1437 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1438 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1440 /* We're now ready to perform the hashing. The eight hashes are
1441 interleaved for performance. The hash function used is
1442 designed to rapidly achieve avalanche and require only 16x16
1443 bit multiplication, and 8-bit swizzles (which we get for
1445 for( i
= 0; i
< 4; i
++ )
1446 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1447 for( i
= 0; i
< 4; i
++ )
1448 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1449 odd_bytes( wtmp
[ i
] ) );
1450 for( i
= 0; i
< 4; i
++ )
1451 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1452 for( i
= 0; i
< 4; i
++ )
1453 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1454 odd_bytes( wtmp
[ i
] ) );
1455 brw_pop_insn_state( p
);
1457 /* Now we want to initialise the four rear gradients based on the
1458 hashes. Format conversion from signed integer to float leaves
1459 everything scaled too high by a factor of pow( 2, 15 ), but
1460 we correct for that right at the end. */
1462 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1463 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1464 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1465 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1466 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1468 brw_push_insn_state( p
);
1469 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1470 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1471 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1472 brw_pop_insn_state( p
);
1474 brw_MUL( p
, x1y0
, x1y0
, t
);
1475 brw_MUL( p
, x1y1
, x1y1
, t
);
1476 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1477 brw_MUL( p
, x0y0
, x0y0
, param0
);
1478 brw_MUL( p
, x0y1
, x0y1
, param0
);
1481 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1482 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1483 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1484 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1486 brw_push_insn_state( p
);
1487 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1488 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1489 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1490 brw_pop_insn_state( p
);
1492 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1493 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1494 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1495 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1496 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1498 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1499 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1500 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1501 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1504 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1505 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1506 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1507 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1509 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1510 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1511 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1512 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1514 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1515 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1516 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1517 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1519 /* We interpolate between the gradients using the polynomial
1520 6t^5 - 15t^4 + 10t^3 (Perlin). */
1521 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1522 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1523 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1524 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1525 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1526 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1527 brw_MUL( p
, xi
, xi
, param0
);
1528 brw_MUL( p
, yi
, yi
, param1
);
1529 brw_MUL( p
, zi
, zi
, param2
);
1530 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1531 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1532 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1533 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1534 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1535 brw_MUL( p
, xi
, xi
, param0
);
1536 brw_MUL( p
, yi
, yi
, param1
);
1537 brw_MUL( p
, zi
, zi
, param2
);
1538 brw_MUL( p
, xi
, xi
, param0
);
1539 brw_MUL( p
, yi
, yi
, param1
);
1540 brw_MUL( p
, zi
, zi
, param2
);
1541 brw_MUL( p
, xi
, xi
, param0
);
1542 brw_MUL( p
, yi
, yi
, param1
);
1543 brw_MUL( p
, zi
, zi
, param2
);
1545 /* Here we interpolate in the y dimension... */
1546 brw_MUL( p
, x0y1
, x0y1
, yi
);
1547 brw_MUL( p
, x1y1
, x1y1
, yi
);
1548 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1549 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1551 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1552 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1553 brw_MUL( p
, x1y0
, x1y0
, xi
);
1554 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1556 /* Now do the same thing for the front four gradients... */
1558 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1559 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1560 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1561 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1563 brw_push_insn_state( p
);
1564 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1565 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1566 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1567 brw_pop_insn_state( p
);
1569 brw_MUL( p
, x1y0
, x1y0
, t
);
1570 brw_MUL( p
, x1y1
, x1y1
, t
);
1571 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1572 brw_MUL( p
, x0y0
, x0y0
, param0
);
1573 brw_MUL( p
, x0y1
, x0y1
, param0
);
1576 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1577 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1578 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1579 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1581 brw_push_insn_state( p
);
1582 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1583 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1584 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1585 brw_pop_insn_state( p
);
1587 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1588 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1589 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1590 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1591 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1593 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1594 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1595 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1596 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1599 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1600 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1601 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1602 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1604 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1605 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1606 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1607 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1609 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1610 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1611 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1612 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1614 /* The interpolation coefficients are still around from last time, so
1615 again interpolate in the y dimension... */
1616 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1617 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1618 brw_MUL( p
, x0y1
, x0y1
, yi
);
1619 brw_MUL( p
, x1y1
, x1y1
, yi
);
1620 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1621 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1623 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1624 time put the front face in tmp[ 1 ] and we're nearly there... */
1625 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1626 brw_MUL( p
, x1y0
, x1y0
, xi
);
1627 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1629 /* The final interpolation, in the z dimension: */
1630 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1631 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1632 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1634 /* scale by pow( 2, -15 ), as described above */
1635 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1637 release_tmps( c
, mark
);
1640 static void emit_noise3( struct brw_wm_compile
*c
,
1641 struct prog_instruction
*inst
)
1643 struct brw_compile
*p
= &c
->func
;
1644 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1645 GLuint mask
= inst
->DstReg
.WriteMask
;
1647 int mark
= mark_tmps( c
);
1649 assert( mark
== 0 );
1651 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
1652 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
1653 src2
= get_src_reg( c
, inst
->SrcReg
, 2, 1 );
1655 param0
= alloc_tmp( c
);
1656 param1
= alloc_tmp( c
);
1657 param2
= alloc_tmp( c
);
1659 brw_MOV( p
, param0
, src0
);
1660 brw_MOV( p
, param1
, src1
);
1661 brw_MOV( p
, param2
, src2
);
1663 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1665 /* Fill in the result: */
1666 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1667 for (i
= 0 ; i
< 4; i
++) {
1668 if (mask
& (1<<i
)) {
1669 dst
= get_dst_reg(c
, inst
, i
, 1);
1670 brw_MOV( p
, dst
, param0
);
1673 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1674 brw_set_saturate( p
, 0 );
1676 release_tmps( c
, mark
);
1680 * For the four-dimensional case, the little micro-optimisation benefits
1681 * we obtain by unrolling all the loops aren't worth the massive bloat it
1682 * now causes. Instead, we loop twice around performing a similar operation
1683 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1684 * code to glue it all together.
1686 static void noise4_sub( struct brw_wm_compile
*c
)
1688 struct brw_compile
*p
= &c
->func
;
1689 struct brw_reg param
[ 4 ],
1690 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1691 w0
, /* noise for the w=0 cube */
1692 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1693 interp
[ 4 ], /* interpolation coefficients */
1694 t
, tmp
[ 8 ], /* float temporaries */
1695 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1696 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1698 int mark
= mark_tmps( c
);
1699 GLuint loop
, origin
;
1701 x0y0
= alloc_tmp( c
);
1702 x0y1
= alloc_tmp( c
);
1703 x1y0
= alloc_tmp( c
);
1704 x1y1
= alloc_tmp( c
);
1706 w0
= alloc_tmp( c
);
1707 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1708 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1710 for( i
= 0; i
< 4; i
++ ) {
1711 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1712 interp
[ i
] = alloc_tmp( c
);
1715 for( i
= 0; i
< 8; i
++ ) {
1716 tmp
[ i
] = alloc_tmp( c
);
1717 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1718 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1721 brw_set_access_mode( p
, BRW_ALIGN_1
);
1723 /* We only want 16 bits of precision from the integral part of each
1724 co-ordinate, but unfortunately the RNDD semantics would saturate
1725 at 16 bits if we performed the operation directly to a 16-bit
1726 destination. Therefore, we round to 32-bit temporaries where
1727 appropriate, and then store only the lower 16 bits. */
1728 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1729 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1730 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1731 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1732 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1733 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1735 /* Modify the flag register here, because the side effect is useful
1736 later (see below). We know for certain that all flags will be
1737 cleared, since the FRC instruction cannot possibly generate
1738 negative results. Even for exceptional inputs (infinities, denormals,
1739 NaNs), the architecture guarantees that the L conditional is false. */
1740 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1741 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1742 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1743 for( i
= 1; i
< 4; i
++ )
1744 brw_FRC( p
, param
[ i
], param
[ i
] );
1746 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1748 for( i
= 0; i
< 4; i
++ )
1749 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1750 for( i
= 0; i
< 4; i
++ )
1751 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1752 for( i
= 0; i
< 4; i
++ )
1753 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1754 for( i
= 0; i
< 4; i
++ )
1755 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1756 for( j
= 0; j
< 3; j
++ )
1757 for( i
= 0; i
< 4; i
++ )
1758 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1760 /* Mark the current address, as it will be a jump destination. The
1761 following code will be executed twice: first, with the flag
1762 register clear indicating the w=0 case, and second with flags
1766 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1767 be hashed. Since we have only 16 bits of precision in the hash, we
1768 must be careful about thorough mixing to maintain entropy as we
1769 squash the input vector into a small scalar. */
1770 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1771 brw_imm_uw( 0xBC8F ) );
1772 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1773 brw_imm_uw( 0xD0BD ) );
1774 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1775 brw_imm_uw( 0x9B93 ) );
1776 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1777 brw_imm_uw( 0xA359 ) );
1778 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1779 brw_imm_uw( 0xBC8F ) );
1781 /* Temporarily disable the execution mask while we work with ExecSize=16
1782 channels (the mask is set for ExecSize=8 and is probably incorrect).
1783 Although this might cause execution of unwanted channels, the code
1784 writes only to temporary registers and has no side effects, so
1785 disabling the mask is harmless. */
1786 brw_push_insn_state( p
);
1787 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1788 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1789 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1790 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1792 /* We're now ready to perform the hashing. The eight hashes are
1793 interleaved for performance. The hash function used is
1794 designed to rapidly achieve avalanche and require only 16x16
1795 bit multiplication, and 8-bit swizzles (which we get for
1797 for( i
= 0; i
< 4; i
++ )
1798 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1799 for( i
= 0; i
< 4; i
++ )
1800 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1801 odd_bytes( wtmp
[ i
] ) );
1802 for( i
= 0; i
< 4; i
++ )
1803 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1804 for( i
= 0; i
< 4; i
++ )
1805 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1806 odd_bytes( wtmp
[ i
] ) );
1807 brw_pop_insn_state( p
);
1809 /* Now we want to initialise the four rear gradients based on the
1810 hashes. Format conversion from signed integer to float leaves
1811 everything scaled too high by a factor of pow( 2, 15 ), but
1812 we correct for that right at the end. */
1814 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1815 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1816 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1817 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1818 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1820 brw_push_insn_state( p
);
1821 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1822 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1823 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1824 brw_pop_insn_state( p
);
1826 brw_MUL( p
, x1y0
, x1y0
, t
);
1827 brw_MUL( p
, x1y1
, x1y1
, t
);
1828 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1829 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1830 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1833 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1834 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1835 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1836 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1838 brw_push_insn_state( p
);
1839 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1840 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1841 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1842 brw_pop_insn_state( p
);
1844 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1845 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1846 /* prepare t for the w component (used below): w the first time through
1847 the loop; w - 1 the second time) */
1848 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1849 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1850 p
->current
->header
.predicate_inverse
= 1;
1851 brw_MOV( p
, t
, param
[ 3 ] );
1852 p
->current
->header
.predicate_inverse
= 0;
1853 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1854 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1855 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1857 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1858 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1859 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1860 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1863 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1864 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1865 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1866 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1868 brw_push_insn_state( p
);
1869 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1870 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1871 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1872 brw_pop_insn_state( p
);
1874 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
1875 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
1876 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
1877 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
1879 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1880 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1881 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1882 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1885 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1886 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1887 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1888 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1890 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1891 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1892 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1893 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1894 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1896 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1897 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1898 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1899 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1901 /* Here we interpolate in the y dimension... */
1902 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1903 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1904 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1905 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1906 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1907 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1909 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1910 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1911 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1912 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1914 /* Now do the same thing for the front four gradients... */
1916 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1917 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1918 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1919 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1921 brw_push_insn_state( p
);
1922 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1923 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1924 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1925 brw_pop_insn_state( p
);
1927 brw_MUL( p
, x1y0
, x1y0
, t
);
1928 brw_MUL( p
, x1y1
, x1y1
, t
);
1929 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1930 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1931 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1934 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1935 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1936 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1937 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1939 brw_push_insn_state( p
);
1940 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1941 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1942 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1943 brw_pop_insn_state( p
);
1945 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1946 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1947 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
1948 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1949 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1951 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1952 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1953 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1954 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1957 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1958 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1959 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1960 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1962 brw_push_insn_state( p
);
1963 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1964 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1965 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1966 brw_pop_insn_state( p
);
1968 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1969 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1970 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1971 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1972 /* prepare t for the w component (used below): w the first time through
1973 the loop; w - 1 the second time) */
1974 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1975 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1976 p
->current
->header
.predicate_inverse
= 1;
1977 brw_MOV( p
, t
, param
[ 3 ] );
1978 p
->current
->header
.predicate_inverse
= 0;
1979 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1981 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1982 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1983 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1984 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1987 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1988 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1989 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1990 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1992 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1993 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1994 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1995 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1997 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1998 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1999 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2000 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2002 /* Interpolate in the y dimension: */
2003 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2004 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2005 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2006 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2007 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2008 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2010 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2011 time put the front face in tmp[ 1 ] and we're nearly there... */
2012 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2013 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2014 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2016 /* Another interpolation, in the z dimension: */
2017 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2018 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2019 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2021 /* Exit the loop if we've computed both cubes... */
2022 origin
= p
->nr_insn
;
2023 brw_push_insn_state( p
);
2024 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2025 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2026 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2027 brw_pop_insn_state( p
);
2029 /* Save the result for the w=0 case, and increment the w coordinate: */
2030 brw_MOV( p
, w0
, tmp
[ 0 ] );
2031 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2034 /* Loop around for the other cube. Explicitly set the flag register
2035 (unfortunately we must spend an extra instruction to do this: we
2036 can't rely on a side effect of the previous MOV or ADD because
2037 conditional modifiers which are normally true might be false in
2038 exceptional circumstances, e.g. given a NaN input; the add to
2039 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2040 brw_push_insn_state( p
);
2041 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2042 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2043 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2044 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2045 brw_pop_insn_state( p
);
2047 /* Patch the previous conditional branch now that we know the
2048 destination address. */
2049 brw_set_src1( p
->store
+ origin
,
2050 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2052 /* The very last interpolation. */
2053 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2054 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2055 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2057 /* scale by pow( 2, -15 ), as described above */
2058 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2060 release_tmps( c
, mark
);
2063 static void emit_noise4( struct brw_wm_compile
*c
,
2064 struct prog_instruction
*inst
)
2066 struct brw_compile
*p
= &c
->func
;
2067 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2068 GLuint mask
= inst
->DstReg
.WriteMask
;
2070 int mark
= mark_tmps( c
);
2072 assert( mark
== 0 );
2074 src0
= get_src_reg( c
, inst
->SrcReg
, 0, 1 );
2075 src1
= get_src_reg( c
, inst
->SrcReg
, 1, 1 );
2076 src2
= get_src_reg( c
, inst
->SrcReg
, 2, 1 );
2077 src3
= get_src_reg( c
, inst
->SrcReg
, 3, 1 );
2079 param0
= alloc_tmp( c
);
2080 param1
= alloc_tmp( c
);
2081 param2
= alloc_tmp( c
);
2082 param3
= alloc_tmp( c
);
2084 brw_MOV( p
, param0
, src0
);
2085 brw_MOV( p
, param1
, src1
);
2086 brw_MOV( p
, param2
, src2
);
2087 brw_MOV( p
, param3
, src3
);
2089 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2091 /* Fill in the result: */
2092 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2093 for (i
= 0 ; i
< 4; i
++) {
2094 if (mask
& (1<<i
)) {
2095 dst
= get_dst_reg(c
, inst
, i
, 1);
2096 brw_MOV( p
, dst
, param0
);
2099 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2100 brw_set_saturate( p
, 0 );
2102 release_tmps( c
, mark
);
2105 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2106 struct prog_instruction
*inst
)
2108 struct brw_compile
*p
= &c
->func
;
2109 GLuint mask
= inst
->DstReg
.WriteMask
;
2110 struct brw_reg src0
[2], dst
[2];
2112 dst
[0] = get_dst_reg(c
, inst
, 0, 1);
2113 dst
[1] = get_dst_reg(c
, inst
, 1, 1);
2115 src0
[0] = get_src_reg(c
, &inst
->SrcReg
[0], 0, 1);
2116 src0
[1] = get_src_reg(c
, &inst
->SrcReg
[0], 1, 1);
2118 /* Calculate the pixel offset from window bottom left into destination
2121 if (mask
& WRITEMASK_X
) {
2122 /* X' = X - origin_x */
2125 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2126 brw_imm_d(0 - c
->key
.origin_x
));
2129 if (mask
& WRITEMASK_Y
) {
2130 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2133 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2134 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2139 BIAS on SIMD8 not workind yet...
2141 static void emit_txb(struct brw_wm_compile
*c
,
2142 struct prog_instruction
*inst
)
2144 struct brw_compile
*p
= &c
->func
;
2145 struct brw_reg dst
[4], src
[4], payload_reg
;
2146 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2149 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2150 for (i
= 0; i
< 4; i
++)
2151 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
2152 for (i
= 0; i
< 4; i
++)
2153 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
2155 switch (inst
->TexSrcTarget
) {
2156 case TEXTURE_1D_INDEX
:
2157 brw_MOV(p
, brw_message_reg(2), src
[0]);
2158 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0));
2159 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2161 case TEXTURE_2D_INDEX
:
2162 case TEXTURE_RECT_INDEX
:
2163 brw_MOV(p
, brw_message_reg(2), src
[0]);
2164 brw_MOV(p
, brw_message_reg(3), src
[1]);
2165 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2168 brw_MOV(p
, brw_message_reg(2), src
[0]);
2169 brw_MOV(p
, brw_message_reg(3), src
[1]);
2170 brw_MOV(p
, brw_message_reg(4), src
[2]);
2173 brw_MOV(p
, brw_message_reg(5), src
[3]);
2174 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
2176 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2178 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2179 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2181 inst
->DstReg
.WriteMask
,
2182 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
2188 static void emit_tex(struct brw_wm_compile
*c
,
2189 struct prog_instruction
*inst
)
2191 struct brw_compile
*p
= &c
->func
;
2192 struct brw_reg dst
[4], src
[4], payload_reg
;
2193 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2198 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2200 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2202 for (i
= 0; i
< 4; i
++)
2203 dst
[i
] = get_dst_reg(c
, inst
, i
, 1);
2204 for (i
= 0; i
< 4; i
++)
2205 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
, 1);
2208 switch (inst
->TexSrcTarget
) {
2209 case TEXTURE_1D_INDEX
:
2213 case TEXTURE_2D_INDEX
:
2214 case TEXTURE_RECT_INDEX
:
2215 emit
= WRITEMASK_XY
;
2219 emit
= WRITEMASK_XYZ
;
2225 for (i
= 0; i
< nr
; i
++) {
2226 static const GLuint swz
[4] = {0,1,2,2};
2228 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2230 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2235 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0));
2236 brw_MOV(p
, brw_message_reg(6), src
[2]);
2240 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2242 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2243 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2245 inst
->DstReg
.WriteMask
,
2246 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
,
2252 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2256 * Resolve subroutine calls after code emit is done.
2258 static void post_wm_emit( struct brw_wm_compile
*c
)
2260 brw_resolve_cals(&c
->func
);
2263 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2266 #define MAX_LOOP_DEPTH 32
2267 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2268 struct brw_instruction
*inst0
, *inst1
;
2269 int i
, if_insn
= 0, loop_insn
= 0;
2270 struct brw_compile
*p
= &c
->func
;
2271 struct brw_indirect stack_index
= brw_indirect(0, 0);
2275 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2276 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2278 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2279 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2281 if (inst
->CondUpdate
)
2282 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2284 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2286 switch (inst
->Opcode
) {
2288 emit_pixel_xy(c
, inst
);
2291 emit_delta_xy(c
, inst
);
2294 emit_pixel_w(c
, inst
);
2297 emit_linterp(c
, inst
);
2300 emit_pinterp(c
, inst
);
2303 emit_cinterp(c
, inst
);
2306 emit_wpos_xy(c
, inst
);
2309 emit_fb_write(c
, inst
);
2330 emit_trunc(c
, inst
);
2405 emit_noise1(c
, inst
);
2408 emit_noise2(c
, inst
);
2411 emit_noise3(c
, inst
);
2414 emit_noise4(c
, inst
);
2426 assert(if_insn
< MAX_IFSN
);
2427 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2430 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2433 assert(if_insn
> 0);
2434 brw_ENDIF(p
, if_inst
[--if_insn
]);
2437 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2443 brw_push_insn_state(p
);
2444 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2445 brw_set_access_mode(p
, BRW_ALIGN_1
);
2446 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2447 brw_set_access_mode(p
, BRW_ALIGN_16
);
2448 brw_ADD(p
, get_addr_reg(stack_index
),
2449 get_addr_reg(stack_index
), brw_imm_d(4));
2450 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2451 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2452 brw_pop_insn_state(p
);
2456 brw_push_insn_state(p
);
2457 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2458 brw_ADD(p
, get_addr_reg(stack_index
),
2459 get_addr_reg(stack_index
), brw_imm_d(-4));
2460 brw_set_access_mode(p
, BRW_ALIGN_1
);
2461 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2462 brw_set_access_mode(p
, BRW_ALIGN_16
);
2463 brw_pop_insn_state(p
);
2466 case OPCODE_BGNLOOP
:
2467 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2471 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2475 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2477 case OPCODE_ENDLOOP
:
2479 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2480 /* patch all the BREAK instructions from
2482 while (inst0
> loop_inst
[loop_insn
]) {
2484 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2485 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2486 inst0
->bits3
.if_else
.pop_count
= 0;
2487 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2488 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2489 inst0
->bits3
.if_else
.pop_count
= 0;
2494 _mesa_printf("unsupported IR in fragment shader %d\n",
2497 if (inst
->CondUpdate
)
2498 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2500 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2507 * Do GPU code generation for shaders that use GLSL features such as
2508 * flow control. Other shaders will be compiled with the
2510 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2512 if (INTEL_DEBUG
& DEBUG_WM
) {
2513 _mesa_printf("brw_wm_glsl_emit:\n");
2516 /* initial instruction translation/simplification */
2519 /* actual code generation */
2520 brw_wm_emit_glsl(brw
, c
);
2522 if (INTEL_DEBUG
& DEBUG_WM
) {
2523 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2526 c
->prog_data
.total_grf
= c
->reg_index
;
2527 c
->prog_data
.total_scratch
= 0;