1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
46 * Record the mapping of a Mesa register to a hardware register.
48 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
49 int component
, struct brw_reg reg
)
51 c
->wm_regs
[file
][index
][component
].reg
= reg
;
52 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
56 * Examine instruction's write mask to find index of first component
57 * enabled for writing.
59 static int get_scalar_dst_index(struct prog_instruction
*inst
)
62 for (i
= 0; i
< 4; i
++)
63 if (inst
->DstReg
.WriteMask
& (1<<i
))
68 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
71 if(c
->tmp_index
== c
->tmp_max
)
72 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
74 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
79 * Save current temp register info.
80 * There must be a matching call to release_tmps().
82 static int mark_tmps(struct brw_wm_compile
*c
)
87 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
89 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
92 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
98 * Convert Mesa src register to brw register.
100 * Since we're running in SOA mode each Mesa register corresponds to four
101 * hardware registers. We allocate the hardware registers as needed here.
103 * \param file register file, one of PROGRAM_x
104 * \param index register number
105 * \param component src component (X=0, Y=1, Z=2, W=3)
106 * \param nr not used?!?
107 * \param neg negate value?
108 * \param abs take absolute value?
110 static struct brw_reg
111 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
112 int nr
, GLuint neg
, GLuint abs
)
116 case PROGRAM_STATE_VAR
:
117 case PROGRAM_CONSTANT
:
118 case PROGRAM_UNIFORM
:
119 file
= PROGRAM_STATE_VAR
;
121 case PROGRAM_UNDEFINED
:
122 return brw_null_reg();
123 case PROGRAM_TEMPORARY
:
126 case PROGRAM_PAYLOAD
:
129 _mesa_problem(NULL
, "Unexpected file in get_reg()");
130 return brw_null_reg();
133 /* see if we've already allocated a HW register for this Mesa register */
134 if (c
->wm_regs
[file
][index
][component
].inited
) {
136 reg
= c
->wm_regs
[file
][index
][component
].reg
;
139 /* no, allocate new register */
140 reg
= brw_vec8_grf(c
->reg_index
, 0);
143 /* if this is a new register allocation, record it in the table */
144 if (!c
->wm_regs
[file
][index
][component
].inited
) {
145 set_reg(c
, file
, index
, component
, reg
);
149 if (c
->reg_index
>= BRW_WM_MAX_GRF
- 12) {
150 /* ran out of temporary registers! */
152 /* This is a big hack for now.
153 * Return bad register index, just don't hang the GPU.
155 _mesa_fprintf(stderr
, "out of regs %d\n", c
->reg_index
);
156 c
->reg_index
= BRW_WM_MAX_GRF
- 13;
158 return brw_null_reg();
162 if (neg
& (1 << component
)) {
172 * Preallocate registers. This sets up the Mesa to hardware register
173 * mapping for certain registers, such as constants (uniforms/state vars)
176 static void prealloc_reg(struct brw_wm_compile
*c
)
180 int nr_interp_regs
= 0;
181 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
183 for (i
= 0; i
< 4; i
++) {
184 if (i
< c
->key
.nr_depth_regs
)
185 reg
= brw_vec8_grf(i
* 2, 0);
187 reg
= brw_vec8_grf(0, 0);
188 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
190 c
->reg_index
+= 2 * c
->key
.nr_depth_regs
;
194 const int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
196 if (1 /* XXX threshold: nr_params <= 8 */) {
197 const struct gl_program_parameter_list
*plist
=
198 c
->fp
->program
.Base
.Parameters
;
201 /* number of float constants in CURBE */
202 c
->prog_data
.nr_params
= 4 * nr_params
;
204 /* loop over program constants (float[4]) */
205 for (i
= 0; i
< nr_params
; i
++) {
206 /* loop over XYZW channels */
207 for (j
= 0; j
< 4; j
++, index
++) {
208 reg
= brw_vec1_grf(c
->reg_index
+ index
/ 8, index
% 8);
209 /* Save pointer to parameter/constant value.
210 * Constants will be copied in prepare_constant_buffer()
212 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
213 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
216 /* number of constant regs used (each reg is float[8]) */
217 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
218 c
->reg_index
+= c
->nr_creg
;
221 /* number of float constants in CURBE */
222 c
->prog_data
.nr_params
= 0;
224 /* When there's a lot of FP constanst we'll store them in a
225 * texture-like buffer instead of using the CURBE buffer.
226 * This means we won't use GRF registers for constants and we'll
227 * have to fetch constants with a dataport read.
232 /* fragment shader inputs */
233 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
234 if (inputs
& (1<<i
)) {
236 reg
= brw_vec8_grf(c
->reg_index
, 0);
237 for (j
= 0; j
< 4; j
++)
238 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
243 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
244 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
245 c
->prog_data
.curb_read_length
= c
->nr_creg
;
246 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
248 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
254 * Convert Mesa dst register to brw register.
256 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
257 const struct prog_instruction
*inst
,
261 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
267 * Convert Mesa src register to brw register.
269 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
270 const struct prog_instruction
*inst
,
271 GLuint srcRegIndex
, GLuint channel
)
273 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
275 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
277 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
278 src
->NegateBase
, src
->Abs
);
283 * Same as \sa get_src_reg() but if the register is a literal, emit
284 * a brw_reg encoding the literal.
285 * Note that a brw instruction only allows one src operand to be a literal.
286 * For instructions with more than one operand, only the second can be a literal.
288 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
289 const struct prog_instruction
*inst
,
290 GLuint srcRegIndex
, GLuint channel
)
292 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
293 if (src
->File
== PROGRAM_CONSTANT
) {
295 const int component
= GET_SWZ(src
->Swizzle
, channel
);
296 const GLfloat
*param
=
297 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
298 GLfloat value
= param
[component
];
302 value
= FABSF(value
);
303 return brw_imm_f(value
);
306 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
312 * Subroutines are minimal support for resusable instruction sequences.
313 * They are implemented as simply as possible to minimise overhead: there
314 * is no explicit support for communication between the caller and callee
315 * other than saving the return address in a temporary register, nor is
316 * there any automatic local storage. This implies that great care is
317 * required before attempting reentrancy or any kind of nested
318 * subroutine invocations.
320 static void invoke_subroutine( struct brw_wm_compile
*c
,
321 enum _subroutine subroutine
,
322 void (*emit
)( struct brw_wm_compile
* ) )
324 struct brw_compile
*p
= &c
->func
;
326 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
328 if( c
->subroutines
[ subroutine
] ) {
329 /* subroutine previously emitted: reuse existing instructions */
331 int mark
= mark_tmps( c
);
332 struct brw_reg return_address
= retype( alloc_tmp( c
),
333 BRW_REGISTER_TYPE_UD
);
334 int here
= p
->nr_insn
;
336 brw_push_insn_state(p
);
337 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
338 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
340 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
341 brw_imm_d( ( c
->subroutines
[ subroutine
] -
343 brw_pop_insn_state(p
);
345 release_tmps( c
, mark
);
347 /* previously unused subroutine: emit, and mark for later reuse */
349 int mark
= mark_tmps( c
);
350 struct brw_reg return_address
= retype( alloc_tmp( c
),
351 BRW_REGISTER_TYPE_UD
);
352 struct brw_instruction
*calc
;
353 int base
= p
->nr_insn
;
355 brw_push_insn_state(p
);
356 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
357 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
358 brw_pop_insn_state(p
);
360 c
->subroutines
[ subroutine
] = p
->nr_insn
;
364 brw_push_insn_state(p
);
365 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
366 brw_MOV( p
, brw_ip_reg(), return_address
);
367 brw_pop_insn_state(p
);
369 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
371 release_tmps( c
, mark
);
375 static void emit_abs( struct brw_wm_compile
*c
,
376 struct prog_instruction
*inst
)
379 struct brw_compile
*p
= &c
->func
;
380 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
381 for (i
= 0; i
< 4; i
++) {
382 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
383 struct brw_reg src
, dst
;
384 dst
= get_dst_reg(c
, inst
, i
);
385 src
= get_src_reg(c
, inst
, 0, i
);
386 brw_MOV(p
, dst
, brw_abs(src
));
389 brw_set_saturate(p
, 0);
392 static void emit_trunc( struct brw_wm_compile
*c
,
393 struct prog_instruction
*inst
)
396 struct brw_compile
*p
= &c
->func
;
397 GLuint mask
= inst
->DstReg
.WriteMask
;
398 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
399 for (i
= 0; i
< 4; i
++) {
401 struct brw_reg src
, dst
;
402 dst
= get_dst_reg(c
, inst
, i
);
403 src
= get_src_reg(c
, inst
, 0, i
);
404 brw_RNDZ(p
, dst
, src
);
407 brw_set_saturate(p
, 0);
410 static void emit_mov( struct brw_wm_compile
*c
,
411 struct prog_instruction
*inst
)
414 struct brw_compile
*p
= &c
->func
;
415 GLuint mask
= inst
->DstReg
.WriteMask
;
416 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
417 for (i
= 0; i
< 4; i
++) {
419 struct brw_reg src
, dst
;
420 dst
= get_dst_reg(c
, inst
, i
);
421 src
= get_src_reg_imm(c
, inst
, 0, i
);
422 brw_MOV(p
, dst
, src
);
425 brw_set_saturate(p
, 0);
428 static void emit_pixel_xy(struct brw_wm_compile
*c
,
429 struct prog_instruction
*inst
)
431 struct brw_reg r1
= brw_vec1_grf(1, 0);
432 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
434 struct brw_reg dst0
, dst1
;
435 struct brw_compile
*p
= &c
->func
;
436 GLuint mask
= inst
->DstReg
.WriteMask
;
438 dst0
= get_dst_reg(c
, inst
, 0);
439 dst1
= get_dst_reg(c
, inst
, 1);
440 /* Calculate pixel centers by adding 1 or 0 to each of the
441 * micro-tile coordinates passed in r1.
443 if (mask
& WRITEMASK_X
) {
445 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
446 stride(suboffset(r1_uw
, 4), 2, 4, 0),
447 brw_imm_v(0x10101010));
450 if (mask
& WRITEMASK_Y
) {
452 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
453 stride(suboffset(r1_uw
, 5), 2, 4, 0),
454 brw_imm_v(0x11001100));
458 static void emit_delta_xy(struct brw_wm_compile
*c
,
459 struct prog_instruction
*inst
)
461 struct brw_reg r1
= brw_vec1_grf(1, 0);
462 struct brw_reg dst0
, dst1
, src0
, src1
;
463 struct brw_compile
*p
= &c
->func
;
464 GLuint mask
= inst
->DstReg
.WriteMask
;
466 dst0
= get_dst_reg(c
, inst
, 0);
467 dst1
= get_dst_reg(c
, inst
, 1);
468 src0
= get_src_reg(c
, inst
, 0, 0);
469 src1
= get_src_reg(c
, inst
, 0, 1);
470 /* Calc delta X,Y by subtracting origin in r1 from the pixel
473 if (mask
& WRITEMASK_X
) {
476 retype(src0
, BRW_REGISTER_TYPE_UW
),
480 if (mask
& WRITEMASK_Y
) {
483 retype(src1
, BRW_REGISTER_TYPE_UW
),
484 negate(suboffset(r1
,1)));
489 static void fire_fb_write( struct brw_wm_compile
*c
,
495 struct brw_compile
*p
= &c
->func
;
496 /* Pass through control information:
498 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
500 brw_push_insn_state(p
);
501 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
503 brw_message_reg(base_reg
+ 1),
505 brw_pop_insn_state(p
);
507 /* Send framebuffer write message: */
509 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
511 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
518 static void emit_fb_write(struct brw_wm_compile
*c
,
519 struct prog_instruction
*inst
)
521 struct brw_compile
*p
= &c
->func
;
527 /* Reserve a space for AA - may not be needed:
529 if (c
->key
.aa_dest_stencil_reg
)
532 brw_push_insn_state(p
);
533 for (channel
= 0; channel
< 4; channel
++) {
534 src0
= get_src_reg(c
, inst
, 0, channel
);
535 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
536 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
537 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
539 /* skip over the regs populated above: */
541 brw_pop_insn_state(p
);
543 if (c
->key
.source_depth_to_render_target
) {
544 if (c
->key
.computes_depth
) {
545 src0
= get_src_reg(c
, inst
, 2, 2);
546 brw_MOV(p
, brw_message_reg(nr
), src0
);
549 src0
= get_src_reg(c
, inst
, 1, 1);
550 brw_MOV(p
, brw_message_reg(nr
), src0
);
556 if (c
->key
.dest_depth_reg
) {
557 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
558 GLuint off
= c
->key
.dest_depth_reg
% 2;
563 /* XXX do we need this code? comp always 1, off always 0, it seems */
565 brw_push_insn_state(p
);
566 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
568 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
570 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
571 brw_pop_insn_state(p
);
576 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
577 brw_MOV(p
, brw_message_reg(nr
), src
);
582 target
= inst
->Aux
>> 1;
584 fire_fb_write(c
, 0, nr
, target
, eot
);
587 static void emit_pixel_w( struct brw_wm_compile
*c
,
588 struct prog_instruction
*inst
)
590 struct brw_compile
*p
= &c
->func
;
591 GLuint mask
= inst
->DstReg
.WriteMask
;
592 if (mask
& WRITEMASK_W
) {
593 struct brw_reg dst
, src0
, delta0
, delta1
;
594 struct brw_reg interp3
;
596 dst
= get_dst_reg(c
, inst
, 3);
597 src0
= get_src_reg(c
, inst
, 0, 0);
598 delta0
= get_src_reg(c
, inst
, 1, 0);
599 delta1
= get_src_reg(c
, inst
, 1, 1);
601 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
602 /* Calc 1/w - just linterp wpos[3] optimized by putting the
603 * result straight into a message reg.
605 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
606 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
610 BRW_MATH_FUNCTION_INV
,
611 BRW_MATH_SATURATE_NONE
,
613 BRW_MATH_PRECISION_FULL
);
617 static void emit_linterp(struct brw_wm_compile
*c
,
618 struct prog_instruction
*inst
)
620 struct brw_compile
*p
= &c
->func
;
621 GLuint mask
= inst
->DstReg
.WriteMask
;
622 struct brw_reg interp
[4];
623 struct brw_reg dst
, delta0
, delta1
;
627 src0
= get_src_reg(c
, inst
, 0, 0);
628 delta0
= get_src_reg(c
, inst
, 1, 0);
629 delta1
= get_src_reg(c
, inst
, 1, 1);
632 interp
[0] = brw_vec1_grf(nr
, 0);
633 interp
[1] = brw_vec1_grf(nr
, 4);
634 interp
[2] = brw_vec1_grf(nr
+1, 0);
635 interp
[3] = brw_vec1_grf(nr
+1, 4);
637 for(i
= 0; i
< 4; i
++ ) {
639 dst
= get_dst_reg(c
, inst
, i
);
640 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
641 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
646 static void emit_cinterp(struct brw_wm_compile
*c
,
647 struct prog_instruction
*inst
)
649 struct brw_compile
*p
= &c
->func
;
650 GLuint mask
= inst
->DstReg
.WriteMask
;
652 struct brw_reg interp
[4];
653 struct brw_reg dst
, src0
;
656 src0
= get_src_reg(c
, inst
, 0, 0);
659 interp
[0] = brw_vec1_grf(nr
, 0);
660 interp
[1] = brw_vec1_grf(nr
, 4);
661 interp
[2] = brw_vec1_grf(nr
+1, 0);
662 interp
[3] = brw_vec1_grf(nr
+1, 4);
664 for(i
= 0; i
< 4; i
++ ) {
666 dst
= get_dst_reg(c
, inst
, i
);
667 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
672 static void emit_pinterp(struct brw_wm_compile
*c
,
673 struct prog_instruction
*inst
)
675 struct brw_compile
*p
= &c
->func
;
676 GLuint mask
= inst
->DstReg
.WriteMask
;
678 struct brw_reg interp
[4];
679 struct brw_reg dst
, delta0
, delta1
;
680 struct brw_reg src0
, w
;
683 src0
= get_src_reg(c
, inst
, 0, 0);
684 delta0
= get_src_reg(c
, inst
, 1, 0);
685 delta1
= get_src_reg(c
, inst
, 1, 1);
686 w
= get_src_reg(c
, inst
, 2, 3);
689 interp
[0] = brw_vec1_grf(nr
, 0);
690 interp
[1] = brw_vec1_grf(nr
, 4);
691 interp
[2] = brw_vec1_grf(nr
+1, 0);
692 interp
[3] = brw_vec1_grf(nr
+1, 4);
694 for(i
= 0; i
< 4; i
++ ) {
696 dst
= get_dst_reg(c
, inst
, i
);
697 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
698 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
700 brw_MUL(p
, dst
, dst
, w
);
705 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
706 static void emit_frontfacing(struct brw_wm_compile
*c
,
707 struct prog_instruction
*inst
)
709 struct brw_compile
*p
= &c
->func
;
710 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
712 GLuint mask
= inst
->DstReg
.WriteMask
;
715 for (i
= 0; i
< 4; i
++) {
717 dst
= get_dst_reg(c
, inst
, i
);
718 brw_MOV(p
, dst
, brw_imm_f(0.0));
722 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
725 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
726 for (i
= 0; i
< 4; i
++) {
728 dst
= get_dst_reg(c
, inst
, i
);
729 brw_MOV(p
, dst
, brw_imm_f(1.0));
732 brw_set_predicate_control_flag_value(p
, 0xff);
735 static void emit_xpd(struct brw_wm_compile
*c
,
736 struct prog_instruction
*inst
)
739 struct brw_compile
*p
= &c
->func
;
740 GLuint mask
= inst
->DstReg
.WriteMask
;
741 for (i
= 0; i
< 4; i
++) {
745 struct brw_reg src0
, src1
, dst
;
746 dst
= get_dst_reg(c
, inst
, i
);
747 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
748 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
749 brw_MUL(p
, brw_null_reg(), src0
, src1
);
750 src0
= get_src_reg(c
, inst
, 0, i1
);
751 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
752 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
753 brw_MAC(p
, dst
, src0
, src1
);
754 brw_set_saturate(p
, 0);
757 brw_set_saturate(p
, 0);
760 static void emit_dp3(struct brw_wm_compile
*c
,
761 struct prog_instruction
*inst
)
763 struct brw_reg src0
[3], src1
[3], dst
;
765 struct brw_compile
*p
= &c
->func
;
766 for (i
= 0; i
< 3; i
++) {
767 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
768 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
771 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
772 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
773 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
774 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
775 brw_MAC(p
, dst
, src0
[2], src1
[2]);
776 brw_set_saturate(p
, 0);
779 static void emit_dp4(struct brw_wm_compile
*c
,
780 struct prog_instruction
*inst
)
782 struct brw_reg src0
[4], src1
[4], dst
;
784 struct brw_compile
*p
= &c
->func
;
785 for (i
= 0; i
< 4; i
++) {
786 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
787 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
789 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
790 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
791 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
792 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
793 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
794 brw_MAC(p
, dst
, src0
[3], src1
[3]);
795 brw_set_saturate(p
, 0);
798 static void emit_dph(struct brw_wm_compile
*c
,
799 struct prog_instruction
*inst
)
801 struct brw_reg src0
[4], src1
[4], dst
;
803 struct brw_compile
*p
= &c
->func
;
804 for (i
= 0; i
< 4; i
++) {
805 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
806 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
808 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
809 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
810 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
811 brw_MAC(p
, dst
, src0
[2], src1
[2]);
812 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
813 brw_ADD(p
, dst
, dst
, src1
[3]);
814 brw_set_saturate(p
, 0);
818 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
819 * Note that the result of the function is smeared across the dest
820 * register's X, Y, Z and W channels (subject to writemasking of course).
822 static void emit_math1(struct brw_wm_compile
*c
,
823 struct prog_instruction
*inst
, GLuint func
)
825 struct brw_compile
*p
= &c
->func
;
826 struct brw_reg src0
, dst
, tmp
;
827 const int mark
= mark_tmps( c
);
832 /* Get first component of source register */
833 src0
= get_src_reg(c
, inst
, 0, 0);
835 /* tmp = func(src0) */
836 brw_MOV(p
, brw_message_reg(2), src0
);
840 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
843 BRW_MATH_DATA_VECTOR
,
844 BRW_MATH_PRECISION_FULL
);
846 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
848 /* replicate tmp value across enabled dest channels */
849 for (i
= 0; i
< 4; i
++) {
850 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
851 dst
= get_dst_reg(c
, inst
, i
);
852 brw_MOV(p
, dst
, tmp
);
856 release_tmps(c
, mark
);
859 static void emit_rcp(struct brw_wm_compile
*c
,
860 struct prog_instruction
*inst
)
862 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
865 static void emit_rsq(struct brw_wm_compile
*c
,
866 struct prog_instruction
*inst
)
868 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
871 static void emit_sin(struct brw_wm_compile
*c
,
872 struct prog_instruction
*inst
)
874 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
877 static void emit_cos(struct brw_wm_compile
*c
,
878 struct prog_instruction
*inst
)
880 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
883 static void emit_ex2(struct brw_wm_compile
*c
,
884 struct prog_instruction
*inst
)
886 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
889 static void emit_lg2(struct brw_wm_compile
*c
,
890 struct prog_instruction
*inst
)
892 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
895 static void emit_add(struct brw_wm_compile
*c
,
896 struct prog_instruction
*inst
)
898 struct brw_compile
*p
= &c
->func
;
899 struct brw_reg src0
, src1
, dst
;
900 GLuint mask
= inst
->DstReg
.WriteMask
;
902 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
903 for (i
= 0 ; i
< 4; i
++) {
905 dst
= get_dst_reg(c
, inst
, i
);
906 src0
= get_src_reg(c
, inst
, 0, i
);
907 src1
= get_src_reg_imm(c
, inst
, 1, i
);
908 brw_ADD(p
, dst
, src0
, src1
);
911 brw_set_saturate(p
, 0);
914 static void emit_arl(struct brw_wm_compile
*c
,
915 struct prog_instruction
*inst
)
917 struct brw_compile
*p
= &c
->func
;
918 struct brw_reg src0
, addr_reg
;
919 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
920 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
922 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
923 brw_MOV(p
, addr_reg
, src0
);
924 brw_set_saturate(p
, 0);
927 static void emit_sub(struct brw_wm_compile
*c
,
928 struct prog_instruction
*inst
)
930 struct brw_compile
*p
= &c
->func
;
931 struct brw_reg src0
, src1
, dst
;
932 GLuint mask
= inst
->DstReg
.WriteMask
;
934 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
935 for (i
= 0 ; i
< 4; i
++) {
937 dst
= get_dst_reg(c
, inst
, i
);
938 src0
= get_src_reg(c
, inst
, 0, i
);
939 src1
= get_src_reg_imm(c
, inst
, 1, i
);
940 brw_ADD(p
, dst
, src0
, negate(src1
));
943 brw_set_saturate(p
, 0);
946 static void emit_mul(struct brw_wm_compile
*c
,
947 struct prog_instruction
*inst
)
949 struct brw_compile
*p
= &c
->func
;
950 struct brw_reg src0
, src1
, dst
;
951 GLuint mask
= inst
->DstReg
.WriteMask
;
953 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
954 for (i
= 0 ; i
< 4; i
++) {
956 dst
= get_dst_reg(c
, inst
, i
);
957 src0
= get_src_reg(c
, inst
, 0, i
);
958 src1
= get_src_reg_imm(c
, inst
, 1, i
);
959 brw_MUL(p
, dst
, src0
, src1
);
962 brw_set_saturate(p
, 0);
965 static void emit_frc(struct brw_wm_compile
*c
,
966 struct prog_instruction
*inst
)
968 struct brw_compile
*p
= &c
->func
;
969 struct brw_reg src0
, dst
;
970 GLuint mask
= inst
->DstReg
.WriteMask
;
972 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
973 for (i
= 0 ; i
< 4; i
++) {
975 dst
= get_dst_reg(c
, inst
, i
);
976 src0
= get_src_reg_imm(c
, inst
, 0, i
);
977 brw_FRC(p
, dst
, src0
);
980 if (inst
->SaturateMode
!= SATURATE_OFF
)
981 brw_set_saturate(p
, 0);
984 static void emit_flr(struct brw_wm_compile
*c
,
985 struct prog_instruction
*inst
)
987 struct brw_compile
*p
= &c
->func
;
988 struct brw_reg src0
, dst
;
989 GLuint mask
= inst
->DstReg
.WriteMask
;
991 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
992 for (i
= 0 ; i
< 4; i
++) {
994 dst
= get_dst_reg(c
, inst
, i
);
995 src0
= get_src_reg_imm(c
, inst
, 0, i
);
996 brw_RNDD(p
, dst
, src0
);
999 brw_set_saturate(p
, 0);
1002 static void emit_max(struct brw_wm_compile
*c
,
1003 struct prog_instruction
*inst
)
1005 struct brw_compile
*p
= &c
->func
;
1006 GLuint mask
= inst
->DstReg
.WriteMask
;
1007 struct brw_reg src0
, src1
, dst
;
1009 brw_push_insn_state(p
);
1010 for (i
= 0; i
< 4; i
++) {
1011 if (mask
& (1<<i
)) {
1012 dst
= get_dst_reg(c
, inst
, i
);
1013 src0
= get_src_reg(c
, inst
, 0, i
);
1014 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1015 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1016 brw_MOV(p
, dst
, src0
);
1017 brw_set_saturate(p
, 0);
1019 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src0
, src1
);
1020 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1021 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1022 brw_MOV(p
, dst
, src1
);
1023 brw_set_saturate(p
, 0);
1024 brw_set_predicate_control_flag_value(p
, 0xff);
1027 brw_pop_insn_state(p
);
1030 static void emit_min(struct brw_wm_compile
*c
,
1031 struct prog_instruction
*inst
)
1033 struct brw_compile
*p
= &c
->func
;
1034 GLuint mask
= inst
->DstReg
.WriteMask
;
1035 struct brw_reg src0
, src1
, dst
;
1037 brw_push_insn_state(p
);
1038 for (i
= 0; i
< 4; i
++) {
1039 if (mask
& (1<<i
)) {
1040 dst
= get_dst_reg(c
, inst
, i
);
1041 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1042 src1
= get_src_reg(c
, inst
, 1, i
);
1043 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1044 brw_MOV(p
, dst
, src0
);
1045 brw_set_saturate(p
, 0);
1047 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1048 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1049 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1050 brw_MOV(p
, dst
, src1
);
1051 brw_set_saturate(p
, 0);
1052 brw_set_predicate_control_flag_value(p
, 0xff);
1055 brw_pop_insn_state(p
);
1058 static void emit_pow(struct brw_wm_compile
*c
,
1059 struct prog_instruction
*inst
)
1061 struct brw_compile
*p
= &c
->func
;
1062 struct brw_reg dst
, src0
, src1
;
1063 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1064 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1065 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1067 brw_MOV(p
, brw_message_reg(2), src0
);
1068 brw_MOV(p
, brw_message_reg(3), src1
);
1072 BRW_MATH_FUNCTION_POW
,
1073 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1076 BRW_MATH_DATA_VECTOR
,
1077 BRW_MATH_PRECISION_FULL
);
1080 static void emit_lrp(struct brw_wm_compile
*c
,
1081 struct prog_instruction
*inst
)
1083 struct brw_compile
*p
= &c
->func
;
1084 GLuint mask
= inst
->DstReg
.WriteMask
;
1085 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1087 int mark
= mark_tmps(c
);
1088 for (i
= 0; i
< 4; i
++) {
1089 if (mask
& (1<<i
)) {
1090 dst
= get_dst_reg(c
, inst
, i
);
1091 src0
= get_src_reg(c
, inst
, 0, i
);
1093 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1095 if (src1
.nr
== dst
.nr
) {
1096 tmp1
= alloc_tmp(c
);
1097 brw_MOV(p
, tmp1
, src1
);
1101 src2
= get_src_reg(c
, inst
, 2, i
);
1102 if (src2
.nr
== dst
.nr
) {
1103 tmp2
= alloc_tmp(c
);
1104 brw_MOV(p
, tmp2
, src2
);
1108 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1109 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1110 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1111 brw_MAC(p
, dst
, src0
, tmp1
);
1112 brw_set_saturate(p
, 0);
1114 release_tmps(c
, mark
);
1119 * For GLSL shaders, this KIL will be unconditional.
1120 * It may be contained inside an IF/ENDIF structure of course.
1122 static void emit_kil(struct brw_wm_compile
*c
)
1124 struct brw_compile
*p
= &c
->func
;
1125 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1126 brw_push_insn_state(p
);
1127 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1128 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1129 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1130 brw_pop_insn_state(p
);
1133 static void emit_mad(struct brw_wm_compile
*c
,
1134 struct prog_instruction
*inst
)
1136 struct brw_compile
*p
= &c
->func
;
1137 GLuint mask
= inst
->DstReg
.WriteMask
;
1138 struct brw_reg dst
, src0
, src1
, src2
;
1141 for (i
= 0; i
< 4; i
++) {
1142 if (mask
& (1<<i
)) {
1143 dst
= get_dst_reg(c
, inst
, i
);
1144 src0
= get_src_reg(c
, inst
, 0, i
);
1145 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1146 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1147 brw_MUL(p
, dst
, src0
, src1
);
1149 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1150 brw_ADD(p
, dst
, dst
, src2
);
1151 brw_set_saturate(p
, 0);
1156 static void emit_sop(struct brw_wm_compile
*c
,
1157 struct prog_instruction
*inst
, GLuint cond
)
1159 struct brw_compile
*p
= &c
->func
;
1160 GLuint mask
= inst
->DstReg
.WriteMask
;
1161 struct brw_reg dst
, src0
, src1
;
1164 for (i
= 0; i
< 4; i
++) {
1165 if (mask
& (1<<i
)) {
1166 dst
= get_dst_reg(c
, inst
, i
);
1167 src0
= get_src_reg(c
, inst
, 0, i
);
1168 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1169 brw_push_insn_state(p
);
1170 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1171 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1172 brw_MOV(p
, dst
, brw_imm_f(0.0));
1173 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1174 brw_MOV(p
, dst
, brw_imm_f(1.0));
1175 brw_pop_insn_state(p
);
1180 static void emit_slt(struct brw_wm_compile
*c
,
1181 struct prog_instruction
*inst
)
1183 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1186 static void emit_sle(struct brw_wm_compile
*c
,
1187 struct prog_instruction
*inst
)
1189 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1192 static void emit_sgt(struct brw_wm_compile
*c
,
1193 struct prog_instruction
*inst
)
1195 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1198 static void emit_sge(struct brw_wm_compile
*c
,
1199 struct prog_instruction
*inst
)
1201 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1204 static void emit_seq(struct brw_wm_compile
*c
,
1205 struct prog_instruction
*inst
)
1207 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1210 static void emit_sne(struct brw_wm_compile
*c
,
1211 struct prog_instruction
*inst
)
1213 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1216 static void emit_ddx(struct brw_wm_compile
*c
,
1217 struct prog_instruction
*inst
)
1219 struct brw_compile
*p
= &c
->func
;
1220 GLuint mask
= inst
->DstReg
.WriteMask
;
1221 struct brw_reg interp
[4];
1223 struct brw_reg src0
, w
;
1225 src0
= get_src_reg(c
, inst
, 0, 0);
1226 w
= get_src_reg(c
, inst
, 1, 3);
1228 interp
[0] = brw_vec1_grf(nr
, 0);
1229 interp
[1] = brw_vec1_grf(nr
, 4);
1230 interp
[2] = brw_vec1_grf(nr
+1, 0);
1231 interp
[3] = brw_vec1_grf(nr
+1, 4);
1232 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1233 for(i
= 0; i
< 4; i
++ ) {
1234 if (mask
& (1<<i
)) {
1235 dst
= get_dst_reg(c
, inst
, i
);
1236 brw_MOV(p
, dst
, interp
[i
]);
1237 brw_MUL(p
, dst
, dst
, w
);
1240 brw_set_saturate(p
, 0);
1243 static void emit_ddy(struct brw_wm_compile
*c
,
1244 struct prog_instruction
*inst
)
1246 struct brw_compile
*p
= &c
->func
;
1247 GLuint mask
= inst
->DstReg
.WriteMask
;
1248 struct brw_reg interp
[4];
1250 struct brw_reg src0
, w
;
1253 src0
= get_src_reg(c
, inst
, 0, 0);
1255 w
= get_src_reg(c
, inst
, 1, 3);
1256 interp
[0] = brw_vec1_grf(nr
, 0);
1257 interp
[1] = brw_vec1_grf(nr
, 4);
1258 interp
[2] = brw_vec1_grf(nr
+1, 0);
1259 interp
[3] = brw_vec1_grf(nr
+1, 4);
1260 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1261 for(i
= 0; i
< 4; i
++ ) {
1262 if (mask
& (1<<i
)) {
1263 dst
= get_dst_reg(c
, inst
, i
);
1264 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1265 brw_MUL(p
, dst
, dst
, w
);
1268 brw_set_saturate(p
, 0);
1271 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1273 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1277 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1279 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1282 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1284 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1287 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1289 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1293 /* One-, two- and three-dimensional Perlin noise, similar to the description
1294 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1295 static void noise1_sub( struct brw_wm_compile
*c
) {
1297 struct brw_compile
*p
= &c
->func
;
1298 struct brw_reg param
,
1299 x0
, x1
, /* gradients at each end */
1300 t
, tmp
[ 2 ], /* float temporaries */
1301 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1303 int mark
= mark_tmps( c
);
1305 x0
= alloc_tmp( c
);
1306 x1
= alloc_tmp( c
);
1308 tmp
[ 0 ] = alloc_tmp( c
);
1309 tmp
[ 1 ] = alloc_tmp( c
);
1310 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1311 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1312 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1313 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1314 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1316 param
= lookup_tmp( c
, mark
- 2 );
1318 brw_set_access_mode( p
, BRW_ALIGN_1
);
1320 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1322 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1323 be hashed. Also compute the remainder (offset within the unit
1324 length), interleaved to reduce register dependency penalties. */
1325 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1326 brw_FRC( p
, param
, param
);
1327 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1328 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1329 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1331 /* We're now ready to perform the hashing. The two hashes are
1332 interleaved for performance. The hash function used is
1333 designed to rapidly achieve avalanche and require only 32x16
1334 bit multiplication, and 16-bit swizzles (which we get for
1335 free). We can't use immediate operands in the multiplies,
1336 because immediates are permitted only in src1 and the 16-bit
1337 factor is permitted only in src0. */
1338 for( i
= 0; i
< 2; i
++ )
1339 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1340 for( i
= 0; i
< 2; i
++ )
1341 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1342 high_words( itmp
[ i
] ) );
1343 for( i
= 0; i
< 2; i
++ )
1344 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1345 for( i
= 0; i
< 2; i
++ )
1346 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1347 high_words( itmp
[ i
] ) );
1348 for( i
= 0; i
< 2; i
++ )
1349 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1350 for( i
= 0; i
< 2; i
++ )
1351 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1352 high_words( itmp
[ i
] ) );
1354 /* Now we want to initialise the two gradients based on the
1355 hashes. Format conversion from signed integer to float leaves
1356 everything scaled too high by a factor of pow( 2, 31 ), but
1357 we correct for that right at the end. */
1358 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1359 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1360 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1362 brw_MUL( p
, x0
, x0
, param
);
1363 brw_MUL( p
, x1
, x1
, t
);
1365 /* We interpolate between the gradients using the polynomial
1366 6t^5 - 15t^4 + 10t^3 (Perlin). */
1367 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1368 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1369 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1370 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1371 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1372 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1374 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1375 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1376 brw_MUL( p
, x1
, x1
, param
);
1377 brw_ADD( p
, x0
, x0
, x1
);
1378 /* scale by pow( 2, -30 ), to compensate for the format conversion
1379 above and an extra factor of 2 so that a single gradient covers
1381 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1383 release_tmps( c
, mark
);
1386 static void emit_noise1( struct brw_wm_compile
*c
,
1387 struct prog_instruction
*inst
)
1389 struct brw_compile
*p
= &c
->func
;
1390 struct brw_reg src
, param
, dst
;
1391 GLuint mask
= inst
->DstReg
.WriteMask
;
1393 int mark
= mark_tmps( c
);
1395 assert( mark
== 0 );
1397 src
= get_src_reg( c
, inst
, 0, 0 );
1399 param
= alloc_tmp( c
);
1401 brw_MOV( p
, param
, src
);
1403 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1405 /* Fill in the result: */
1406 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1407 for (i
= 0 ; i
< 4; i
++) {
1408 if (mask
& (1<<i
)) {
1409 dst
= get_dst_reg(c
, inst
, i
);
1410 brw_MOV( p
, dst
, param
);
1413 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1414 brw_set_saturate( p
, 0 );
1416 release_tmps( c
, mark
);
1419 static void noise2_sub( struct brw_wm_compile
*c
) {
1421 struct brw_compile
*p
= &c
->func
;
1422 struct brw_reg param0
, param1
,
1423 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1424 t
, tmp
[ 4 ], /* float temporaries */
1425 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1427 int mark
= mark_tmps( c
);
1429 x0y0
= alloc_tmp( c
);
1430 x0y1
= alloc_tmp( c
);
1431 x1y0
= alloc_tmp( c
);
1432 x1y1
= alloc_tmp( c
);
1434 for( i
= 0; i
< 4; i
++ ) {
1435 tmp
[ i
] = alloc_tmp( c
);
1436 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1438 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1439 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1440 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1442 param0
= lookup_tmp( c
, mark
- 3 );
1443 param1
= lookup_tmp( c
, mark
- 2 );
1445 brw_set_access_mode( p
, BRW_ALIGN_1
);
1447 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1448 be hashed. Also compute the remainders (offsets within the unit
1449 square), interleaved to reduce register dependency penalties. */
1450 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1451 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1452 brw_FRC( p
, param0
, param0
);
1453 brw_FRC( p
, param1
, param1
);
1454 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1455 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1456 low_words( itmp
[ 1 ] ) );
1457 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1458 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1459 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1460 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1461 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1463 /* We're now ready to perform the hashing. The four hashes are
1464 interleaved for performance. The hash function used is
1465 designed to rapidly achieve avalanche and require only 32x16
1466 bit multiplication, and 16-bit swizzles (which we get for
1467 free). We can't use immediate operands in the multiplies,
1468 because immediates are permitted only in src1 and the 16-bit
1469 factor is permitted only in src0. */
1470 for( i
= 0; i
< 4; i
++ )
1471 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1472 for( i
= 0; i
< 4; i
++ )
1473 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1474 high_words( itmp
[ i
] ) );
1475 for( i
= 0; i
< 4; i
++ )
1476 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1477 for( i
= 0; i
< 4; i
++ )
1478 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1479 high_words( itmp
[ i
] ) );
1480 for( i
= 0; i
< 4; i
++ )
1481 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1482 for( i
= 0; i
< 4; i
++ )
1483 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1484 high_words( itmp
[ i
] ) );
1486 /* Now we want to initialise the four gradients based on the
1487 hashes. Format conversion from signed integer to float leaves
1488 everything scaled too high by a factor of pow( 2, 15 ), but
1489 we correct for that right at the end. */
1490 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1491 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1492 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1493 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1494 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1496 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1497 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1498 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1499 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1501 brw_MUL( p
, x1y0
, x1y0
, t
);
1502 brw_MUL( p
, x1y1
, x1y1
, t
);
1503 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1504 brw_MUL( p
, x0y0
, x0y0
, param0
);
1505 brw_MUL( p
, x0y1
, x0y1
, param0
);
1507 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1508 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1509 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1510 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1512 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1513 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1514 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1515 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1517 /* We interpolate between the gradients using the polynomial
1518 6t^5 - 15t^4 + 10t^3 (Perlin). */
1519 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1520 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1521 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1522 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1523 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1524 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1525 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1527 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1528 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1529 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1530 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1531 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1533 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1534 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1535 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1536 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1538 /* Here we interpolate in the y dimension... */
1539 brw_MUL( p
, x0y1
, x0y1
, param1
);
1540 brw_MUL( p
, x1y1
, x1y1
, param1
);
1541 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1542 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1544 /* And now in x. There are horrible register dependencies here,
1545 but we have nothing else to do. */
1546 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1547 brw_MUL( p
, x1y0
, x1y0
, param0
);
1548 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1550 /* scale by pow( 2, -15 ), as described above */
1551 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1553 release_tmps( c
, mark
);
1556 static void emit_noise2( struct brw_wm_compile
*c
,
1557 struct prog_instruction
*inst
)
1559 struct brw_compile
*p
= &c
->func
;
1560 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1561 GLuint mask
= inst
->DstReg
.WriteMask
;
1563 int mark
= mark_tmps( c
);
1565 assert( mark
== 0 );
1567 src0
= get_src_reg( c
, inst
, 0, 0 );
1568 src1
= get_src_reg( c
, inst
, 0, 1 );
1570 param0
= alloc_tmp( c
);
1571 param1
= alloc_tmp( c
);
1573 brw_MOV( p
, param0
, src0
);
1574 brw_MOV( p
, param1
, src1
);
1576 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1578 /* Fill in the result: */
1579 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1580 for (i
= 0 ; i
< 4; i
++) {
1581 if (mask
& (1<<i
)) {
1582 dst
= get_dst_reg(c
, inst
, i
);
1583 brw_MOV( p
, dst
, param0
);
1586 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1587 brw_set_saturate( p
, 0 );
1589 release_tmps( c
, mark
);
1593 * The three-dimensional case is much like the one- and two- versions above,
1594 * but since the number of corners is rapidly growing we now pack 16 16-bit
1595 * hashes into each register to extract more parallelism from the EUs.
1597 static void noise3_sub( struct brw_wm_compile
*c
) {
1599 struct brw_compile
*p
= &c
->func
;
1600 struct brw_reg param0
, param1
, param2
,
1601 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1602 xi
, yi
, zi
, /* interpolation coefficients */
1603 t
, tmp
[ 8 ], /* float temporaries */
1604 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1605 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1607 int mark
= mark_tmps( c
);
1609 x0y0
= alloc_tmp( c
);
1610 x0y1
= alloc_tmp( c
);
1611 x1y0
= alloc_tmp( c
);
1612 x1y1
= alloc_tmp( c
);
1613 xi
= alloc_tmp( c
);
1614 yi
= alloc_tmp( c
);
1615 zi
= alloc_tmp( c
);
1617 for( i
= 0; i
< 8; i
++ ) {
1618 tmp
[ i
] = alloc_tmp( c
);
1619 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1620 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1623 param0
= lookup_tmp( c
, mark
- 4 );
1624 param1
= lookup_tmp( c
, mark
- 3 );
1625 param2
= lookup_tmp( c
, mark
- 2 );
1627 brw_set_access_mode( p
, BRW_ALIGN_1
);
1629 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1630 be hashed. Also compute the remainders (offsets within the unit
1631 cube), interleaved to reduce register dependency penalties. */
1632 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1633 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1634 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1635 brw_FRC( p
, param0
, param0
);
1636 brw_FRC( p
, param1
, param1
);
1637 brw_FRC( p
, param2
, param2
);
1638 /* Since we now have only 16 bits of precision in the hash, we must
1639 be more careful about thorough mixing to maintain entropy as we
1640 squash the input vector into a small scalar. */
1641 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1642 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1643 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1644 brw_imm_uw( 0x9B93 ) );
1645 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1646 brw_imm_uw( 0xBC8F ) );
1648 /* Temporarily disable the execution mask while we work with ExecSize=16
1649 channels (the mask is set for ExecSize=8 and is probably incorrect).
1650 Although this might cause execution of unwanted channels, the code
1651 writes only to temporary registers and has no side effects, so
1652 disabling the mask is harmless. */
1653 brw_push_insn_state( p
);
1654 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1655 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1656 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1657 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1659 /* We're now ready to perform the hashing. The eight hashes are
1660 interleaved for performance. The hash function used is
1661 designed to rapidly achieve avalanche and require only 16x16
1662 bit multiplication, and 8-bit swizzles (which we get for
1664 for( i
= 0; i
< 4; i
++ )
1665 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1666 for( i
= 0; i
< 4; i
++ )
1667 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1668 odd_bytes( wtmp
[ i
] ) );
1669 for( i
= 0; i
< 4; i
++ )
1670 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1671 for( i
= 0; i
< 4; i
++ )
1672 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1673 odd_bytes( wtmp
[ i
] ) );
1674 brw_pop_insn_state( p
);
1676 /* Now we want to initialise the four rear gradients based on the
1677 hashes. Format conversion from signed integer to float leaves
1678 everything scaled too high by a factor of pow( 2, 15 ), but
1679 we correct for that right at the end. */
1681 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1682 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1683 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1684 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1685 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1687 brw_push_insn_state( p
);
1688 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1689 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1690 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1691 brw_pop_insn_state( p
);
1693 brw_MUL( p
, x1y0
, x1y0
, t
);
1694 brw_MUL( p
, x1y1
, x1y1
, t
);
1695 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1696 brw_MUL( p
, x0y0
, x0y0
, param0
);
1697 brw_MUL( p
, x0y1
, x0y1
, param0
);
1700 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1701 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1702 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1703 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1705 brw_push_insn_state( p
);
1706 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1707 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1708 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1709 brw_pop_insn_state( p
);
1711 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1712 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1713 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1714 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1715 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1717 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1718 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1719 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1720 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1723 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1724 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1725 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1726 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1728 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1729 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1730 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1731 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1733 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1734 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1735 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1736 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1738 /* We interpolate between the gradients using the polynomial
1739 6t^5 - 15t^4 + 10t^3 (Perlin). */
1740 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1741 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1742 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1743 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1744 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1745 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1746 brw_MUL( p
, xi
, xi
, param0
);
1747 brw_MUL( p
, yi
, yi
, param1
);
1748 brw_MUL( p
, zi
, zi
, param2
);
1749 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1750 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1751 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1752 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1753 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1754 brw_MUL( p
, xi
, xi
, param0
);
1755 brw_MUL( p
, yi
, yi
, param1
);
1756 brw_MUL( p
, zi
, zi
, param2
);
1757 brw_MUL( p
, xi
, xi
, param0
);
1758 brw_MUL( p
, yi
, yi
, param1
);
1759 brw_MUL( p
, zi
, zi
, param2
);
1760 brw_MUL( p
, xi
, xi
, param0
);
1761 brw_MUL( p
, yi
, yi
, param1
);
1762 brw_MUL( p
, zi
, zi
, param2
);
1764 /* Here we interpolate in the y dimension... */
1765 brw_MUL( p
, x0y1
, x0y1
, yi
);
1766 brw_MUL( p
, x1y1
, x1y1
, yi
);
1767 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1768 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1770 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1771 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1772 brw_MUL( p
, x1y0
, x1y0
, xi
);
1773 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1775 /* Now do the same thing for the front four gradients... */
1777 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1778 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1779 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1780 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1782 brw_push_insn_state( p
);
1783 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1784 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1785 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1786 brw_pop_insn_state( p
);
1788 brw_MUL( p
, x1y0
, x1y0
, t
);
1789 brw_MUL( p
, x1y1
, x1y1
, t
);
1790 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1791 brw_MUL( p
, x0y0
, x0y0
, param0
);
1792 brw_MUL( p
, x0y1
, x0y1
, param0
);
1795 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1796 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1797 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1798 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1800 brw_push_insn_state( p
);
1801 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1802 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1803 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1804 brw_pop_insn_state( p
);
1806 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1807 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1808 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1809 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1810 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1812 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1813 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1814 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1815 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1818 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1819 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1820 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1821 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1823 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1824 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1825 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1826 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1828 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1829 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1830 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1831 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1833 /* The interpolation coefficients are still around from last time, so
1834 again interpolate in the y dimension... */
1835 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1836 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1837 brw_MUL( p
, x0y1
, x0y1
, yi
);
1838 brw_MUL( p
, x1y1
, x1y1
, yi
);
1839 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1840 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1842 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1843 time put the front face in tmp[ 1 ] and we're nearly there... */
1844 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1845 brw_MUL( p
, x1y0
, x1y0
, xi
);
1846 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1848 /* The final interpolation, in the z dimension: */
1849 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1850 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1851 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1853 /* scale by pow( 2, -15 ), as described above */
1854 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1856 release_tmps( c
, mark
);
1859 static void emit_noise3( struct brw_wm_compile
*c
,
1860 struct prog_instruction
*inst
)
1862 struct brw_compile
*p
= &c
->func
;
1863 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1864 GLuint mask
= inst
->DstReg
.WriteMask
;
1866 int mark
= mark_tmps( c
);
1868 assert( mark
== 0 );
1870 src0
= get_src_reg( c
, inst
, 0, 0 );
1871 src1
= get_src_reg( c
, inst
, 0, 1 );
1872 src2
= get_src_reg( c
, inst
, 0, 2 );
1874 param0
= alloc_tmp( c
);
1875 param1
= alloc_tmp( c
);
1876 param2
= alloc_tmp( c
);
1878 brw_MOV( p
, param0
, src0
);
1879 brw_MOV( p
, param1
, src1
);
1880 brw_MOV( p
, param2
, src2
);
1882 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1884 /* Fill in the result: */
1885 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1886 for (i
= 0 ; i
< 4; i
++) {
1887 if (mask
& (1<<i
)) {
1888 dst
= get_dst_reg(c
, inst
, i
);
1889 brw_MOV( p
, dst
, param0
);
1892 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1893 brw_set_saturate( p
, 0 );
1895 release_tmps( c
, mark
);
1899 * For the four-dimensional case, the little micro-optimisation benefits
1900 * we obtain by unrolling all the loops aren't worth the massive bloat it
1901 * now causes. Instead, we loop twice around performing a similar operation
1902 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1903 * code to glue it all together.
1905 static void noise4_sub( struct brw_wm_compile
*c
)
1907 struct brw_compile
*p
= &c
->func
;
1908 struct brw_reg param
[ 4 ],
1909 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1910 w0
, /* noise for the w=0 cube */
1911 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1912 interp
[ 4 ], /* interpolation coefficients */
1913 t
, tmp
[ 8 ], /* float temporaries */
1914 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1915 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1917 int mark
= mark_tmps( c
);
1918 GLuint loop
, origin
;
1920 x0y0
= alloc_tmp( c
);
1921 x0y1
= alloc_tmp( c
);
1922 x1y0
= alloc_tmp( c
);
1923 x1y1
= alloc_tmp( c
);
1925 w0
= alloc_tmp( c
);
1926 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1927 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1929 for( i
= 0; i
< 4; i
++ ) {
1930 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1931 interp
[ i
] = alloc_tmp( c
);
1934 for( i
= 0; i
< 8; i
++ ) {
1935 tmp
[ i
] = alloc_tmp( c
);
1936 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1937 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1940 brw_set_access_mode( p
, BRW_ALIGN_1
);
1942 /* We only want 16 bits of precision from the integral part of each
1943 co-ordinate, but unfortunately the RNDD semantics would saturate
1944 at 16 bits if we performed the operation directly to a 16-bit
1945 destination. Therefore, we round to 32-bit temporaries where
1946 appropriate, and then store only the lower 16 bits. */
1947 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1948 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1949 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1950 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1951 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1952 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1954 /* Modify the flag register here, because the side effect is useful
1955 later (see below). We know for certain that all flags will be
1956 cleared, since the FRC instruction cannot possibly generate
1957 negative results. Even for exceptional inputs (infinities, denormals,
1958 NaNs), the architecture guarantees that the L conditional is false. */
1959 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1960 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1961 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1962 for( i
= 1; i
< 4; i
++ )
1963 brw_FRC( p
, param
[ i
], param
[ i
] );
1965 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1967 for( i
= 0; i
< 4; i
++ )
1968 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1969 for( i
= 0; i
< 4; i
++ )
1970 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1971 for( i
= 0; i
< 4; i
++ )
1972 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1973 for( i
= 0; i
< 4; i
++ )
1974 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1975 for( j
= 0; j
< 3; j
++ )
1976 for( i
= 0; i
< 4; i
++ )
1977 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1979 /* Mark the current address, as it will be a jump destination. The
1980 following code will be executed twice: first, with the flag
1981 register clear indicating the w=0 case, and second with flags
1985 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1986 be hashed. Since we have only 16 bits of precision in the hash, we
1987 must be careful about thorough mixing to maintain entropy as we
1988 squash the input vector into a small scalar. */
1989 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1990 brw_imm_uw( 0xBC8F ) );
1991 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1992 brw_imm_uw( 0xD0BD ) );
1993 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1994 brw_imm_uw( 0x9B93 ) );
1995 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1996 brw_imm_uw( 0xA359 ) );
1997 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1998 brw_imm_uw( 0xBC8F ) );
2000 /* Temporarily disable the execution mask while we work with ExecSize=16
2001 channels (the mask is set for ExecSize=8 and is probably incorrect).
2002 Although this might cause execution of unwanted channels, the code
2003 writes only to temporary registers and has no side effects, so
2004 disabling the mask is harmless. */
2005 brw_push_insn_state( p
);
2006 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2007 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2008 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2009 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2011 /* We're now ready to perform the hashing. The eight hashes are
2012 interleaved for performance. The hash function used is
2013 designed to rapidly achieve avalanche and require only 16x16
2014 bit multiplication, and 8-bit swizzles (which we get for
2016 for( i
= 0; i
< 4; i
++ )
2017 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2018 for( i
= 0; i
< 4; i
++ )
2019 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2020 odd_bytes( wtmp
[ i
] ) );
2021 for( i
= 0; i
< 4; i
++ )
2022 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2023 for( i
= 0; i
< 4; i
++ )
2024 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2025 odd_bytes( wtmp
[ i
] ) );
2026 brw_pop_insn_state( p
);
2028 /* Now we want to initialise the four rear gradients based on the
2029 hashes. Format conversion from signed integer to float leaves
2030 everything scaled too high by a factor of pow( 2, 15 ), but
2031 we correct for that right at the end. */
2033 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2034 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2035 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2036 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2037 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2039 brw_push_insn_state( p
);
2040 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2041 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2042 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2043 brw_pop_insn_state( p
);
2045 brw_MUL( p
, x1y0
, x1y0
, t
);
2046 brw_MUL( p
, x1y1
, x1y1
, t
);
2047 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2048 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2049 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2052 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2053 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2054 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2055 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2057 brw_push_insn_state( p
);
2058 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2059 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2060 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2061 brw_pop_insn_state( p
);
2063 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2064 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2065 /* prepare t for the w component (used below): w the first time through
2066 the loop; w - 1 the second time) */
2067 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2068 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2069 p
->current
->header
.predicate_inverse
= 1;
2070 brw_MOV( p
, t
, param
[ 3 ] );
2071 p
->current
->header
.predicate_inverse
= 0;
2072 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2073 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2074 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2076 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2077 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2078 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2079 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2082 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2083 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2084 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2085 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2087 brw_push_insn_state( p
);
2088 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2089 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2090 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2091 brw_pop_insn_state( p
);
2093 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2094 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2095 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2096 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2098 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2099 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2100 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2101 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2104 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2105 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2106 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2107 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2109 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2110 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2111 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2112 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2113 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2115 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2116 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2117 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2118 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2120 /* Here we interpolate in the y dimension... */
2121 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2122 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2123 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2124 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2125 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2126 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2128 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2129 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2130 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2131 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2133 /* Now do the same thing for the front four gradients... */
2135 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2136 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2137 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2138 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2140 brw_push_insn_state( p
);
2141 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2142 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2143 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2144 brw_pop_insn_state( p
);
2146 brw_MUL( p
, x1y0
, x1y0
, t
);
2147 brw_MUL( p
, x1y1
, x1y1
, t
);
2148 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2149 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2150 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2153 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2154 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2155 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2156 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2158 brw_push_insn_state( p
);
2159 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2160 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2161 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2162 brw_pop_insn_state( p
);
2164 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2165 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2166 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2167 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2168 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2170 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2171 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2172 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2173 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2176 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2177 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2178 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2179 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2181 brw_push_insn_state( p
);
2182 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2183 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2184 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2185 brw_pop_insn_state( p
);
2187 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2188 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2189 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2190 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2191 /* prepare t for the w component (used below): w the first time through
2192 the loop; w - 1 the second time) */
2193 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2194 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2195 p
->current
->header
.predicate_inverse
= 1;
2196 brw_MOV( p
, t
, param
[ 3 ] );
2197 p
->current
->header
.predicate_inverse
= 0;
2198 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2200 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2201 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2202 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2203 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2206 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2207 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2208 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2209 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2211 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2212 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2213 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2214 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2216 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2217 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2218 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2219 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2221 /* Interpolate in the y dimension: */
2222 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2223 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2224 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2225 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2226 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2227 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2229 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2230 time put the front face in tmp[ 1 ] and we're nearly there... */
2231 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2232 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2233 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2235 /* Another interpolation, in the z dimension: */
2236 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2237 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2238 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2240 /* Exit the loop if we've computed both cubes... */
2241 origin
= p
->nr_insn
;
2242 brw_push_insn_state( p
);
2243 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2244 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2245 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2246 brw_pop_insn_state( p
);
2248 /* Save the result for the w=0 case, and increment the w coordinate: */
2249 brw_MOV( p
, w0
, tmp
[ 0 ] );
2250 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2253 /* Loop around for the other cube. Explicitly set the flag register
2254 (unfortunately we must spend an extra instruction to do this: we
2255 can't rely on a side effect of the previous MOV or ADD because
2256 conditional modifiers which are normally true might be false in
2257 exceptional circumstances, e.g. given a NaN input; the add to
2258 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2259 brw_push_insn_state( p
);
2260 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2261 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2262 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2263 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2264 brw_pop_insn_state( p
);
2266 /* Patch the previous conditional branch now that we know the
2267 destination address. */
2268 brw_set_src1( p
->store
+ origin
,
2269 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2271 /* The very last interpolation. */
2272 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2273 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2274 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2276 /* scale by pow( 2, -15 ), as described above */
2277 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2279 release_tmps( c
, mark
);
2282 static void emit_noise4( struct brw_wm_compile
*c
,
2283 struct prog_instruction
*inst
)
2285 struct brw_compile
*p
= &c
->func
;
2286 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2287 GLuint mask
= inst
->DstReg
.WriteMask
;
2289 int mark
= mark_tmps( c
);
2291 assert( mark
== 0 );
2293 src0
= get_src_reg( c
, inst
, 0, 0 );
2294 src1
= get_src_reg( c
, inst
, 0, 1 );
2295 src2
= get_src_reg( c
, inst
, 0, 2 );
2296 src3
= get_src_reg( c
, inst
, 0, 3 );
2298 param0
= alloc_tmp( c
);
2299 param1
= alloc_tmp( c
);
2300 param2
= alloc_tmp( c
);
2301 param3
= alloc_tmp( c
);
2303 brw_MOV( p
, param0
, src0
);
2304 brw_MOV( p
, param1
, src1
);
2305 brw_MOV( p
, param2
, src2
);
2306 brw_MOV( p
, param3
, src3
);
2308 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2310 /* Fill in the result: */
2311 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2312 for (i
= 0 ; i
< 4; i
++) {
2313 if (mask
& (1<<i
)) {
2314 dst
= get_dst_reg(c
, inst
, i
);
2315 brw_MOV( p
, dst
, param0
);
2318 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2319 brw_set_saturate( p
, 0 );
2321 release_tmps( c
, mark
);
2324 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2325 struct prog_instruction
*inst
)
2327 struct brw_compile
*p
= &c
->func
;
2328 GLuint mask
= inst
->DstReg
.WriteMask
;
2329 struct brw_reg src0
[2], dst
[2];
2331 dst
[0] = get_dst_reg(c
, inst
, 0);
2332 dst
[1] = get_dst_reg(c
, inst
, 1);
2334 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2335 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2337 /* Calculate the pixel offset from window bottom left into destination
2340 if (mask
& WRITEMASK_X
) {
2341 /* X' = X - origin_x */
2344 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2345 brw_imm_d(0 - c
->key
.origin_x
));
2348 if (mask
& WRITEMASK_Y
) {
2349 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2352 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2353 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2358 BIAS on SIMD8 not working yet...
2360 static void emit_txb(struct brw_wm_compile
*c
,
2361 struct prog_instruction
*inst
)
2363 struct brw_compile
*p
= &c
->func
;
2364 struct brw_reg dst
[4], src
[4], payload_reg
;
2365 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2368 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2370 for (i
= 0; i
< 4; i
++)
2371 dst
[i
] = get_dst_reg(c
, inst
, i
);
2372 for (i
= 0; i
< 4; i
++)
2373 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2375 switch (inst
->TexSrcTarget
) {
2376 case TEXTURE_1D_INDEX
:
2377 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2378 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2379 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2381 case TEXTURE_2D_INDEX
:
2382 case TEXTURE_RECT_INDEX
:
2383 brw_MOV(p
, brw_message_reg(2), src
[0]);
2384 brw_MOV(p
, brw_message_reg(3), src
[1]);
2385 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2388 brw_MOV(p
, brw_message_reg(2), src
[0]);
2389 brw_MOV(p
, brw_message_reg(3), src
[1]);
2390 brw_MOV(p
, brw_message_reg(4), src
[2]);
2393 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2394 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2396 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2398 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2399 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2401 inst
->DstReg
.WriteMask
, /* writemask */
2402 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2403 4, /* response_length */
2409 static void emit_tex(struct brw_wm_compile
*c
,
2410 struct prog_instruction
*inst
)
2412 struct brw_compile
*p
= &c
->func
;
2413 struct brw_reg dst
[4], src
[4], payload_reg
;
2414 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2418 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2420 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2422 for (i
= 0; i
< 4; i
++)
2423 dst
[i
] = get_dst_reg(c
, inst
, i
);
2424 for (i
= 0; i
< 4; i
++)
2425 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2427 switch (inst
->TexSrcTarget
) {
2428 case TEXTURE_1D_INDEX
:
2432 case TEXTURE_2D_INDEX
:
2433 case TEXTURE_RECT_INDEX
:
2434 emit
= WRITEMASK_XY
;
2438 emit
= WRITEMASK_XYZ
;
2444 /* move/load S, T, R coords */
2445 for (i
= 0; i
< nr
; i
++) {
2446 static const GLuint swz
[4] = {0,1,2,2};
2448 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2450 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2455 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2456 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2460 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2462 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2463 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2465 inst
->DstReg
.WriteMask
, /* writemask */
2466 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2467 4, /* response_length */
2468 shadow
? 6 : 4, /* msg_length */
2472 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2476 static void emit_get_constant(struct brw_context
*brw
,
2477 struct brw_wm_compile
*c
,
2478 struct prog_instruction
*inst
,
2481 struct brw_compile
*p
= &c
->func
;
2482 struct brw_reg dst
[4];
2484 const int mark
= mark_tmps( c
);
2485 struct brw_reg writeback_reg
[4];
2487 /* XXX only need 1 temp reg??? */
2488 for (i
= 0; i
< 4; i
++) {
2489 writeback_reg
[i
] = alloc_tmp(c
);
2492 for (i
= 0; i
< 4; i
++) {
2493 dst
[i
] = get_dst_reg(c
, inst
, i
);
2496 /* Get float[4] vector from constant buffer */
2498 writeback_reg
[0], /* first writeback dest */
2500 GL_FALSE
, /* rel addr? */
2501 16 * constIndex
, /* byte offset */
2502 BRW_WM_MAX_SURF
- 1 /* surface, binding table index */
2505 /* Extract the four channel values, smear across dest registers */
2506 for (i
= 0; i
< 4; i
++) {
2507 /* extract 1 float from the writeback reg */
2508 struct brw_reg new_src
= stride(writeback_reg
[0], 0, 1, 0);
2509 new_src
.subnr
= i
* 4;
2510 /* and smear it into the dest register */
2511 brw_MOV(p
, dst
[i
], new_src
);
2514 release_tmps( c
, mark
);
2519 * Resolve subroutine calls after code emit is done.
2521 static void post_wm_emit( struct brw_wm_compile
*c
)
2523 brw_resolve_cals(&c
->func
);
2526 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2529 #define MAX_LOOP_DEPTH 32
2530 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2531 struct brw_instruction
*inst0
, *inst1
;
2532 int i
, if_insn
= 0, loop_insn
= 0;
2533 struct brw_compile
*p
= &c
->func
;
2534 struct brw_indirect stack_index
= brw_indirect(0, 0);
2538 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2539 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2541 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2542 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2544 if (inst
->CondUpdate
)
2545 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2547 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2549 switch (inst
->Opcode
) {
2551 emit_pixel_xy(c
, inst
);
2554 emit_delta_xy(c
, inst
);
2557 emit_pixel_w(c
, inst
);
2560 emit_linterp(c
, inst
);
2563 emit_pinterp(c
, inst
);
2566 emit_cinterp(c
, inst
);
2569 emit_wpos_xy(c
, inst
);
2572 emit_fb_write(c
, inst
);
2574 case WM_FRONTFACING
:
2575 emit_frontfacing(c
, inst
);
2599 emit_trunc(c
, inst
);
2603 /* test hook for new constant buffer code */
2604 if (inst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) {
2605 emit_get_constant(brw
, c
, inst
, inst
->SrcReg
[0].Index
);
2684 emit_noise1(c
, inst
);
2687 emit_noise2(c
, inst
);
2690 emit_noise3(c
, inst
);
2693 emit_noise4(c
, inst
);
2705 assert(if_insn
< MAX_IFSN
);
2706 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2709 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2712 assert(if_insn
> 0);
2713 brw_ENDIF(p
, if_inst
[--if_insn
]);
2716 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2722 brw_push_insn_state(p
);
2723 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2724 brw_set_access_mode(p
, BRW_ALIGN_1
);
2725 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2726 brw_set_access_mode(p
, BRW_ALIGN_16
);
2727 brw_ADD(p
, get_addr_reg(stack_index
),
2728 get_addr_reg(stack_index
), brw_imm_d(4));
2729 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2730 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2731 brw_pop_insn_state(p
);
2735 brw_push_insn_state(p
);
2736 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2737 brw_ADD(p
, get_addr_reg(stack_index
),
2738 get_addr_reg(stack_index
), brw_imm_d(-4));
2739 brw_set_access_mode(p
, BRW_ALIGN_1
);
2740 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2741 brw_set_access_mode(p
, BRW_ALIGN_16
);
2742 brw_pop_insn_state(p
);
2745 case OPCODE_BGNLOOP
:
2746 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2750 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2754 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2756 case OPCODE_ENDLOOP
:
2758 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2759 /* patch all the BREAK instructions from
2761 while (inst0
> loop_inst
[loop_insn
]) {
2763 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2764 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2765 inst0
->bits3
.if_else
.pop_count
= 0;
2766 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2767 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2768 inst0
->bits3
.if_else
.pop_count
= 0;
2773 _mesa_printf("unsupported IR in fragment shader %d\n",
2776 if (inst
->CondUpdate
)
2777 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2779 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2783 if (c
->reg_index
>= BRW_WM_MAX_GRF
) {
2784 _mesa_problem(NULL
, "Ran out of registers in brw_wm_emit_glsl()");
2785 /* XXX we need to do some proper error recovery here */
2791 * Do GPU code generation for shaders that use GLSL features such as
2792 * flow control. Other shaders will be compiled with the
2794 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2796 if (INTEL_DEBUG
& DEBUG_WM
) {
2797 _mesa_printf("brw_wm_glsl_emit:\n");
2800 /* initial instruction translation/simplification */
2803 /* actual code generation */
2804 brw_wm_emit_glsl(brw
, c
);
2806 if (INTEL_DEBUG
& DEBUG_WM
) {
2807 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2810 c
->prog_data
.total_grf
= c
->reg_index
;
2811 c
->prog_data
.total_scratch
= 0;