1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
14 const struct prog_instruction
*inst
,
18 * Determine if the given fragment program uses GLSL features such
19 * as flow conditionals, loops, subroutines.
20 * Some GLSL shaders may use these features, others might not.
22 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
26 if (INTEL_DEBUG
& DEBUG_GLSL_FORCE
)
29 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
30 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
31 switch (inst
->Opcode
) {
54 reclaim_temps(struct brw_wm_compile
*c
);
57 /** Mark GRF register as used. */
59 prealloc_grf(struct brw_wm_compile
*c
, int r
)
61 c
->used_grf
[r
] = GL_TRUE
;
65 /** Mark given GRF register as not in use. */
67 release_grf(struct brw_wm_compile
*c
, int r
)
69 /*assert(c->used_grf[r]);*/
70 c
->used_grf
[r
] = GL_FALSE
;
71 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
75 /** Return index of a free GRF, mark it as used. */
77 alloc_grf(struct brw_wm_compile
*c
)
80 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
81 if (!c
->used_grf
[r
]) {
82 c
->used_grf
[r
] = GL_TRUE
;
83 c
->first_free_grf
= r
+ 1; /* a guess */
88 /* no free temps, try to reclaim some */
90 c
->first_free_grf
= 0;
93 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
94 if (!c
->used_grf
[r
]) {
95 c
->used_grf
[r
] = GL_TRUE
;
96 c
->first_free_grf
= r
+ 1; /* a guess */
101 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
102 assert(c
->used_grf
[r
]);
105 /* really, no free GRF regs found */
106 if (!c
->out_of_regs
) {
107 /* print warning once per compilation */
108 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
109 c
->out_of_regs
= GL_TRUE
;
116 /** Return number of GRF registers used */
118 num_grf_used(const struct brw_wm_compile
*c
)
121 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
130 * Record the mapping of a Mesa register to a hardware register.
132 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
133 int component
, struct brw_reg reg
)
135 c
->wm_regs
[file
][index
][component
].reg
= reg
;
136 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
139 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
143 /* if we need to allocate another temp, grow the tmp_regs[] array */
144 if (c
->tmp_index
== c
->tmp_max
) {
145 int r
= alloc_grf(c
);
147 /*printf("Out of temps in %s\n", __FUNCTION__);*/
148 r
= 50; /* XXX random register! */
150 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
153 /* form the GRF register */
154 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
155 /*printf("alloc_temp %d\n", reg.nr);*/
156 assert(reg
.nr
< BRW_WM_MAX_GRF
);
162 * Save current temp register info.
163 * There must be a matching call to release_tmps().
165 static int mark_tmps(struct brw_wm_compile
*c
)
170 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
172 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
175 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
181 * Convert Mesa src register to brw register.
183 * Since we're running in SOA mode each Mesa register corresponds to four
184 * hardware registers. We allocate the hardware registers as needed here.
186 * \param file register file, one of PROGRAM_x
187 * \param index register number
188 * \param component src component (X=0, Y=1, Z=2, W=3)
189 * \param nr not used?!?
190 * \param neg negate value?
191 * \param abs take absolute value?
193 static struct brw_reg
194 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
195 int nr
, GLuint neg
, GLuint abs
)
199 case PROGRAM_STATE_VAR
:
200 case PROGRAM_CONSTANT
:
201 case PROGRAM_UNIFORM
:
202 file
= PROGRAM_STATE_VAR
;
204 case PROGRAM_UNDEFINED
:
205 return brw_null_reg();
206 case PROGRAM_TEMPORARY
:
209 case PROGRAM_PAYLOAD
:
212 _mesa_problem(NULL
, "Unexpected file in get_reg()");
213 return brw_null_reg();
217 assert(component
< 4);
219 /* see if we've already allocated a HW register for this Mesa register */
220 if (c
->wm_regs
[file
][index
][component
].inited
) {
222 reg
= c
->wm_regs
[file
][index
][component
].reg
;
225 /* no, allocate new register */
226 int grf
= alloc_grf(c
);
227 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
229 /* totally out of temps */
230 grf
= 51; /* XXX random register! */
233 reg
= brw_vec8_grf(grf
, 0);
234 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
236 set_reg(c
, file
, index
, component
, reg
);
239 if (neg
& (1 << component
)) {
250 * This is called if we run out of GRF registers. Examine the live intervals
251 * of temp regs in the program and free those which won't be used again.
254 reclaim_temps(struct brw_wm_compile
*c
)
256 GLint intBegin
[MAX_PROGRAM_TEMPS
];
257 GLint intEnd
[MAX_PROGRAM_TEMPS
];
260 /*printf("Reclaim temps:\n");*/
262 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
265 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
266 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
267 /* program temp[i] can be freed */
269 /*printf(" temp[%d] is dead\n", index);*/
270 for (component
= 0; component
< 4; component
++) {
271 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
272 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
275 printf(" Reclaim temp %d, reg %d at inst %d\n",
276 index, r, c->cur_inst);
278 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
289 * Preallocate registers. This sets up the Mesa to hardware register
290 * mapping for certain registers, such as constants (uniforms/state vars)
293 static void prealloc_reg(struct brw_wm_compile
*c
)
295 struct intel_context
*intel
= &c
->func
.brw
->intel
;
298 int urb_read_length
= 0;
299 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
;
300 GLuint reg_index
= 0;
302 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
303 c
->first_free_grf
= 0;
305 for (i
= 0; i
< 4; i
++) {
306 if (i
< c
->key
.nr_depth_regs
)
307 reg
= brw_vec8_grf(i
* 2, 0);
309 reg
= brw_vec8_grf(0, 0);
310 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
312 reg_index
+= 2 * c
->key
.nr_depth_regs
;
316 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
317 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
319 /* use a real constant buffer, or just use a section of the GRF? */
320 /* XXX this heuristic may need adjustment... */
321 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
322 c
->fp
->use_const_buffer
= GL_TRUE
;
324 c
->fp
->use_const_buffer
= GL_FALSE
;
325 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
327 if (c
->fp
->use_const_buffer
) {
328 /* We'll use a real constant buffer and fetch constants from
329 * it with a dataport read message.
332 /* number of float constants in CURBE */
333 c
->prog_data
.nr_params
= 0;
336 const struct gl_program_parameter_list
*plist
=
337 c
->fp
->program
.Base
.Parameters
;
340 /* number of float constants in CURBE */
341 c
->prog_data
.nr_params
= 4 * nr_params
;
343 /* loop over program constants (float[4]) */
344 for (i
= 0; i
< nr_params
; i
++) {
345 /* loop over XYZW channels */
346 for (j
= 0; j
< 4; j
++, index
++) {
347 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
348 /* Save pointer to parameter/constant value.
349 * Constants will be copied in prepare_constant_buffer()
351 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
352 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
355 /* number of constant regs used (each reg is float[8]) */
356 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
357 reg_index
+= c
->nr_creg
;
361 /* fragment shader inputs */
362 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
365 if (i
>= VERT_RESULT_VAR0
)
366 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
367 else if (i
<= VERT_RESULT_TEX7
)
372 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
373 urb_read_length
= reg_index
;
374 reg
= brw_vec8_grf(reg_index
, 0);
375 for (j
= 0; j
< 4; j
++)
376 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
378 if (c
->key
.vp_outputs_written
& BITFIELD64_BIT(i
)) {
383 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
384 c
->prog_data
.urb_read_length
= urb_read_length
;
385 c
->prog_data
.curb_read_length
= c
->nr_creg
;
386 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
388 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
391 /* mark GRF regs [0..reg_index-1] as in-use */
392 for (i
= 0; i
< reg_index
; i
++)
395 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
396 prealloc_grf(c
, 126);
397 prealloc_grf(c
, 127);
399 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
400 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
401 struct brw_reg dst
[4];
403 switch (inst
->Opcode
) {
406 /* Allocate the channels of texture results contiguously,
407 * since they are written out that way by the sampler unit.
409 for (j
= 0; j
< 4; j
++) {
410 dst
[j
] = get_dst_reg(c
, inst
, j
);
412 assert(dst
[j
].nr
== dst
[j
- 1].nr
+ 1);
420 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
421 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
423 switch (inst
->Opcode
) {
425 /* Allocate WM_DELTAXY destination on G45/GM45 to an
426 * even-numbered GRF if possible so that we can use the PLN
429 if (inst
->DstReg
.WriteMask
== WRITEMASK_XY
&&
430 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][0].inited
&&
431 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][1].inited
&&
432 (IS_G4X(intel
->intelScreen
->deviceID
) || intel
->gen
== 5)) {
435 for (grf
= c
->first_free_grf
& ~1;
436 grf
< BRW_WM_MAX_GRF
;
439 if (!c
->used_grf
[grf
] && !c
->used_grf
[grf
+ 1]) {
440 c
->used_grf
[grf
] = GL_TRUE
;
441 c
->used_grf
[grf
+ 1] = GL_TRUE
;
442 c
->first_free_grf
= grf
+ 2; /* a guess */
444 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 0,
445 brw_vec8_grf(grf
, 0));
446 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 1,
447 brw_vec8_grf(grf
+ 1, 0));
457 /* An instruction may reference up to three constants.
458 * They'll be found in these registers.
459 * XXX alloc these on demand!
461 if (c
->fp
->use_const_buffer
) {
462 for (i
= 0; i
< 3; i
++) {
463 c
->current_const
[i
].index
= -1;
464 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
468 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
469 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
475 * Check if any of the instruction's src registers are constants, uniforms,
476 * or statevars. If so, fetch any constants that we don't already have in
477 * the three GRF slots.
479 static void fetch_constants(struct brw_wm_compile
*c
,
480 const struct prog_instruction
*inst
)
482 struct brw_compile
*p
= &c
->func
;
485 /* loop over instruction src regs */
486 for (i
= 0; i
< 3; i
++) {
487 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
488 if (src
->File
== PROGRAM_STATE_VAR
||
489 src
->File
== PROGRAM_CONSTANT
||
490 src
->File
== PROGRAM_UNIFORM
) {
491 c
->current_const
[i
].index
= src
->Index
;
494 printf(" fetch const[%d] for arg %d into reg %d\n",
495 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
498 /* need to fetch the constant now */
500 c
->current_const
[i
].reg
, /* writeback dest */
501 src
->RelAddr
, /* relative indexing? */
502 16 * src
->Index
, /* byte offset */
503 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
511 * Convert Mesa dst register to brw register.
513 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
514 const struct prog_instruction
*inst
,
518 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
523 static struct brw_reg
524 get_src_reg_const(struct brw_wm_compile
*c
,
525 const struct prog_instruction
*inst
,
526 GLuint srcRegIndex
, GLuint component
)
528 /* We should have already fetched the constant from the constant
529 * buffer in fetch_constants(). Now we just have to return a
530 * register description that extracts the needed component and
531 * smears it across all eight vector components.
533 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
534 struct brw_reg const_reg
;
536 assert(component
< 4);
537 assert(srcRegIndex
< 3);
538 assert(c
->current_const
[srcRegIndex
].index
!= -1);
539 const_reg
= c
->current_const
[srcRegIndex
].reg
;
541 /* extract desired float from the const_reg, and smear */
542 const_reg
= stride(const_reg
, 0, 1, 0);
543 const_reg
.subnr
= component
* 4;
545 if (src
->Negate
& (1 << component
))
546 const_reg
= negate(const_reg
);
548 const_reg
= brw_abs(const_reg
);
551 printf(" form const[%d].%d for arg %d, reg %d\n",
552 c
->current_const
[srcRegIndex
].index
,
563 * Convert Mesa src register to brw register.
565 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
566 const struct prog_instruction
*inst
,
567 GLuint srcRegIndex
, GLuint channel
)
569 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
571 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
573 /* Only one immediate value can be used per native opcode, and it
574 * has be in the src1 slot, so not all Mesa instructions will get
575 * to take advantage of immediate constants.
577 if (brw_wm_arg_can_be_immediate(inst
->Opcode
, srcRegIndex
)) {
578 const struct gl_program_parameter_list
*params
;
580 params
= c
->fp
->program
.Base
.Parameters
;
582 /* Extended swizzle terms */
583 if (component
== SWIZZLE_ZERO
) {
584 return brw_imm_f(0.0F
);
585 } else if (component
== SWIZZLE_ONE
) {
586 return brw_imm_f(1.0F
);
589 if (src
->File
== PROGRAM_CONSTANT
) {
590 return brw_imm_f(params
->ParameterValues
[src
->Index
][component
]);
594 if (c
->fp
->use_const_buffer
&&
595 (src
->File
== PROGRAM_STATE_VAR
||
596 src
->File
== PROGRAM_CONSTANT
||
597 src
->File
== PROGRAM_UNIFORM
)) {
598 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
601 /* other type of source register */
602 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
603 src
->Negate
, src
->Abs
);
608 * Subroutines are minimal support for resusable instruction sequences.
609 * They are implemented as simply as possible to minimise overhead: there
610 * is no explicit support for communication between the caller and callee
611 * other than saving the return address in a temporary register, nor is
612 * there any automatic local storage. This implies that great care is
613 * required before attempting reentrancy or any kind of nested
614 * subroutine invocations.
616 static void invoke_subroutine( struct brw_wm_compile
*c
,
617 enum _subroutine subroutine
,
618 void (*emit
)( struct brw_wm_compile
* ) )
620 struct brw_compile
*p
= &c
->func
;
622 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
624 if( c
->subroutines
[ subroutine
] ) {
625 /* subroutine previously emitted: reuse existing instructions */
627 int mark
= mark_tmps( c
);
628 struct brw_reg return_address
= retype( alloc_tmp( c
),
629 BRW_REGISTER_TYPE_UD
);
630 int here
= p
->nr_insn
;
632 brw_push_insn_state(p
);
633 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
634 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
636 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
637 brw_imm_d( ( c
->subroutines
[ subroutine
] -
639 brw_pop_insn_state(p
);
641 release_tmps( c
, mark
);
643 /* previously unused subroutine: emit, and mark for later reuse */
645 int mark
= mark_tmps( c
);
646 struct brw_reg return_address
= retype( alloc_tmp( c
),
647 BRW_REGISTER_TYPE_UD
);
648 struct brw_instruction
*calc
;
649 int base
= p
->nr_insn
;
651 brw_push_insn_state(p
);
652 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
653 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
654 brw_pop_insn_state(p
);
656 c
->subroutines
[ subroutine
] = p
->nr_insn
;
660 brw_push_insn_state(p
);
661 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
662 brw_MOV( p
, brw_ip_reg(), return_address
);
663 brw_pop_insn_state(p
);
665 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
667 release_tmps( c
, mark
);
671 static void emit_arl(struct brw_wm_compile
*c
,
672 const struct prog_instruction
*inst
)
674 struct brw_compile
*p
= &c
->func
;
675 struct brw_reg src0
, addr_reg
;
676 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
677 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
679 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
680 brw_MOV(p
, addr_reg
, src0
);
681 brw_set_saturate(p
, 0);
685 * For GLSL shaders, this KIL will be unconditional.
686 * It may be contained inside an IF/ENDIF structure of course.
688 static void emit_kil(struct brw_wm_compile
*c
)
690 struct brw_compile
*p
= &c
->func
;
691 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
692 brw_push_insn_state(p
);
693 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
694 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
695 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
696 brw_pop_insn_state(p
);
699 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
701 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
705 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
707 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
710 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
712 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
715 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
717 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
721 /* One-, two- and three-dimensional Perlin noise, similar to the description
722 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
723 static void noise1_sub( struct brw_wm_compile
*c
) {
725 struct brw_compile
*p
= &c
->func
;
726 struct brw_reg param
,
727 x0
, x1
, /* gradients at each end */
728 t
, tmp
[ 2 ], /* float temporaries */
729 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
731 int mark
= mark_tmps( c
);
736 tmp
[ 0 ] = alloc_tmp( c
);
737 tmp
[ 1 ] = alloc_tmp( c
);
738 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
739 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
740 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
741 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
742 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
744 param
= lookup_tmp( c
, mark
- 2 );
746 brw_set_access_mode( p
, BRW_ALIGN_1
);
748 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
750 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
751 be hashed. Also compute the remainder (offset within the unit
752 length), interleaved to reduce register dependency penalties. */
753 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
754 brw_FRC( p
, param
, param
);
755 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
756 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
757 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
759 /* We're now ready to perform the hashing. The two hashes are
760 interleaved for performance. The hash function used is
761 designed to rapidly achieve avalanche and require only 32x16
762 bit multiplication, and 16-bit swizzles (which we get for
763 free). We can't use immediate operands in the multiplies,
764 because immediates are permitted only in src1 and the 16-bit
765 factor is permitted only in src0. */
766 for( i
= 0; i
< 2; i
++ )
767 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
768 for( i
= 0; i
< 2; i
++ )
769 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
770 high_words( itmp
[ i
] ) );
771 for( i
= 0; i
< 2; i
++ )
772 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
773 for( i
= 0; i
< 2; i
++ )
774 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
775 high_words( itmp
[ i
] ) );
776 for( i
= 0; i
< 2; i
++ )
777 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
778 for( i
= 0; i
< 2; i
++ )
779 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
780 high_words( itmp
[ i
] ) );
782 /* Now we want to initialise the two gradients based on the
783 hashes. Format conversion from signed integer to float leaves
784 everything scaled too high by a factor of pow( 2, 31 ), but
785 we correct for that right at the end. */
786 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
787 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
788 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
790 brw_MUL( p
, x0
, x0
, param
);
791 brw_MUL( p
, x1
, x1
, t
);
793 /* We interpolate between the gradients using the polynomial
794 6t^5 - 15t^4 + 10t^3 (Perlin). */
795 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
796 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
797 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
798 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
799 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
800 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
802 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
803 brw_MUL( p
, param
, tmp
[ 0 ], param
);
804 brw_MUL( p
, x1
, x1
, param
);
805 brw_ADD( p
, x0
, x0
, x1
);
806 /* scale by pow( 2, -30 ), to compensate for the format conversion
807 above and an extra factor of 2 so that a single gradient covers
809 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
811 release_tmps( c
, mark
);
814 static void emit_noise1( struct brw_wm_compile
*c
,
815 const struct prog_instruction
*inst
)
817 struct brw_compile
*p
= &c
->func
;
818 struct brw_reg src
, param
, dst
;
819 GLuint mask
= inst
->DstReg
.WriteMask
;
821 int mark
= mark_tmps( c
);
825 src
= get_src_reg( c
, inst
, 0, 0 );
827 param
= alloc_tmp( c
);
829 brw_MOV( p
, param
, src
);
831 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
833 /* Fill in the result: */
834 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
835 for (i
= 0 ; i
< 4; i
++) {
837 dst
= get_dst_reg(c
, inst
, i
);
838 brw_MOV( p
, dst
, param
);
841 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
842 brw_set_saturate( p
, 0 );
844 release_tmps( c
, mark
);
847 static void noise2_sub( struct brw_wm_compile
*c
) {
849 struct brw_compile
*p
= &c
->func
;
850 struct brw_reg param0
, param1
,
851 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
852 t
, tmp
[ 4 ], /* float temporaries */
853 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
855 int mark
= mark_tmps( c
);
857 x0y0
= alloc_tmp( c
);
858 x0y1
= alloc_tmp( c
);
859 x1y0
= alloc_tmp( c
);
860 x1y1
= alloc_tmp( c
);
862 for( i
= 0; i
< 4; i
++ ) {
863 tmp
[ i
] = alloc_tmp( c
);
864 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
866 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
867 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
868 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
870 param0
= lookup_tmp( c
, mark
- 3 );
871 param1
= lookup_tmp( c
, mark
- 2 );
873 brw_set_access_mode( p
, BRW_ALIGN_1
);
875 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
876 be hashed. Also compute the remainders (offsets within the unit
877 square), interleaved to reduce register dependency penalties. */
878 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
879 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
880 brw_FRC( p
, param0
, param0
);
881 brw_FRC( p
, param1
, param1
);
882 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
883 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
884 low_words( itmp
[ 1 ] ) );
885 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
886 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
887 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
888 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
889 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
891 /* We're now ready to perform the hashing. The four hashes are
892 interleaved for performance. The hash function used is
893 designed to rapidly achieve avalanche and require only 32x16
894 bit multiplication, and 16-bit swizzles (which we get for
895 free). We can't use immediate operands in the multiplies,
896 because immediates are permitted only in src1 and the 16-bit
897 factor is permitted only in src0. */
898 for( i
= 0; i
< 4; i
++ )
899 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
900 for( i
= 0; i
< 4; i
++ )
901 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
902 high_words( itmp
[ i
] ) );
903 for( i
= 0; i
< 4; i
++ )
904 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
905 for( i
= 0; i
< 4; i
++ )
906 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
907 high_words( itmp
[ i
] ) );
908 for( i
= 0; i
< 4; i
++ )
909 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
910 for( i
= 0; i
< 4; i
++ )
911 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
912 high_words( itmp
[ i
] ) );
914 /* Now we want to initialise the four gradients based on the
915 hashes. Format conversion from signed integer to float leaves
916 everything scaled too high by a factor of pow( 2, 15 ), but
917 we correct for that right at the end. */
918 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
919 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
920 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
921 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
922 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
924 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
925 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
926 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
927 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
929 brw_MUL( p
, x1y0
, x1y0
, t
);
930 brw_MUL( p
, x1y1
, x1y1
, t
);
931 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
932 brw_MUL( p
, x0y0
, x0y0
, param0
);
933 brw_MUL( p
, x0y1
, x0y1
, param0
);
935 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
936 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
937 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
938 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
940 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
941 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
942 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
943 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
945 /* We interpolate between the gradients using the polynomial
946 6t^5 - 15t^4 + 10t^3 (Perlin). */
947 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
948 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
949 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
950 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
951 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
952 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
953 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
955 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
956 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
957 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
958 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
959 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
961 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
962 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
963 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
964 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
966 /* Here we interpolate in the y dimension... */
967 brw_MUL( p
, x0y1
, x0y1
, param1
);
968 brw_MUL( p
, x1y1
, x1y1
, param1
);
969 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
970 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
972 /* And now in x. There are horrible register dependencies here,
973 but we have nothing else to do. */
974 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
975 brw_MUL( p
, x1y0
, x1y0
, param0
);
976 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
978 /* scale by pow( 2, -15 ), as described above */
979 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
981 release_tmps( c
, mark
);
984 static void emit_noise2( struct brw_wm_compile
*c
,
985 const struct prog_instruction
*inst
)
987 struct brw_compile
*p
= &c
->func
;
988 struct brw_reg src0
, src1
, param0
, param1
, dst
;
989 GLuint mask
= inst
->DstReg
.WriteMask
;
991 int mark
= mark_tmps( c
);
995 src0
= get_src_reg( c
, inst
, 0, 0 );
996 src1
= get_src_reg( c
, inst
, 0, 1 );
998 param0
= alloc_tmp( c
);
999 param1
= alloc_tmp( c
);
1001 brw_MOV( p
, param0
, src0
);
1002 brw_MOV( p
, param1
, src1
);
1004 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1006 /* Fill in the result: */
1007 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1008 for (i
= 0 ; i
< 4; i
++) {
1009 if (mask
& (1<<i
)) {
1010 dst
= get_dst_reg(c
, inst
, i
);
1011 brw_MOV( p
, dst
, param0
);
1014 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1015 brw_set_saturate( p
, 0 );
1017 release_tmps( c
, mark
);
1021 * The three-dimensional case is much like the one- and two- versions above,
1022 * but since the number of corners is rapidly growing we now pack 16 16-bit
1023 * hashes into each register to extract more parallelism from the EUs.
1025 static void noise3_sub( struct brw_wm_compile
*c
) {
1027 struct brw_compile
*p
= &c
->func
;
1028 struct brw_reg param0
, param1
, param2
,
1029 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1030 xi
, yi
, zi
, /* interpolation coefficients */
1031 t
, tmp
[ 8 ], /* float temporaries */
1032 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1033 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1035 int mark
= mark_tmps( c
);
1037 x0y0
= alloc_tmp( c
);
1038 x0y1
= alloc_tmp( c
);
1039 x1y0
= alloc_tmp( c
);
1040 x1y1
= alloc_tmp( c
);
1041 xi
= alloc_tmp( c
);
1042 yi
= alloc_tmp( c
);
1043 zi
= alloc_tmp( c
);
1045 for( i
= 0; i
< 8; i
++ ) {
1046 tmp
[ i
] = alloc_tmp( c
);
1047 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1048 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1051 param0
= lookup_tmp( c
, mark
- 4 );
1052 param1
= lookup_tmp( c
, mark
- 3 );
1053 param2
= lookup_tmp( c
, mark
- 2 );
1055 brw_set_access_mode( p
, BRW_ALIGN_1
);
1057 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1058 be hashed. Also compute the remainders (offsets within the unit
1059 cube), interleaved to reduce register dependency penalties. */
1060 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1061 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1062 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1063 brw_FRC( p
, param0
, param0
);
1064 brw_FRC( p
, param1
, param1
);
1065 brw_FRC( p
, param2
, param2
);
1066 /* Since we now have only 16 bits of precision in the hash, we must
1067 be more careful about thorough mixing to maintain entropy as we
1068 squash the input vector into a small scalar. */
1069 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1070 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1071 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1072 brw_imm_uw( 0x9B93 ) );
1073 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1074 brw_imm_uw( 0xBC8F ) );
1076 /* Temporarily disable the execution mask while we work with ExecSize=16
1077 channels (the mask is set for ExecSize=8 and is probably incorrect).
1078 Although this might cause execution of unwanted channels, the code
1079 writes only to temporary registers and has no side effects, so
1080 disabling the mask is harmless. */
1081 brw_push_insn_state( p
);
1082 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1083 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1084 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1085 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1087 /* We're now ready to perform the hashing. The eight hashes are
1088 interleaved for performance. The hash function used is
1089 designed to rapidly achieve avalanche and require only 16x16
1090 bit multiplication, and 8-bit swizzles (which we get for
1092 for( i
= 0; i
< 4; i
++ )
1093 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1094 for( i
= 0; i
< 4; i
++ )
1095 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1096 odd_bytes( wtmp
[ i
] ) );
1097 for( i
= 0; i
< 4; i
++ )
1098 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1099 for( i
= 0; i
< 4; i
++ )
1100 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1101 odd_bytes( wtmp
[ i
] ) );
1102 brw_pop_insn_state( p
);
1104 /* Now we want to initialise the four rear gradients based on the
1105 hashes. Format conversion from signed integer to float leaves
1106 everything scaled too high by a factor of pow( 2, 15 ), but
1107 we correct for that right at the end. */
1109 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1110 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1111 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1112 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1113 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1115 brw_push_insn_state( p
);
1116 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1117 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1118 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1119 brw_pop_insn_state( p
);
1121 brw_MUL( p
, x1y0
, x1y0
, t
);
1122 brw_MUL( p
, x1y1
, x1y1
, t
);
1123 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1124 brw_MUL( p
, x0y0
, x0y0
, param0
);
1125 brw_MUL( p
, x0y1
, x0y1
, param0
);
1128 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1129 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1130 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1131 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1133 brw_push_insn_state( p
);
1134 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1135 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1136 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1137 brw_pop_insn_state( p
);
1139 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1140 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1141 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1142 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1143 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1145 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1146 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1147 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1148 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1151 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1152 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1153 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1154 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1156 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1157 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1158 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1159 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1161 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1162 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1163 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1164 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1166 /* We interpolate between the gradients using the polynomial
1167 6t^5 - 15t^4 + 10t^3 (Perlin). */
1168 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1169 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1170 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1171 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1172 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1173 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1174 brw_MUL( p
, xi
, xi
, param0
);
1175 brw_MUL( p
, yi
, yi
, param1
);
1176 brw_MUL( p
, zi
, zi
, param2
);
1177 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1178 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1179 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1180 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1181 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1182 brw_MUL( p
, xi
, xi
, param0
);
1183 brw_MUL( p
, yi
, yi
, param1
);
1184 brw_MUL( p
, zi
, zi
, param2
);
1185 brw_MUL( p
, xi
, xi
, param0
);
1186 brw_MUL( p
, yi
, yi
, param1
);
1187 brw_MUL( p
, zi
, zi
, param2
);
1188 brw_MUL( p
, xi
, xi
, param0
);
1189 brw_MUL( p
, yi
, yi
, param1
);
1190 brw_MUL( p
, zi
, zi
, param2
);
1192 /* Here we interpolate in the y dimension... */
1193 brw_MUL( p
, x0y1
, x0y1
, yi
);
1194 brw_MUL( p
, x1y1
, x1y1
, yi
);
1195 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1196 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1198 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1199 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1200 brw_MUL( p
, x1y0
, x1y0
, xi
);
1201 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1203 /* Now do the same thing for the front four gradients... */
1205 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1206 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1207 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1208 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1210 brw_push_insn_state( p
);
1211 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1212 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1213 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1214 brw_pop_insn_state( p
);
1216 brw_MUL( p
, x1y0
, x1y0
, t
);
1217 brw_MUL( p
, x1y1
, x1y1
, t
);
1218 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1219 brw_MUL( p
, x0y0
, x0y0
, param0
);
1220 brw_MUL( p
, x0y1
, x0y1
, param0
);
1223 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1224 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1225 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1226 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1228 brw_push_insn_state( p
);
1229 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1230 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1231 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1232 brw_pop_insn_state( p
);
1234 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1235 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1236 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1237 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1238 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1240 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1241 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1242 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1243 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1246 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1247 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1248 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1249 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1251 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1252 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1253 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1254 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1256 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1257 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1258 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1259 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1261 /* The interpolation coefficients are still around from last time, so
1262 again interpolate in the y dimension... */
1263 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1264 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1265 brw_MUL( p
, x0y1
, x0y1
, yi
);
1266 brw_MUL( p
, x1y1
, x1y1
, yi
);
1267 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1268 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1270 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1271 time put the front face in tmp[ 1 ] and we're nearly there... */
1272 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1273 brw_MUL( p
, x1y0
, x1y0
, xi
);
1274 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1276 /* The final interpolation, in the z dimension: */
1277 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1278 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1279 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1281 /* scale by pow( 2, -15 ), as described above */
1282 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1284 release_tmps( c
, mark
);
1287 static void emit_noise3( struct brw_wm_compile
*c
,
1288 const struct prog_instruction
*inst
)
1290 struct brw_compile
*p
= &c
->func
;
1291 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1292 GLuint mask
= inst
->DstReg
.WriteMask
;
1294 int mark
= mark_tmps( c
);
1296 assert( mark
== 0 );
1298 src0
= get_src_reg( c
, inst
, 0, 0 );
1299 src1
= get_src_reg( c
, inst
, 0, 1 );
1300 src2
= get_src_reg( c
, inst
, 0, 2 );
1302 param0
= alloc_tmp( c
);
1303 param1
= alloc_tmp( c
);
1304 param2
= alloc_tmp( c
);
1306 brw_MOV( p
, param0
, src0
);
1307 brw_MOV( p
, param1
, src1
);
1308 brw_MOV( p
, param2
, src2
);
1310 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1312 /* Fill in the result: */
1313 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1314 for (i
= 0 ; i
< 4; i
++) {
1315 if (mask
& (1<<i
)) {
1316 dst
= get_dst_reg(c
, inst
, i
);
1317 brw_MOV( p
, dst
, param0
);
1320 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1321 brw_set_saturate( p
, 0 );
1323 release_tmps( c
, mark
);
1327 * For the four-dimensional case, the little micro-optimisation benefits
1328 * we obtain by unrolling all the loops aren't worth the massive bloat it
1329 * now causes. Instead, we loop twice around performing a similar operation
1330 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1331 * code to glue it all together.
1333 static void noise4_sub( struct brw_wm_compile
*c
)
1335 struct brw_compile
*p
= &c
->func
;
1336 struct brw_reg param
[ 4 ],
1337 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1338 w0
, /* noise for the w=0 cube */
1339 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1340 interp
[ 4 ], /* interpolation coefficients */
1341 t
, tmp
[ 8 ], /* float temporaries */
1342 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1343 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1345 int mark
= mark_tmps( c
);
1346 GLuint loop
, origin
;
1348 x0y0
= alloc_tmp( c
);
1349 x0y1
= alloc_tmp( c
);
1350 x1y0
= alloc_tmp( c
);
1351 x1y1
= alloc_tmp( c
);
1353 w0
= alloc_tmp( c
);
1354 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1355 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1357 for( i
= 0; i
< 4; i
++ ) {
1358 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1359 interp
[ i
] = alloc_tmp( c
);
1362 for( i
= 0; i
< 8; i
++ ) {
1363 tmp
[ i
] = alloc_tmp( c
);
1364 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1365 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1368 brw_set_access_mode( p
, BRW_ALIGN_1
);
1370 /* We only want 16 bits of precision from the integral part of each
1371 co-ordinate, but unfortunately the RNDD semantics would saturate
1372 at 16 bits if we performed the operation directly to a 16-bit
1373 destination. Therefore, we round to 32-bit temporaries where
1374 appropriate, and then store only the lower 16 bits. */
1375 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1376 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1377 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1378 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1379 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1380 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1382 /* Modify the flag register here, because the side effect is useful
1383 later (see below). We know for certain that all flags will be
1384 cleared, since the FRC instruction cannot possibly generate
1385 negative results. Even for exceptional inputs (infinities, denormals,
1386 NaNs), the architecture guarantees that the L conditional is false. */
1387 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1388 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1389 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1390 for( i
= 1; i
< 4; i
++ )
1391 brw_FRC( p
, param
[ i
], param
[ i
] );
1393 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1395 for( i
= 0; i
< 4; i
++ )
1396 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1397 for( i
= 0; i
< 4; i
++ )
1398 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1399 for( i
= 0; i
< 4; i
++ )
1400 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1401 for( i
= 0; i
< 4; i
++ )
1402 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1403 for( j
= 0; j
< 3; j
++ )
1404 for( i
= 0; i
< 4; i
++ )
1405 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1407 /* Mark the current address, as it will be a jump destination. The
1408 following code will be executed twice: first, with the flag
1409 register clear indicating the w=0 case, and second with flags
1413 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1414 be hashed. Since we have only 16 bits of precision in the hash, we
1415 must be careful about thorough mixing to maintain entropy as we
1416 squash the input vector into a small scalar. */
1417 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1418 brw_imm_uw( 0xBC8F ) );
1419 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1420 brw_imm_uw( 0xD0BD ) );
1421 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1422 brw_imm_uw( 0x9B93 ) );
1423 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1424 brw_imm_uw( 0xA359 ) );
1425 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1426 brw_imm_uw( 0xBC8F ) );
1428 /* Temporarily disable the execution mask while we work with ExecSize=16
1429 channels (the mask is set for ExecSize=8 and is probably incorrect).
1430 Although this might cause execution of unwanted channels, the code
1431 writes only to temporary registers and has no side effects, so
1432 disabling the mask is harmless. */
1433 brw_push_insn_state( p
);
1434 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1435 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1436 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1437 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1439 /* We're now ready to perform the hashing. The eight hashes are
1440 interleaved for performance. The hash function used is
1441 designed to rapidly achieve avalanche and require only 16x16
1442 bit multiplication, and 8-bit swizzles (which we get for
1444 for( i
= 0; i
< 4; i
++ )
1445 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1446 for( i
= 0; i
< 4; i
++ )
1447 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1448 odd_bytes( wtmp
[ i
] ) );
1449 for( i
= 0; i
< 4; i
++ )
1450 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1451 for( i
= 0; i
< 4; i
++ )
1452 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1453 odd_bytes( wtmp
[ i
] ) );
1454 brw_pop_insn_state( p
);
1456 /* Now we want to initialise the four rear gradients based on the
1457 hashes. Format conversion from signed integer to float leaves
1458 everything scaled too high by a factor of pow( 2, 15 ), but
1459 we correct for that right at the end. */
1461 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1462 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1463 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1464 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1465 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1467 brw_push_insn_state( p
);
1468 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1469 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1470 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1471 brw_pop_insn_state( p
);
1473 brw_MUL( p
, x1y0
, x1y0
, t
);
1474 brw_MUL( p
, x1y1
, x1y1
, t
);
1475 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1476 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1477 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1480 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1481 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1482 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1483 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1485 brw_push_insn_state( p
);
1486 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1487 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1488 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1489 brw_pop_insn_state( p
);
1491 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1492 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1493 /* prepare t for the w component (used below): w the first time through
1494 the loop; w - 1 the second time) */
1495 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1496 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1497 p
->current
->header
.predicate_inverse
= 1;
1498 brw_MOV( p
, t
, param
[ 3 ] );
1499 p
->current
->header
.predicate_inverse
= 0;
1500 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1501 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1502 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1504 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1505 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1506 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1507 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1510 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1511 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1512 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1513 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1515 brw_push_insn_state( p
);
1516 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1517 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1518 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1519 brw_pop_insn_state( p
);
1521 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
1522 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
1523 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
1524 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
1526 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1527 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1528 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1529 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1532 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1533 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1534 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1535 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1537 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1538 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1539 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1540 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1541 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1543 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1544 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1545 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1546 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1548 /* Here we interpolate in the y dimension... */
1549 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1550 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1551 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1552 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1553 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1554 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1556 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1557 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1558 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1559 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1561 /* Now do the same thing for the front four gradients... */
1563 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1564 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1565 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1566 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1568 brw_push_insn_state( p
);
1569 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1570 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1571 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1572 brw_pop_insn_state( p
);
1574 brw_MUL( p
, x1y0
, x1y0
, t
);
1575 brw_MUL( p
, x1y1
, x1y1
, t
);
1576 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1577 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1578 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1581 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1582 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1583 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1584 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1586 brw_push_insn_state( p
);
1587 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1588 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1589 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1590 brw_pop_insn_state( p
);
1592 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1593 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1594 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
1595 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1596 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1598 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1599 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1600 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1601 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1604 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1605 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1606 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1607 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1609 brw_push_insn_state( p
);
1610 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1611 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1612 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1613 brw_pop_insn_state( p
);
1615 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1616 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1617 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1618 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1619 /* prepare t for the w component (used below): w the first time through
1620 the loop; w - 1 the second time) */
1621 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1622 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1623 p
->current
->header
.predicate_inverse
= 1;
1624 brw_MOV( p
, t
, param
[ 3 ] );
1625 p
->current
->header
.predicate_inverse
= 0;
1626 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1628 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1629 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1630 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1631 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1634 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1635 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1636 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1637 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1639 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1640 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1641 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1642 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1644 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1645 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1646 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1647 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1649 /* Interpolate in the y dimension: */
1650 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1651 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1652 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1653 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1654 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1655 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1657 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1658 time put the front face in tmp[ 1 ] and we're nearly there... */
1659 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1660 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1661 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1663 /* Another interpolation, in the z dimension: */
1664 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1665 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
1666 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1668 /* Exit the loop if we've computed both cubes... */
1669 origin
= p
->nr_insn
;
1670 brw_push_insn_state( p
);
1671 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1672 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1673 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
1674 brw_pop_insn_state( p
);
1676 /* Save the result for the w=0 case, and increment the w coordinate: */
1677 brw_MOV( p
, w0
, tmp
[ 0 ] );
1678 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
1681 /* Loop around for the other cube. Explicitly set the flag register
1682 (unfortunately we must spend an extra instruction to do this: we
1683 can't rely on a side effect of the previous MOV or ADD because
1684 conditional modifiers which are normally true might be false in
1685 exceptional circumstances, e.g. given a NaN input; the add to
1686 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
1687 brw_push_insn_state( p
);
1688 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1689 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
1690 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
1691 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
1692 brw_pop_insn_state( p
);
1694 /* Patch the previous conditional branch now that we know the
1695 destination address. */
1696 brw_set_src1( p
->store
+ origin
,
1697 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
1699 /* The very last interpolation. */
1700 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
1701 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
1702 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
1704 /* scale by pow( 2, -15 ), as described above */
1705 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1707 release_tmps( c
, mark
);
1710 static void emit_noise4( struct brw_wm_compile
*c
,
1711 const struct prog_instruction
*inst
)
1713 struct brw_compile
*p
= &c
->func
;
1714 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
1715 GLuint mask
= inst
->DstReg
.WriteMask
;
1717 int mark
= mark_tmps( c
);
1719 assert( mark
== 0 );
1721 src0
= get_src_reg( c
, inst
, 0, 0 );
1722 src1
= get_src_reg( c
, inst
, 0, 1 );
1723 src2
= get_src_reg( c
, inst
, 0, 2 );
1724 src3
= get_src_reg( c
, inst
, 0, 3 );
1726 param0
= alloc_tmp( c
);
1727 param1
= alloc_tmp( c
);
1728 param2
= alloc_tmp( c
);
1729 param3
= alloc_tmp( c
);
1731 brw_MOV( p
, param0
, src0
);
1732 brw_MOV( p
, param1
, src1
);
1733 brw_MOV( p
, param2
, src2
);
1734 brw_MOV( p
, param3
, src3
);
1736 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
1738 /* Fill in the result: */
1739 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1740 for (i
= 0 ; i
< 4; i
++) {
1741 if (mask
& (1<<i
)) {
1742 dst
= get_dst_reg(c
, inst
, i
);
1743 brw_MOV( p
, dst
, param0
);
1746 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1747 brw_set_saturate( p
, 0 );
1749 release_tmps( c
, mark
);
1753 * Resolve subroutine calls after code emit is done.
1755 static void post_wm_emit( struct brw_wm_compile
*c
)
1757 brw_resolve_cals(&c
->func
);
1761 get_argument_regs(struct brw_wm_compile
*c
,
1762 const struct prog_instruction
*inst
,
1764 struct brw_reg
*dst
,
1765 struct brw_reg
*regs
,
1768 struct brw_compile
*p
= &c
->func
;
1771 for (i
= 0; i
< 4; i
++) {
1772 if (mask
& (1 << i
)) {
1773 regs
[i
] = get_src_reg(c
, inst
, index
, i
);
1775 /* Unalias destination registers from our sources. */
1776 if (regs
[i
].file
== BRW_GENERAL_REGISTER_FILE
) {
1777 for (j
= 0; j
< 4; j
++) {
1778 if (memcmp(®s
[i
], &dst
[j
], sizeof(regs
[0])) == 0) {
1779 struct brw_reg tmp
= alloc_tmp(c
);
1780 brw_MOV(p
, tmp
, regs
[i
]);
1790 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1792 struct intel_context
*intel
= &brw
->intel
;
1793 #define MAX_IF_DEPTH 32
1794 #define MAX_LOOP_DEPTH 32
1795 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
1796 GLuint i
, if_depth
= 0, loop_depth
= 0;
1797 struct brw_compile
*p
= &c
->func
;
1798 struct brw_indirect stack_index
= brw_indirect(0, 0);
1800 c
->out_of_regs
= GL_FALSE
;
1803 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1804 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1806 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
1807 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
1809 struct brw_reg args
[3][4], dst
[4];
1811 int mark
= mark_tmps( c
);
1816 printf("Inst %d: ", i
);
1817 _mesa_print_instruction(inst
);
1820 /* fetch any constants that this instruction needs */
1821 if (c
->fp
->use_const_buffer
)
1822 fetch_constants(c
, inst
);
1824 if (inst
->Opcode
!= OPCODE_ARL
) {
1825 for (j
= 0; j
< 4; j
++) {
1826 if (inst
->DstReg
.WriteMask
& (1 << j
))
1827 dst
[j
] = get_dst_reg(c
, inst
, j
);
1829 dst
[j
] = brw_null_reg();
1832 for (j
= 0; j
< brw_wm_nr_args(inst
->Opcode
); j
++)
1833 get_argument_regs(c
, inst
, j
, dst
, args
[j
], WRITEMASK_XYZW
);
1835 dst_flags
= inst
->DstReg
.WriteMask
;
1836 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1837 dst_flags
|= SATURATE
;
1839 if (inst
->CondUpdate
)
1840 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
1842 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
1844 switch (inst
->Opcode
) {
1846 emit_pixel_xy(c
, dst
, dst_flags
);
1849 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1852 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1855 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1858 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1861 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1864 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1867 emit_fb_write(c
, args
[0], args
[1], args
[2],
1868 INST_AUX_GET_TARGET(inst
->Aux
),
1869 inst
->Aux
& INST_AUX_EOT
);
1871 case WM_FRONTFACING
:
1872 emit_frontfacing(p
, dst
, dst_flags
);
1875 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1881 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1884 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1887 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1890 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1894 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1897 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1900 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1903 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1906 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1909 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1912 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1915 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1918 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1921 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1924 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1927 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1930 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1933 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1937 emit_ddxy(p
, dst
, dst_flags
, (inst
->Opcode
== OPCODE_DDX
),
1941 emit_sop(p
, dst
, dst_flags
,
1942 BRW_CONDITIONAL_L
, args
[0], args
[1]);
1945 emit_sop(p
, dst
, dst_flags
,
1946 BRW_CONDITIONAL_LE
, args
[0], args
[1]);
1949 emit_sop(p
, dst
, dst_flags
,
1950 BRW_CONDITIONAL_G
, args
[0], args
[1]);
1953 emit_sop(p
, dst
, dst_flags
,
1954 BRW_CONDITIONAL_GE
, args
[0], args
[1]);
1957 emit_sop(p
, dst
, dst_flags
,
1958 BRW_CONDITIONAL_EQ
, args
[0], args
[1]);
1961 emit_sop(p
, dst
, dst_flags
,
1962 BRW_CONDITIONAL_NEQ
, args
[0], args
[1]);
1965 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1968 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1969 dst
, dst_flags
, args
[0], args
[1]);
1972 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1975 emit_noise1(c
, inst
);
1978 emit_noise2(c
, inst
);
1981 emit_noise3(c
, inst
);
1984 emit_noise4(c
, inst
);
1987 emit_tex(c
, dst
, dst_flags
, args
[0],
1988 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
1992 (c
->key
.shadowtex_mask
& (1 << inst
->TexSrcUnit
)) != 0);
1995 emit_txb(c
, dst
, dst_flags
, args
[0],
1996 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
1999 c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
]);
2005 assert(if_depth
< MAX_IF_DEPTH
);
2006 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2009 assert(if_depth
> 0);
2010 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2013 assert(if_depth
> 0);
2014 brw_ENDIF(p
, if_inst
[--if_depth
]);
2017 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2023 brw_push_insn_state(p
);
2024 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2025 brw_set_access_mode(p
, BRW_ALIGN_1
);
2026 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2027 brw_set_access_mode(p
, BRW_ALIGN_16
);
2028 brw_ADD(p
, get_addr_reg(stack_index
),
2029 get_addr_reg(stack_index
), brw_imm_d(4));
2030 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2031 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2032 brw_pop_insn_state(p
);
2036 brw_push_insn_state(p
);
2037 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2038 brw_ADD(p
, get_addr_reg(stack_index
),
2039 get_addr_reg(stack_index
), brw_imm_d(-4));
2040 brw_set_access_mode(p
, BRW_ALIGN_1
);
2041 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2042 brw_set_access_mode(p
, BRW_ALIGN_16
);
2043 brw_pop_insn_state(p
);
2046 case OPCODE_BGNLOOP
:
2047 /* XXX may need to invalidate the current_constant regs */
2048 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2052 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2056 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2058 case OPCODE_ENDLOOP
:
2060 struct brw_instruction
*inst0
, *inst1
;
2063 if (intel
->is_ironlake
)
2066 assert(loop_depth
> 0);
2068 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2069 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2070 while (inst0
> loop_inst
[loop_depth
]) {
2072 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2073 inst0
->bits3
.if_else
.jump_count
== 0) {
2074 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2075 inst0
->bits3
.if_else
.pop_count
= 0;
2077 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2078 inst0
->bits3
.if_else
.jump_count
== 0) {
2079 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2080 inst0
->bits3
.if_else
.pop_count
= 0;
2086 printf("unsupported opcode %d (%s) in fragment shader\n",
2087 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
2088 _mesa_opcode_string(inst
->Opcode
) : "unknown");
2091 /* Release temporaries containing any unaliased source regs. */
2092 release_tmps( c
, mark
);
2094 if (inst
->CondUpdate
)
2095 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2097 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2101 if (INTEL_DEBUG
& DEBUG_WM
) {
2102 printf("wm-native:\n");
2103 for (i
= 0; i
< p
->nr_insn
; i
++)
2104 brw_disasm(stderr
, &p
->store
[i
]);
2110 * Do GPU code generation for shaders that use GLSL features such as
2111 * flow control. Other shaders will be compiled with the
2113 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2115 if (INTEL_DEBUG
& DEBUG_WM
) {
2116 printf("brw_wm_glsl_emit:\n");
2119 /* initial instruction translation/simplification */
2122 /* actual code generation */
2123 brw_wm_emit_glsl(brw
, c
);
2125 if (INTEL_DEBUG
& DEBUG_WM
) {
2126 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2129 c
->prog_data
.total_grf
= num_grf_used(c
);
2130 c
->prog_data
.total_scratch
= 0;