1 #include "main/macros.h"
2 #include "program/prog_parameter.h"
3 #include "program/prog_print.h"
4 #include "program/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
14 const struct prog_instruction
*inst
,
18 * Determine if the given fragment program uses GLSL features such
19 * as flow conditionals, loops, subroutines.
20 * Some GLSL shaders may use these features, others might not.
22 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
26 if (INTEL_DEBUG
& DEBUG_GLSL_FORCE
)
29 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
30 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
31 switch (inst
->Opcode
) {
54 reclaim_temps(struct brw_wm_compile
*c
);
57 /** Mark GRF register as used. */
59 prealloc_grf(struct brw_wm_compile
*c
, int r
)
61 c
->used_grf
[r
] = GL_TRUE
;
65 /** Mark given GRF register as not in use. */
67 release_grf(struct brw_wm_compile
*c
, int r
)
69 /*assert(c->used_grf[r]);*/
70 c
->used_grf
[r
] = GL_FALSE
;
71 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
75 /** Return index of a free GRF, mark it as used. */
77 alloc_grf(struct brw_wm_compile
*c
)
80 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
81 if (!c
->used_grf
[r
]) {
82 c
->used_grf
[r
] = GL_TRUE
;
83 c
->first_free_grf
= r
+ 1; /* a guess */
88 /* no free temps, try to reclaim some */
90 c
->first_free_grf
= 0;
93 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
94 if (!c
->used_grf
[r
]) {
95 c
->used_grf
[r
] = GL_TRUE
;
96 c
->first_free_grf
= r
+ 1; /* a guess */
101 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
102 assert(c
->used_grf
[r
]);
105 /* really, no free GRF regs found */
106 if (!c
->out_of_regs
) {
107 /* print warning once per compilation */
108 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
109 c
->out_of_regs
= GL_TRUE
;
116 /** Return number of GRF registers used */
118 num_grf_used(const struct brw_wm_compile
*c
)
121 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
130 * Record the mapping of a Mesa register to a hardware register.
132 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
133 int component
, struct brw_reg reg
)
135 c
->wm_regs
[file
][index
][component
].reg
= reg
;
136 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
139 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
143 /* if we need to allocate another temp, grow the tmp_regs[] array */
144 if (c
->tmp_index
== c
->tmp_max
) {
145 int r
= alloc_grf(c
);
147 /*printf("Out of temps in %s\n", __FUNCTION__);*/
148 r
= 50; /* XXX random register! */
150 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
153 /* form the GRF register */
154 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
155 /*printf("alloc_temp %d\n", reg.nr);*/
156 assert(reg
.nr
< BRW_WM_MAX_GRF
);
162 * Save current temp register info.
163 * There must be a matching call to release_tmps().
165 static int mark_tmps(struct brw_wm_compile
*c
)
170 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
172 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
175 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
181 * Convert Mesa src register to brw register.
183 * Since we're running in SOA mode each Mesa register corresponds to four
184 * hardware registers. We allocate the hardware registers as needed here.
186 * \param file register file, one of PROGRAM_x
187 * \param index register number
188 * \param component src component (X=0, Y=1, Z=2, W=3)
189 * \param nr not used?!?
190 * \param neg negate value?
191 * \param abs take absolute value?
193 static struct brw_reg
194 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
195 int nr
, GLuint neg
, GLuint abs
)
199 case PROGRAM_STATE_VAR
:
200 case PROGRAM_CONSTANT
:
201 case PROGRAM_UNIFORM
:
202 file
= PROGRAM_STATE_VAR
;
204 case PROGRAM_UNDEFINED
:
205 return brw_null_reg();
206 case PROGRAM_TEMPORARY
:
209 case PROGRAM_PAYLOAD
:
212 _mesa_problem(NULL
, "Unexpected file in get_reg()");
213 return brw_null_reg();
217 assert(component
< 4);
219 /* see if we've already allocated a HW register for this Mesa register */
220 if (c
->wm_regs
[file
][index
][component
].inited
) {
222 reg
= c
->wm_regs
[file
][index
][component
].reg
;
225 /* no, allocate new register */
226 int grf
= alloc_grf(c
);
227 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
229 /* totally out of temps */
230 grf
= 51; /* XXX random register! */
233 reg
= brw_vec8_grf(grf
, 0);
234 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
236 set_reg(c
, file
, index
, component
, reg
);
239 if (neg
& (1 << component
)) {
250 * This is called if we run out of GRF registers. Examine the live intervals
251 * of temp regs in the program and free those which won't be used again.
254 reclaim_temps(struct brw_wm_compile
*c
)
256 GLint intBegin
[MAX_PROGRAM_TEMPS
];
257 GLint intEnd
[MAX_PROGRAM_TEMPS
];
260 /*printf("Reclaim temps:\n");*/
262 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
265 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
266 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
267 /* program temp[i] can be freed */
269 /*printf(" temp[%d] is dead\n", index);*/
270 for (component
= 0; component
< 4; component
++) {
271 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
272 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
275 printf(" Reclaim temp %d, reg %d at inst %d\n",
276 index, r, c->cur_inst);
278 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
289 * Preallocate registers. This sets up the Mesa to hardware register
290 * mapping for certain registers, such as constants (uniforms/state vars)
293 static void prealloc_reg(struct brw_wm_compile
*c
)
295 struct intel_context
*intel
= &c
->func
.brw
->intel
;
298 int urb_read_length
= 0;
299 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
;
300 GLuint reg_index
= 0;
302 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
303 c
->first_free_grf
= 0;
305 for (i
= 0; i
< 4; i
++) {
306 if (i
< c
->key
.nr_depth_regs
)
307 reg
= brw_vec8_grf(i
* 2, 0);
309 reg
= brw_vec8_grf(0, 0);
310 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
312 reg_index
+= 2 * c
->key
.nr_depth_regs
;
316 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
317 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
319 /* use a real constant buffer, or just use a section of the GRF? */
320 /* XXX this heuristic may need adjustment... */
321 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
322 c
->fp
->use_const_buffer
= GL_TRUE
;
324 c
->fp
->use_const_buffer
= GL_FALSE
;
325 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
327 if (c
->fp
->use_const_buffer
) {
328 /* We'll use a real constant buffer and fetch constants from
329 * it with a dataport read message.
332 /* number of float constants in CURBE */
333 c
->prog_data
.nr_params
= 0;
336 const struct gl_program_parameter_list
*plist
=
337 c
->fp
->program
.Base
.Parameters
;
340 /* number of float constants in CURBE */
341 c
->prog_data
.nr_params
= 4 * nr_params
;
343 /* loop over program constants (float[4]) */
344 for (i
= 0; i
< nr_params
; i
++) {
345 /* loop over XYZW channels */
346 for (j
= 0; j
< 4; j
++, index
++) {
347 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
348 /* Save pointer to parameter/constant value.
349 * Constants will be copied in prepare_constant_buffer()
351 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
352 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
355 /* number of constant regs used (each reg is float[8]) */
356 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
357 reg_index
+= c
->nr_creg
;
361 /* fragment shader inputs */
362 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
365 if (i
>= VERT_RESULT_VAR0
)
366 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
367 else if (i
<= VERT_RESULT_TEX7
)
372 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
373 urb_read_length
= reg_index
;
374 reg
= brw_vec8_grf(reg_index
, 0);
375 for (j
= 0; j
< 4; j
++)
376 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
378 if (c
->key
.vp_outputs_written
& BITFIELD64_BIT(i
)) {
383 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
384 c
->prog_data
.urb_read_length
= urb_read_length
;
385 c
->prog_data
.curb_read_length
= c
->nr_creg
;
386 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
388 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
391 /* mark GRF regs [0..reg_index-1] as in-use */
392 for (i
= 0; i
< reg_index
; i
++)
395 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
396 prealloc_grf(c
, 126);
397 prealloc_grf(c
, 127);
399 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
400 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
401 struct brw_reg dst
[4];
403 switch (inst
->Opcode
) {
406 /* Allocate the channels of texture results contiguously,
407 * since they are written out that way by the sampler unit.
409 for (j
= 0; j
< 4; j
++) {
410 dst
[j
] = get_dst_reg(c
, inst
, j
);
412 assert(dst
[j
].nr
== dst
[j
- 1].nr
+ 1);
420 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
421 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
423 switch (inst
->Opcode
) {
425 /* Allocate WM_DELTAXY destination on G45/GM45 to an
426 * even-numbered GRF if possible so that we can use the PLN
429 if (inst
->DstReg
.WriteMask
== WRITEMASK_XY
&&
430 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][0].inited
&&
431 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][1].inited
&&
432 (IS_G4X(intel
->intelScreen
->deviceID
) || intel
->gen
== 5)) {
435 for (grf
= c
->first_free_grf
& ~1;
436 grf
< BRW_WM_MAX_GRF
;
439 if (!c
->used_grf
[grf
] && !c
->used_grf
[grf
+ 1]) {
440 c
->used_grf
[grf
] = GL_TRUE
;
441 c
->used_grf
[grf
+ 1] = GL_TRUE
;
442 c
->first_free_grf
= grf
+ 2; /* a guess */
444 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 0,
445 brw_vec8_grf(grf
, 0));
446 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 1,
447 brw_vec8_grf(grf
+ 1, 0));
457 /* An instruction may reference up to three constants.
458 * They'll be found in these registers.
459 * XXX alloc these on demand!
461 if (c
->fp
->use_const_buffer
) {
462 for (i
= 0; i
< 3; i
++) {
463 c
->current_const
[i
].index
= -1;
464 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
468 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
469 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
475 * Check if any of the instruction's src registers are constants, uniforms,
476 * or statevars. If so, fetch any constants that we don't already have in
477 * the three GRF slots.
479 static void fetch_constants(struct brw_wm_compile
*c
,
480 const struct prog_instruction
*inst
)
482 struct brw_compile
*p
= &c
->func
;
485 /* loop over instruction src regs */
486 for (i
= 0; i
< 3; i
++) {
487 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
488 if (src
->File
== PROGRAM_STATE_VAR
||
489 src
->File
== PROGRAM_CONSTANT
||
490 src
->File
== PROGRAM_UNIFORM
) {
491 c
->current_const
[i
].index
= src
->Index
;
494 printf(" fetch const[%d] for arg %d into reg %d\n",
495 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
498 /* need to fetch the constant now */
500 c
->current_const
[i
].reg
, /* writeback dest */
501 src
->RelAddr
, /* relative indexing? */
502 16 * src
->Index
, /* byte offset */
503 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
511 * Convert Mesa dst register to brw register.
513 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
514 const struct prog_instruction
*inst
,
518 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
523 static struct brw_reg
524 get_src_reg_const(struct brw_wm_compile
*c
,
525 const struct prog_instruction
*inst
,
526 GLuint srcRegIndex
, GLuint component
)
528 /* We should have already fetched the constant from the constant
529 * buffer in fetch_constants(). Now we just have to return a
530 * register description that extracts the needed component and
531 * smears it across all eight vector components.
533 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
534 struct brw_reg const_reg
;
536 assert(component
< 4);
537 assert(srcRegIndex
< 3);
538 assert(c
->current_const
[srcRegIndex
].index
!= -1);
539 const_reg
= c
->current_const
[srcRegIndex
].reg
;
541 /* extract desired float from the const_reg, and smear */
542 const_reg
= stride(const_reg
, 0, 1, 0);
543 const_reg
.subnr
= component
* 4;
545 if (src
->Negate
& (1 << component
))
546 const_reg
= negate(const_reg
);
548 const_reg
= brw_abs(const_reg
);
551 printf(" form const[%d].%d for arg %d, reg %d\n",
552 c
->current_const
[srcRegIndex
].index
,
563 * Convert Mesa src register to brw register.
565 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
566 const struct prog_instruction
*inst
,
567 GLuint srcRegIndex
, GLuint channel
)
569 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
571 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
573 /* Only one immediate value can be used per native opcode, and it
574 * has be in the src1 slot, so not all Mesa instructions will get
575 * to take advantage of immediate constants.
577 if (brw_wm_arg_can_be_immediate(inst
->Opcode
, srcRegIndex
)) {
578 const struct gl_program_parameter_list
*params
;
580 params
= c
->fp
->program
.Base
.Parameters
;
582 /* Extended swizzle terms */
583 if (component
== SWIZZLE_ZERO
) {
584 return brw_imm_f(0.0F
);
585 } else if (component
== SWIZZLE_ONE
) {
587 return brw_imm_f(-1.0F
);
589 return brw_imm_f(1.0F
);
592 if (src
->File
== PROGRAM_CONSTANT
) {
593 float f
= params
->ParameterValues
[src
->Index
][component
];
604 if (c
->fp
->use_const_buffer
&&
605 (src
->File
== PROGRAM_STATE_VAR
||
606 src
->File
== PROGRAM_CONSTANT
||
607 src
->File
== PROGRAM_UNIFORM
)) {
608 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
611 /* other type of source register */
612 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
613 src
->Negate
, src
->Abs
);
618 * Subroutines are minimal support for resusable instruction sequences.
619 * They are implemented as simply as possible to minimise overhead: there
620 * is no explicit support for communication between the caller and callee
621 * other than saving the return address in a temporary register, nor is
622 * there any automatic local storage. This implies that great care is
623 * required before attempting reentrancy or any kind of nested
624 * subroutine invocations.
626 static void invoke_subroutine( struct brw_wm_compile
*c
,
627 enum _subroutine subroutine
,
628 void (*emit
)( struct brw_wm_compile
* ) )
630 struct brw_compile
*p
= &c
->func
;
632 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
634 if( c
->subroutines
[ subroutine
] ) {
635 /* subroutine previously emitted: reuse existing instructions */
637 int mark
= mark_tmps( c
);
638 struct brw_reg return_address
= retype( alloc_tmp( c
),
639 BRW_REGISTER_TYPE_UD
);
640 int here
= p
->nr_insn
;
642 brw_push_insn_state(p
);
643 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
644 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
646 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
647 brw_imm_d( ( c
->subroutines
[ subroutine
] -
649 brw_pop_insn_state(p
);
651 release_tmps( c
, mark
);
653 /* previously unused subroutine: emit, and mark for later reuse */
655 int mark
= mark_tmps( c
);
656 struct brw_reg return_address
= retype( alloc_tmp( c
),
657 BRW_REGISTER_TYPE_UD
);
658 struct brw_instruction
*calc
;
659 int base
= p
->nr_insn
;
661 brw_push_insn_state(p
);
662 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
663 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
664 brw_pop_insn_state(p
);
666 c
->subroutines
[ subroutine
] = p
->nr_insn
;
670 brw_push_insn_state(p
);
671 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
672 brw_MOV( p
, brw_ip_reg(), return_address
);
673 brw_pop_insn_state(p
);
675 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
677 release_tmps( c
, mark
);
681 static void emit_arl(struct brw_wm_compile
*c
,
682 const struct prog_instruction
*inst
)
684 struct brw_compile
*p
= &c
->func
;
685 struct brw_reg src0
, addr_reg
;
686 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
687 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
689 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
690 brw_MOV(p
, addr_reg
, src0
);
691 brw_set_saturate(p
, 0);
695 * For GLSL shaders, this KIL will be unconditional.
696 * It may be contained inside an IF/ENDIF structure of course.
698 static void emit_kil(struct brw_wm_compile
*c
)
700 struct brw_compile
*p
= &c
->func
;
701 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
702 brw_push_insn_state(p
);
703 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
704 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
705 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
706 brw_pop_insn_state(p
);
709 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
711 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
715 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
717 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
720 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
722 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
725 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
727 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
731 /* One-, two- and three-dimensional Perlin noise, similar to the description
732 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
733 static void noise1_sub( struct brw_wm_compile
*c
) {
735 struct brw_compile
*p
= &c
->func
;
736 struct brw_reg param
,
737 x0
, x1
, /* gradients at each end */
738 t
, tmp
[ 2 ], /* float temporaries */
739 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
741 int mark
= mark_tmps( c
);
746 tmp
[ 0 ] = alloc_tmp( c
);
747 tmp
[ 1 ] = alloc_tmp( c
);
748 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
749 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
750 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
751 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
752 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
754 param
= lookup_tmp( c
, mark
- 2 );
756 brw_set_access_mode( p
, BRW_ALIGN_1
);
758 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
760 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
761 be hashed. Also compute the remainder (offset within the unit
762 length), interleaved to reduce register dependency penalties. */
763 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
764 brw_FRC( p
, param
, param
);
765 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
766 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
767 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
769 /* We're now ready to perform the hashing. The two hashes are
770 interleaved for performance. The hash function used is
771 designed to rapidly achieve avalanche and require only 32x16
772 bit multiplication, and 16-bit swizzles (which we get for
773 free). We can't use immediate operands in the multiplies,
774 because immediates are permitted only in src1 and the 16-bit
775 factor is permitted only in src0. */
776 for( i
= 0; i
< 2; i
++ )
777 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
778 for( i
= 0; i
< 2; i
++ )
779 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
780 high_words( itmp
[ i
] ) );
781 for( i
= 0; i
< 2; i
++ )
782 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
783 for( i
= 0; i
< 2; i
++ )
784 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
785 high_words( itmp
[ i
] ) );
786 for( i
= 0; i
< 2; i
++ )
787 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
788 for( i
= 0; i
< 2; i
++ )
789 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
790 high_words( itmp
[ i
] ) );
792 /* Now we want to initialise the two gradients based on the
793 hashes. Format conversion from signed integer to float leaves
794 everything scaled too high by a factor of pow( 2, 31 ), but
795 we correct for that right at the end. */
796 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
797 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
798 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
800 brw_MUL( p
, x0
, x0
, param
);
801 brw_MUL( p
, x1
, x1
, t
);
803 /* We interpolate between the gradients using the polynomial
804 6t^5 - 15t^4 + 10t^3 (Perlin). */
805 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
806 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
807 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
808 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
809 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
810 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
812 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
813 brw_MUL( p
, param
, tmp
[ 0 ], param
);
814 brw_MUL( p
, x1
, x1
, param
);
815 brw_ADD( p
, x0
, x0
, x1
);
816 /* scale by pow( 2, -30 ), to compensate for the format conversion
817 above and an extra factor of 2 so that a single gradient covers
819 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
821 release_tmps( c
, mark
);
824 static void emit_noise1( struct brw_wm_compile
*c
,
825 const struct prog_instruction
*inst
)
827 struct brw_compile
*p
= &c
->func
;
828 struct brw_reg src
, param
, dst
;
829 GLuint mask
= inst
->DstReg
.WriteMask
;
831 int mark
= mark_tmps( c
);
835 src
= get_src_reg( c
, inst
, 0, 0 );
837 param
= alloc_tmp( c
);
839 brw_MOV( p
, param
, src
);
841 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
843 /* Fill in the result: */
844 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
845 for (i
= 0 ; i
< 4; i
++) {
847 dst
= get_dst_reg(c
, inst
, i
);
848 brw_MOV( p
, dst
, param
);
851 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
852 brw_set_saturate( p
, 0 );
854 release_tmps( c
, mark
);
857 static void noise2_sub( struct brw_wm_compile
*c
) {
859 struct brw_compile
*p
= &c
->func
;
860 struct brw_reg param0
, param1
,
861 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
862 t
, tmp
[ 4 ], /* float temporaries */
863 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
865 int mark
= mark_tmps( c
);
867 x0y0
= alloc_tmp( c
);
868 x0y1
= alloc_tmp( c
);
869 x1y0
= alloc_tmp( c
);
870 x1y1
= alloc_tmp( c
);
872 for( i
= 0; i
< 4; i
++ ) {
873 tmp
[ i
] = alloc_tmp( c
);
874 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
876 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
877 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
878 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
880 param0
= lookup_tmp( c
, mark
- 3 );
881 param1
= lookup_tmp( c
, mark
- 2 );
883 brw_set_access_mode( p
, BRW_ALIGN_1
);
885 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
886 be hashed. Also compute the remainders (offsets within the unit
887 square), interleaved to reduce register dependency penalties. */
888 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
889 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
890 brw_FRC( p
, param0
, param0
);
891 brw_FRC( p
, param1
, param1
);
892 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
893 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
894 low_words( itmp
[ 1 ] ) );
895 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
896 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
897 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
898 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
899 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
901 /* We're now ready to perform the hashing. The four hashes are
902 interleaved for performance. The hash function used is
903 designed to rapidly achieve avalanche and require only 32x16
904 bit multiplication, and 16-bit swizzles (which we get for
905 free). We can't use immediate operands in the multiplies,
906 because immediates are permitted only in src1 and the 16-bit
907 factor is permitted only in src0. */
908 for( i
= 0; i
< 4; i
++ )
909 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
910 for( i
= 0; i
< 4; i
++ )
911 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
912 high_words( itmp
[ i
] ) );
913 for( i
= 0; i
< 4; i
++ )
914 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
915 for( i
= 0; i
< 4; i
++ )
916 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
917 high_words( itmp
[ i
] ) );
918 for( i
= 0; i
< 4; i
++ )
919 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
920 for( i
= 0; i
< 4; i
++ )
921 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
922 high_words( itmp
[ i
] ) );
924 /* Now we want to initialise the four gradients based on the
925 hashes. Format conversion from signed integer to float leaves
926 everything scaled too high by a factor of pow( 2, 15 ), but
927 we correct for that right at the end. */
928 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
929 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
930 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
931 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
932 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
934 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
935 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
936 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
937 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
939 brw_MUL( p
, x1y0
, x1y0
, t
);
940 brw_MUL( p
, x1y1
, x1y1
, t
);
941 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
942 brw_MUL( p
, x0y0
, x0y0
, param0
);
943 brw_MUL( p
, x0y1
, x0y1
, param0
);
945 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
946 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
947 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
948 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
950 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
951 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
952 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
953 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
955 /* We interpolate between the gradients using the polynomial
956 6t^5 - 15t^4 + 10t^3 (Perlin). */
957 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
958 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
959 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
960 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
961 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
962 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
963 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
965 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
966 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
967 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
968 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
969 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
971 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
972 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
973 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
974 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
976 /* Here we interpolate in the y dimension... */
977 brw_MUL( p
, x0y1
, x0y1
, param1
);
978 brw_MUL( p
, x1y1
, x1y1
, param1
);
979 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
980 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
982 /* And now in x. There are horrible register dependencies here,
983 but we have nothing else to do. */
984 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
985 brw_MUL( p
, x1y0
, x1y0
, param0
);
986 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
988 /* scale by pow( 2, -15 ), as described above */
989 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
991 release_tmps( c
, mark
);
994 static void emit_noise2( struct brw_wm_compile
*c
,
995 const struct prog_instruction
*inst
)
997 struct brw_compile
*p
= &c
->func
;
998 struct brw_reg src0
, src1
, param0
, param1
, dst
;
999 GLuint mask
= inst
->DstReg
.WriteMask
;
1001 int mark
= mark_tmps( c
);
1003 assert( mark
== 0 );
1005 src0
= get_src_reg( c
, inst
, 0, 0 );
1006 src1
= get_src_reg( c
, inst
, 0, 1 );
1008 param0
= alloc_tmp( c
);
1009 param1
= alloc_tmp( c
);
1011 brw_MOV( p
, param0
, src0
);
1012 brw_MOV( p
, param1
, src1
);
1014 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1016 /* Fill in the result: */
1017 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1018 for (i
= 0 ; i
< 4; i
++) {
1019 if (mask
& (1<<i
)) {
1020 dst
= get_dst_reg(c
, inst
, i
);
1021 brw_MOV( p
, dst
, param0
);
1024 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1025 brw_set_saturate( p
, 0 );
1027 release_tmps( c
, mark
);
1031 * The three-dimensional case is much like the one- and two- versions above,
1032 * but since the number of corners is rapidly growing we now pack 16 16-bit
1033 * hashes into each register to extract more parallelism from the EUs.
1035 static void noise3_sub( struct brw_wm_compile
*c
) {
1037 struct brw_compile
*p
= &c
->func
;
1038 struct brw_reg param0
, param1
, param2
,
1039 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1040 xi
, yi
, zi
, /* interpolation coefficients */
1041 t
, tmp
[ 8 ], /* float temporaries */
1042 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1043 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1045 int mark
= mark_tmps( c
);
1047 x0y0
= alloc_tmp( c
);
1048 x0y1
= alloc_tmp( c
);
1049 x1y0
= alloc_tmp( c
);
1050 x1y1
= alloc_tmp( c
);
1051 xi
= alloc_tmp( c
);
1052 yi
= alloc_tmp( c
);
1053 zi
= alloc_tmp( c
);
1055 for( i
= 0; i
< 8; i
++ ) {
1056 tmp
[ i
] = alloc_tmp( c
);
1057 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1058 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1061 param0
= lookup_tmp( c
, mark
- 4 );
1062 param1
= lookup_tmp( c
, mark
- 3 );
1063 param2
= lookup_tmp( c
, mark
- 2 );
1065 brw_set_access_mode( p
, BRW_ALIGN_1
);
1067 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1068 be hashed. Also compute the remainders (offsets within the unit
1069 cube), interleaved to reduce register dependency penalties. */
1070 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1071 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1072 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1073 brw_FRC( p
, param0
, param0
);
1074 brw_FRC( p
, param1
, param1
);
1075 brw_FRC( p
, param2
, param2
);
1076 /* Since we now have only 16 bits of precision in the hash, we must
1077 be more careful about thorough mixing to maintain entropy as we
1078 squash the input vector into a small scalar. */
1079 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1080 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1081 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1082 brw_imm_uw( 0x9B93 ) );
1083 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1084 brw_imm_uw( 0xBC8F ) );
1086 /* Temporarily disable the execution mask while we work with ExecSize=16
1087 channels (the mask is set for ExecSize=8 and is probably incorrect).
1088 Although this might cause execution of unwanted channels, the code
1089 writes only to temporary registers and has no side effects, so
1090 disabling the mask is harmless. */
1091 brw_push_insn_state( p
);
1092 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1093 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1094 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1095 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1097 /* We're now ready to perform the hashing. The eight hashes are
1098 interleaved for performance. The hash function used is
1099 designed to rapidly achieve avalanche and require only 16x16
1100 bit multiplication, and 8-bit swizzles (which we get for
1102 for( i
= 0; i
< 4; i
++ )
1103 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1104 for( i
= 0; i
< 4; i
++ )
1105 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1106 odd_bytes( wtmp
[ i
] ) );
1107 for( i
= 0; i
< 4; i
++ )
1108 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1109 for( i
= 0; i
< 4; i
++ )
1110 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1111 odd_bytes( wtmp
[ i
] ) );
1112 brw_pop_insn_state( p
);
1114 /* Now we want to initialise the four rear gradients based on the
1115 hashes. Format conversion from signed integer to float leaves
1116 everything scaled too high by a factor of pow( 2, 15 ), but
1117 we correct for that right at the end. */
1119 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1120 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1121 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1122 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1123 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1125 brw_push_insn_state( p
);
1126 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1127 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1128 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1129 brw_pop_insn_state( p
);
1131 brw_MUL( p
, x1y0
, x1y0
, t
);
1132 brw_MUL( p
, x1y1
, x1y1
, t
);
1133 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1134 brw_MUL( p
, x0y0
, x0y0
, param0
);
1135 brw_MUL( p
, x0y1
, x0y1
, param0
);
1138 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1139 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1140 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1141 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1143 brw_push_insn_state( p
);
1144 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1145 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1146 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1147 brw_pop_insn_state( p
);
1149 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1150 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1151 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1152 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1153 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1155 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1156 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1157 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1158 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1161 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1162 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1163 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1164 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1166 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1167 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1168 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1169 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1171 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1172 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1173 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1174 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1176 /* We interpolate between the gradients using the polynomial
1177 6t^5 - 15t^4 + 10t^3 (Perlin). */
1178 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1179 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1180 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1181 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1182 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1183 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1184 brw_MUL( p
, xi
, xi
, param0
);
1185 brw_MUL( p
, yi
, yi
, param1
);
1186 brw_MUL( p
, zi
, zi
, param2
);
1187 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1188 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1189 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1190 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1191 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1192 brw_MUL( p
, xi
, xi
, param0
);
1193 brw_MUL( p
, yi
, yi
, param1
);
1194 brw_MUL( p
, zi
, zi
, param2
);
1195 brw_MUL( p
, xi
, xi
, param0
);
1196 brw_MUL( p
, yi
, yi
, param1
);
1197 brw_MUL( p
, zi
, zi
, param2
);
1198 brw_MUL( p
, xi
, xi
, param0
);
1199 brw_MUL( p
, yi
, yi
, param1
);
1200 brw_MUL( p
, zi
, zi
, param2
);
1202 /* Here we interpolate in the y dimension... */
1203 brw_MUL( p
, x0y1
, x0y1
, yi
);
1204 brw_MUL( p
, x1y1
, x1y1
, yi
);
1205 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1206 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1208 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1209 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1210 brw_MUL( p
, x1y0
, x1y0
, xi
);
1211 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1213 /* Now do the same thing for the front four gradients... */
1215 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1216 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1217 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1218 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1220 brw_push_insn_state( p
);
1221 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1222 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1223 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1224 brw_pop_insn_state( p
);
1226 brw_MUL( p
, x1y0
, x1y0
, t
);
1227 brw_MUL( p
, x1y1
, x1y1
, t
);
1228 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1229 brw_MUL( p
, x0y0
, x0y0
, param0
);
1230 brw_MUL( p
, x0y1
, x0y1
, param0
);
1233 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1234 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1235 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1236 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1238 brw_push_insn_state( p
);
1239 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1240 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1241 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1242 brw_pop_insn_state( p
);
1244 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1245 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1246 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1247 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1248 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1250 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1251 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1252 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1253 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1256 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1257 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1258 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1259 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1261 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1262 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1263 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1264 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1266 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1267 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1268 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1269 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1271 /* The interpolation coefficients are still around from last time, so
1272 again interpolate in the y dimension... */
1273 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1274 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1275 brw_MUL( p
, x0y1
, x0y1
, yi
);
1276 brw_MUL( p
, x1y1
, x1y1
, yi
);
1277 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1278 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1280 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1281 time put the front face in tmp[ 1 ] and we're nearly there... */
1282 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1283 brw_MUL( p
, x1y0
, x1y0
, xi
);
1284 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1286 /* The final interpolation, in the z dimension: */
1287 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1288 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1289 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1291 /* scale by pow( 2, -15 ), as described above */
1292 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1294 release_tmps( c
, mark
);
1297 static void emit_noise3( struct brw_wm_compile
*c
,
1298 const struct prog_instruction
*inst
)
1300 struct brw_compile
*p
= &c
->func
;
1301 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1302 GLuint mask
= inst
->DstReg
.WriteMask
;
1304 int mark
= mark_tmps( c
);
1306 assert( mark
== 0 );
1308 src0
= get_src_reg( c
, inst
, 0, 0 );
1309 src1
= get_src_reg( c
, inst
, 0, 1 );
1310 src2
= get_src_reg( c
, inst
, 0, 2 );
1312 param0
= alloc_tmp( c
);
1313 param1
= alloc_tmp( c
);
1314 param2
= alloc_tmp( c
);
1316 brw_MOV( p
, param0
, src0
);
1317 brw_MOV( p
, param1
, src1
);
1318 brw_MOV( p
, param2
, src2
);
1320 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1322 /* Fill in the result: */
1323 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1324 for (i
= 0 ; i
< 4; i
++) {
1325 if (mask
& (1<<i
)) {
1326 dst
= get_dst_reg(c
, inst
, i
);
1327 brw_MOV( p
, dst
, param0
);
1330 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1331 brw_set_saturate( p
, 0 );
1333 release_tmps( c
, mark
);
1337 * For the four-dimensional case, the little micro-optimisation benefits
1338 * we obtain by unrolling all the loops aren't worth the massive bloat it
1339 * now causes. Instead, we loop twice around performing a similar operation
1340 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1341 * code to glue it all together.
1343 static void noise4_sub( struct brw_wm_compile
*c
)
1345 struct brw_compile
*p
= &c
->func
;
1346 struct brw_reg param
[ 4 ],
1347 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1348 w0
, /* noise for the w=0 cube */
1349 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1350 interp
[ 4 ], /* interpolation coefficients */
1351 t
, tmp
[ 8 ], /* float temporaries */
1352 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1353 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1355 int mark
= mark_tmps( c
);
1356 GLuint loop
, origin
;
1358 x0y0
= alloc_tmp( c
);
1359 x0y1
= alloc_tmp( c
);
1360 x1y0
= alloc_tmp( c
);
1361 x1y1
= alloc_tmp( c
);
1363 w0
= alloc_tmp( c
);
1364 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1365 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1367 for( i
= 0; i
< 4; i
++ ) {
1368 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1369 interp
[ i
] = alloc_tmp( c
);
1372 for( i
= 0; i
< 8; i
++ ) {
1373 tmp
[ i
] = alloc_tmp( c
);
1374 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1375 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1378 brw_set_access_mode( p
, BRW_ALIGN_1
);
1380 /* We only want 16 bits of precision from the integral part of each
1381 co-ordinate, but unfortunately the RNDD semantics would saturate
1382 at 16 bits if we performed the operation directly to a 16-bit
1383 destination. Therefore, we round to 32-bit temporaries where
1384 appropriate, and then store only the lower 16 bits. */
1385 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1386 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1387 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1388 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1389 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1390 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1392 /* Modify the flag register here, because the side effect is useful
1393 later (see below). We know for certain that all flags will be
1394 cleared, since the FRC instruction cannot possibly generate
1395 negative results. Even for exceptional inputs (infinities, denormals,
1396 NaNs), the architecture guarantees that the L conditional is false. */
1397 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1398 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1399 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1400 for( i
= 1; i
< 4; i
++ )
1401 brw_FRC( p
, param
[ i
], param
[ i
] );
1403 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1405 for( i
= 0; i
< 4; i
++ )
1406 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1407 for( i
= 0; i
< 4; i
++ )
1408 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1409 for( i
= 0; i
< 4; i
++ )
1410 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1411 for( i
= 0; i
< 4; i
++ )
1412 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1413 for( j
= 0; j
< 3; j
++ )
1414 for( i
= 0; i
< 4; i
++ )
1415 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1417 /* Mark the current address, as it will be a jump destination. The
1418 following code will be executed twice: first, with the flag
1419 register clear indicating the w=0 case, and second with flags
1423 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1424 be hashed. Since we have only 16 bits of precision in the hash, we
1425 must be careful about thorough mixing to maintain entropy as we
1426 squash the input vector into a small scalar. */
1427 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1428 brw_imm_uw( 0xBC8F ) );
1429 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1430 brw_imm_uw( 0xD0BD ) );
1431 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1432 brw_imm_uw( 0x9B93 ) );
1433 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1434 brw_imm_uw( 0xA359 ) );
1435 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1436 brw_imm_uw( 0xBC8F ) );
1438 /* Temporarily disable the execution mask while we work with ExecSize=16
1439 channels (the mask is set for ExecSize=8 and is probably incorrect).
1440 Although this might cause execution of unwanted channels, the code
1441 writes only to temporary registers and has no side effects, so
1442 disabling the mask is harmless. */
1443 brw_push_insn_state( p
);
1444 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1445 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1446 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1447 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1449 /* We're now ready to perform the hashing. The eight hashes are
1450 interleaved for performance. The hash function used is
1451 designed to rapidly achieve avalanche and require only 16x16
1452 bit multiplication, and 8-bit swizzles (which we get for
1454 for( i
= 0; i
< 4; i
++ )
1455 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1456 for( i
= 0; i
< 4; i
++ )
1457 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1458 odd_bytes( wtmp
[ i
] ) );
1459 for( i
= 0; i
< 4; i
++ )
1460 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1461 for( i
= 0; i
< 4; i
++ )
1462 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1463 odd_bytes( wtmp
[ i
] ) );
1464 brw_pop_insn_state( p
);
1466 /* Now we want to initialise the four rear gradients based on the
1467 hashes. Format conversion from signed integer to float leaves
1468 everything scaled too high by a factor of pow( 2, 15 ), but
1469 we correct for that right at the end. */
1471 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1472 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1473 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1474 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1475 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1477 brw_push_insn_state( p
);
1478 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1479 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1480 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1481 brw_pop_insn_state( p
);
1483 brw_MUL( p
, x1y0
, x1y0
, t
);
1484 brw_MUL( p
, x1y1
, x1y1
, t
);
1485 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1486 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1487 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1490 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1491 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1492 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1493 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1495 brw_push_insn_state( p
);
1496 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1497 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1498 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1499 brw_pop_insn_state( p
);
1501 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1502 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1503 /* prepare t for the w component (used below): w the first time through
1504 the loop; w - 1 the second time) */
1505 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1506 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1507 p
->current
->header
.predicate_inverse
= 1;
1508 brw_MOV( p
, t
, param
[ 3 ] );
1509 p
->current
->header
.predicate_inverse
= 0;
1510 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1511 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1512 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1514 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1515 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1516 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1517 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1520 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1521 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1522 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1523 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1525 brw_push_insn_state( p
);
1526 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1527 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1528 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1529 brw_pop_insn_state( p
);
1531 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
1532 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
1533 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
1534 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
1536 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1537 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1538 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1539 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1542 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1543 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1544 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1545 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1547 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1548 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1549 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1550 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1551 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1553 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1554 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1555 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1556 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1558 /* Here we interpolate in the y dimension... */
1559 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1560 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1561 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1562 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1563 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1564 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1566 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1567 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1568 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1569 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1571 /* Now do the same thing for the front four gradients... */
1573 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1574 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1575 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1576 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1578 brw_push_insn_state( p
);
1579 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1580 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1581 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1582 brw_pop_insn_state( p
);
1584 brw_MUL( p
, x1y0
, x1y0
, t
);
1585 brw_MUL( p
, x1y1
, x1y1
, t
);
1586 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1587 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1588 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1591 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1592 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1593 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1594 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1596 brw_push_insn_state( p
);
1597 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1598 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1599 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1600 brw_pop_insn_state( p
);
1602 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1603 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1604 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
1605 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1606 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1608 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1609 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1610 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1611 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1614 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1615 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1616 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1617 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1619 brw_push_insn_state( p
);
1620 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1621 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1622 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1623 brw_pop_insn_state( p
);
1625 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1626 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1627 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1628 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1629 /* prepare t for the w component (used below): w the first time through
1630 the loop; w - 1 the second time) */
1631 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1632 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1633 p
->current
->header
.predicate_inverse
= 1;
1634 brw_MOV( p
, t
, param
[ 3 ] );
1635 p
->current
->header
.predicate_inverse
= 0;
1636 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1638 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1639 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1640 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1641 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1644 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1645 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1646 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1647 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1649 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1650 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1651 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1652 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1654 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1655 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1656 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1657 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1659 /* Interpolate in the y dimension: */
1660 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1661 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1662 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1663 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1664 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1665 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1667 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1668 time put the front face in tmp[ 1 ] and we're nearly there... */
1669 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1670 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1671 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1673 /* Another interpolation, in the z dimension: */
1674 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1675 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
1676 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1678 /* Exit the loop if we've computed both cubes... */
1679 origin
= p
->nr_insn
;
1680 brw_push_insn_state( p
);
1681 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1682 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1683 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
1684 brw_pop_insn_state( p
);
1686 /* Save the result for the w=0 case, and increment the w coordinate: */
1687 brw_MOV( p
, w0
, tmp
[ 0 ] );
1688 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
1691 /* Loop around for the other cube. Explicitly set the flag register
1692 (unfortunately we must spend an extra instruction to do this: we
1693 can't rely on a side effect of the previous MOV or ADD because
1694 conditional modifiers which are normally true might be false in
1695 exceptional circumstances, e.g. given a NaN input; the add to
1696 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
1697 brw_push_insn_state( p
);
1698 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1699 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
1700 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
1701 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
1702 brw_pop_insn_state( p
);
1704 /* Patch the previous conditional branch now that we know the
1705 destination address. */
1706 brw_set_src1( p
->store
+ origin
,
1707 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
1709 /* The very last interpolation. */
1710 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
1711 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
1712 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
1714 /* scale by pow( 2, -15 ), as described above */
1715 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1717 release_tmps( c
, mark
);
1720 static void emit_noise4( struct brw_wm_compile
*c
,
1721 const struct prog_instruction
*inst
)
1723 struct brw_compile
*p
= &c
->func
;
1724 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
1725 GLuint mask
= inst
->DstReg
.WriteMask
;
1727 int mark
= mark_tmps( c
);
1729 assert( mark
== 0 );
1731 src0
= get_src_reg( c
, inst
, 0, 0 );
1732 src1
= get_src_reg( c
, inst
, 0, 1 );
1733 src2
= get_src_reg( c
, inst
, 0, 2 );
1734 src3
= get_src_reg( c
, inst
, 0, 3 );
1736 param0
= alloc_tmp( c
);
1737 param1
= alloc_tmp( c
);
1738 param2
= alloc_tmp( c
);
1739 param3
= alloc_tmp( c
);
1741 brw_MOV( p
, param0
, src0
);
1742 brw_MOV( p
, param1
, src1
);
1743 brw_MOV( p
, param2
, src2
);
1744 brw_MOV( p
, param3
, src3
);
1746 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
1748 /* Fill in the result: */
1749 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1750 for (i
= 0 ; i
< 4; i
++) {
1751 if (mask
& (1<<i
)) {
1752 dst
= get_dst_reg(c
, inst
, i
);
1753 brw_MOV( p
, dst
, param0
);
1756 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1757 brw_set_saturate( p
, 0 );
1759 release_tmps( c
, mark
);
1763 * Resolve subroutine calls after code emit is done.
1765 static void post_wm_emit( struct brw_wm_compile
*c
)
1767 brw_resolve_cals(&c
->func
);
1771 get_argument_regs(struct brw_wm_compile
*c
,
1772 const struct prog_instruction
*inst
,
1774 struct brw_reg
*dst
,
1775 struct brw_reg
*regs
,
1778 struct brw_compile
*p
= &c
->func
;
1781 for (i
= 0; i
< 4; i
++) {
1782 if (mask
& (1 << i
)) {
1783 regs
[i
] = get_src_reg(c
, inst
, index
, i
);
1785 /* Unalias destination registers from our sources. */
1786 if (regs
[i
].file
== BRW_GENERAL_REGISTER_FILE
) {
1787 for (j
= 0; j
< 4; j
++) {
1788 if (memcmp(®s
[i
], &dst
[j
], sizeof(regs
[0])) == 0) {
1789 struct brw_reg tmp
= alloc_tmp(c
);
1790 brw_MOV(p
, tmp
, regs
[i
]);
1800 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1802 struct intel_context
*intel
= &brw
->intel
;
1803 #define MAX_IF_DEPTH 32
1804 #define MAX_LOOP_DEPTH 32
1805 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
1806 GLuint i
, if_depth
= 0, loop_depth
= 0;
1807 struct brw_compile
*p
= &c
->func
;
1808 struct brw_indirect stack_index
= brw_indirect(0, 0);
1810 c
->out_of_regs
= GL_FALSE
;
1813 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1814 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1816 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
1817 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
1819 struct brw_reg args
[3][4], dst
[4];
1821 int mark
= mark_tmps( c
);
1826 printf("Inst %d: ", i
);
1827 _mesa_print_instruction(inst
);
1830 /* fetch any constants that this instruction needs */
1831 if (c
->fp
->use_const_buffer
)
1832 fetch_constants(c
, inst
);
1834 if (inst
->Opcode
!= OPCODE_ARL
) {
1835 for (j
= 0; j
< 4; j
++) {
1836 if (inst
->DstReg
.WriteMask
& (1 << j
))
1837 dst
[j
] = get_dst_reg(c
, inst
, j
);
1839 dst
[j
] = brw_null_reg();
1842 for (j
= 0; j
< brw_wm_nr_args(inst
->Opcode
); j
++)
1843 get_argument_regs(c
, inst
, j
, dst
, args
[j
], WRITEMASK_XYZW
);
1845 dst_flags
= inst
->DstReg
.WriteMask
;
1846 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1847 dst_flags
|= SATURATE
;
1849 if (inst
->CondUpdate
)
1850 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
1852 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
1854 switch (inst
->Opcode
) {
1856 emit_pixel_xy(c
, dst
, dst_flags
);
1859 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1862 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1865 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1868 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1871 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1874 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1877 emit_fb_write(c
, args
[0], args
[1], args
[2],
1878 INST_AUX_GET_TARGET(inst
->Aux
),
1879 inst
->Aux
& INST_AUX_EOT
);
1881 case WM_FRONTFACING
:
1882 emit_frontfacing(p
, dst
, dst_flags
);
1885 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1891 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1894 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1897 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1900 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1904 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1907 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1910 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1913 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1916 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1919 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1922 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1925 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1928 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1931 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1934 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1937 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1940 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
1943 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
1947 emit_ddxy(p
, dst
, dst_flags
, (inst
->Opcode
== OPCODE_DDX
),
1951 emit_sop(p
, dst
, dst_flags
,
1952 BRW_CONDITIONAL_L
, args
[0], args
[1]);
1955 emit_sop(p
, dst
, dst_flags
,
1956 BRW_CONDITIONAL_LE
, args
[0], args
[1]);
1959 emit_sop(p
, dst
, dst_flags
,
1960 BRW_CONDITIONAL_G
, args
[0], args
[1]);
1963 emit_sop(p
, dst
, dst_flags
,
1964 BRW_CONDITIONAL_GE
, args
[0], args
[1]);
1967 emit_sop(p
, dst
, dst_flags
,
1968 BRW_CONDITIONAL_EQ
, args
[0], args
[1]);
1971 emit_sop(p
, dst
, dst_flags
,
1972 BRW_CONDITIONAL_NEQ
, args
[0], args
[1]);
1975 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
1978 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
1979 dst
, dst_flags
, args
[0], args
[1]);
1982 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1985 emit_noise1(c
, inst
);
1988 emit_noise2(c
, inst
);
1991 emit_noise3(c
, inst
);
1994 emit_noise4(c
, inst
);
1997 emit_tex(c
, dst
, dst_flags
, args
[0],
1998 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
2002 (c
->key
.shadowtex_mask
& (1 << inst
->TexSrcUnit
)) != 0);
2005 emit_txb(c
, dst
, dst_flags
, args
[0],
2006 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
2009 c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
]);
2015 assert(if_depth
< MAX_IF_DEPTH
);
2016 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2019 assert(if_depth
> 0);
2020 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2023 assert(if_depth
> 0);
2024 brw_ENDIF(p
, if_inst
[--if_depth
]);
2027 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2033 brw_push_insn_state(p
);
2034 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2035 brw_set_access_mode(p
, BRW_ALIGN_1
);
2036 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2037 brw_set_access_mode(p
, BRW_ALIGN_16
);
2038 brw_ADD(p
, get_addr_reg(stack_index
),
2039 get_addr_reg(stack_index
), brw_imm_d(4));
2040 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2041 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2042 brw_pop_insn_state(p
);
2046 brw_push_insn_state(p
);
2047 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2048 brw_ADD(p
, get_addr_reg(stack_index
),
2049 get_addr_reg(stack_index
), brw_imm_d(-4));
2050 brw_set_access_mode(p
, BRW_ALIGN_1
);
2051 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2052 brw_set_access_mode(p
, BRW_ALIGN_16
);
2053 brw_pop_insn_state(p
);
2056 case OPCODE_BGNLOOP
:
2057 /* XXX may need to invalidate the current_constant regs */
2058 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2062 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2066 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2068 case OPCODE_ENDLOOP
:
2070 struct brw_instruction
*inst0
, *inst1
;
2073 if (intel
->gen
== 5)
2076 assert(loop_depth
> 0);
2078 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2079 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2080 while (inst0
> loop_inst
[loop_depth
]) {
2082 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2083 inst0
->bits3
.if_else
.jump_count
== 0) {
2084 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2085 inst0
->bits3
.if_else
.pop_count
= 0;
2087 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2088 inst0
->bits3
.if_else
.jump_count
== 0) {
2089 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2090 inst0
->bits3
.if_else
.pop_count
= 0;
2096 printf("unsupported opcode %d (%s) in fragment shader\n",
2097 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
2098 _mesa_opcode_string(inst
->Opcode
) : "unknown");
2101 /* Release temporaries containing any unaliased source regs. */
2102 release_tmps( c
, mark
);
2104 if (inst
->CondUpdate
)
2105 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2107 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2111 if (INTEL_DEBUG
& DEBUG_WM
) {
2112 printf("wm-native:\n");
2113 for (i
= 0; i
< p
->nr_insn
; i
++)
2114 brw_disasm(stderr
, &p
->store
[i
], intel
->gen
);
2120 * Do GPU code generation for shaders that use GLSL features such as
2121 * flow control. Other shaders will be compiled with the
2123 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2125 if (INTEL_DEBUG
& DEBUG_WM
) {
2126 printf("brw_wm_glsl_emit:\n");
2129 /* initial instruction translation/simplification */
2132 /* actual code generation */
2133 brw_wm_emit_glsl(brw
, c
);
2135 if (INTEL_DEBUG
& DEBUG_WM
) {
2136 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2139 c
->prog_data
.total_grf
= num_grf_used(c
);
2140 c
->prog_data
.total_scratch
= 0;