1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
14 const struct prog_instruction
*inst
,
18 * Determine if the given fragment program uses GLSL features such
19 * as flow conditionals, loops, subroutines.
20 * Some GLSL shaders may use these features, others might not.
22 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
26 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
27 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
28 switch (inst
->Opcode
) {
51 reclaim_temps(struct brw_wm_compile
*c
);
54 /** Mark GRF register as used. */
56 prealloc_grf(struct brw_wm_compile
*c
, int r
)
58 c
->used_grf
[r
] = GL_TRUE
;
62 /** Mark given GRF register as not in use. */
64 release_grf(struct brw_wm_compile
*c
, int r
)
66 /*assert(c->used_grf[r]);*/
67 c
->used_grf
[r
] = GL_FALSE
;
68 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
72 /** Return index of a free GRF, mark it as used. */
74 alloc_grf(struct brw_wm_compile
*c
)
77 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
78 if (!c
->used_grf
[r
]) {
79 c
->used_grf
[r
] = GL_TRUE
;
80 c
->first_free_grf
= r
+ 1; /* a guess */
85 /* no free temps, try to reclaim some */
87 c
->first_free_grf
= 0;
90 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
91 if (!c
->used_grf
[r
]) {
92 c
->used_grf
[r
] = GL_TRUE
;
93 c
->first_free_grf
= r
+ 1; /* a guess */
98 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
99 assert(c
->used_grf
[r
]);
102 /* really, no free GRF regs found */
103 if (!c
->out_of_regs
) {
104 /* print warning once per compilation */
105 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
106 c
->out_of_regs
= GL_TRUE
;
113 /** Return number of GRF registers used */
115 num_grf_used(const struct brw_wm_compile
*c
)
118 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
127 * Record the mapping of a Mesa register to a hardware register.
129 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
130 int component
, struct brw_reg reg
)
132 c
->wm_regs
[file
][index
][component
].reg
= reg
;
133 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
136 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
140 /* if we need to allocate another temp, grow the tmp_regs[] array */
141 if (c
->tmp_index
== c
->tmp_max
) {
142 int r
= alloc_grf(c
);
144 /*printf("Out of temps in %s\n", __FUNCTION__);*/
145 r
= 50; /* XXX random register! */
147 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
150 /* form the GRF register */
151 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
152 /*printf("alloc_temp %d\n", reg.nr);*/
153 assert(reg
.nr
< BRW_WM_MAX_GRF
);
159 * Save current temp register info.
160 * There must be a matching call to release_tmps().
162 static int mark_tmps(struct brw_wm_compile
*c
)
167 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
169 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
172 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
178 * Convert Mesa src register to brw register.
180 * Since we're running in SOA mode each Mesa register corresponds to four
181 * hardware registers. We allocate the hardware registers as needed here.
183 * \param file register file, one of PROGRAM_x
184 * \param index register number
185 * \param component src component (X=0, Y=1, Z=2, W=3)
186 * \param nr not used?!?
187 * \param neg negate value?
188 * \param abs take absolute value?
190 static struct brw_reg
191 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
192 int nr
, GLuint neg
, GLuint abs
)
196 case PROGRAM_STATE_VAR
:
197 case PROGRAM_CONSTANT
:
198 case PROGRAM_UNIFORM
:
199 file
= PROGRAM_STATE_VAR
;
201 case PROGRAM_UNDEFINED
:
202 return brw_null_reg();
203 case PROGRAM_TEMPORARY
:
206 case PROGRAM_PAYLOAD
:
209 _mesa_problem(NULL
, "Unexpected file in get_reg()");
210 return brw_null_reg();
214 assert(component
< 4);
216 /* see if we've already allocated a HW register for this Mesa register */
217 if (c
->wm_regs
[file
][index
][component
].inited
) {
219 reg
= c
->wm_regs
[file
][index
][component
].reg
;
222 /* no, allocate new register */
223 int grf
= alloc_grf(c
);
224 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
226 /* totally out of temps */
227 grf
= 51; /* XXX random register! */
230 reg
= brw_vec8_grf(grf
, 0);
231 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
233 set_reg(c
, file
, index
, component
, reg
);
236 if (neg
& (1 << component
)) {
247 * This is called if we run out of GRF registers. Examine the live intervals
248 * of temp regs in the program and free those which won't be used again.
251 reclaim_temps(struct brw_wm_compile
*c
)
253 GLint intBegin
[MAX_PROGRAM_TEMPS
];
254 GLint intEnd
[MAX_PROGRAM_TEMPS
];
257 /*printf("Reclaim temps:\n");*/
259 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
262 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
263 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
264 /* program temp[i] can be freed */
266 /*printf(" temp[%d] is dead\n", index);*/
267 for (component
= 0; component
< 4; component
++) {
268 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
269 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
272 printf(" Reclaim temp %d, reg %d at inst %d\n",
273 index, r, c->cur_inst);
275 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
286 * Preallocate registers. This sets up the Mesa to hardware register
287 * mapping for certain registers, such as constants (uniforms/state vars)
290 static void prealloc_reg(struct brw_wm_compile
*c
)
294 int urb_read_length
= 0;
295 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
;
296 GLuint reg_index
= 0;
298 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
299 c
->first_free_grf
= 0;
301 for (i
= 0; i
< 4; i
++) {
302 if (i
< c
->key
.nr_depth_regs
)
303 reg
= brw_vec8_grf(i
* 2, 0);
305 reg
= brw_vec8_grf(0, 0);
306 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
308 reg_index
+= 2 * c
->key
.nr_depth_regs
;
312 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
313 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
315 /* use a real constant buffer, or just use a section of the GRF? */
316 /* XXX this heuristic may need adjustment... */
317 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
318 c
->fp
->use_const_buffer
= GL_TRUE
;
320 c
->fp
->use_const_buffer
= GL_FALSE
;
321 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
323 if (c
->fp
->use_const_buffer
) {
324 /* We'll use a real constant buffer and fetch constants from
325 * it with a dataport read message.
328 /* number of float constants in CURBE */
329 c
->prog_data
.nr_params
= 0;
332 const struct gl_program_parameter_list
*plist
=
333 c
->fp
->program
.Base
.Parameters
;
336 /* number of float constants in CURBE */
337 c
->prog_data
.nr_params
= 4 * nr_params
;
339 /* loop over program constants (float[4]) */
340 for (i
= 0; i
< nr_params
; i
++) {
341 /* loop over XYZW channels */
342 for (j
= 0; j
< 4; j
++, index
++) {
343 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
344 /* Save pointer to parameter/constant value.
345 * Constants will be copied in prepare_constant_buffer()
347 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
348 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
351 /* number of constant regs used (each reg is float[8]) */
352 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
353 reg_index
+= c
->nr_creg
;
357 /* fragment shader inputs */
358 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
361 if (i
>= VERT_RESULT_VAR0
)
362 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
363 else if (i
<= VERT_RESULT_TEX7
)
368 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
369 urb_read_length
= reg_index
;
370 reg
= brw_vec8_grf(reg_index
, 0);
371 for (j
= 0; j
< 4; j
++)
372 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
374 if (c
->key
.vp_outputs_written
& BITFIELD64_BIT(i
)) {
379 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
380 c
->prog_data
.urb_read_length
= urb_read_length
;
381 c
->prog_data
.curb_read_length
= c
->nr_creg
;
382 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
384 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
387 /* mark GRF regs [0..reg_index-1] as in-use */
388 for (i
= 0; i
< reg_index
; i
++)
391 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
392 prealloc_grf(c
, 126);
393 prealloc_grf(c
, 127);
395 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
396 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
397 struct brw_reg dst
[4];
399 switch (inst
->Opcode
) {
402 /* Allocate the channels of texture results contiguously,
403 * since they are written out that way by the sampler unit.
405 for (j
= 0; j
< 4; j
++) {
406 dst
[j
] = get_dst_reg(c
, inst
, j
);
408 assert(dst
[j
].nr
== dst
[j
- 1].nr
+ 1);
416 /* An instruction may reference up to three constants.
417 * They'll be found in these registers.
418 * XXX alloc these on demand!
420 if (c
->fp
->use_const_buffer
) {
421 for (i
= 0; i
< 3; i
++) {
422 c
->current_const
[i
].index
= -1;
423 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
427 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
428 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
434 * Check if any of the instruction's src registers are constants, uniforms,
435 * or statevars. If so, fetch any constants that we don't already have in
436 * the three GRF slots.
438 static void fetch_constants(struct brw_wm_compile
*c
,
439 const struct prog_instruction
*inst
)
441 struct brw_compile
*p
= &c
->func
;
444 /* loop over instruction src regs */
445 for (i
= 0; i
< 3; i
++) {
446 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
447 if (src
->File
== PROGRAM_STATE_VAR
||
448 src
->File
== PROGRAM_CONSTANT
||
449 src
->File
== PROGRAM_UNIFORM
) {
450 c
->current_const
[i
].index
= src
->Index
;
453 printf(" fetch const[%d] for arg %d into reg %d\n",
454 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
457 /* need to fetch the constant now */
459 c
->current_const
[i
].reg
, /* writeback dest */
460 src
->RelAddr
, /* relative indexing? */
461 16 * src
->Index
, /* byte offset */
462 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
470 * Convert Mesa dst register to brw register.
472 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
473 const struct prog_instruction
*inst
,
477 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
482 static struct brw_reg
483 get_src_reg_const(struct brw_wm_compile
*c
,
484 const struct prog_instruction
*inst
,
485 GLuint srcRegIndex
, GLuint component
)
487 /* We should have already fetched the constant from the constant
488 * buffer in fetch_constants(). Now we just have to return a
489 * register description that extracts the needed component and
490 * smears it across all eight vector components.
492 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
493 struct brw_reg const_reg
;
495 assert(component
< 4);
496 assert(srcRegIndex
< 3);
497 assert(c
->current_const
[srcRegIndex
].index
!= -1);
498 const_reg
= c
->current_const
[srcRegIndex
].reg
;
500 /* extract desired float from the const_reg, and smear */
501 const_reg
= stride(const_reg
, 0, 1, 0);
502 const_reg
.subnr
= component
* 4;
504 if (src
->Negate
& (1 << component
))
505 const_reg
= negate(const_reg
);
507 const_reg
= brw_abs(const_reg
);
510 printf(" form const[%d].%d for arg %d, reg %d\n",
511 c
->current_const
[srcRegIndex
].index
,
522 * Convert Mesa src register to brw register.
524 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
525 const struct prog_instruction
*inst
,
526 GLuint srcRegIndex
, GLuint channel
)
528 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
530 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
532 /* Extended swizzle terms */
533 if (component
== SWIZZLE_ZERO
) {
534 return brw_imm_f(0.0F
);
536 else if (component
== SWIZZLE_ONE
) {
537 return brw_imm_f(1.0F
);
540 if (c
->fp
->use_const_buffer
&&
541 (src
->File
== PROGRAM_STATE_VAR
||
542 src
->File
== PROGRAM_CONSTANT
||
543 src
->File
== PROGRAM_UNIFORM
)) {
544 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
547 /* other type of source register */
548 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
549 src
->Negate
, src
->Abs
);
554 * Subroutines are minimal support for resusable instruction sequences.
555 * They are implemented as simply as possible to minimise overhead: there
556 * is no explicit support for communication between the caller and callee
557 * other than saving the return address in a temporary register, nor is
558 * there any automatic local storage. This implies that great care is
559 * required before attempting reentrancy or any kind of nested
560 * subroutine invocations.
562 static void invoke_subroutine( struct brw_wm_compile
*c
,
563 enum _subroutine subroutine
,
564 void (*emit
)( struct brw_wm_compile
* ) )
566 struct brw_compile
*p
= &c
->func
;
568 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
570 if( c
->subroutines
[ subroutine
] ) {
571 /* subroutine previously emitted: reuse existing instructions */
573 int mark
= mark_tmps( c
);
574 struct brw_reg return_address
= retype( alloc_tmp( c
),
575 BRW_REGISTER_TYPE_UD
);
576 int here
= p
->nr_insn
;
578 brw_push_insn_state(p
);
579 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
580 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
582 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
583 brw_imm_d( ( c
->subroutines
[ subroutine
] -
585 brw_pop_insn_state(p
);
587 release_tmps( c
, mark
);
589 /* previously unused subroutine: emit, and mark for later reuse */
591 int mark
= mark_tmps( c
);
592 struct brw_reg return_address
= retype( alloc_tmp( c
),
593 BRW_REGISTER_TYPE_UD
);
594 struct brw_instruction
*calc
;
595 int base
= p
->nr_insn
;
597 brw_push_insn_state(p
);
598 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
599 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
600 brw_pop_insn_state(p
);
602 c
->subroutines
[ subroutine
] = p
->nr_insn
;
606 brw_push_insn_state(p
);
607 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
608 brw_MOV( p
, brw_ip_reg(), return_address
);
609 brw_pop_insn_state(p
);
611 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
613 release_tmps( c
, mark
);
617 /* Workaround for using brw_wm_emit.c's emit functions, which expect
618 * destination regs to be uniquely written. Moves arguments out to
619 * temporaries as necessary for instructions which use their destination as
623 unalias3(struct brw_wm_compile
*c
,
624 void (*func
)(struct brw_compile
*c
,
625 const struct brw_reg
*dst
,
627 const struct brw_reg
*arg0
,
628 const struct brw_reg
*arg1
,
629 const struct brw_reg
*arg2
),
630 const struct brw_reg
*dst
,
632 const struct brw_reg
*arg0
,
633 const struct brw_reg
*arg1
,
634 const struct brw_reg
*arg2
)
636 struct brw_compile
*p
= &c
->func
;
637 struct brw_reg tmp_arg0
[4], tmp_arg1
[4], tmp_arg2
[4];
639 int mark
= mark_tmps(c
);
641 for (j
= 0; j
< 4; j
++) {
642 tmp_arg0
[j
] = arg0
[j
];
643 tmp_arg1
[j
] = arg1
[j
];
644 tmp_arg2
[j
] = arg2
[j
];
647 for (i
= 0; i
< 4; i
++) {
649 for (j
= 0; j
< 4; j
++) {
650 if (arg0
[j
].file
== dst
[i
].file
&&
651 dst
[i
].nr
== arg0
[j
].nr
) {
652 tmp_arg0
[j
] = alloc_tmp(c
);
653 brw_MOV(p
, tmp_arg0
[j
], arg0
[j
]);
655 if (arg1
[j
].file
== dst
[i
].file
&&
656 dst
[i
].nr
== arg1
[j
].nr
) {
657 tmp_arg1
[j
] = alloc_tmp(c
);
658 brw_MOV(p
, tmp_arg1
[j
], arg1
[j
]);
660 if (arg2
[j
].file
== dst
[i
].file
&&
661 dst
[i
].nr
== arg2
[j
].nr
) {
662 tmp_arg2
[j
] = alloc_tmp(c
);
663 brw_MOV(p
, tmp_arg2
[j
], arg2
[j
]);
669 func(p
, dst
, mask
, tmp_arg0
, tmp_arg1
, tmp_arg2
);
671 release_tmps(c
, mark
);
674 /* Workaround for using brw_wm_emit.c's emit functions, which expect
675 * destination regs to be uniquely written. Moves arguments out to
676 * temporaries as necessary for instructions which use their destination as
680 unalias2(struct brw_wm_compile
*c
,
681 void (*func
)(struct brw_compile
*c
,
682 const struct brw_reg
*dst
,
684 const struct brw_reg
*arg0
,
685 const struct brw_reg
*arg1
),
686 const struct brw_reg
*dst
,
688 const struct brw_reg
*arg0
,
689 const struct brw_reg
*arg1
)
691 struct brw_compile
*p
= &c
->func
;
692 struct brw_reg tmp_arg0
[4], tmp_arg1
[4];
694 int mark
= mark_tmps(c
);
696 for (j
= 0; j
< 4; j
++) {
697 tmp_arg0
[j
] = arg0
[j
];
698 tmp_arg1
[j
] = arg1
[j
];
701 for (i
= 0; i
< 4; i
++) {
703 for (j
= 0; j
< 4; j
++) {
704 if (arg0
[j
].file
== dst
[i
].file
&&
705 dst
[i
].nr
== arg0
[j
].nr
) {
706 tmp_arg0
[j
] = alloc_tmp(c
);
707 brw_MOV(p
, tmp_arg0
[j
], arg0
[j
]);
709 if (arg1
[j
].file
== dst
[i
].file
&&
710 dst
[i
].nr
== arg1
[j
].nr
) {
711 tmp_arg1
[j
] = alloc_tmp(c
);
712 brw_MOV(p
, tmp_arg1
[j
], arg1
[j
]);
718 func(p
, dst
, mask
, tmp_arg0
, tmp_arg1
);
720 release_tmps(c
, mark
);
723 static void emit_arl(struct brw_wm_compile
*c
,
724 const struct prog_instruction
*inst
)
726 struct brw_compile
*p
= &c
->func
;
727 struct brw_reg src0
, addr_reg
;
728 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
729 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
731 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
732 brw_MOV(p
, addr_reg
, src0
);
733 brw_set_saturate(p
, 0);
737 * For GLSL shaders, this KIL will be unconditional.
738 * It may be contained inside an IF/ENDIF structure of course.
740 static void emit_kil(struct brw_wm_compile
*c
)
742 struct brw_compile
*p
= &c
->func
;
743 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
744 brw_push_insn_state(p
);
745 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
746 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); /* IMASK */
747 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
748 brw_pop_insn_state(p
);
751 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
753 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
757 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
759 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
762 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
764 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
767 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
769 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
773 /* One-, two- and three-dimensional Perlin noise, similar to the description
774 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
775 static void noise1_sub( struct brw_wm_compile
*c
) {
777 struct brw_compile
*p
= &c
->func
;
778 struct brw_reg param
,
779 x0
, x1
, /* gradients at each end */
780 t
, tmp
[ 2 ], /* float temporaries */
781 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
783 int mark
= mark_tmps( c
);
788 tmp
[ 0 ] = alloc_tmp( c
);
789 tmp
[ 1 ] = alloc_tmp( c
);
790 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
791 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
792 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
793 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
794 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
796 param
= lookup_tmp( c
, mark
- 2 );
798 brw_set_access_mode( p
, BRW_ALIGN_1
);
800 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
802 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
803 be hashed. Also compute the remainder (offset within the unit
804 length), interleaved to reduce register dependency penalties. */
805 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
806 brw_FRC( p
, param
, param
);
807 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
808 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
809 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
811 /* We're now ready to perform the hashing. The two hashes are
812 interleaved for performance. The hash function used is
813 designed to rapidly achieve avalanche and require only 32x16
814 bit multiplication, and 16-bit swizzles (which we get for
815 free). We can't use immediate operands in the multiplies,
816 because immediates are permitted only in src1 and the 16-bit
817 factor is permitted only in src0. */
818 for( i
= 0; i
< 2; i
++ )
819 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
820 for( i
= 0; i
< 2; i
++ )
821 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
822 high_words( itmp
[ i
] ) );
823 for( i
= 0; i
< 2; i
++ )
824 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
825 for( i
= 0; i
< 2; i
++ )
826 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
827 high_words( itmp
[ i
] ) );
828 for( i
= 0; i
< 2; i
++ )
829 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
830 for( i
= 0; i
< 2; i
++ )
831 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
832 high_words( itmp
[ i
] ) );
834 /* Now we want to initialise the two gradients based on the
835 hashes. Format conversion from signed integer to float leaves
836 everything scaled too high by a factor of pow( 2, 31 ), but
837 we correct for that right at the end. */
838 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
839 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
840 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
842 brw_MUL( p
, x0
, x0
, param
);
843 brw_MUL( p
, x1
, x1
, t
);
845 /* We interpolate between the gradients using the polynomial
846 6t^5 - 15t^4 + 10t^3 (Perlin). */
847 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
848 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
849 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
850 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
851 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
852 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
854 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
855 brw_MUL( p
, param
, tmp
[ 0 ], param
);
856 brw_MUL( p
, x1
, x1
, param
);
857 brw_ADD( p
, x0
, x0
, x1
);
858 /* scale by pow( 2, -30 ), to compensate for the format conversion
859 above and an extra factor of 2 so that a single gradient covers
861 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
863 release_tmps( c
, mark
);
866 static void emit_noise1( struct brw_wm_compile
*c
,
867 const struct prog_instruction
*inst
)
869 struct brw_compile
*p
= &c
->func
;
870 struct brw_reg src
, param
, dst
;
871 GLuint mask
= inst
->DstReg
.WriteMask
;
873 int mark
= mark_tmps( c
);
877 src
= get_src_reg( c
, inst
, 0, 0 );
879 param
= alloc_tmp( c
);
881 brw_MOV( p
, param
, src
);
883 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
885 /* Fill in the result: */
886 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
887 for (i
= 0 ; i
< 4; i
++) {
889 dst
= get_dst_reg(c
, inst
, i
);
890 brw_MOV( p
, dst
, param
);
893 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
894 brw_set_saturate( p
, 0 );
896 release_tmps( c
, mark
);
899 static void noise2_sub( struct brw_wm_compile
*c
) {
901 struct brw_compile
*p
= &c
->func
;
902 struct brw_reg param0
, param1
,
903 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
904 t
, tmp
[ 4 ], /* float temporaries */
905 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
907 int mark
= mark_tmps( c
);
909 x0y0
= alloc_tmp( c
);
910 x0y1
= alloc_tmp( c
);
911 x1y0
= alloc_tmp( c
);
912 x1y1
= alloc_tmp( c
);
914 for( i
= 0; i
< 4; i
++ ) {
915 tmp
[ i
] = alloc_tmp( c
);
916 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
918 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
919 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
920 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
922 param0
= lookup_tmp( c
, mark
- 3 );
923 param1
= lookup_tmp( c
, mark
- 2 );
925 brw_set_access_mode( p
, BRW_ALIGN_1
);
927 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
928 be hashed. Also compute the remainders (offsets within the unit
929 square), interleaved to reduce register dependency penalties. */
930 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
931 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
932 brw_FRC( p
, param0
, param0
);
933 brw_FRC( p
, param1
, param1
);
934 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
935 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
936 low_words( itmp
[ 1 ] ) );
937 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
938 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
939 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
940 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
941 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
943 /* We're now ready to perform the hashing. The four hashes are
944 interleaved for performance. The hash function used is
945 designed to rapidly achieve avalanche and require only 32x16
946 bit multiplication, and 16-bit swizzles (which we get for
947 free). We can't use immediate operands in the multiplies,
948 because immediates are permitted only in src1 and the 16-bit
949 factor is permitted only in src0. */
950 for( i
= 0; i
< 4; i
++ )
951 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
952 for( i
= 0; i
< 4; i
++ )
953 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
954 high_words( itmp
[ i
] ) );
955 for( i
= 0; i
< 4; i
++ )
956 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
957 for( i
= 0; i
< 4; i
++ )
958 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
959 high_words( itmp
[ i
] ) );
960 for( i
= 0; i
< 4; i
++ )
961 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
962 for( i
= 0; i
< 4; i
++ )
963 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
964 high_words( itmp
[ i
] ) );
966 /* Now we want to initialise the four gradients based on the
967 hashes. Format conversion from signed integer to float leaves
968 everything scaled too high by a factor of pow( 2, 15 ), but
969 we correct for that right at the end. */
970 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
971 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
972 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
973 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
974 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
976 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
977 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
978 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
979 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
981 brw_MUL( p
, x1y0
, x1y0
, t
);
982 brw_MUL( p
, x1y1
, x1y1
, t
);
983 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
984 brw_MUL( p
, x0y0
, x0y0
, param0
);
985 brw_MUL( p
, x0y1
, x0y1
, param0
);
987 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
988 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
989 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
990 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
992 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
993 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
994 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
995 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
997 /* We interpolate between the gradients using the polynomial
998 6t^5 - 15t^4 + 10t^3 (Perlin). */
999 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1000 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1001 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1002 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1003 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1004 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1005 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1007 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1008 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1009 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1010 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1011 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1013 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1014 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1015 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1016 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1018 /* Here we interpolate in the y dimension... */
1019 brw_MUL( p
, x0y1
, x0y1
, param1
);
1020 brw_MUL( p
, x1y1
, x1y1
, param1
);
1021 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1022 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1024 /* And now in x. There are horrible register dependencies here,
1025 but we have nothing else to do. */
1026 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1027 brw_MUL( p
, x1y0
, x1y0
, param0
);
1028 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1030 /* scale by pow( 2, -15 ), as described above */
1031 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1033 release_tmps( c
, mark
);
1036 static void emit_noise2( struct brw_wm_compile
*c
,
1037 const struct prog_instruction
*inst
)
1039 struct brw_compile
*p
= &c
->func
;
1040 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1041 GLuint mask
= inst
->DstReg
.WriteMask
;
1043 int mark
= mark_tmps( c
);
1045 assert( mark
== 0 );
1047 src0
= get_src_reg( c
, inst
, 0, 0 );
1048 src1
= get_src_reg( c
, inst
, 0, 1 );
1050 param0
= alloc_tmp( c
);
1051 param1
= alloc_tmp( c
);
1053 brw_MOV( p
, param0
, src0
);
1054 brw_MOV( p
, param1
, src1
);
1056 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1058 /* Fill in the result: */
1059 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1060 for (i
= 0 ; i
< 4; i
++) {
1061 if (mask
& (1<<i
)) {
1062 dst
= get_dst_reg(c
, inst
, i
);
1063 brw_MOV( p
, dst
, param0
);
1066 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1067 brw_set_saturate( p
, 0 );
1069 release_tmps( c
, mark
);
1073 * The three-dimensional case is much like the one- and two- versions above,
1074 * but since the number of corners is rapidly growing we now pack 16 16-bit
1075 * hashes into each register to extract more parallelism from the EUs.
1077 static void noise3_sub( struct brw_wm_compile
*c
) {
1079 struct brw_compile
*p
= &c
->func
;
1080 struct brw_reg param0
, param1
, param2
,
1081 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1082 xi
, yi
, zi
, /* interpolation coefficients */
1083 t
, tmp
[ 8 ], /* float temporaries */
1084 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1085 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1087 int mark
= mark_tmps( c
);
1089 x0y0
= alloc_tmp( c
);
1090 x0y1
= alloc_tmp( c
);
1091 x1y0
= alloc_tmp( c
);
1092 x1y1
= alloc_tmp( c
);
1093 xi
= alloc_tmp( c
);
1094 yi
= alloc_tmp( c
);
1095 zi
= alloc_tmp( c
);
1097 for( i
= 0; i
< 8; i
++ ) {
1098 tmp
[ i
] = alloc_tmp( c
);
1099 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1100 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1103 param0
= lookup_tmp( c
, mark
- 4 );
1104 param1
= lookup_tmp( c
, mark
- 3 );
1105 param2
= lookup_tmp( c
, mark
- 2 );
1107 brw_set_access_mode( p
, BRW_ALIGN_1
);
1109 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1110 be hashed. Also compute the remainders (offsets within the unit
1111 cube), interleaved to reduce register dependency penalties. */
1112 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1113 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1114 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1115 brw_FRC( p
, param0
, param0
);
1116 brw_FRC( p
, param1
, param1
);
1117 brw_FRC( p
, param2
, param2
);
1118 /* Since we now have only 16 bits of precision in the hash, we must
1119 be more careful about thorough mixing to maintain entropy as we
1120 squash the input vector into a small scalar. */
1121 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1122 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1123 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1124 brw_imm_uw( 0x9B93 ) );
1125 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1126 brw_imm_uw( 0xBC8F ) );
1128 /* Temporarily disable the execution mask while we work with ExecSize=16
1129 channels (the mask is set for ExecSize=8 and is probably incorrect).
1130 Although this might cause execution of unwanted channels, the code
1131 writes only to temporary registers and has no side effects, so
1132 disabling the mask is harmless. */
1133 brw_push_insn_state( p
);
1134 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1135 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1136 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1137 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1139 /* We're now ready to perform the hashing. The eight hashes are
1140 interleaved for performance. The hash function used is
1141 designed to rapidly achieve avalanche and require only 16x16
1142 bit multiplication, and 8-bit swizzles (which we get for
1144 for( i
= 0; i
< 4; i
++ )
1145 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1146 for( i
= 0; i
< 4; i
++ )
1147 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1148 odd_bytes( wtmp
[ i
] ) );
1149 for( i
= 0; i
< 4; i
++ )
1150 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1151 for( i
= 0; i
< 4; i
++ )
1152 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1153 odd_bytes( wtmp
[ i
] ) );
1154 brw_pop_insn_state( p
);
1156 /* Now we want to initialise the four rear gradients based on the
1157 hashes. Format conversion from signed integer to float leaves
1158 everything scaled too high by a factor of pow( 2, 15 ), but
1159 we correct for that right at the end. */
1161 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1162 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1163 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1164 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1165 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1167 brw_push_insn_state( p
);
1168 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1169 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1170 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1171 brw_pop_insn_state( p
);
1173 brw_MUL( p
, x1y0
, x1y0
, t
);
1174 brw_MUL( p
, x1y1
, x1y1
, t
);
1175 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1176 brw_MUL( p
, x0y0
, x0y0
, param0
);
1177 brw_MUL( p
, x0y1
, x0y1
, param0
);
1180 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1181 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1182 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1183 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1185 brw_push_insn_state( p
);
1186 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1187 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1188 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1189 brw_pop_insn_state( p
);
1191 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1192 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1193 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1194 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1195 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1197 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1198 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1199 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1200 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1203 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1204 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1205 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1206 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1208 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1209 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1210 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1211 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1213 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1214 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1215 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1216 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1218 /* We interpolate between the gradients using the polynomial
1219 6t^5 - 15t^4 + 10t^3 (Perlin). */
1220 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1221 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1222 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1223 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1224 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1225 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1226 brw_MUL( p
, xi
, xi
, param0
);
1227 brw_MUL( p
, yi
, yi
, param1
);
1228 brw_MUL( p
, zi
, zi
, param2
);
1229 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1230 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1231 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1232 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1233 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1234 brw_MUL( p
, xi
, xi
, param0
);
1235 brw_MUL( p
, yi
, yi
, param1
);
1236 brw_MUL( p
, zi
, zi
, param2
);
1237 brw_MUL( p
, xi
, xi
, param0
);
1238 brw_MUL( p
, yi
, yi
, param1
);
1239 brw_MUL( p
, zi
, zi
, param2
);
1240 brw_MUL( p
, xi
, xi
, param0
);
1241 brw_MUL( p
, yi
, yi
, param1
);
1242 brw_MUL( p
, zi
, zi
, param2
);
1244 /* Here we interpolate in the y dimension... */
1245 brw_MUL( p
, x0y1
, x0y1
, yi
);
1246 brw_MUL( p
, x1y1
, x1y1
, yi
);
1247 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1248 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1250 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1251 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1252 brw_MUL( p
, x1y0
, x1y0
, xi
);
1253 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1255 /* Now do the same thing for the front four gradients... */
1257 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1258 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1259 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1260 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1262 brw_push_insn_state( p
);
1263 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1264 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1265 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1266 brw_pop_insn_state( p
);
1268 brw_MUL( p
, x1y0
, x1y0
, t
);
1269 brw_MUL( p
, x1y1
, x1y1
, t
);
1270 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1271 brw_MUL( p
, x0y0
, x0y0
, param0
);
1272 brw_MUL( p
, x0y1
, x0y1
, param0
);
1275 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1276 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1277 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1278 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1280 brw_push_insn_state( p
);
1281 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1282 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1283 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1284 brw_pop_insn_state( p
);
1286 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1287 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1288 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1289 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1290 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1292 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1293 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1294 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1295 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1298 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1299 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1300 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1301 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1303 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1304 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1305 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1306 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1308 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1309 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1310 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1311 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1313 /* The interpolation coefficients are still around from last time, so
1314 again interpolate in the y dimension... */
1315 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1316 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1317 brw_MUL( p
, x0y1
, x0y1
, yi
);
1318 brw_MUL( p
, x1y1
, x1y1
, yi
);
1319 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1320 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1322 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1323 time put the front face in tmp[ 1 ] and we're nearly there... */
1324 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1325 brw_MUL( p
, x1y0
, x1y0
, xi
);
1326 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1328 /* The final interpolation, in the z dimension: */
1329 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1330 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1331 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1333 /* scale by pow( 2, -15 ), as described above */
1334 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1336 release_tmps( c
, mark
);
1339 static void emit_noise3( struct brw_wm_compile
*c
,
1340 const struct prog_instruction
*inst
)
1342 struct brw_compile
*p
= &c
->func
;
1343 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1344 GLuint mask
= inst
->DstReg
.WriteMask
;
1346 int mark
= mark_tmps( c
);
1348 assert( mark
== 0 );
1350 src0
= get_src_reg( c
, inst
, 0, 0 );
1351 src1
= get_src_reg( c
, inst
, 0, 1 );
1352 src2
= get_src_reg( c
, inst
, 0, 2 );
1354 param0
= alloc_tmp( c
);
1355 param1
= alloc_tmp( c
);
1356 param2
= alloc_tmp( c
);
1358 brw_MOV( p
, param0
, src0
);
1359 brw_MOV( p
, param1
, src1
);
1360 brw_MOV( p
, param2
, src2
);
1362 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1364 /* Fill in the result: */
1365 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1366 for (i
= 0 ; i
< 4; i
++) {
1367 if (mask
& (1<<i
)) {
1368 dst
= get_dst_reg(c
, inst
, i
);
1369 brw_MOV( p
, dst
, param0
);
1372 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1373 brw_set_saturate( p
, 0 );
1375 release_tmps( c
, mark
);
1379 * For the four-dimensional case, the little micro-optimisation benefits
1380 * we obtain by unrolling all the loops aren't worth the massive bloat it
1381 * now causes. Instead, we loop twice around performing a similar operation
1382 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1383 * code to glue it all together.
1385 static void noise4_sub( struct brw_wm_compile
*c
)
1387 struct brw_compile
*p
= &c
->func
;
1388 struct brw_reg param
[ 4 ],
1389 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1390 w0
, /* noise for the w=0 cube */
1391 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1392 interp
[ 4 ], /* interpolation coefficients */
1393 t
, tmp
[ 8 ], /* float temporaries */
1394 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1395 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1397 int mark
= mark_tmps( c
);
1398 GLuint loop
, origin
;
1400 x0y0
= alloc_tmp( c
);
1401 x0y1
= alloc_tmp( c
);
1402 x1y0
= alloc_tmp( c
);
1403 x1y1
= alloc_tmp( c
);
1405 w0
= alloc_tmp( c
);
1406 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1407 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1409 for( i
= 0; i
< 4; i
++ ) {
1410 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1411 interp
[ i
] = alloc_tmp( c
);
1414 for( i
= 0; i
< 8; i
++ ) {
1415 tmp
[ i
] = alloc_tmp( c
);
1416 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1417 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1420 brw_set_access_mode( p
, BRW_ALIGN_1
);
1422 /* We only want 16 bits of precision from the integral part of each
1423 co-ordinate, but unfortunately the RNDD semantics would saturate
1424 at 16 bits if we performed the operation directly to a 16-bit
1425 destination. Therefore, we round to 32-bit temporaries where
1426 appropriate, and then store only the lower 16 bits. */
1427 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1428 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1429 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1430 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1431 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1432 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1434 /* Modify the flag register here, because the side effect is useful
1435 later (see below). We know for certain that all flags will be
1436 cleared, since the FRC instruction cannot possibly generate
1437 negative results. Even for exceptional inputs (infinities, denormals,
1438 NaNs), the architecture guarantees that the L conditional is false. */
1439 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1440 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1441 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1442 for( i
= 1; i
< 4; i
++ )
1443 brw_FRC( p
, param
[ i
], param
[ i
] );
1445 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1447 for( i
= 0; i
< 4; i
++ )
1448 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1449 for( i
= 0; i
< 4; i
++ )
1450 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1451 for( i
= 0; i
< 4; i
++ )
1452 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1453 for( i
= 0; i
< 4; i
++ )
1454 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1455 for( j
= 0; j
< 3; j
++ )
1456 for( i
= 0; i
< 4; i
++ )
1457 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1459 /* Mark the current address, as it will be a jump destination. The
1460 following code will be executed twice: first, with the flag
1461 register clear indicating the w=0 case, and second with flags
1465 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1466 be hashed. Since we have only 16 bits of precision in the hash, we
1467 must be careful about thorough mixing to maintain entropy as we
1468 squash the input vector into a small scalar. */
1469 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1470 brw_imm_uw( 0xBC8F ) );
1471 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1472 brw_imm_uw( 0xD0BD ) );
1473 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1474 brw_imm_uw( 0x9B93 ) );
1475 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1476 brw_imm_uw( 0xA359 ) );
1477 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1478 brw_imm_uw( 0xBC8F ) );
1480 /* Temporarily disable the execution mask while we work with ExecSize=16
1481 channels (the mask is set for ExecSize=8 and is probably incorrect).
1482 Although this might cause execution of unwanted channels, the code
1483 writes only to temporary registers and has no side effects, so
1484 disabling the mask is harmless. */
1485 brw_push_insn_state( p
);
1486 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1487 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1488 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1489 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1491 /* We're now ready to perform the hashing. The eight hashes are
1492 interleaved for performance. The hash function used is
1493 designed to rapidly achieve avalanche and require only 16x16
1494 bit multiplication, and 8-bit swizzles (which we get for
1496 for( i
= 0; i
< 4; i
++ )
1497 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1498 for( i
= 0; i
< 4; i
++ )
1499 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1500 odd_bytes( wtmp
[ i
] ) );
1501 for( i
= 0; i
< 4; i
++ )
1502 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1503 for( i
= 0; i
< 4; i
++ )
1504 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1505 odd_bytes( wtmp
[ i
] ) );
1506 brw_pop_insn_state( p
);
1508 /* Now we want to initialise the four rear gradients based on the
1509 hashes. Format conversion from signed integer to float leaves
1510 everything scaled too high by a factor of pow( 2, 15 ), but
1511 we correct for that right at the end. */
1513 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1514 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1515 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1516 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1517 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1519 brw_push_insn_state( p
);
1520 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1521 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1522 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1523 brw_pop_insn_state( p
);
1525 brw_MUL( p
, x1y0
, x1y0
, t
);
1526 brw_MUL( p
, x1y1
, x1y1
, t
);
1527 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1528 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1529 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1532 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1533 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1534 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1535 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1537 brw_push_insn_state( p
);
1538 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1539 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1540 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1541 brw_pop_insn_state( p
);
1543 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1544 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1545 /* prepare t for the w component (used below): w the first time through
1546 the loop; w - 1 the second time) */
1547 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1548 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1549 p
->current
->header
.predicate_inverse
= 1;
1550 brw_MOV( p
, t
, param
[ 3 ] );
1551 p
->current
->header
.predicate_inverse
= 0;
1552 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1553 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1554 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1556 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1557 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1558 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1559 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1562 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1563 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1564 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1565 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1567 brw_push_insn_state( p
);
1568 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1569 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1570 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1571 brw_pop_insn_state( p
);
1573 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
1574 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
1575 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
1576 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
1578 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1579 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1580 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1581 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1584 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1585 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1586 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1587 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1589 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1590 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1591 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1592 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1593 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1595 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1596 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1597 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1598 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1600 /* Here we interpolate in the y dimension... */
1601 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1602 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1603 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1604 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1605 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1606 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1608 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1609 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1610 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1611 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1613 /* Now do the same thing for the front four gradients... */
1615 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1616 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1617 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1618 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1620 brw_push_insn_state( p
);
1621 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1622 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1623 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1624 brw_pop_insn_state( p
);
1626 brw_MUL( p
, x1y0
, x1y0
, t
);
1627 brw_MUL( p
, x1y1
, x1y1
, t
);
1628 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1629 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1630 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1633 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1634 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1635 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1636 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1638 brw_push_insn_state( p
);
1639 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1640 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1641 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1642 brw_pop_insn_state( p
);
1644 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1645 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1646 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
1647 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
1648 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
1650 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1651 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1652 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1653 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1656 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1657 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1658 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1659 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1661 brw_push_insn_state( p
);
1662 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1663 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
1664 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
1665 brw_pop_insn_state( p
);
1667 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1668 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1669 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1670 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1671 /* prepare t for the w component (used below): w the first time through
1672 the loop; w - 1 the second time) */
1673 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1674 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
1675 p
->current
->header
.predicate_inverse
= 1;
1676 brw_MOV( p
, t
, param
[ 3 ] );
1677 p
->current
->header
.predicate_inverse
= 0;
1678 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1680 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1681 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1682 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1683 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1686 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1687 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1688 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1689 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1691 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1692 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1693 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1694 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1696 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1697 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1698 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1699 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1701 /* Interpolate in the y dimension: */
1702 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1703 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1704 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
1705 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
1706 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1707 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1709 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1710 time put the front face in tmp[ 1 ] and we're nearly there... */
1711 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1712 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
1713 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1715 /* Another interpolation, in the z dimension: */
1716 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1717 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
1718 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1720 /* Exit the loop if we've computed both cubes... */
1721 origin
= p
->nr_insn
;
1722 brw_push_insn_state( p
);
1723 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
1724 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1725 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
1726 brw_pop_insn_state( p
);
1728 /* Save the result for the w=0 case, and increment the w coordinate: */
1729 brw_MOV( p
, w0
, tmp
[ 0 ] );
1730 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
1733 /* Loop around for the other cube. Explicitly set the flag register
1734 (unfortunately we must spend an extra instruction to do this: we
1735 can't rely on a side effect of the previous MOV or ADD because
1736 conditional modifiers which are normally true might be false in
1737 exceptional circumstances, e.g. given a NaN input; the add to
1738 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
1739 brw_push_insn_state( p
);
1740 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1741 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
1742 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
1743 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
1744 brw_pop_insn_state( p
);
1746 /* Patch the previous conditional branch now that we know the
1747 destination address. */
1748 brw_set_src1( p
->store
+ origin
,
1749 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
1751 /* The very last interpolation. */
1752 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
1753 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
1754 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
1756 /* scale by pow( 2, -15 ), as described above */
1757 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1759 release_tmps( c
, mark
);
1762 static void emit_noise4( struct brw_wm_compile
*c
,
1763 const struct prog_instruction
*inst
)
1765 struct brw_compile
*p
= &c
->func
;
1766 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
1767 GLuint mask
= inst
->DstReg
.WriteMask
;
1769 int mark
= mark_tmps( c
);
1771 assert( mark
== 0 );
1773 src0
= get_src_reg( c
, inst
, 0, 0 );
1774 src1
= get_src_reg( c
, inst
, 0, 1 );
1775 src2
= get_src_reg( c
, inst
, 0, 2 );
1776 src3
= get_src_reg( c
, inst
, 0, 3 );
1778 param0
= alloc_tmp( c
);
1779 param1
= alloc_tmp( c
);
1780 param2
= alloc_tmp( c
);
1781 param3
= alloc_tmp( c
);
1783 brw_MOV( p
, param0
, src0
);
1784 brw_MOV( p
, param1
, src1
);
1785 brw_MOV( p
, param2
, src2
);
1786 brw_MOV( p
, param3
, src3
);
1788 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
1790 /* Fill in the result: */
1791 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1792 for (i
= 0 ; i
< 4; i
++) {
1793 if (mask
& (1<<i
)) {
1794 dst
= get_dst_reg(c
, inst
, i
);
1795 brw_MOV( p
, dst
, param0
);
1798 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1799 brw_set_saturate( p
, 0 );
1801 release_tmps( c
, mark
);
1805 * Resolve subroutine calls after code emit is done.
1807 static void post_wm_emit( struct brw_wm_compile
*c
)
1809 brw_resolve_cals(&c
->func
);
1813 get_argument_regs(struct brw_wm_compile
*c
,
1814 const struct prog_instruction
*inst
,
1816 struct brw_reg
*regs
,
1821 for (i
= 0; i
< 4; i
++) {
1822 if (mask
& (1 << i
))
1823 regs
[i
] = get_src_reg(c
, inst
, index
, i
);
1827 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1829 #define MAX_IF_DEPTH 32
1830 #define MAX_LOOP_DEPTH 32
1831 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
1832 GLuint i
, if_depth
= 0, loop_depth
= 0;
1833 struct brw_compile
*p
= &c
->func
;
1834 struct brw_indirect stack_index
= brw_indirect(0, 0);
1836 c
->out_of_regs
= GL_FALSE
;
1839 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1840 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1842 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
1843 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
1845 struct brw_reg args
[3][4], dst
[4];
1851 _mesa_printf("Inst %d: ", i
);
1852 _mesa_print_instruction(inst
);
1855 /* fetch any constants that this instruction needs */
1856 if (c
->fp
->use_const_buffer
)
1857 fetch_constants(c
, inst
);
1859 if (inst
->Opcode
!= OPCODE_ARL
) {
1860 for (j
= 0; j
< 4; j
++) {
1861 if (inst
->DstReg
.WriteMask
& (1 << j
))
1862 dst
[j
] = get_dst_reg(c
, inst
, j
);
1864 dst
[j
] = brw_null_reg();
1867 for (j
= 0; j
< brw_wm_nr_args(inst
->Opcode
); j
++)
1868 get_argument_regs(c
, inst
, j
, args
[j
], WRITEMASK_XYZW
);
1870 dst_flags
= inst
->DstReg
.WriteMask
;
1871 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1872 dst_flags
|= SATURATE
;
1874 if (inst
->CondUpdate
)
1875 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
1877 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
1879 dst_flags
= inst
->DstReg
.WriteMask
;
1880 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1881 dst_flags
|= SATURATE
;
1883 switch (inst
->Opcode
) {
1885 emit_pixel_xy(c
, dst
, dst_flags
);
1888 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
1891 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
1894 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
1897 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
1900 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
1903 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
1906 emit_fb_write(c
, args
[0], args
[1], args
[2],
1907 INST_AUX_GET_TARGET(inst
->Aux
),
1908 inst
->Aux
& INST_AUX_EOT
);
1910 case WM_FRONTFACING
:
1911 emit_frontfacing(p
, dst
, dst_flags
);
1914 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
1920 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
1923 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
1926 unalias3(c
, emit_lrp
,
1927 dst
, dst_flags
, args
[0], args
[1], args
[2]);
1930 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
1934 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
1937 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
1940 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
1943 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
1946 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
1949 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
1952 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
1955 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
1958 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
1961 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
1964 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
1967 unalias2(c
, emit_min
, dst
, dst_flags
, args
[0], args
[1]);
1970 unalias2(c
, emit_max
, dst
, dst_flags
, args
[0], args
[1]);
1974 emit_ddxy(p
, dst
, dst_flags
, (inst
->Opcode
== OPCODE_DDX
),
1978 emit_sop(p
, dst
, dst_flags
,
1979 BRW_CONDITIONAL_L
, args
[0], args
[1]);
1982 emit_sop(p
, dst
, dst_flags
,
1983 BRW_CONDITIONAL_LE
, args
[0], args
[1]);
1986 emit_sop(p
, dst
, dst_flags
,
1987 BRW_CONDITIONAL_G
, args
[0], args
[1]);
1990 emit_sop(p
, dst
, dst_flags
,
1991 BRW_CONDITIONAL_GE
, args
[0], args
[1]);
1994 emit_sop(p
, dst
, dst_flags
,
1995 BRW_CONDITIONAL_EQ
, args
[0], args
[1]);
1998 emit_sop(p
, dst
, dst_flags
,
1999 BRW_CONDITIONAL_NEQ
, args
[0], args
[1]);
2002 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
2005 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
2006 dst
, dst_flags
, args
[0], args
[1]);
2009 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
2012 emit_noise1(c
, inst
);
2015 emit_noise2(c
, inst
);
2018 emit_noise3(c
, inst
);
2021 emit_noise4(c
, inst
);
2024 emit_tex(c
, dst
, dst_flags
, args
[0],
2025 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
2029 (c
->key
.shadowtex_mask
& (1 << inst
->TexSrcUnit
)) != 0);
2032 emit_txb(c
, dst
, dst_flags
, args
[0],
2033 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
2036 c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
]);
2042 assert(if_depth
< MAX_IF_DEPTH
);
2043 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2046 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2049 assert(if_depth
> 0);
2050 brw_ENDIF(p
, if_inst
[--if_depth
]);
2053 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2059 brw_push_insn_state(p
);
2060 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2061 brw_set_access_mode(p
, BRW_ALIGN_1
);
2062 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2063 brw_set_access_mode(p
, BRW_ALIGN_16
);
2064 brw_ADD(p
, get_addr_reg(stack_index
),
2065 get_addr_reg(stack_index
), brw_imm_d(4));
2066 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2067 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2068 brw_pop_insn_state(p
);
2072 brw_push_insn_state(p
);
2073 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2074 brw_ADD(p
, get_addr_reg(stack_index
),
2075 get_addr_reg(stack_index
), brw_imm_d(-4));
2076 brw_set_access_mode(p
, BRW_ALIGN_1
);
2077 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2078 brw_set_access_mode(p
, BRW_ALIGN_16
);
2079 brw_pop_insn_state(p
);
2082 case OPCODE_BGNLOOP
:
2083 /* XXX may need to invalidate the current_constant regs */
2084 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2088 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2092 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2094 case OPCODE_ENDLOOP
:
2096 struct brw_instruction
*inst0
, *inst1
;
2099 if (BRW_IS_IGDNG(brw
))
2103 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2104 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2105 while (inst0
> loop_inst
[loop_depth
]) {
2107 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2108 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2109 inst0
->bits3
.if_else
.pop_count
= 0;
2111 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2112 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2113 inst0
->bits3
.if_else
.pop_count
= 0;
2119 _mesa_printf("unsupported IR in fragment shader %d\n",
2123 if (inst
->CondUpdate
)
2124 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2126 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2130 if (INTEL_DEBUG
& DEBUG_WM
) {
2131 _mesa_printf("wm-native:\n");
2132 for (i
= 0; i
< p
->nr_insn
; i
++)
2133 brw_disasm(stderr
, &p
->store
[i
]);
2139 * Do GPU code generation for shaders that use GLSL features such as
2140 * flow control. Other shaders will be compiled with the
2142 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2144 if (INTEL_DEBUG
& DEBUG_WM
) {
2145 _mesa_printf("brw_wm_glsl_emit:\n");
2148 /* initial instruction translation/simplification */
2151 /* actual code generation */
2152 brw_wm_emit_glsl(brw
, c
);
2154 if (INTEL_DEBUG
& DEBUG_WM
) {
2155 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2158 c
->prog_data
.total_grf
= num_grf_used(c
);
2159 c
->prog_data
.total_scratch
= 0;