1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
46 * Record the mapping of a Mesa register to a hardware register.
48 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
49 int component
, struct brw_reg reg
)
51 c
->wm_regs
[file
][index
][component
].reg
= reg
;
52 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
56 * Examine instruction's write mask to find index of first component
57 * enabled for writing.
59 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
62 for (i
= 0; i
< 4; i
++)
63 if (inst
->DstReg
.WriteMask
& (1<<i
))
68 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
71 if(c
->tmp_index
== c
->tmp_max
)
72 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
74 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
79 * Save current temp register info.
80 * There must be a matching call to release_tmps().
82 static int mark_tmps(struct brw_wm_compile
*c
)
87 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
89 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
92 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
98 * Convert Mesa src register to brw register.
100 * Since we're running in SOA mode each Mesa register corresponds to four
101 * hardware registers. We allocate the hardware registers as needed here.
103 * \param file register file, one of PROGRAM_x
104 * \param index register number
105 * \param component src component (X=0, Y=1, Z=2, W=3)
106 * \param nr not used?!?
107 * \param neg negate value?
108 * \param abs take absolute value?
110 static struct brw_reg
111 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
112 int nr
, GLuint neg
, GLuint abs
)
116 case PROGRAM_STATE_VAR
:
117 case PROGRAM_CONSTANT
:
118 case PROGRAM_UNIFORM
:
119 file
= PROGRAM_STATE_VAR
;
121 case PROGRAM_UNDEFINED
:
122 return brw_null_reg();
123 case PROGRAM_TEMPORARY
:
126 case PROGRAM_PAYLOAD
:
129 _mesa_problem(NULL
, "Unexpected file in get_reg()");
130 return brw_null_reg();
133 /* see if we've already allocated a HW register for this Mesa register */
134 if (c
->wm_regs
[file
][index
][component
].inited
) {
136 reg
= c
->wm_regs
[file
][index
][component
].reg
;
139 /* no, allocate new register */
140 reg
= brw_vec8_grf(c
->reg_index
, 0);
143 /* if this is a new register allocation, record it in the table */
144 if (!c
->wm_regs
[file
][index
][component
].inited
) {
145 set_reg(c
, file
, index
, component
, reg
);
149 if (c
->reg_index
>= BRW_WM_MAX_GRF
- 12) {
150 /* ran out of temporary registers! */
152 /* This is a big hack for now.
153 * Return bad register index, just don't hang the GPU.
155 _mesa_fprintf(stderr
, "out of regs %d\n", c
->reg_index
);
156 c
->reg_index
= BRW_WM_MAX_GRF
- 13;
158 return brw_null_reg();
162 if (neg
& (1 << component
)) {
172 * Preallocate registers. This sets up the Mesa to hardware register
173 * mapping for certain registers, such as constants (uniforms/state vars)
176 static void prealloc_reg(struct brw_wm_compile
*c
)
180 int urb_read_length
= 0;
181 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
183 for (i
= 0; i
< 4; i
++) {
184 if (i
< c
->key
.nr_depth_regs
)
185 reg
= brw_vec8_grf(i
* 2, 0);
187 reg
= brw_vec8_grf(0, 0);
188 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
190 c
->reg_index
+= 2 * c
->key
.nr_depth_regs
;
194 const int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
196 /* use a real constant buffer, or just use a section of the GRF? */
197 c
->fp
->use_const_buffer
= GL_FALSE
; /* (nr_params > 8);*/
199 if (c
->fp
->use_const_buffer
) {
200 /* We'll use a real constant buffer and fetch constants from
201 * it with a dataport read message.
204 /* number of float constants in CURBE */
205 c
->prog_data
.nr_params
= 0;
208 const struct gl_program_parameter_list
*plist
=
209 c
->fp
->program
.Base
.Parameters
;
212 /* number of float constants in CURBE */
213 c
->prog_data
.nr_params
= 4 * nr_params
;
215 /* loop over program constants (float[4]) */
216 for (i
= 0; i
< nr_params
; i
++) {
217 /* loop over XYZW channels */
218 for (j
= 0; j
< 4; j
++, index
++) {
219 reg
= brw_vec1_grf(c
->reg_index
+ index
/ 8, index
% 8);
220 /* Save pointer to parameter/constant value.
221 * Constants will be copied in prepare_constant_buffer()
223 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
224 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
227 /* number of constant regs used (each reg is float[8]) */
228 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
229 c
->reg_index
+= c
->nr_creg
;
233 /* fragment shader inputs */
234 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
237 if (i
>= VERT_RESULT_VAR0
)
238 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
239 else if (i
<= VERT_RESULT_TEX7
)
244 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
245 urb_read_length
= c
->reg_index
;
246 reg
= brw_vec8_grf(c
->reg_index
, 0);
247 for (j
= 0; j
< 4; j
++)
248 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
250 if (c
->key
.vp_outputs_written
& (1 << i
)) {
255 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
256 c
->prog_data
.urb_read_length
= urb_read_length
;
257 c
->prog_data
.curb_read_length
= c
->nr_creg
;
258 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
260 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
263 /* An instruction may reference up to three constants.
264 * They'll be found in these registers.
265 * XXX alloc these on demand!
267 if (c
->fp
->use_const_buffer
) {
268 for (i
= 0; i
< 3; i
++) {
269 c
->current_const
[i
].index
= -1;
270 c
->current_const
[i
].reg
= alloc_tmp(c
);
274 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
275 printf("AFTER PRE_ALLOC, reg_index = %d\n", c
->reg_index
);
281 * Check if any of the instruction's src registers are constants, uniforms,
282 * or statevars. If so, fetch any constants that we don't already have in
283 * the three GRF slots.
285 static void fetch_constants(struct brw_wm_compile
*c
,
286 const struct prog_instruction
*inst
)
288 struct brw_compile
*p
= &c
->func
;
291 /* loop over instruction src regs */
292 for (i
= 0; i
< 3; i
++) {
293 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
294 if (src
->File
== PROGRAM_STATE_VAR
||
295 src
->File
== PROGRAM_CONSTANT
||
296 src
->File
== PROGRAM_UNIFORM
) {
297 c
->current_const
[i
].index
= src
->Index
;
300 printf(" fetch const[%d] for arg %d into reg %d\n",
301 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
304 /* need to fetch the constant now */
306 c
->current_const
[i
].reg
, /* writeback dest */
307 src
->RelAddr
, /* relative indexing? */
308 16 * src
->Index
, /* byte offset */
309 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
317 * Convert Mesa dst register to brw register.
319 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
320 const struct prog_instruction
*inst
,
324 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
329 static struct brw_reg
330 get_src_reg_const(struct brw_wm_compile
*c
,
331 const struct prog_instruction
*inst
,
332 GLuint srcRegIndex
, GLuint component
)
334 /* We should have already fetched the constant from the constant
335 * buffer in fetch_constants(). Now we just have to return a
336 * register description that extracts the needed component and
337 * smears it across all eight vector components.
339 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
340 struct brw_reg const_reg
;
342 assert(component
< 4);
343 assert(srcRegIndex
< 3);
344 assert(c
->current_const
[srcRegIndex
].index
!= -1);
345 const_reg
= c
->current_const
[srcRegIndex
].reg
;
347 /* extract desired float from the const_reg, and smear */
348 const_reg
= stride(const_reg
, 0, 1, 0);
349 const_reg
.subnr
= component
* 4;
351 if (src
->Negate
& (1 << component
))
352 const_reg
= negate(const_reg
);
354 const_reg
= brw_abs(const_reg
);
357 printf(" form const[%d].%d for arg %d, reg %d\n",
358 c
->current_const
[srcRegIndex
].index
,
369 * Convert Mesa src register to brw register.
371 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
372 const struct prog_instruction
*inst
,
373 GLuint srcRegIndex
, GLuint channel
)
375 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
377 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
379 if (c
->fp
->use_const_buffer
&&
380 (src
->File
== PROGRAM_STATE_VAR
||
381 src
->File
== PROGRAM_CONSTANT
||
382 src
->File
== PROGRAM_UNIFORM
)) {
383 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
386 /* other type of source register */
387 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
388 src
->Negate
, src
->Abs
);
394 * Same as \sa get_src_reg() but if the register is a literal, emit
395 * a brw_reg encoding the literal.
396 * Note that a brw instruction only allows one src operand to be a literal.
397 * For instructions with more than one operand, only the second can be a
398 * literal. This means that we treat some literals as constants/uniforms
399 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
402 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
403 const struct prog_instruction
*inst
,
404 GLuint srcRegIndex
, GLuint channel
)
406 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
407 if (src
->File
== PROGRAM_CONSTANT
) {
409 const int component
= GET_SWZ(src
->Swizzle
, channel
);
410 const GLfloat
*param
=
411 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
412 GLfloat value
= param
[component
];
413 if (src
->Negate
& (1 << channel
))
416 value
= FABSF(value
);
418 printf(" form immed value %f for chan %d\n", value
, channel
);
420 return brw_imm_f(value
);
423 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
429 * Subroutines are minimal support for resusable instruction sequences.
430 * They are implemented as simply as possible to minimise overhead: there
431 * is no explicit support for communication between the caller and callee
432 * other than saving the return address in a temporary register, nor is
433 * there any automatic local storage. This implies that great care is
434 * required before attempting reentrancy or any kind of nested
435 * subroutine invocations.
437 static void invoke_subroutine( struct brw_wm_compile
*c
,
438 enum _subroutine subroutine
,
439 void (*emit
)( struct brw_wm_compile
* ) )
441 struct brw_compile
*p
= &c
->func
;
443 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
445 if( c
->subroutines
[ subroutine
] ) {
446 /* subroutine previously emitted: reuse existing instructions */
448 int mark
= mark_tmps( c
);
449 struct brw_reg return_address
= retype( alloc_tmp( c
),
450 BRW_REGISTER_TYPE_UD
);
451 int here
= p
->nr_insn
;
453 brw_push_insn_state(p
);
454 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
455 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
457 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
458 brw_imm_d( ( c
->subroutines
[ subroutine
] -
460 brw_pop_insn_state(p
);
462 release_tmps( c
, mark
);
464 /* previously unused subroutine: emit, and mark for later reuse */
466 int mark
= mark_tmps( c
);
467 struct brw_reg return_address
= retype( alloc_tmp( c
),
468 BRW_REGISTER_TYPE_UD
);
469 struct brw_instruction
*calc
;
470 int base
= p
->nr_insn
;
472 brw_push_insn_state(p
);
473 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
474 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
475 brw_pop_insn_state(p
);
477 c
->subroutines
[ subroutine
] = p
->nr_insn
;
481 brw_push_insn_state(p
);
482 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
483 brw_MOV( p
, brw_ip_reg(), return_address
);
484 brw_pop_insn_state(p
);
486 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
488 release_tmps( c
, mark
);
492 static void emit_abs( struct brw_wm_compile
*c
,
493 const struct prog_instruction
*inst
)
496 struct brw_compile
*p
= &c
->func
;
497 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
498 for (i
= 0; i
< 4; i
++) {
499 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
500 struct brw_reg src
, dst
;
501 dst
= get_dst_reg(c
, inst
, i
);
502 src
= get_src_reg(c
, inst
, 0, i
);
503 brw_MOV(p
, dst
, brw_abs(src
));
506 brw_set_saturate(p
, 0);
509 static void emit_trunc( struct brw_wm_compile
*c
,
510 const struct prog_instruction
*inst
)
513 struct brw_compile
*p
= &c
->func
;
514 GLuint mask
= inst
->DstReg
.WriteMask
;
515 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
516 for (i
= 0; i
< 4; i
++) {
518 struct brw_reg src
, dst
;
519 dst
= get_dst_reg(c
, inst
, i
);
520 src
= get_src_reg(c
, inst
, 0, i
);
521 brw_RNDZ(p
, dst
, src
);
524 brw_set_saturate(p
, 0);
527 static void emit_mov( struct brw_wm_compile
*c
,
528 const struct prog_instruction
*inst
)
531 struct brw_compile
*p
= &c
->func
;
532 GLuint mask
= inst
->DstReg
.WriteMask
;
533 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
534 for (i
= 0; i
< 4; i
++) {
536 struct brw_reg src
, dst
;
537 dst
= get_dst_reg(c
, inst
, i
);
538 /* XXX some moves from immediate value don't work reliably!!! */
539 /*src = get_src_reg_imm(c, inst, 0, i);*/
540 src
= get_src_reg(c
, inst
, 0, i
);
541 brw_MOV(p
, dst
, src
);
544 brw_set_saturate(p
, 0);
547 static void emit_pixel_xy(struct brw_wm_compile
*c
,
548 const struct prog_instruction
*inst
)
550 struct brw_reg r1
= brw_vec1_grf(1, 0);
551 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
553 struct brw_reg dst0
, dst1
;
554 struct brw_compile
*p
= &c
->func
;
555 GLuint mask
= inst
->DstReg
.WriteMask
;
557 dst0
= get_dst_reg(c
, inst
, 0);
558 dst1
= get_dst_reg(c
, inst
, 1);
559 /* Calculate pixel centers by adding 1 or 0 to each of the
560 * micro-tile coordinates passed in r1.
562 if (mask
& WRITEMASK_X
) {
564 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
565 stride(suboffset(r1_uw
, 4), 2, 4, 0),
566 brw_imm_v(0x10101010));
569 if (mask
& WRITEMASK_Y
) {
571 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
572 stride(suboffset(r1_uw
, 5), 2, 4, 0),
573 brw_imm_v(0x11001100));
577 static void emit_delta_xy(struct brw_wm_compile
*c
,
578 const struct prog_instruction
*inst
)
580 struct brw_reg r1
= brw_vec1_grf(1, 0);
581 struct brw_reg dst0
, dst1
, src0
, src1
;
582 struct brw_compile
*p
= &c
->func
;
583 GLuint mask
= inst
->DstReg
.WriteMask
;
585 dst0
= get_dst_reg(c
, inst
, 0);
586 dst1
= get_dst_reg(c
, inst
, 1);
587 src0
= get_src_reg(c
, inst
, 0, 0);
588 src1
= get_src_reg(c
, inst
, 0, 1);
589 /* Calc delta X,Y by subtracting origin in r1 from the pixel
592 if (mask
& WRITEMASK_X
) {
595 retype(src0
, BRW_REGISTER_TYPE_UW
),
599 if (mask
& WRITEMASK_Y
) {
602 retype(src1
, BRW_REGISTER_TYPE_UW
),
603 negate(suboffset(r1
,1)));
608 static void fire_fb_write( struct brw_wm_compile
*c
,
614 struct brw_compile
*p
= &c
->func
;
615 /* Pass through control information:
617 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
619 brw_push_insn_state(p
);
620 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
622 brw_message_reg(base_reg
+ 1),
624 brw_pop_insn_state(p
);
626 /* Send framebuffer write message: */
628 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
630 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
637 static void emit_fb_write(struct brw_wm_compile
*c
,
638 const struct prog_instruction
*inst
)
640 struct brw_compile
*p
= &c
->func
;
646 /* Reserve a space for AA - may not be needed:
648 if (c
->key
.aa_dest_stencil_reg
)
651 brw_push_insn_state(p
);
652 for (channel
= 0; channel
< 4; channel
++) {
653 src0
= get_src_reg(c
, inst
, 0, channel
);
654 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
655 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
656 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
658 /* skip over the regs populated above: */
660 brw_pop_insn_state(p
);
662 if (c
->key
.source_depth_to_render_target
) {
663 if (c
->key
.computes_depth
) {
664 src0
= get_src_reg(c
, inst
, 2, 2);
665 brw_MOV(p
, brw_message_reg(nr
), src0
);
668 src0
= get_src_reg(c
, inst
, 1, 1);
669 brw_MOV(p
, brw_message_reg(nr
), src0
);
675 if (c
->key
.dest_depth_reg
) {
676 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
677 GLuint off
= c
->key
.dest_depth_reg
% 2;
682 /* XXX do we need this code? comp always 1, off always 0, it seems */
684 brw_push_insn_state(p
);
685 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
687 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
689 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
690 brw_pop_insn_state(p
);
695 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
696 brw_MOV(p
, brw_message_reg(nr
), src
);
701 target
= inst
->Aux
>> 1;
703 fire_fb_write(c
, 0, nr
, target
, eot
);
706 static void emit_pixel_w( struct brw_wm_compile
*c
,
707 const struct prog_instruction
*inst
)
709 struct brw_compile
*p
= &c
->func
;
710 GLuint mask
= inst
->DstReg
.WriteMask
;
711 if (mask
& WRITEMASK_W
) {
712 struct brw_reg dst
, src0
, delta0
, delta1
;
713 struct brw_reg interp3
;
715 dst
= get_dst_reg(c
, inst
, 3);
716 src0
= get_src_reg(c
, inst
, 0, 0);
717 delta0
= get_src_reg(c
, inst
, 1, 0);
718 delta1
= get_src_reg(c
, inst
, 1, 1);
720 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
721 /* Calc 1/w - just linterp wpos[3] optimized by putting the
722 * result straight into a message reg.
724 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
725 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
729 BRW_MATH_FUNCTION_INV
,
730 BRW_MATH_SATURATE_NONE
,
732 BRW_MATH_PRECISION_FULL
);
736 static void emit_linterp(struct brw_wm_compile
*c
,
737 const struct prog_instruction
*inst
)
739 struct brw_compile
*p
= &c
->func
;
740 GLuint mask
= inst
->DstReg
.WriteMask
;
741 struct brw_reg interp
[4];
742 struct brw_reg dst
, delta0
, delta1
;
746 src0
= get_src_reg(c
, inst
, 0, 0);
747 delta0
= get_src_reg(c
, inst
, 1, 0);
748 delta1
= get_src_reg(c
, inst
, 1, 1);
751 interp
[0] = brw_vec1_grf(nr
, 0);
752 interp
[1] = brw_vec1_grf(nr
, 4);
753 interp
[2] = brw_vec1_grf(nr
+1, 0);
754 interp
[3] = brw_vec1_grf(nr
+1, 4);
756 for(i
= 0; i
< 4; i
++ ) {
758 dst
= get_dst_reg(c
, inst
, i
);
759 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
760 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
765 static void emit_cinterp(struct brw_wm_compile
*c
,
766 const struct prog_instruction
*inst
)
768 struct brw_compile
*p
= &c
->func
;
769 GLuint mask
= inst
->DstReg
.WriteMask
;
771 struct brw_reg interp
[4];
772 struct brw_reg dst
, src0
;
775 src0
= get_src_reg(c
, inst
, 0, 0);
778 interp
[0] = brw_vec1_grf(nr
, 0);
779 interp
[1] = brw_vec1_grf(nr
, 4);
780 interp
[2] = brw_vec1_grf(nr
+1, 0);
781 interp
[3] = brw_vec1_grf(nr
+1, 4);
783 for(i
= 0; i
< 4; i
++ ) {
785 dst
= get_dst_reg(c
, inst
, i
);
786 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
791 static void emit_pinterp(struct brw_wm_compile
*c
,
792 const struct prog_instruction
*inst
)
794 struct brw_compile
*p
= &c
->func
;
795 GLuint mask
= inst
->DstReg
.WriteMask
;
797 struct brw_reg interp
[4];
798 struct brw_reg dst
, delta0
, delta1
;
799 struct brw_reg src0
, w
;
802 src0
= get_src_reg(c
, inst
, 0, 0);
803 delta0
= get_src_reg(c
, inst
, 1, 0);
804 delta1
= get_src_reg(c
, inst
, 1, 1);
805 w
= get_src_reg(c
, inst
, 2, 3);
808 interp
[0] = brw_vec1_grf(nr
, 0);
809 interp
[1] = brw_vec1_grf(nr
, 4);
810 interp
[2] = brw_vec1_grf(nr
+1, 0);
811 interp
[3] = brw_vec1_grf(nr
+1, 4);
813 for(i
= 0; i
< 4; i
++ ) {
815 dst
= get_dst_reg(c
, inst
, i
);
816 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
817 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
819 brw_MUL(p
, dst
, dst
, w
);
824 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
825 static void emit_frontfacing(struct brw_wm_compile
*c
,
826 const struct prog_instruction
*inst
)
828 struct brw_compile
*p
= &c
->func
;
829 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
831 GLuint mask
= inst
->DstReg
.WriteMask
;
834 for (i
= 0; i
< 4; i
++) {
836 dst
= get_dst_reg(c
, inst
, i
);
837 brw_MOV(p
, dst
, brw_imm_f(0.0));
841 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
844 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
845 for (i
= 0; i
< 4; i
++) {
847 dst
= get_dst_reg(c
, inst
, i
);
848 brw_MOV(p
, dst
, brw_imm_f(1.0));
851 brw_set_predicate_control_flag_value(p
, 0xff);
854 static void emit_xpd(struct brw_wm_compile
*c
,
855 const struct prog_instruction
*inst
)
858 struct brw_compile
*p
= &c
->func
;
859 GLuint mask
= inst
->DstReg
.WriteMask
;
860 for (i
= 0; i
< 4; i
++) {
864 struct brw_reg src0
, src1
, dst
;
865 dst
= get_dst_reg(c
, inst
, i
);
866 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
867 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
868 brw_MUL(p
, brw_null_reg(), src0
, src1
);
869 src0
= get_src_reg(c
, inst
, 0, i1
);
870 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
871 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
872 brw_MAC(p
, dst
, src0
, src1
);
873 brw_set_saturate(p
, 0);
876 brw_set_saturate(p
, 0);
879 static void emit_dp3(struct brw_wm_compile
*c
,
880 const struct prog_instruction
*inst
)
882 struct brw_reg src0
[3], src1
[3], dst
;
884 struct brw_compile
*p
= &c
->func
;
885 for (i
= 0; i
< 3; i
++) {
886 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
887 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
890 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
891 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
892 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
893 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
894 brw_MAC(p
, dst
, src0
[2], src1
[2]);
895 brw_set_saturate(p
, 0);
898 static void emit_dp4(struct brw_wm_compile
*c
,
899 const struct prog_instruction
*inst
)
901 struct brw_reg src0
[4], src1
[4], dst
;
903 struct brw_compile
*p
= &c
->func
;
904 for (i
= 0; i
< 4; i
++) {
905 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
906 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
908 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
909 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
910 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
911 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
912 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
913 brw_MAC(p
, dst
, src0
[3], src1
[3]);
914 brw_set_saturate(p
, 0);
917 static void emit_dph(struct brw_wm_compile
*c
,
918 const struct prog_instruction
*inst
)
920 struct brw_reg src0
[4], src1
[4], dst
;
922 struct brw_compile
*p
= &c
->func
;
923 for (i
= 0; i
< 4; i
++) {
924 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
925 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
927 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
928 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
929 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
930 brw_MAC(p
, dst
, src0
[2], src1
[2]);
931 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
932 brw_ADD(p
, dst
, dst
, src1
[3]);
933 brw_set_saturate(p
, 0);
937 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
938 * Note that the result of the function is smeared across the dest
939 * register's X, Y, Z and W channels (subject to writemasking of course).
941 static void emit_math1(struct brw_wm_compile
*c
,
942 const struct prog_instruction
*inst
, GLuint func
)
944 struct brw_compile
*p
= &c
->func
;
945 struct brw_reg src0
, dst
, tmp
;
946 const int mark
= mark_tmps( c
);
951 /* Get first component of source register */
952 src0
= get_src_reg(c
, inst
, 0, 0);
954 /* tmp = func(src0) */
955 brw_MOV(p
, brw_message_reg(2), src0
);
959 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
962 BRW_MATH_DATA_VECTOR
,
963 BRW_MATH_PRECISION_FULL
);
965 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
967 /* replicate tmp value across enabled dest channels */
968 for (i
= 0; i
< 4; i
++) {
969 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
970 dst
= get_dst_reg(c
, inst
, i
);
971 brw_MOV(p
, dst
, tmp
);
975 release_tmps(c
, mark
);
978 static void emit_rcp(struct brw_wm_compile
*c
,
979 const struct prog_instruction
*inst
)
981 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
984 static void emit_rsq(struct brw_wm_compile
*c
,
985 const struct prog_instruction
*inst
)
987 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
990 static void emit_sin(struct brw_wm_compile
*c
,
991 const struct prog_instruction
*inst
)
993 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
996 static void emit_cos(struct brw_wm_compile
*c
,
997 const struct prog_instruction
*inst
)
999 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1002 static void emit_ex2(struct brw_wm_compile
*c
,
1003 const struct prog_instruction
*inst
)
1005 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1008 static void emit_lg2(struct brw_wm_compile
*c
,
1009 const struct prog_instruction
*inst
)
1011 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1014 static void emit_add(struct brw_wm_compile
*c
,
1015 const struct prog_instruction
*inst
)
1017 struct brw_compile
*p
= &c
->func
;
1018 struct brw_reg src0
, src1
, dst
;
1019 GLuint mask
= inst
->DstReg
.WriteMask
;
1021 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1022 for (i
= 0 ; i
< 4; i
++) {
1023 if (mask
& (1<<i
)) {
1024 dst
= get_dst_reg(c
, inst
, i
);
1025 src0
= get_src_reg(c
, inst
, 0, i
);
1026 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1027 brw_ADD(p
, dst
, src0
, src1
);
1030 brw_set_saturate(p
, 0);
1033 static void emit_arl(struct brw_wm_compile
*c
,
1034 const struct prog_instruction
*inst
)
1036 struct brw_compile
*p
= &c
->func
;
1037 struct brw_reg src0
, addr_reg
;
1038 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1039 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1040 BRW_ARF_ADDRESS
, 0);
1041 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1042 brw_MOV(p
, addr_reg
, src0
);
1043 brw_set_saturate(p
, 0);
1046 static void emit_sub(struct brw_wm_compile
*c
,
1047 const struct prog_instruction
*inst
)
1049 struct brw_compile
*p
= &c
->func
;
1050 struct brw_reg src0
, src1
, dst
;
1051 GLuint mask
= inst
->DstReg
.WriteMask
;
1053 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1054 for (i
= 0 ; i
< 4; i
++) {
1055 if (mask
& (1<<i
)) {
1056 dst
= get_dst_reg(c
, inst
, i
);
1057 src0
= get_src_reg(c
, inst
, 0, i
);
1058 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1059 brw_ADD(p
, dst
, src0
, negate(src1
));
1062 brw_set_saturate(p
, 0);
1065 static void emit_mul(struct brw_wm_compile
*c
,
1066 const struct prog_instruction
*inst
)
1068 struct brw_compile
*p
= &c
->func
;
1069 struct brw_reg src0
, src1
, dst
;
1070 GLuint mask
= inst
->DstReg
.WriteMask
;
1072 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1073 for (i
= 0 ; i
< 4; i
++) {
1074 if (mask
& (1<<i
)) {
1075 dst
= get_dst_reg(c
, inst
, i
);
1076 src0
= get_src_reg(c
, inst
, 0, i
);
1077 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1078 brw_MUL(p
, dst
, src0
, src1
);
1081 brw_set_saturate(p
, 0);
1084 static void emit_frc(struct brw_wm_compile
*c
,
1085 const struct prog_instruction
*inst
)
1087 struct brw_compile
*p
= &c
->func
;
1088 struct brw_reg src0
, dst
;
1089 GLuint mask
= inst
->DstReg
.WriteMask
;
1091 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1092 for (i
= 0 ; i
< 4; i
++) {
1093 if (mask
& (1<<i
)) {
1094 dst
= get_dst_reg(c
, inst
, i
);
1095 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1096 brw_FRC(p
, dst
, src0
);
1099 if (inst
->SaturateMode
!= SATURATE_OFF
)
1100 brw_set_saturate(p
, 0);
1103 static void emit_flr(struct brw_wm_compile
*c
,
1104 const struct prog_instruction
*inst
)
1106 struct brw_compile
*p
= &c
->func
;
1107 struct brw_reg src0
, dst
;
1108 GLuint mask
= inst
->DstReg
.WriteMask
;
1110 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1111 for (i
= 0 ; i
< 4; i
++) {
1112 if (mask
& (1<<i
)) {
1113 dst
= get_dst_reg(c
, inst
, i
);
1114 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1115 brw_RNDD(p
, dst
, src0
);
1118 brw_set_saturate(p
, 0);
1122 static void emit_min_max(struct brw_wm_compile
*c
,
1123 const struct prog_instruction
*inst
)
1125 struct brw_compile
*p
= &c
->func
;
1126 const GLuint mask
= inst
->DstReg
.WriteMask
;
1127 const int mark
= mark_tmps(c
);
1129 brw_push_insn_state(p
);
1130 for (i
= 0; i
< 4; i
++) {
1131 if (mask
& (1<<i
)) {
1132 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1133 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1134 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1136 /* if dst==src0 or dst==src1 we need to use a temp reg */
1137 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1138 brw_same_reg(dst
, src1
);
1145 printf(" Min/max: dst %d src0 %d src1 %d\n",
1146 dst.nr, src0.nr, src1.nr);
1148 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1149 brw_MOV(p
, dst
, src0
);
1150 brw_set_saturate(p
, 0);
1152 if (inst
->Opcode
== OPCODE_MIN
)
1153 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1155 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1157 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1158 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1159 brw_MOV(p
, dst
, src1
);
1160 brw_set_saturate(p
, 0);
1161 brw_set_predicate_control_flag_value(p
, 0xff);
1163 brw_MOV(p
, real_dst
, dst
);
1166 brw_pop_insn_state(p
);
1167 release_tmps(c
, mark
);
1170 static void emit_pow(struct brw_wm_compile
*c
,
1171 const struct prog_instruction
*inst
)
1173 struct brw_compile
*p
= &c
->func
;
1174 struct brw_reg dst
, src0
, src1
;
1175 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1176 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1177 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1179 brw_MOV(p
, brw_message_reg(2), src0
);
1180 brw_MOV(p
, brw_message_reg(3), src1
);
1184 BRW_MATH_FUNCTION_POW
,
1185 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1188 BRW_MATH_DATA_VECTOR
,
1189 BRW_MATH_PRECISION_FULL
);
1192 static void emit_lrp(struct brw_wm_compile
*c
,
1193 const struct prog_instruction
*inst
)
1195 struct brw_compile
*p
= &c
->func
;
1196 GLuint mask
= inst
->DstReg
.WriteMask
;
1197 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1199 int mark
= mark_tmps(c
);
1200 for (i
= 0; i
< 4; i
++) {
1201 if (mask
& (1<<i
)) {
1202 dst
= get_dst_reg(c
, inst
, i
);
1203 src0
= get_src_reg(c
, inst
, 0, i
);
1205 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1207 if (src1
.nr
== dst
.nr
) {
1208 tmp1
= alloc_tmp(c
);
1209 brw_MOV(p
, tmp1
, src1
);
1213 src2
= get_src_reg(c
, inst
, 2, i
);
1214 if (src2
.nr
== dst
.nr
) {
1215 tmp2
= alloc_tmp(c
);
1216 brw_MOV(p
, tmp2
, src2
);
1220 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1221 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1222 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1223 brw_MAC(p
, dst
, src0
, tmp1
);
1224 brw_set_saturate(p
, 0);
1226 release_tmps(c
, mark
);
1231 * For GLSL shaders, this KIL will be unconditional.
1232 * It may be contained inside an IF/ENDIF structure of course.
1234 static void emit_kil(struct brw_wm_compile
*c
)
1236 struct brw_compile
*p
= &c
->func
;
1237 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1238 brw_push_insn_state(p
);
1239 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1240 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1241 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1242 brw_pop_insn_state(p
);
1245 static void emit_mad(struct brw_wm_compile
*c
,
1246 const struct prog_instruction
*inst
)
1248 struct brw_compile
*p
= &c
->func
;
1249 GLuint mask
= inst
->DstReg
.WriteMask
;
1250 struct brw_reg dst
, src0
, src1
, src2
;
1253 for (i
= 0; i
< 4; i
++) {
1254 if (mask
& (1<<i
)) {
1255 dst
= get_dst_reg(c
, inst
, i
);
1256 src0
= get_src_reg(c
, inst
, 0, i
);
1257 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1258 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1259 brw_MUL(p
, dst
, src0
, src1
);
1261 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1262 brw_ADD(p
, dst
, dst
, src2
);
1263 brw_set_saturate(p
, 0);
1268 static void emit_sop(struct brw_wm_compile
*c
,
1269 const struct prog_instruction
*inst
, GLuint cond
)
1271 struct brw_compile
*p
= &c
->func
;
1272 GLuint mask
= inst
->DstReg
.WriteMask
;
1273 struct brw_reg dst
, src0
, src1
;
1276 for (i
= 0; i
< 4; i
++) {
1277 if (mask
& (1<<i
)) {
1278 dst
= get_dst_reg(c
, inst
, i
);
1279 src0
= get_src_reg(c
, inst
, 0, i
);
1280 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1281 brw_push_insn_state(p
);
1282 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1283 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1284 brw_MOV(p
, dst
, brw_imm_f(0.0));
1285 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1286 brw_MOV(p
, dst
, brw_imm_f(1.0));
1287 brw_pop_insn_state(p
);
1292 static void emit_slt(struct brw_wm_compile
*c
,
1293 const struct prog_instruction
*inst
)
1295 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1298 static void emit_sle(struct brw_wm_compile
*c
,
1299 const struct prog_instruction
*inst
)
1301 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1304 static void emit_sgt(struct brw_wm_compile
*c
,
1305 const struct prog_instruction
*inst
)
1307 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1310 static void emit_sge(struct brw_wm_compile
*c
,
1311 const struct prog_instruction
*inst
)
1313 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1316 static void emit_seq(struct brw_wm_compile
*c
,
1317 const struct prog_instruction
*inst
)
1319 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1322 static void emit_sne(struct brw_wm_compile
*c
,
1323 const struct prog_instruction
*inst
)
1325 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1328 static void emit_ddx(struct brw_wm_compile
*c
,
1329 const struct prog_instruction
*inst
)
1331 struct brw_compile
*p
= &c
->func
;
1332 GLuint mask
= inst
->DstReg
.WriteMask
;
1333 struct brw_reg interp
[4];
1335 struct brw_reg src0
, w
;
1337 src0
= get_src_reg(c
, inst
, 0, 0);
1338 w
= get_src_reg(c
, inst
, 1, 3);
1340 interp
[0] = brw_vec1_grf(nr
, 0);
1341 interp
[1] = brw_vec1_grf(nr
, 4);
1342 interp
[2] = brw_vec1_grf(nr
+1, 0);
1343 interp
[3] = brw_vec1_grf(nr
+1, 4);
1344 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1345 for(i
= 0; i
< 4; i
++ ) {
1346 if (mask
& (1<<i
)) {
1347 dst
= get_dst_reg(c
, inst
, i
);
1348 brw_MOV(p
, dst
, interp
[i
]);
1349 brw_MUL(p
, dst
, dst
, w
);
1352 brw_set_saturate(p
, 0);
1355 static void emit_ddy(struct brw_wm_compile
*c
,
1356 const struct prog_instruction
*inst
)
1358 struct brw_compile
*p
= &c
->func
;
1359 GLuint mask
= inst
->DstReg
.WriteMask
;
1360 struct brw_reg interp
[4];
1362 struct brw_reg src0
, w
;
1365 src0
= get_src_reg(c
, inst
, 0, 0);
1367 w
= get_src_reg(c
, inst
, 1, 3);
1368 interp
[0] = brw_vec1_grf(nr
, 0);
1369 interp
[1] = brw_vec1_grf(nr
, 4);
1370 interp
[2] = brw_vec1_grf(nr
+1, 0);
1371 interp
[3] = brw_vec1_grf(nr
+1, 4);
1372 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1373 for(i
= 0; i
< 4; i
++ ) {
1374 if (mask
& (1<<i
)) {
1375 dst
= get_dst_reg(c
, inst
, i
);
1376 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1377 brw_MUL(p
, dst
, dst
, w
);
1380 brw_set_saturate(p
, 0);
1383 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1385 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1389 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1391 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1394 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1396 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1399 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1401 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1405 /* One-, two- and three-dimensional Perlin noise, similar to the description
1406 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1407 static void noise1_sub( struct brw_wm_compile
*c
) {
1409 struct brw_compile
*p
= &c
->func
;
1410 struct brw_reg param
,
1411 x0
, x1
, /* gradients at each end */
1412 t
, tmp
[ 2 ], /* float temporaries */
1413 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1415 int mark
= mark_tmps( c
);
1417 x0
= alloc_tmp( c
);
1418 x1
= alloc_tmp( c
);
1420 tmp
[ 0 ] = alloc_tmp( c
);
1421 tmp
[ 1 ] = alloc_tmp( c
);
1422 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1423 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1424 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1425 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1426 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1428 param
= lookup_tmp( c
, mark
- 2 );
1430 brw_set_access_mode( p
, BRW_ALIGN_1
);
1432 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1434 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1435 be hashed. Also compute the remainder (offset within the unit
1436 length), interleaved to reduce register dependency penalties. */
1437 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1438 brw_FRC( p
, param
, param
);
1439 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1440 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1441 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1443 /* We're now ready to perform the hashing. The two hashes are
1444 interleaved for performance. The hash function used is
1445 designed to rapidly achieve avalanche and require only 32x16
1446 bit multiplication, and 16-bit swizzles (which we get for
1447 free). We can't use immediate operands in the multiplies,
1448 because immediates are permitted only in src1 and the 16-bit
1449 factor is permitted only in src0. */
1450 for( i
= 0; i
< 2; i
++ )
1451 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1452 for( i
= 0; i
< 2; i
++ )
1453 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1454 high_words( itmp
[ i
] ) );
1455 for( i
= 0; i
< 2; i
++ )
1456 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1457 for( i
= 0; i
< 2; i
++ )
1458 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1459 high_words( itmp
[ i
] ) );
1460 for( i
= 0; i
< 2; i
++ )
1461 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1462 for( i
= 0; i
< 2; i
++ )
1463 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1464 high_words( itmp
[ i
] ) );
1466 /* Now we want to initialise the two gradients based on the
1467 hashes. Format conversion from signed integer to float leaves
1468 everything scaled too high by a factor of pow( 2, 31 ), but
1469 we correct for that right at the end. */
1470 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1471 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1472 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1474 brw_MUL( p
, x0
, x0
, param
);
1475 brw_MUL( p
, x1
, x1
, t
);
1477 /* We interpolate between the gradients using the polynomial
1478 6t^5 - 15t^4 + 10t^3 (Perlin). */
1479 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1480 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1481 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1482 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1483 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1484 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1486 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1487 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1488 brw_MUL( p
, x1
, x1
, param
);
1489 brw_ADD( p
, x0
, x0
, x1
);
1490 /* scale by pow( 2, -30 ), to compensate for the format conversion
1491 above and an extra factor of 2 so that a single gradient covers
1493 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1495 release_tmps( c
, mark
);
1498 static void emit_noise1( struct brw_wm_compile
*c
,
1499 const struct prog_instruction
*inst
)
1501 struct brw_compile
*p
= &c
->func
;
1502 struct brw_reg src
, param
, dst
;
1503 GLuint mask
= inst
->DstReg
.WriteMask
;
1505 int mark
= mark_tmps( c
);
1507 assert( mark
== 0 );
1509 src
= get_src_reg( c
, inst
, 0, 0 );
1511 param
= alloc_tmp( c
);
1513 brw_MOV( p
, param
, src
);
1515 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1517 /* Fill in the result: */
1518 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1519 for (i
= 0 ; i
< 4; i
++) {
1520 if (mask
& (1<<i
)) {
1521 dst
= get_dst_reg(c
, inst
, i
);
1522 brw_MOV( p
, dst
, param
);
1525 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1526 brw_set_saturate( p
, 0 );
1528 release_tmps( c
, mark
);
1531 static void noise2_sub( struct brw_wm_compile
*c
) {
1533 struct brw_compile
*p
= &c
->func
;
1534 struct brw_reg param0
, param1
,
1535 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1536 t
, tmp
[ 4 ], /* float temporaries */
1537 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1539 int mark
= mark_tmps( c
);
1541 x0y0
= alloc_tmp( c
);
1542 x0y1
= alloc_tmp( c
);
1543 x1y0
= alloc_tmp( c
);
1544 x1y1
= alloc_tmp( c
);
1546 for( i
= 0; i
< 4; i
++ ) {
1547 tmp
[ i
] = alloc_tmp( c
);
1548 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1550 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1551 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1552 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1554 param0
= lookup_tmp( c
, mark
- 3 );
1555 param1
= lookup_tmp( c
, mark
- 2 );
1557 brw_set_access_mode( p
, BRW_ALIGN_1
);
1559 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1560 be hashed. Also compute the remainders (offsets within the unit
1561 square), interleaved to reduce register dependency penalties. */
1562 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1563 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1564 brw_FRC( p
, param0
, param0
);
1565 brw_FRC( p
, param1
, param1
);
1566 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1567 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1568 low_words( itmp
[ 1 ] ) );
1569 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1570 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1571 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1572 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1573 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1575 /* We're now ready to perform the hashing. The four hashes are
1576 interleaved for performance. The hash function used is
1577 designed to rapidly achieve avalanche and require only 32x16
1578 bit multiplication, and 16-bit swizzles (which we get for
1579 free). We can't use immediate operands in the multiplies,
1580 because immediates are permitted only in src1 and the 16-bit
1581 factor is permitted only in src0. */
1582 for( i
= 0; i
< 4; i
++ )
1583 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1584 for( i
= 0; i
< 4; i
++ )
1585 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1586 high_words( itmp
[ i
] ) );
1587 for( i
= 0; i
< 4; i
++ )
1588 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1589 for( i
= 0; i
< 4; i
++ )
1590 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1591 high_words( itmp
[ i
] ) );
1592 for( i
= 0; i
< 4; i
++ )
1593 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1594 for( i
= 0; i
< 4; i
++ )
1595 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1596 high_words( itmp
[ i
] ) );
1598 /* Now we want to initialise the four gradients based on the
1599 hashes. Format conversion from signed integer to float leaves
1600 everything scaled too high by a factor of pow( 2, 15 ), but
1601 we correct for that right at the end. */
1602 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1603 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1604 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1605 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1606 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1608 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1609 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1610 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1611 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1613 brw_MUL( p
, x1y0
, x1y0
, t
);
1614 brw_MUL( p
, x1y1
, x1y1
, t
);
1615 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1616 brw_MUL( p
, x0y0
, x0y0
, param0
);
1617 brw_MUL( p
, x0y1
, x0y1
, param0
);
1619 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1620 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1621 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1622 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1624 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1625 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1626 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1627 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1629 /* We interpolate between the gradients using the polynomial
1630 6t^5 - 15t^4 + 10t^3 (Perlin). */
1631 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1632 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1633 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1634 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1635 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1636 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1637 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1639 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1640 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1641 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1642 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1643 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1645 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1646 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1647 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1648 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1650 /* Here we interpolate in the y dimension... */
1651 brw_MUL( p
, x0y1
, x0y1
, param1
);
1652 brw_MUL( p
, x1y1
, x1y1
, param1
);
1653 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1654 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1656 /* And now in x. There are horrible register dependencies here,
1657 but we have nothing else to do. */
1658 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1659 brw_MUL( p
, x1y0
, x1y0
, param0
);
1660 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1662 /* scale by pow( 2, -15 ), as described above */
1663 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1665 release_tmps( c
, mark
);
1668 static void emit_noise2( struct brw_wm_compile
*c
,
1669 const struct prog_instruction
*inst
)
1671 struct brw_compile
*p
= &c
->func
;
1672 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1673 GLuint mask
= inst
->DstReg
.WriteMask
;
1675 int mark
= mark_tmps( c
);
1677 assert( mark
== 0 );
1679 src0
= get_src_reg( c
, inst
, 0, 0 );
1680 src1
= get_src_reg( c
, inst
, 0, 1 );
1682 param0
= alloc_tmp( c
);
1683 param1
= alloc_tmp( c
);
1685 brw_MOV( p
, param0
, src0
);
1686 brw_MOV( p
, param1
, src1
);
1688 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1690 /* Fill in the result: */
1691 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1692 for (i
= 0 ; i
< 4; i
++) {
1693 if (mask
& (1<<i
)) {
1694 dst
= get_dst_reg(c
, inst
, i
);
1695 brw_MOV( p
, dst
, param0
);
1698 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1699 brw_set_saturate( p
, 0 );
1701 release_tmps( c
, mark
);
1705 * The three-dimensional case is much like the one- and two- versions above,
1706 * but since the number of corners is rapidly growing we now pack 16 16-bit
1707 * hashes into each register to extract more parallelism from the EUs.
1709 static void noise3_sub( struct brw_wm_compile
*c
) {
1711 struct brw_compile
*p
= &c
->func
;
1712 struct brw_reg param0
, param1
, param2
,
1713 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1714 xi
, yi
, zi
, /* interpolation coefficients */
1715 t
, tmp
[ 8 ], /* float temporaries */
1716 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1717 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1719 int mark
= mark_tmps( c
);
1721 x0y0
= alloc_tmp( c
);
1722 x0y1
= alloc_tmp( c
);
1723 x1y0
= alloc_tmp( c
);
1724 x1y1
= alloc_tmp( c
);
1725 xi
= alloc_tmp( c
);
1726 yi
= alloc_tmp( c
);
1727 zi
= alloc_tmp( c
);
1729 for( i
= 0; i
< 8; i
++ ) {
1730 tmp
[ i
] = alloc_tmp( c
);
1731 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1732 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1735 param0
= lookup_tmp( c
, mark
- 4 );
1736 param1
= lookup_tmp( c
, mark
- 3 );
1737 param2
= lookup_tmp( c
, mark
- 2 );
1739 brw_set_access_mode( p
, BRW_ALIGN_1
);
1741 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1742 be hashed. Also compute the remainders (offsets within the unit
1743 cube), interleaved to reduce register dependency penalties. */
1744 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1745 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1746 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1747 brw_FRC( p
, param0
, param0
);
1748 brw_FRC( p
, param1
, param1
);
1749 brw_FRC( p
, param2
, param2
);
1750 /* Since we now have only 16 bits of precision in the hash, we must
1751 be more careful about thorough mixing to maintain entropy as we
1752 squash the input vector into a small scalar. */
1753 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1754 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1755 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1756 brw_imm_uw( 0x9B93 ) );
1757 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1758 brw_imm_uw( 0xBC8F ) );
1760 /* Temporarily disable the execution mask while we work with ExecSize=16
1761 channels (the mask is set for ExecSize=8 and is probably incorrect).
1762 Although this might cause execution of unwanted channels, the code
1763 writes only to temporary registers and has no side effects, so
1764 disabling the mask is harmless. */
1765 brw_push_insn_state( p
);
1766 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1767 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1768 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1769 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1771 /* We're now ready to perform the hashing. The eight hashes are
1772 interleaved for performance. The hash function used is
1773 designed to rapidly achieve avalanche and require only 16x16
1774 bit multiplication, and 8-bit swizzles (which we get for
1776 for( i
= 0; i
< 4; i
++ )
1777 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1778 for( i
= 0; i
< 4; i
++ )
1779 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1780 odd_bytes( wtmp
[ i
] ) );
1781 for( i
= 0; i
< 4; i
++ )
1782 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1783 for( i
= 0; i
< 4; i
++ )
1784 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1785 odd_bytes( wtmp
[ i
] ) );
1786 brw_pop_insn_state( p
);
1788 /* Now we want to initialise the four rear gradients based on the
1789 hashes. Format conversion from signed integer to float leaves
1790 everything scaled too high by a factor of pow( 2, 15 ), but
1791 we correct for that right at the end. */
1793 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1794 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1795 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1796 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1797 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1799 brw_push_insn_state( p
);
1800 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1801 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1802 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1803 brw_pop_insn_state( p
);
1805 brw_MUL( p
, x1y0
, x1y0
, t
);
1806 brw_MUL( p
, x1y1
, x1y1
, t
);
1807 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1808 brw_MUL( p
, x0y0
, x0y0
, param0
);
1809 brw_MUL( p
, x0y1
, x0y1
, param0
);
1812 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1813 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1814 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1815 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1817 brw_push_insn_state( p
);
1818 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1819 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1820 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1821 brw_pop_insn_state( p
);
1823 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1824 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1825 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1826 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1827 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1829 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1830 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1831 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1832 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1835 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1836 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1837 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1838 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1840 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1841 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1842 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1843 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1845 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1846 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1847 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1848 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1850 /* We interpolate between the gradients using the polynomial
1851 6t^5 - 15t^4 + 10t^3 (Perlin). */
1852 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1853 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1854 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1855 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1856 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1857 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1858 brw_MUL( p
, xi
, xi
, param0
);
1859 brw_MUL( p
, yi
, yi
, param1
);
1860 brw_MUL( p
, zi
, zi
, param2
);
1861 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1862 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1863 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1864 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1865 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1866 brw_MUL( p
, xi
, xi
, param0
);
1867 brw_MUL( p
, yi
, yi
, param1
);
1868 brw_MUL( p
, zi
, zi
, param2
);
1869 brw_MUL( p
, xi
, xi
, param0
);
1870 brw_MUL( p
, yi
, yi
, param1
);
1871 brw_MUL( p
, zi
, zi
, param2
);
1872 brw_MUL( p
, xi
, xi
, param0
);
1873 brw_MUL( p
, yi
, yi
, param1
);
1874 brw_MUL( p
, zi
, zi
, param2
);
1876 /* Here we interpolate in the y dimension... */
1877 brw_MUL( p
, x0y1
, x0y1
, yi
);
1878 brw_MUL( p
, x1y1
, x1y1
, yi
);
1879 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1880 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1882 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1883 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1884 brw_MUL( p
, x1y0
, x1y0
, xi
);
1885 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1887 /* Now do the same thing for the front four gradients... */
1889 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1890 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1891 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1892 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1894 brw_push_insn_state( p
);
1895 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1896 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1897 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1898 brw_pop_insn_state( p
);
1900 brw_MUL( p
, x1y0
, x1y0
, t
);
1901 brw_MUL( p
, x1y1
, x1y1
, t
);
1902 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1903 brw_MUL( p
, x0y0
, x0y0
, param0
);
1904 brw_MUL( p
, x0y1
, x0y1
, param0
);
1907 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1908 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1909 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1910 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1912 brw_push_insn_state( p
);
1913 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1914 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1915 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1916 brw_pop_insn_state( p
);
1918 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1919 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1920 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1921 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1922 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1924 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1925 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1926 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1927 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1930 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1931 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1932 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1933 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1935 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1936 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1937 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1938 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1940 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1941 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1942 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1943 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1945 /* The interpolation coefficients are still around from last time, so
1946 again interpolate in the y dimension... */
1947 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1948 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1949 brw_MUL( p
, x0y1
, x0y1
, yi
);
1950 brw_MUL( p
, x1y1
, x1y1
, yi
);
1951 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1952 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1954 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1955 time put the front face in tmp[ 1 ] and we're nearly there... */
1956 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1957 brw_MUL( p
, x1y0
, x1y0
, xi
);
1958 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1960 /* The final interpolation, in the z dimension: */
1961 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1962 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1963 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1965 /* scale by pow( 2, -15 ), as described above */
1966 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1968 release_tmps( c
, mark
);
1971 static void emit_noise3( struct brw_wm_compile
*c
,
1972 const struct prog_instruction
*inst
)
1974 struct brw_compile
*p
= &c
->func
;
1975 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1976 GLuint mask
= inst
->DstReg
.WriteMask
;
1978 int mark
= mark_tmps( c
);
1980 assert( mark
== 0 );
1982 src0
= get_src_reg( c
, inst
, 0, 0 );
1983 src1
= get_src_reg( c
, inst
, 0, 1 );
1984 src2
= get_src_reg( c
, inst
, 0, 2 );
1986 param0
= alloc_tmp( c
);
1987 param1
= alloc_tmp( c
);
1988 param2
= alloc_tmp( c
);
1990 brw_MOV( p
, param0
, src0
);
1991 brw_MOV( p
, param1
, src1
);
1992 brw_MOV( p
, param2
, src2
);
1994 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1996 /* Fill in the result: */
1997 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1998 for (i
= 0 ; i
< 4; i
++) {
1999 if (mask
& (1<<i
)) {
2000 dst
= get_dst_reg(c
, inst
, i
);
2001 brw_MOV( p
, dst
, param0
);
2004 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2005 brw_set_saturate( p
, 0 );
2007 release_tmps( c
, mark
);
2011 * For the four-dimensional case, the little micro-optimisation benefits
2012 * we obtain by unrolling all the loops aren't worth the massive bloat it
2013 * now causes. Instead, we loop twice around performing a similar operation
2014 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2015 * code to glue it all together.
2017 static void noise4_sub( struct brw_wm_compile
*c
)
2019 struct brw_compile
*p
= &c
->func
;
2020 struct brw_reg param
[ 4 ],
2021 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2022 w0
, /* noise for the w=0 cube */
2023 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2024 interp
[ 4 ], /* interpolation coefficients */
2025 t
, tmp
[ 8 ], /* float temporaries */
2026 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2027 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2029 int mark
= mark_tmps( c
);
2030 GLuint loop
, origin
;
2032 x0y0
= alloc_tmp( c
);
2033 x0y1
= alloc_tmp( c
);
2034 x1y0
= alloc_tmp( c
);
2035 x1y1
= alloc_tmp( c
);
2037 w0
= alloc_tmp( c
);
2038 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2039 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2041 for( i
= 0; i
< 4; i
++ ) {
2042 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2043 interp
[ i
] = alloc_tmp( c
);
2046 for( i
= 0; i
< 8; i
++ ) {
2047 tmp
[ i
] = alloc_tmp( c
);
2048 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2049 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2052 brw_set_access_mode( p
, BRW_ALIGN_1
);
2054 /* We only want 16 bits of precision from the integral part of each
2055 co-ordinate, but unfortunately the RNDD semantics would saturate
2056 at 16 bits if we performed the operation directly to a 16-bit
2057 destination. Therefore, we round to 32-bit temporaries where
2058 appropriate, and then store only the lower 16 bits. */
2059 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2060 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2061 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2062 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2063 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2064 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2066 /* Modify the flag register here, because the side effect is useful
2067 later (see below). We know for certain that all flags will be
2068 cleared, since the FRC instruction cannot possibly generate
2069 negative results. Even for exceptional inputs (infinities, denormals,
2070 NaNs), the architecture guarantees that the L conditional is false. */
2071 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2072 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2073 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2074 for( i
= 1; i
< 4; i
++ )
2075 brw_FRC( p
, param
[ i
], param
[ i
] );
2077 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2079 for( i
= 0; i
< 4; i
++ )
2080 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2081 for( i
= 0; i
< 4; i
++ )
2082 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2083 for( i
= 0; i
< 4; i
++ )
2084 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2085 for( i
= 0; i
< 4; i
++ )
2086 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2087 for( j
= 0; j
< 3; j
++ )
2088 for( i
= 0; i
< 4; i
++ )
2089 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2091 /* Mark the current address, as it will be a jump destination. The
2092 following code will be executed twice: first, with the flag
2093 register clear indicating the w=0 case, and second with flags
2097 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2098 be hashed. Since we have only 16 bits of precision in the hash, we
2099 must be careful about thorough mixing to maintain entropy as we
2100 squash the input vector into a small scalar. */
2101 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2102 brw_imm_uw( 0xBC8F ) );
2103 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2104 brw_imm_uw( 0xD0BD ) );
2105 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2106 brw_imm_uw( 0x9B93 ) );
2107 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2108 brw_imm_uw( 0xA359 ) );
2109 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2110 brw_imm_uw( 0xBC8F ) );
2112 /* Temporarily disable the execution mask while we work with ExecSize=16
2113 channels (the mask is set for ExecSize=8 and is probably incorrect).
2114 Although this might cause execution of unwanted channels, the code
2115 writes only to temporary registers and has no side effects, so
2116 disabling the mask is harmless. */
2117 brw_push_insn_state( p
);
2118 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2119 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2120 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2121 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2123 /* We're now ready to perform the hashing. The eight hashes are
2124 interleaved for performance. The hash function used is
2125 designed to rapidly achieve avalanche and require only 16x16
2126 bit multiplication, and 8-bit swizzles (which we get for
2128 for( i
= 0; i
< 4; i
++ )
2129 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2130 for( i
= 0; i
< 4; i
++ )
2131 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2132 odd_bytes( wtmp
[ i
] ) );
2133 for( i
= 0; i
< 4; i
++ )
2134 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2135 for( i
= 0; i
< 4; i
++ )
2136 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2137 odd_bytes( wtmp
[ i
] ) );
2138 brw_pop_insn_state( p
);
2140 /* Now we want to initialise the four rear gradients based on the
2141 hashes. Format conversion from signed integer to float leaves
2142 everything scaled too high by a factor of pow( 2, 15 ), but
2143 we correct for that right at the end. */
2145 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2146 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2147 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2148 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2149 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2151 brw_push_insn_state( p
);
2152 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2153 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2154 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2155 brw_pop_insn_state( p
);
2157 brw_MUL( p
, x1y0
, x1y0
, t
);
2158 brw_MUL( p
, x1y1
, x1y1
, t
);
2159 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2160 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2161 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2164 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2165 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2166 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2167 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2169 brw_push_insn_state( p
);
2170 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2171 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2172 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2173 brw_pop_insn_state( p
);
2175 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2176 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2177 /* prepare t for the w component (used below): w the first time through
2178 the loop; w - 1 the second time) */
2179 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2180 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2181 p
->current
->header
.predicate_inverse
= 1;
2182 brw_MOV( p
, t
, param
[ 3 ] );
2183 p
->current
->header
.predicate_inverse
= 0;
2184 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2185 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2186 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2188 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2189 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2190 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2191 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2194 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2195 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2196 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2197 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2199 brw_push_insn_state( p
);
2200 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2201 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2202 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2203 brw_pop_insn_state( p
);
2205 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2206 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2207 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2208 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2210 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2211 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2212 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2213 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2216 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2217 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2218 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2219 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2221 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2222 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2223 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2224 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2225 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2227 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2228 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2229 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2230 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2232 /* Here we interpolate in the y dimension... */
2233 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2234 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2235 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2236 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2237 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2238 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2240 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2241 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2242 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2243 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2245 /* Now do the same thing for the front four gradients... */
2247 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2248 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2249 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2250 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2252 brw_push_insn_state( p
);
2253 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2254 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2255 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2256 brw_pop_insn_state( p
);
2258 brw_MUL( p
, x1y0
, x1y0
, t
);
2259 brw_MUL( p
, x1y1
, x1y1
, t
);
2260 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2261 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2262 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2265 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2266 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2267 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2268 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2270 brw_push_insn_state( p
);
2271 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2272 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2273 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2274 brw_pop_insn_state( p
);
2276 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2277 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2278 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2279 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2280 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2282 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2283 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2284 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2285 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2288 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2289 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2290 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2291 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2293 brw_push_insn_state( p
);
2294 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2295 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2296 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2297 brw_pop_insn_state( p
);
2299 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2300 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2301 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2302 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2303 /* prepare t for the w component (used below): w the first time through
2304 the loop; w - 1 the second time) */
2305 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2306 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2307 p
->current
->header
.predicate_inverse
= 1;
2308 brw_MOV( p
, t
, param
[ 3 ] );
2309 p
->current
->header
.predicate_inverse
= 0;
2310 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2312 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2313 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2314 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2315 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2318 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2319 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2320 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2321 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2323 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2324 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2325 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2326 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2328 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2329 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2330 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2331 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2333 /* Interpolate in the y dimension: */
2334 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2335 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2336 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2337 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2338 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2339 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2341 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2342 time put the front face in tmp[ 1 ] and we're nearly there... */
2343 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2344 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2345 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2347 /* Another interpolation, in the z dimension: */
2348 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2349 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2350 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2352 /* Exit the loop if we've computed both cubes... */
2353 origin
= p
->nr_insn
;
2354 brw_push_insn_state( p
);
2355 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2356 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2357 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2358 brw_pop_insn_state( p
);
2360 /* Save the result for the w=0 case, and increment the w coordinate: */
2361 brw_MOV( p
, w0
, tmp
[ 0 ] );
2362 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2365 /* Loop around for the other cube. Explicitly set the flag register
2366 (unfortunately we must spend an extra instruction to do this: we
2367 can't rely on a side effect of the previous MOV or ADD because
2368 conditional modifiers which are normally true might be false in
2369 exceptional circumstances, e.g. given a NaN input; the add to
2370 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2371 brw_push_insn_state( p
);
2372 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2373 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2374 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2375 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2376 brw_pop_insn_state( p
);
2378 /* Patch the previous conditional branch now that we know the
2379 destination address. */
2380 brw_set_src1( p
->store
+ origin
,
2381 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2383 /* The very last interpolation. */
2384 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2385 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2386 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2388 /* scale by pow( 2, -15 ), as described above */
2389 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2391 release_tmps( c
, mark
);
2394 static void emit_noise4( struct brw_wm_compile
*c
,
2395 const struct prog_instruction
*inst
)
2397 struct brw_compile
*p
= &c
->func
;
2398 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2399 GLuint mask
= inst
->DstReg
.WriteMask
;
2401 int mark
= mark_tmps( c
);
2403 assert( mark
== 0 );
2405 src0
= get_src_reg( c
, inst
, 0, 0 );
2406 src1
= get_src_reg( c
, inst
, 0, 1 );
2407 src2
= get_src_reg( c
, inst
, 0, 2 );
2408 src3
= get_src_reg( c
, inst
, 0, 3 );
2410 param0
= alloc_tmp( c
);
2411 param1
= alloc_tmp( c
);
2412 param2
= alloc_tmp( c
);
2413 param3
= alloc_tmp( c
);
2415 brw_MOV( p
, param0
, src0
);
2416 brw_MOV( p
, param1
, src1
);
2417 brw_MOV( p
, param2
, src2
);
2418 brw_MOV( p
, param3
, src3
);
2420 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2422 /* Fill in the result: */
2423 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2424 for (i
= 0 ; i
< 4; i
++) {
2425 if (mask
& (1<<i
)) {
2426 dst
= get_dst_reg(c
, inst
, i
);
2427 brw_MOV( p
, dst
, param0
);
2430 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2431 brw_set_saturate( p
, 0 );
2433 release_tmps( c
, mark
);
2436 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2437 const struct prog_instruction
*inst
)
2439 struct brw_compile
*p
= &c
->func
;
2440 GLuint mask
= inst
->DstReg
.WriteMask
;
2441 struct brw_reg src0
[2], dst
[2];
2443 dst
[0] = get_dst_reg(c
, inst
, 0);
2444 dst
[1] = get_dst_reg(c
, inst
, 1);
2446 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2447 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2449 /* Calculate the pixel offset from window bottom left into destination
2452 if (mask
& WRITEMASK_X
) {
2453 /* X' = X - origin_x */
2456 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2457 brw_imm_d(0 - c
->key
.origin_x
));
2460 if (mask
& WRITEMASK_Y
) {
2461 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2464 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2465 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2470 BIAS on SIMD8 not working yet...
2472 static void emit_txb(struct brw_wm_compile
*c
,
2473 const struct prog_instruction
*inst
)
2475 struct brw_compile
*p
= &c
->func
;
2476 struct brw_reg dst
[4], src
[4], payload_reg
;
2477 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2480 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2482 for (i
= 0; i
< 4; i
++)
2483 dst
[i
] = get_dst_reg(c
, inst
, i
);
2484 for (i
= 0; i
< 4; i
++)
2485 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2487 switch (inst
->TexSrcTarget
) {
2488 case TEXTURE_1D_INDEX
:
2489 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2490 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2491 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2493 case TEXTURE_2D_INDEX
:
2494 case TEXTURE_RECT_INDEX
:
2495 brw_MOV(p
, brw_message_reg(2), src
[0]);
2496 brw_MOV(p
, brw_message_reg(3), src
[1]);
2497 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2500 brw_MOV(p
, brw_message_reg(2), src
[0]);
2501 brw_MOV(p
, brw_message_reg(3), src
[1]);
2502 brw_MOV(p
, brw_message_reg(4), src
[2]);
2505 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2506 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2508 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2510 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2511 SURF_INDEX_TEXTURE(unit
),
2513 inst
->DstReg
.WriteMask
, /* writemask */
2514 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2515 4, /* response_length */
2521 static void emit_tex(struct brw_wm_compile
*c
,
2522 const struct prog_instruction
*inst
)
2524 struct brw_compile
*p
= &c
->func
;
2525 struct brw_reg dst
[4], src
[4], payload_reg
;
2526 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2530 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2532 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2534 for (i
= 0; i
< 4; i
++)
2535 dst
[i
] = get_dst_reg(c
, inst
, i
);
2536 for (i
= 0; i
< 4; i
++)
2537 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2539 switch (inst
->TexSrcTarget
) {
2540 case TEXTURE_1D_INDEX
:
2544 case TEXTURE_2D_INDEX
:
2545 case TEXTURE_RECT_INDEX
:
2546 emit
= WRITEMASK_XY
;
2550 emit
= WRITEMASK_XYZ
;
2556 /* move/load S, T, R coords */
2557 for (i
= 0; i
< nr
; i
++) {
2558 static const GLuint swz
[4] = {0,1,2,2};
2560 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2562 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2567 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2568 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2572 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2574 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2575 SURF_INDEX_TEXTURE(unit
),
2577 inst
->DstReg
.WriteMask
, /* writemask */
2578 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2579 4, /* response_length */
2580 shadow
? 6 : 4, /* msg_length */
2584 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2589 * Resolve subroutine calls after code emit is done.
2591 static void post_wm_emit( struct brw_wm_compile
*c
)
2593 brw_resolve_cals(&c
->func
);
2596 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2599 #define MAX_LOOP_DEPTH 32
2600 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2601 struct brw_instruction
*inst0
, *inst1
;
2602 int i
, if_insn
= 0, loop_insn
= 0;
2603 struct brw_compile
*p
= &c
->func
;
2604 struct brw_indirect stack_index
= brw_indirect(0, 0);
2608 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2609 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2611 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2612 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2615 _mesa_printf("Inst %d: ", i
);
2616 _mesa_print_instruction(inst
);
2619 /* fetch any constants that this instruction needs */
2620 if (c
->fp
->use_const_buffer
)
2621 fetch_constants(c
, inst
);
2623 if (inst
->CondUpdate
)
2624 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2626 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2628 switch (inst
->Opcode
) {
2630 emit_pixel_xy(c
, inst
);
2633 emit_delta_xy(c
, inst
);
2636 emit_pixel_w(c
, inst
);
2639 emit_linterp(c
, inst
);
2642 emit_pinterp(c
, inst
);
2645 emit_cinterp(c
, inst
);
2648 emit_wpos_xy(c
, inst
);
2651 emit_fb_write(c
, inst
);
2653 case WM_FRONTFACING
:
2654 emit_frontfacing(c
, inst
);
2678 emit_trunc(c
, inst
);
2716 emit_min_max(c
, inst
);
2752 emit_noise1(c
, inst
);
2755 emit_noise2(c
, inst
);
2758 emit_noise3(c
, inst
);
2761 emit_noise4(c
, inst
);
2773 assert(if_insn
< MAX_IFSN
);
2774 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2777 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2780 assert(if_insn
> 0);
2781 brw_ENDIF(p
, if_inst
[--if_insn
]);
2784 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2790 brw_push_insn_state(p
);
2791 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2792 brw_set_access_mode(p
, BRW_ALIGN_1
);
2793 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2794 brw_set_access_mode(p
, BRW_ALIGN_16
);
2795 brw_ADD(p
, get_addr_reg(stack_index
),
2796 get_addr_reg(stack_index
), brw_imm_d(4));
2797 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2798 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2799 brw_pop_insn_state(p
);
2803 brw_push_insn_state(p
);
2804 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2805 brw_ADD(p
, get_addr_reg(stack_index
),
2806 get_addr_reg(stack_index
), brw_imm_d(-4));
2807 brw_set_access_mode(p
, BRW_ALIGN_1
);
2808 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2809 brw_set_access_mode(p
, BRW_ALIGN_16
);
2810 brw_pop_insn_state(p
);
2813 case OPCODE_BGNLOOP
:
2814 /* XXX may need to invalidate the current_constant regs */
2815 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2819 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2823 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2825 case OPCODE_ENDLOOP
:
2827 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2828 /* patch all the BREAK instructions from
2830 while (inst0
> loop_inst
[loop_insn
]) {
2832 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2833 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2834 inst0
->bits3
.if_else
.pop_count
= 0;
2835 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2836 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2837 inst0
->bits3
.if_else
.pop_count
= 0;
2842 _mesa_printf("unsupported IR in fragment shader %d\n",
2845 if (inst
->CondUpdate
)
2846 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2848 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2852 if (c
->reg_index
>= BRW_WM_MAX_GRF
) {
2853 _mesa_problem(NULL
, "Ran out of registers in brw_wm_emit_glsl()");
2854 /* XXX we need to do some proper error recovery here */
2860 * Do GPU code generation for shaders that use GLSL features such as
2861 * flow control. Other shaders will be compiled with the
2863 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2865 if (INTEL_DEBUG
& DEBUG_WM
) {
2866 _mesa_printf("brw_wm_glsl_emit:\n");
2869 /* initial instruction translation/simplification */
2872 /* actual code generation */
2873 brw_wm_emit_glsl(brw
, c
);
2875 if (INTEL_DEBUG
& DEBUG_WM
) {
2876 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2879 c
->prog_data
.total_grf
= c
->reg_index
;
2880 c
->prog_data
.total_scratch
= 0;