1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
46 * Record the mapping of a Mesa register to a hardware register.
48 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
49 int component
, struct brw_reg reg
)
51 c
->wm_regs
[file
][index
][component
].reg
= reg
;
52 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
56 * Examine instruction's write mask to find index of first component
57 * enabled for writing.
59 static int get_scalar_dst_index(struct prog_instruction
*inst
)
62 for (i
= 0; i
< 4; i
++)
63 if (inst
->DstReg
.WriteMask
& (1<<i
))
68 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
71 if(c
->tmp_index
== c
->tmp_max
)
72 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
74 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
79 * Save current temp register info.
80 * There must be a matching call to release_tmps().
82 static int mark_tmps(struct brw_wm_compile
*c
)
87 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
89 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
92 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
98 * Convert Mesa src register to brw register.
100 * Since we're running in SOA mode each Mesa register corresponds to four
101 * hardware registers. We allocate the hardware registers as needed here.
103 * \param file register file, one of PROGRAM_x
104 * \param index register number
105 * \param component src component (X=0, Y=1, Z=2, W=3)
106 * \param nr not used?!?
107 * \param neg negate value?
108 * \param abs take absolute value?
110 static struct brw_reg
111 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
112 int nr
, GLuint neg
, GLuint abs
)
116 case PROGRAM_STATE_VAR
:
117 case PROGRAM_CONSTANT
:
118 case PROGRAM_UNIFORM
:
119 file
= PROGRAM_STATE_VAR
;
121 case PROGRAM_UNDEFINED
:
122 return brw_null_reg();
123 case PROGRAM_TEMPORARY
:
126 case PROGRAM_PAYLOAD
:
129 _mesa_problem(NULL
, "Unexpected file in get_reg()");
130 return brw_null_reg();
133 /* see if we've already allocated a HW register for this Mesa register */
134 if (c
->wm_regs
[file
][index
][component
].inited
) {
136 reg
= c
->wm_regs
[file
][index
][component
].reg
;
139 /* no, allocate new register */
140 reg
= brw_vec8_grf(c
->reg_index
, 0);
143 /* if this is a new register allocation, record it in the table */
144 if (!c
->wm_regs
[file
][index
][component
].inited
) {
145 set_reg(c
, file
, index
, component
, reg
);
149 if (c
->reg_index
>= BRW_WM_MAX_GRF
- 12) {
150 /* ran out of temporary registers! */
152 /* This is a big hack for now.
153 * Return bad register index, just don't hang the GPU.
155 _mesa_fprintf(stderr
, "out of regs %d\n", c
->reg_index
);
156 c
->reg_index
= BRW_WM_MAX_GRF
- 13;
158 return brw_null_reg();
162 if (neg
& (1 << component
)) {
172 * Preallocate registers. This sets up the Mesa to hardware register
173 * mapping for certain registers, such as constants (uniforms/state vars)
176 static void prealloc_reg(struct brw_wm_compile
*c
)
180 int nr_interp_regs
= 0;
181 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
183 for (i
= 0; i
< 4; i
++) {
184 if (i
< c
->key
.nr_depth_regs
)
185 reg
= brw_vec8_grf(i
* 2, 0);
187 reg
= brw_vec8_grf(0, 0);
188 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
190 c
->reg_index
+= 2 * c
->key
.nr_depth_regs
;
194 const int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
196 /* use a real constant buffer, or just use a section of the GRF? */
197 c
->use_const_buffer
= GL_FALSE
; /* (nr_params > 8);*/
199 if (c
->use_const_buffer
) {
200 /* We'll use a real constant buffer and fetch constants from
201 * it with a dataport read message.
204 /* number of float constants in CURBE */
205 c
->prog_data
.nr_params
= 0;
208 const struct gl_program_parameter_list
*plist
=
209 c
->fp
->program
.Base
.Parameters
;
212 /* number of float constants in CURBE */
213 c
->prog_data
.nr_params
= 4 * nr_params
;
215 /* loop over program constants (float[4]) */
216 for (i
= 0; i
< nr_params
; i
++) {
217 /* loop over XYZW channels */
218 for (j
= 0; j
< 4; j
++, index
++) {
219 reg
= brw_vec1_grf(c
->reg_index
+ index
/ 8, index
% 8);
220 /* Save pointer to parameter/constant value.
221 * Constants will be copied in prepare_constant_buffer()
223 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
224 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
227 /* number of constant regs used (each reg is float[8]) */
228 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
229 c
->reg_index
+= c
->nr_creg
;
233 /* fragment shader inputs */
234 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
235 if (inputs
& (1<<i
)) {
237 reg
= brw_vec8_grf(c
->reg_index
, 0);
238 for (j
= 0; j
< 4; j
++)
239 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
244 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
245 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
246 c
->prog_data
.curb_read_length
= c
->nr_creg
;
247 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
249 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
252 /* An instruction may reference up to three constants.
253 * They'll be found in these registers.
254 * XXX alloc these on demand!
256 if (c
->use_const_buffer
) {
257 for (i
= 0; i
< 3; i
++) {
258 c
->current_const
[i
].index
= -1;
259 c
->current_const
[i
].reg
= alloc_tmp(c
);
263 printf("USE CONST BUFFER? %d\n", c
->use_const_buffer
);
264 printf("AFTER PRE_ALLOC, reg_index = %d\n", c
->reg_index
);
270 * Check if any of the instruction's src registers are constants, uniforms,
271 * or statevars. If so, fetch any constants that we don't already have in
272 * the three GRF slots.
274 static void fetch_constants(struct brw_wm_compile
*c
,
275 const struct prog_instruction
*inst
)
277 struct brw_compile
*p
= &c
->func
;
280 /* loop over instruction src regs */
281 for (i
= 0; i
< 3; i
++) {
282 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
283 if (src
->File
== PROGRAM_STATE_VAR
||
284 src
->File
== PROGRAM_CONSTANT
||
285 src
->File
== PROGRAM_UNIFORM
) {
286 if (c
->current_const
[i
].index
!= src
->Index
) {
287 c
->current_const
[i
].index
= src
->Index
;
290 printf(" fetch const[%d] for arg %d into reg %d\n",
291 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
294 /* need to fetch the constant now */
296 c
->current_const
[i
].reg
, /* writeback dest */
298 src
->RelAddr
, /* relative indexing? */
299 16 * src
->Index
, /* byte offset */
300 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
309 * Convert Mesa dst register to brw register.
311 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
312 const struct prog_instruction
*inst
,
316 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
321 static struct brw_reg
322 get_src_reg_const(struct brw_wm_compile
*c
,
323 const struct prog_instruction
*inst
,
324 GLuint srcRegIndex
, GLuint component
)
326 /* We should have already fetched the constant from the constant
327 * buffer in fetch_constants(). Now we just have to return a
328 * register description that extracts the needed component and
329 * smears it across all eight vector components.
331 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
332 struct brw_reg const_reg
;
334 assert(component
< 4);
335 assert(srcRegIndex
< 3);
336 assert(c
->current_const
[srcRegIndex
].index
!= -1);
337 const_reg
= c
->current_const
[srcRegIndex
].reg
;
339 /* extract desired float from the const_reg, and smear */
340 const_reg
= stride(const_reg
, 0, 1, 0);
341 const_reg
.subnr
= component
* 4;
344 const_reg
= negate(const_reg
);
346 const_reg
= brw_abs(const_reg
);
349 printf(" form const[%d] for arg %d, comp %d, reg %d\n",
350 c
->current_const
[srcRegIndex
].index
,
361 * Convert Mesa src register to brw register.
363 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
364 const struct prog_instruction
*inst
,
365 GLuint srcRegIndex
, GLuint channel
)
367 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
369 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
371 if (c
->use_const_buffer
&&
372 (src
->File
== PROGRAM_STATE_VAR
||
373 src
->File
== PROGRAM_CONSTANT
||
374 src
->File
== PROGRAM_UNIFORM
)) {
375 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
378 /* other type of source register */
379 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
380 src
->NegateBase
, src
->Abs
);
386 * Same as \sa get_src_reg() but if the register is a literal, emit
387 * a brw_reg encoding the literal.
388 * Note that a brw instruction only allows one src operand to be a literal.
389 * For instructions with more than one operand, only the second can be a
390 * literal. This means that we treat some literals as constants/uniforms
391 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
394 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
395 const struct prog_instruction
*inst
,
396 GLuint srcRegIndex
, GLuint channel
)
398 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
399 if (src
->File
== PROGRAM_CONSTANT
) {
401 const int component
= GET_SWZ(src
->Swizzle
, channel
);
402 const GLfloat
*param
=
403 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
404 GLfloat value
= param
[component
];
408 value
= FABSF(value
);
410 printf(" form imm reg %f\n", value
);
412 return brw_imm_f(value
);
415 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
421 * Subroutines are minimal support for resusable instruction sequences.
422 * They are implemented as simply as possible to minimise overhead: there
423 * is no explicit support for communication between the caller and callee
424 * other than saving the return address in a temporary register, nor is
425 * there any automatic local storage. This implies that great care is
426 * required before attempting reentrancy or any kind of nested
427 * subroutine invocations.
429 static void invoke_subroutine( struct brw_wm_compile
*c
,
430 enum _subroutine subroutine
,
431 void (*emit
)( struct brw_wm_compile
* ) )
433 struct brw_compile
*p
= &c
->func
;
435 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
437 if( c
->subroutines
[ subroutine
] ) {
438 /* subroutine previously emitted: reuse existing instructions */
440 int mark
= mark_tmps( c
);
441 struct brw_reg return_address
= retype( alloc_tmp( c
),
442 BRW_REGISTER_TYPE_UD
);
443 int here
= p
->nr_insn
;
445 brw_push_insn_state(p
);
446 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
447 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
449 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
450 brw_imm_d( ( c
->subroutines
[ subroutine
] -
452 brw_pop_insn_state(p
);
454 release_tmps( c
, mark
);
456 /* previously unused subroutine: emit, and mark for later reuse */
458 int mark
= mark_tmps( c
);
459 struct brw_reg return_address
= retype( alloc_tmp( c
),
460 BRW_REGISTER_TYPE_UD
);
461 struct brw_instruction
*calc
;
462 int base
= p
->nr_insn
;
464 brw_push_insn_state(p
);
465 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
466 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
467 brw_pop_insn_state(p
);
469 c
->subroutines
[ subroutine
] = p
->nr_insn
;
473 brw_push_insn_state(p
);
474 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
475 brw_MOV( p
, brw_ip_reg(), return_address
);
476 brw_pop_insn_state(p
);
478 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
480 release_tmps( c
, mark
);
484 static void emit_abs( struct brw_wm_compile
*c
,
485 struct prog_instruction
*inst
)
488 struct brw_compile
*p
= &c
->func
;
489 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
490 for (i
= 0; i
< 4; i
++) {
491 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
492 struct brw_reg src
, dst
;
493 dst
= get_dst_reg(c
, inst
, i
);
494 src
= get_src_reg(c
, inst
, 0, i
);
495 brw_MOV(p
, dst
, brw_abs(src
));
498 brw_set_saturate(p
, 0);
501 static void emit_trunc( struct brw_wm_compile
*c
,
502 struct prog_instruction
*inst
)
505 struct brw_compile
*p
= &c
->func
;
506 GLuint mask
= inst
->DstReg
.WriteMask
;
507 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
508 for (i
= 0; i
< 4; i
++) {
510 struct brw_reg src
, dst
;
511 dst
= get_dst_reg(c
, inst
, i
);
512 src
= get_src_reg(c
, inst
, 0, i
);
513 brw_RNDZ(p
, dst
, src
);
516 brw_set_saturate(p
, 0);
519 static void emit_mov( struct brw_wm_compile
*c
,
520 struct prog_instruction
*inst
)
523 struct brw_compile
*p
= &c
->func
;
524 GLuint mask
= inst
->DstReg
.WriteMask
;
525 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
526 for (i
= 0; i
< 4; i
++) {
528 struct brw_reg src
, dst
;
529 dst
= get_dst_reg(c
, inst
, i
);
530 src
= get_src_reg_imm(c
, inst
, 0, i
);
531 brw_MOV(p
, dst
, src
);
534 brw_set_saturate(p
, 0);
537 static void emit_pixel_xy(struct brw_wm_compile
*c
,
538 struct prog_instruction
*inst
)
540 struct brw_reg r1
= brw_vec1_grf(1, 0);
541 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
543 struct brw_reg dst0
, dst1
;
544 struct brw_compile
*p
= &c
->func
;
545 GLuint mask
= inst
->DstReg
.WriteMask
;
547 dst0
= get_dst_reg(c
, inst
, 0);
548 dst1
= get_dst_reg(c
, inst
, 1);
549 /* Calculate pixel centers by adding 1 or 0 to each of the
550 * micro-tile coordinates passed in r1.
552 if (mask
& WRITEMASK_X
) {
554 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
555 stride(suboffset(r1_uw
, 4), 2, 4, 0),
556 brw_imm_v(0x10101010));
559 if (mask
& WRITEMASK_Y
) {
561 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
562 stride(suboffset(r1_uw
, 5), 2, 4, 0),
563 brw_imm_v(0x11001100));
567 static void emit_delta_xy(struct brw_wm_compile
*c
,
568 struct prog_instruction
*inst
)
570 struct brw_reg r1
= brw_vec1_grf(1, 0);
571 struct brw_reg dst0
, dst1
, src0
, src1
;
572 struct brw_compile
*p
= &c
->func
;
573 GLuint mask
= inst
->DstReg
.WriteMask
;
575 dst0
= get_dst_reg(c
, inst
, 0);
576 dst1
= get_dst_reg(c
, inst
, 1);
577 src0
= get_src_reg(c
, inst
, 0, 0);
578 src1
= get_src_reg(c
, inst
, 0, 1);
579 /* Calc delta X,Y by subtracting origin in r1 from the pixel
582 if (mask
& WRITEMASK_X
) {
585 retype(src0
, BRW_REGISTER_TYPE_UW
),
589 if (mask
& WRITEMASK_Y
) {
592 retype(src1
, BRW_REGISTER_TYPE_UW
),
593 negate(suboffset(r1
,1)));
598 static void fire_fb_write( struct brw_wm_compile
*c
,
604 struct brw_compile
*p
= &c
->func
;
605 /* Pass through control information:
607 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
609 brw_push_insn_state(p
);
610 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
612 brw_message_reg(base_reg
+ 1),
614 brw_pop_insn_state(p
);
616 /* Send framebuffer write message: */
618 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
620 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
627 static void emit_fb_write(struct brw_wm_compile
*c
,
628 struct prog_instruction
*inst
)
630 struct brw_compile
*p
= &c
->func
;
636 /* Reserve a space for AA - may not be needed:
638 if (c
->key
.aa_dest_stencil_reg
)
641 brw_push_insn_state(p
);
642 for (channel
= 0; channel
< 4; channel
++) {
643 src0
= get_src_reg(c
, inst
, 0, channel
);
644 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
645 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
646 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
648 /* skip over the regs populated above: */
650 brw_pop_insn_state(p
);
652 if (c
->key
.source_depth_to_render_target
) {
653 if (c
->key
.computes_depth
) {
654 src0
= get_src_reg(c
, inst
, 2, 2);
655 brw_MOV(p
, brw_message_reg(nr
), src0
);
658 src0
= get_src_reg(c
, inst
, 1, 1);
659 brw_MOV(p
, brw_message_reg(nr
), src0
);
665 if (c
->key
.dest_depth_reg
) {
666 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
667 GLuint off
= c
->key
.dest_depth_reg
% 2;
672 /* XXX do we need this code? comp always 1, off always 0, it seems */
674 brw_push_insn_state(p
);
675 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
677 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
679 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
680 brw_pop_insn_state(p
);
685 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
686 brw_MOV(p
, brw_message_reg(nr
), src
);
691 target
= inst
->Aux
>> 1;
693 fire_fb_write(c
, 0, nr
, target
, eot
);
696 static void emit_pixel_w( struct brw_wm_compile
*c
,
697 struct prog_instruction
*inst
)
699 struct brw_compile
*p
= &c
->func
;
700 GLuint mask
= inst
->DstReg
.WriteMask
;
701 if (mask
& WRITEMASK_W
) {
702 struct brw_reg dst
, src0
, delta0
, delta1
;
703 struct brw_reg interp3
;
705 dst
= get_dst_reg(c
, inst
, 3);
706 src0
= get_src_reg(c
, inst
, 0, 0);
707 delta0
= get_src_reg(c
, inst
, 1, 0);
708 delta1
= get_src_reg(c
, inst
, 1, 1);
710 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
711 /* Calc 1/w - just linterp wpos[3] optimized by putting the
712 * result straight into a message reg.
714 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
715 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
719 BRW_MATH_FUNCTION_INV
,
720 BRW_MATH_SATURATE_NONE
,
722 BRW_MATH_PRECISION_FULL
);
726 static void emit_linterp(struct brw_wm_compile
*c
,
727 struct prog_instruction
*inst
)
729 struct brw_compile
*p
= &c
->func
;
730 GLuint mask
= inst
->DstReg
.WriteMask
;
731 struct brw_reg interp
[4];
732 struct brw_reg dst
, delta0
, delta1
;
736 src0
= get_src_reg(c
, inst
, 0, 0);
737 delta0
= get_src_reg(c
, inst
, 1, 0);
738 delta1
= get_src_reg(c
, inst
, 1, 1);
741 interp
[0] = brw_vec1_grf(nr
, 0);
742 interp
[1] = brw_vec1_grf(nr
, 4);
743 interp
[2] = brw_vec1_grf(nr
+1, 0);
744 interp
[3] = brw_vec1_grf(nr
+1, 4);
746 for(i
= 0; i
< 4; i
++ ) {
748 dst
= get_dst_reg(c
, inst
, i
);
749 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
750 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
755 static void emit_cinterp(struct brw_wm_compile
*c
,
756 struct prog_instruction
*inst
)
758 struct brw_compile
*p
= &c
->func
;
759 GLuint mask
= inst
->DstReg
.WriteMask
;
761 struct brw_reg interp
[4];
762 struct brw_reg dst
, src0
;
765 src0
= get_src_reg(c
, inst
, 0, 0);
768 interp
[0] = brw_vec1_grf(nr
, 0);
769 interp
[1] = brw_vec1_grf(nr
, 4);
770 interp
[2] = brw_vec1_grf(nr
+1, 0);
771 interp
[3] = brw_vec1_grf(nr
+1, 4);
773 for(i
= 0; i
< 4; i
++ ) {
775 dst
= get_dst_reg(c
, inst
, i
);
776 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
781 static void emit_pinterp(struct brw_wm_compile
*c
,
782 struct prog_instruction
*inst
)
784 struct brw_compile
*p
= &c
->func
;
785 GLuint mask
= inst
->DstReg
.WriteMask
;
787 struct brw_reg interp
[4];
788 struct brw_reg dst
, delta0
, delta1
;
789 struct brw_reg src0
, w
;
792 src0
= get_src_reg(c
, inst
, 0, 0);
793 delta0
= get_src_reg(c
, inst
, 1, 0);
794 delta1
= get_src_reg(c
, inst
, 1, 1);
795 w
= get_src_reg(c
, inst
, 2, 3);
798 interp
[0] = brw_vec1_grf(nr
, 0);
799 interp
[1] = brw_vec1_grf(nr
, 4);
800 interp
[2] = brw_vec1_grf(nr
+1, 0);
801 interp
[3] = brw_vec1_grf(nr
+1, 4);
803 for(i
= 0; i
< 4; i
++ ) {
805 dst
= get_dst_reg(c
, inst
, i
);
806 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
807 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
809 brw_MUL(p
, dst
, dst
, w
);
814 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
815 static void emit_frontfacing(struct brw_wm_compile
*c
,
816 struct prog_instruction
*inst
)
818 struct brw_compile
*p
= &c
->func
;
819 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
821 GLuint mask
= inst
->DstReg
.WriteMask
;
824 for (i
= 0; i
< 4; i
++) {
826 dst
= get_dst_reg(c
, inst
, i
);
827 brw_MOV(p
, dst
, brw_imm_f(0.0));
831 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
834 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
835 for (i
= 0; i
< 4; i
++) {
837 dst
= get_dst_reg(c
, inst
, i
);
838 brw_MOV(p
, dst
, brw_imm_f(1.0));
841 brw_set_predicate_control_flag_value(p
, 0xff);
844 static void emit_xpd(struct brw_wm_compile
*c
,
845 struct prog_instruction
*inst
)
848 struct brw_compile
*p
= &c
->func
;
849 GLuint mask
= inst
->DstReg
.WriteMask
;
850 for (i
= 0; i
< 4; i
++) {
854 struct brw_reg src0
, src1
, dst
;
855 dst
= get_dst_reg(c
, inst
, i
);
856 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
857 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
858 brw_MUL(p
, brw_null_reg(), src0
, src1
);
859 src0
= get_src_reg(c
, inst
, 0, i1
);
860 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
861 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
862 brw_MAC(p
, dst
, src0
, src1
);
863 brw_set_saturate(p
, 0);
866 brw_set_saturate(p
, 0);
869 static void emit_dp3(struct brw_wm_compile
*c
,
870 struct prog_instruction
*inst
)
872 struct brw_reg src0
[3], src1
[3], dst
;
874 struct brw_compile
*p
= &c
->func
;
875 for (i
= 0; i
< 3; i
++) {
876 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
877 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
880 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
881 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
882 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
883 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
884 brw_MAC(p
, dst
, src0
[2], src1
[2]);
885 brw_set_saturate(p
, 0);
888 static void emit_dp4(struct brw_wm_compile
*c
,
889 struct prog_instruction
*inst
)
891 struct brw_reg src0
[4], src1
[4], dst
;
893 struct brw_compile
*p
= &c
->func
;
894 for (i
= 0; i
< 4; i
++) {
895 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
896 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
898 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
899 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
900 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
901 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
902 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
903 brw_MAC(p
, dst
, src0
[3], src1
[3]);
904 brw_set_saturate(p
, 0);
907 static void emit_dph(struct brw_wm_compile
*c
,
908 struct prog_instruction
*inst
)
910 struct brw_reg src0
[4], src1
[4], dst
;
912 struct brw_compile
*p
= &c
->func
;
913 for (i
= 0; i
< 4; i
++) {
914 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
915 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
917 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
918 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
919 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
920 brw_MAC(p
, dst
, src0
[2], src1
[2]);
921 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
922 brw_ADD(p
, dst
, dst
, src1
[3]);
923 brw_set_saturate(p
, 0);
927 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
928 * Note that the result of the function is smeared across the dest
929 * register's X, Y, Z and W channels (subject to writemasking of course).
931 static void emit_math1(struct brw_wm_compile
*c
,
932 struct prog_instruction
*inst
, GLuint func
)
934 struct brw_compile
*p
= &c
->func
;
935 struct brw_reg src0
, dst
, tmp
;
936 const int mark
= mark_tmps( c
);
941 /* Get first component of source register */
942 src0
= get_src_reg(c
, inst
, 0, 0);
944 /* tmp = func(src0) */
945 brw_MOV(p
, brw_message_reg(2), src0
);
949 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
952 BRW_MATH_DATA_VECTOR
,
953 BRW_MATH_PRECISION_FULL
);
955 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
957 /* replicate tmp value across enabled dest channels */
958 for (i
= 0; i
< 4; i
++) {
959 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
960 dst
= get_dst_reg(c
, inst
, i
);
961 brw_MOV(p
, dst
, tmp
);
965 release_tmps(c
, mark
);
968 static void emit_rcp(struct brw_wm_compile
*c
,
969 struct prog_instruction
*inst
)
971 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
974 static void emit_rsq(struct brw_wm_compile
*c
,
975 struct prog_instruction
*inst
)
977 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
980 static void emit_sin(struct brw_wm_compile
*c
,
981 struct prog_instruction
*inst
)
983 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
986 static void emit_cos(struct brw_wm_compile
*c
,
987 struct prog_instruction
*inst
)
989 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
992 static void emit_ex2(struct brw_wm_compile
*c
,
993 struct prog_instruction
*inst
)
995 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
998 static void emit_lg2(struct brw_wm_compile
*c
,
999 struct prog_instruction
*inst
)
1001 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1004 static void emit_add(struct brw_wm_compile
*c
,
1005 struct prog_instruction
*inst
)
1007 struct brw_compile
*p
= &c
->func
;
1008 struct brw_reg src0
, src1
, dst
;
1009 GLuint mask
= inst
->DstReg
.WriteMask
;
1011 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1012 for (i
= 0 ; i
< 4; i
++) {
1013 if (mask
& (1<<i
)) {
1014 dst
= get_dst_reg(c
, inst
, i
);
1015 src0
= get_src_reg(c
, inst
, 0, i
);
1016 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1017 brw_ADD(p
, dst
, src0
, src1
);
1020 brw_set_saturate(p
, 0);
1023 static void emit_arl(struct brw_wm_compile
*c
,
1024 struct prog_instruction
*inst
)
1026 struct brw_compile
*p
= &c
->func
;
1027 struct brw_reg src0
, addr_reg
;
1028 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1029 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1030 BRW_ARF_ADDRESS
, 0);
1031 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1032 brw_MOV(p
, addr_reg
, src0
);
1033 brw_set_saturate(p
, 0);
1036 static void emit_sub(struct brw_wm_compile
*c
,
1037 struct prog_instruction
*inst
)
1039 struct brw_compile
*p
= &c
->func
;
1040 struct brw_reg src0
, src1
, dst
;
1041 GLuint mask
= inst
->DstReg
.WriteMask
;
1043 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1044 for (i
= 0 ; i
< 4; i
++) {
1045 if (mask
& (1<<i
)) {
1046 dst
= get_dst_reg(c
, inst
, i
);
1047 src0
= get_src_reg(c
, inst
, 0, i
);
1048 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1049 brw_ADD(p
, dst
, src0
, negate(src1
));
1052 brw_set_saturate(p
, 0);
1055 static void emit_mul(struct brw_wm_compile
*c
,
1056 struct prog_instruction
*inst
)
1058 struct brw_compile
*p
= &c
->func
;
1059 struct brw_reg src0
, src1
, dst
;
1060 GLuint mask
= inst
->DstReg
.WriteMask
;
1062 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1063 for (i
= 0 ; i
< 4; i
++) {
1064 if (mask
& (1<<i
)) {
1065 dst
= get_dst_reg(c
, inst
, i
);
1066 src0
= get_src_reg(c
, inst
, 0, i
);
1067 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1068 brw_MUL(p
, dst
, src0
, src1
);
1071 brw_set_saturate(p
, 0);
1074 static void emit_frc(struct brw_wm_compile
*c
,
1075 struct prog_instruction
*inst
)
1077 struct brw_compile
*p
= &c
->func
;
1078 struct brw_reg src0
, dst
;
1079 GLuint mask
= inst
->DstReg
.WriteMask
;
1081 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1082 for (i
= 0 ; i
< 4; i
++) {
1083 if (mask
& (1<<i
)) {
1084 dst
= get_dst_reg(c
, inst
, i
);
1085 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1086 brw_FRC(p
, dst
, src0
);
1089 if (inst
->SaturateMode
!= SATURATE_OFF
)
1090 brw_set_saturate(p
, 0);
1093 static void emit_flr(struct brw_wm_compile
*c
,
1094 struct prog_instruction
*inst
)
1096 struct brw_compile
*p
= &c
->func
;
1097 struct brw_reg src0
, dst
;
1098 GLuint mask
= inst
->DstReg
.WriteMask
;
1100 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1101 for (i
= 0 ; i
< 4; i
++) {
1102 if (mask
& (1<<i
)) {
1103 dst
= get_dst_reg(c
, inst
, i
);
1104 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1105 brw_RNDD(p
, dst
, src0
);
1108 brw_set_saturate(p
, 0);
1112 static void emit_min_max(struct brw_wm_compile
*c
,
1113 const struct prog_instruction
*inst
)
1115 struct brw_compile
*p
= &c
->func
;
1116 const GLuint mask
= inst
->DstReg
.WriteMask
;
1117 const int mark
= mark_tmps(c
);
1119 brw_push_insn_state(p
);
1120 for (i
= 0; i
< 4; i
++) {
1121 if (mask
& (1<<i
)) {
1122 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1123 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1124 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1126 /* if dst==src0 or dst==src1 we need to use a temp reg */
1127 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1128 brw_same_reg(dst
, src1
);
1135 printf(" Min/max: dst %d src0 %d src1 %d\n",
1136 dst.nr, src0.nr, src1.nr);
1138 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1139 brw_MOV(p
, dst
, src0
);
1140 brw_set_saturate(p
, 0);
1142 if (inst
->Opcode
== OPCODE_MIN
)
1143 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1145 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1147 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1148 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1149 brw_MOV(p
, dst
, src1
);
1150 brw_set_saturate(p
, 0);
1151 brw_set_predicate_control_flag_value(p
, 0xff);
1153 brw_MOV(p
, real_dst
, dst
);
1156 brw_pop_insn_state(p
);
1157 release_tmps(c
, mark
);
1160 static void emit_pow(struct brw_wm_compile
*c
,
1161 struct prog_instruction
*inst
)
1163 struct brw_compile
*p
= &c
->func
;
1164 struct brw_reg dst
, src0
, src1
;
1165 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1166 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1167 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1169 brw_MOV(p
, brw_message_reg(2), src0
);
1170 brw_MOV(p
, brw_message_reg(3), src1
);
1174 BRW_MATH_FUNCTION_POW
,
1175 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1178 BRW_MATH_DATA_VECTOR
,
1179 BRW_MATH_PRECISION_FULL
);
1182 static void emit_lrp(struct brw_wm_compile
*c
,
1183 struct prog_instruction
*inst
)
1185 struct brw_compile
*p
= &c
->func
;
1186 GLuint mask
= inst
->DstReg
.WriteMask
;
1187 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1189 int mark
= mark_tmps(c
);
1190 for (i
= 0; i
< 4; i
++) {
1191 if (mask
& (1<<i
)) {
1192 dst
= get_dst_reg(c
, inst
, i
);
1193 src0
= get_src_reg(c
, inst
, 0, i
);
1195 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1197 if (src1
.nr
== dst
.nr
) {
1198 tmp1
= alloc_tmp(c
);
1199 brw_MOV(p
, tmp1
, src1
);
1203 src2
= get_src_reg(c
, inst
, 2, i
);
1204 if (src2
.nr
== dst
.nr
) {
1205 tmp2
= alloc_tmp(c
);
1206 brw_MOV(p
, tmp2
, src2
);
1210 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1211 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1212 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1213 brw_MAC(p
, dst
, src0
, tmp1
);
1214 brw_set_saturate(p
, 0);
1216 release_tmps(c
, mark
);
1221 * For GLSL shaders, this KIL will be unconditional.
1222 * It may be contained inside an IF/ENDIF structure of course.
1224 static void emit_kil(struct brw_wm_compile
*c
)
1226 struct brw_compile
*p
= &c
->func
;
1227 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1228 brw_push_insn_state(p
);
1229 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1230 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1231 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1232 brw_pop_insn_state(p
);
1235 static void emit_mad(struct brw_wm_compile
*c
,
1236 struct prog_instruction
*inst
)
1238 struct brw_compile
*p
= &c
->func
;
1239 GLuint mask
= inst
->DstReg
.WriteMask
;
1240 struct brw_reg dst
, src0
, src1
, src2
;
1243 for (i
= 0; i
< 4; i
++) {
1244 if (mask
& (1<<i
)) {
1245 dst
= get_dst_reg(c
, inst
, i
);
1246 src0
= get_src_reg(c
, inst
, 0, i
);
1247 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1248 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1249 brw_MUL(p
, dst
, src0
, src1
);
1251 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1252 brw_ADD(p
, dst
, dst
, src2
);
1253 brw_set_saturate(p
, 0);
1258 static void emit_sop(struct brw_wm_compile
*c
,
1259 struct prog_instruction
*inst
, GLuint cond
)
1261 struct brw_compile
*p
= &c
->func
;
1262 GLuint mask
= inst
->DstReg
.WriteMask
;
1263 struct brw_reg dst
, src0
, src1
;
1266 for (i
= 0; i
< 4; i
++) {
1267 if (mask
& (1<<i
)) {
1268 dst
= get_dst_reg(c
, inst
, i
);
1269 src0
= get_src_reg(c
, inst
, 0, i
);
1270 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1271 brw_push_insn_state(p
);
1272 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1273 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1274 brw_MOV(p
, dst
, brw_imm_f(0.0));
1275 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1276 brw_MOV(p
, dst
, brw_imm_f(1.0));
1277 brw_pop_insn_state(p
);
1282 static void emit_slt(struct brw_wm_compile
*c
,
1283 struct prog_instruction
*inst
)
1285 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1288 static void emit_sle(struct brw_wm_compile
*c
,
1289 struct prog_instruction
*inst
)
1291 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1294 static void emit_sgt(struct brw_wm_compile
*c
,
1295 struct prog_instruction
*inst
)
1297 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1300 static void emit_sge(struct brw_wm_compile
*c
,
1301 struct prog_instruction
*inst
)
1303 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1306 static void emit_seq(struct brw_wm_compile
*c
,
1307 struct prog_instruction
*inst
)
1309 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1312 static void emit_sne(struct brw_wm_compile
*c
,
1313 struct prog_instruction
*inst
)
1315 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1318 static void emit_ddx(struct brw_wm_compile
*c
,
1319 struct prog_instruction
*inst
)
1321 struct brw_compile
*p
= &c
->func
;
1322 GLuint mask
= inst
->DstReg
.WriteMask
;
1323 struct brw_reg interp
[4];
1325 struct brw_reg src0
, w
;
1327 src0
= get_src_reg(c
, inst
, 0, 0);
1328 w
= get_src_reg(c
, inst
, 1, 3);
1330 interp
[0] = brw_vec1_grf(nr
, 0);
1331 interp
[1] = brw_vec1_grf(nr
, 4);
1332 interp
[2] = brw_vec1_grf(nr
+1, 0);
1333 interp
[3] = brw_vec1_grf(nr
+1, 4);
1334 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1335 for(i
= 0; i
< 4; i
++ ) {
1336 if (mask
& (1<<i
)) {
1337 dst
= get_dst_reg(c
, inst
, i
);
1338 brw_MOV(p
, dst
, interp
[i
]);
1339 brw_MUL(p
, dst
, dst
, w
);
1342 brw_set_saturate(p
, 0);
1345 static void emit_ddy(struct brw_wm_compile
*c
,
1346 struct prog_instruction
*inst
)
1348 struct brw_compile
*p
= &c
->func
;
1349 GLuint mask
= inst
->DstReg
.WriteMask
;
1350 struct brw_reg interp
[4];
1352 struct brw_reg src0
, w
;
1355 src0
= get_src_reg(c
, inst
, 0, 0);
1357 w
= get_src_reg(c
, inst
, 1, 3);
1358 interp
[0] = brw_vec1_grf(nr
, 0);
1359 interp
[1] = brw_vec1_grf(nr
, 4);
1360 interp
[2] = brw_vec1_grf(nr
+1, 0);
1361 interp
[3] = brw_vec1_grf(nr
+1, 4);
1362 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1363 for(i
= 0; i
< 4; i
++ ) {
1364 if (mask
& (1<<i
)) {
1365 dst
= get_dst_reg(c
, inst
, i
);
1366 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1367 brw_MUL(p
, dst
, dst
, w
);
1370 brw_set_saturate(p
, 0);
1373 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1375 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1379 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1381 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1384 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1386 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1389 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1391 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1395 /* One-, two- and three-dimensional Perlin noise, similar to the description
1396 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1397 static void noise1_sub( struct brw_wm_compile
*c
) {
1399 struct brw_compile
*p
= &c
->func
;
1400 struct brw_reg param
,
1401 x0
, x1
, /* gradients at each end */
1402 t
, tmp
[ 2 ], /* float temporaries */
1403 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1405 int mark
= mark_tmps( c
);
1407 x0
= alloc_tmp( c
);
1408 x1
= alloc_tmp( c
);
1410 tmp
[ 0 ] = alloc_tmp( c
);
1411 tmp
[ 1 ] = alloc_tmp( c
);
1412 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1413 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1414 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1415 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1416 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1418 param
= lookup_tmp( c
, mark
- 2 );
1420 brw_set_access_mode( p
, BRW_ALIGN_1
);
1422 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1424 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1425 be hashed. Also compute the remainder (offset within the unit
1426 length), interleaved to reduce register dependency penalties. */
1427 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1428 brw_FRC( p
, param
, param
);
1429 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1430 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1431 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1433 /* We're now ready to perform the hashing. The two hashes are
1434 interleaved for performance. The hash function used is
1435 designed to rapidly achieve avalanche and require only 32x16
1436 bit multiplication, and 16-bit swizzles (which we get for
1437 free). We can't use immediate operands in the multiplies,
1438 because immediates are permitted only in src1 and the 16-bit
1439 factor is permitted only in src0. */
1440 for( i
= 0; i
< 2; i
++ )
1441 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1442 for( i
= 0; i
< 2; i
++ )
1443 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1444 high_words( itmp
[ i
] ) );
1445 for( i
= 0; i
< 2; i
++ )
1446 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1447 for( i
= 0; i
< 2; i
++ )
1448 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1449 high_words( itmp
[ i
] ) );
1450 for( i
= 0; i
< 2; i
++ )
1451 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1452 for( i
= 0; i
< 2; i
++ )
1453 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1454 high_words( itmp
[ i
] ) );
1456 /* Now we want to initialise the two gradients based on the
1457 hashes. Format conversion from signed integer to float leaves
1458 everything scaled too high by a factor of pow( 2, 31 ), but
1459 we correct for that right at the end. */
1460 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1461 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1462 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1464 brw_MUL( p
, x0
, x0
, param
);
1465 brw_MUL( p
, x1
, x1
, t
);
1467 /* We interpolate between the gradients using the polynomial
1468 6t^5 - 15t^4 + 10t^3 (Perlin). */
1469 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1470 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1471 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1472 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1473 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1474 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1476 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1477 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1478 brw_MUL( p
, x1
, x1
, param
);
1479 brw_ADD( p
, x0
, x0
, x1
);
1480 /* scale by pow( 2, -30 ), to compensate for the format conversion
1481 above and an extra factor of 2 so that a single gradient covers
1483 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1485 release_tmps( c
, mark
);
1488 static void emit_noise1( struct brw_wm_compile
*c
,
1489 struct prog_instruction
*inst
)
1491 struct brw_compile
*p
= &c
->func
;
1492 struct brw_reg src
, param
, dst
;
1493 GLuint mask
= inst
->DstReg
.WriteMask
;
1495 int mark
= mark_tmps( c
);
1497 assert( mark
== 0 );
1499 src
= get_src_reg( c
, inst
, 0, 0 );
1501 param
= alloc_tmp( c
);
1503 brw_MOV( p
, param
, src
);
1505 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1507 /* Fill in the result: */
1508 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1509 for (i
= 0 ; i
< 4; i
++) {
1510 if (mask
& (1<<i
)) {
1511 dst
= get_dst_reg(c
, inst
, i
);
1512 brw_MOV( p
, dst
, param
);
1515 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1516 brw_set_saturate( p
, 0 );
1518 release_tmps( c
, mark
);
1521 static void noise2_sub( struct brw_wm_compile
*c
) {
1523 struct brw_compile
*p
= &c
->func
;
1524 struct brw_reg param0
, param1
,
1525 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1526 t
, tmp
[ 4 ], /* float temporaries */
1527 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1529 int mark
= mark_tmps( c
);
1531 x0y0
= alloc_tmp( c
);
1532 x0y1
= alloc_tmp( c
);
1533 x1y0
= alloc_tmp( c
);
1534 x1y1
= alloc_tmp( c
);
1536 for( i
= 0; i
< 4; i
++ ) {
1537 tmp
[ i
] = alloc_tmp( c
);
1538 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1540 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1541 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1542 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1544 param0
= lookup_tmp( c
, mark
- 3 );
1545 param1
= lookup_tmp( c
, mark
- 2 );
1547 brw_set_access_mode( p
, BRW_ALIGN_1
);
1549 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1550 be hashed. Also compute the remainders (offsets within the unit
1551 square), interleaved to reduce register dependency penalties. */
1552 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1553 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1554 brw_FRC( p
, param0
, param0
);
1555 brw_FRC( p
, param1
, param1
);
1556 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1557 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1558 low_words( itmp
[ 1 ] ) );
1559 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1560 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1561 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1562 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1563 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1565 /* We're now ready to perform the hashing. The four hashes are
1566 interleaved for performance. The hash function used is
1567 designed to rapidly achieve avalanche and require only 32x16
1568 bit multiplication, and 16-bit swizzles (which we get for
1569 free). We can't use immediate operands in the multiplies,
1570 because immediates are permitted only in src1 and the 16-bit
1571 factor is permitted only in src0. */
1572 for( i
= 0; i
< 4; i
++ )
1573 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1574 for( i
= 0; i
< 4; i
++ )
1575 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1576 high_words( itmp
[ i
] ) );
1577 for( i
= 0; i
< 4; i
++ )
1578 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1579 for( i
= 0; i
< 4; i
++ )
1580 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1581 high_words( itmp
[ i
] ) );
1582 for( i
= 0; i
< 4; i
++ )
1583 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1584 for( i
= 0; i
< 4; i
++ )
1585 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1586 high_words( itmp
[ i
] ) );
1588 /* Now we want to initialise the four gradients based on the
1589 hashes. Format conversion from signed integer to float leaves
1590 everything scaled too high by a factor of pow( 2, 15 ), but
1591 we correct for that right at the end. */
1592 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1593 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1594 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1595 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1596 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1598 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1599 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1600 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1601 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1603 brw_MUL( p
, x1y0
, x1y0
, t
);
1604 brw_MUL( p
, x1y1
, x1y1
, t
);
1605 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1606 brw_MUL( p
, x0y0
, x0y0
, param0
);
1607 brw_MUL( p
, x0y1
, x0y1
, param0
);
1609 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1610 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1611 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1612 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1614 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1615 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1616 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1617 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1619 /* We interpolate between the gradients using the polynomial
1620 6t^5 - 15t^4 + 10t^3 (Perlin). */
1621 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1622 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1623 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1624 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1625 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1626 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1627 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1629 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1630 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1631 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1632 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1633 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1635 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1636 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1637 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1638 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1640 /* Here we interpolate in the y dimension... */
1641 brw_MUL( p
, x0y1
, x0y1
, param1
);
1642 brw_MUL( p
, x1y1
, x1y1
, param1
);
1643 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1644 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1646 /* And now in x. There are horrible register dependencies here,
1647 but we have nothing else to do. */
1648 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1649 brw_MUL( p
, x1y0
, x1y0
, param0
);
1650 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1652 /* scale by pow( 2, -15 ), as described above */
1653 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1655 release_tmps( c
, mark
);
1658 static void emit_noise2( struct brw_wm_compile
*c
,
1659 struct prog_instruction
*inst
)
1661 struct brw_compile
*p
= &c
->func
;
1662 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1663 GLuint mask
= inst
->DstReg
.WriteMask
;
1665 int mark
= mark_tmps( c
);
1667 assert( mark
== 0 );
1669 src0
= get_src_reg( c
, inst
, 0, 0 );
1670 src1
= get_src_reg( c
, inst
, 0, 1 );
1672 param0
= alloc_tmp( c
);
1673 param1
= alloc_tmp( c
);
1675 brw_MOV( p
, param0
, src0
);
1676 brw_MOV( p
, param1
, src1
);
1678 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1680 /* Fill in the result: */
1681 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1682 for (i
= 0 ; i
< 4; i
++) {
1683 if (mask
& (1<<i
)) {
1684 dst
= get_dst_reg(c
, inst
, i
);
1685 brw_MOV( p
, dst
, param0
);
1688 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1689 brw_set_saturate( p
, 0 );
1691 release_tmps( c
, mark
);
1695 * The three-dimensional case is much like the one- and two- versions above,
1696 * but since the number of corners is rapidly growing we now pack 16 16-bit
1697 * hashes into each register to extract more parallelism from the EUs.
1699 static void noise3_sub( struct brw_wm_compile
*c
) {
1701 struct brw_compile
*p
= &c
->func
;
1702 struct brw_reg param0
, param1
, param2
,
1703 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1704 xi
, yi
, zi
, /* interpolation coefficients */
1705 t
, tmp
[ 8 ], /* float temporaries */
1706 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1707 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1709 int mark
= mark_tmps( c
);
1711 x0y0
= alloc_tmp( c
);
1712 x0y1
= alloc_tmp( c
);
1713 x1y0
= alloc_tmp( c
);
1714 x1y1
= alloc_tmp( c
);
1715 xi
= alloc_tmp( c
);
1716 yi
= alloc_tmp( c
);
1717 zi
= alloc_tmp( c
);
1719 for( i
= 0; i
< 8; i
++ ) {
1720 tmp
[ i
] = alloc_tmp( c
);
1721 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1722 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1725 param0
= lookup_tmp( c
, mark
- 4 );
1726 param1
= lookup_tmp( c
, mark
- 3 );
1727 param2
= lookup_tmp( c
, mark
- 2 );
1729 brw_set_access_mode( p
, BRW_ALIGN_1
);
1731 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1732 be hashed. Also compute the remainders (offsets within the unit
1733 cube), interleaved to reduce register dependency penalties. */
1734 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1735 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1736 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1737 brw_FRC( p
, param0
, param0
);
1738 brw_FRC( p
, param1
, param1
);
1739 brw_FRC( p
, param2
, param2
);
1740 /* Since we now have only 16 bits of precision in the hash, we must
1741 be more careful about thorough mixing to maintain entropy as we
1742 squash the input vector into a small scalar. */
1743 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1744 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1745 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1746 brw_imm_uw( 0x9B93 ) );
1747 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1748 brw_imm_uw( 0xBC8F ) );
1750 /* Temporarily disable the execution mask while we work with ExecSize=16
1751 channels (the mask is set for ExecSize=8 and is probably incorrect).
1752 Although this might cause execution of unwanted channels, the code
1753 writes only to temporary registers and has no side effects, so
1754 disabling the mask is harmless. */
1755 brw_push_insn_state( p
);
1756 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1757 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1758 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1759 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1761 /* We're now ready to perform the hashing. The eight hashes are
1762 interleaved for performance. The hash function used is
1763 designed to rapidly achieve avalanche and require only 16x16
1764 bit multiplication, and 8-bit swizzles (which we get for
1766 for( i
= 0; i
< 4; i
++ )
1767 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1768 for( i
= 0; i
< 4; i
++ )
1769 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1770 odd_bytes( wtmp
[ i
] ) );
1771 for( i
= 0; i
< 4; i
++ )
1772 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1773 for( i
= 0; i
< 4; i
++ )
1774 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1775 odd_bytes( wtmp
[ i
] ) );
1776 brw_pop_insn_state( p
);
1778 /* Now we want to initialise the four rear gradients based on the
1779 hashes. Format conversion from signed integer to float leaves
1780 everything scaled too high by a factor of pow( 2, 15 ), but
1781 we correct for that right at the end. */
1783 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1784 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1785 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1786 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1787 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1789 brw_push_insn_state( p
);
1790 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1791 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1792 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1793 brw_pop_insn_state( p
);
1795 brw_MUL( p
, x1y0
, x1y0
, t
);
1796 brw_MUL( p
, x1y1
, x1y1
, t
);
1797 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1798 brw_MUL( p
, x0y0
, x0y0
, param0
);
1799 brw_MUL( p
, x0y1
, x0y1
, param0
);
1802 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1803 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1804 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1805 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1807 brw_push_insn_state( p
);
1808 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1809 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1810 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1811 brw_pop_insn_state( p
);
1813 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1814 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1815 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1816 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1817 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1819 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1820 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1821 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1822 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1825 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1826 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1827 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1828 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1830 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1831 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1832 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1833 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1835 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1836 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1837 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1838 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1840 /* We interpolate between the gradients using the polynomial
1841 6t^5 - 15t^4 + 10t^3 (Perlin). */
1842 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1843 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1844 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1845 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1846 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1847 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1848 brw_MUL( p
, xi
, xi
, param0
);
1849 brw_MUL( p
, yi
, yi
, param1
);
1850 brw_MUL( p
, zi
, zi
, param2
);
1851 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1852 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1853 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1854 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1855 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1856 brw_MUL( p
, xi
, xi
, param0
);
1857 brw_MUL( p
, yi
, yi
, param1
);
1858 brw_MUL( p
, zi
, zi
, param2
);
1859 brw_MUL( p
, xi
, xi
, param0
);
1860 brw_MUL( p
, yi
, yi
, param1
);
1861 brw_MUL( p
, zi
, zi
, param2
);
1862 brw_MUL( p
, xi
, xi
, param0
);
1863 brw_MUL( p
, yi
, yi
, param1
);
1864 brw_MUL( p
, zi
, zi
, param2
);
1866 /* Here we interpolate in the y dimension... */
1867 brw_MUL( p
, x0y1
, x0y1
, yi
);
1868 brw_MUL( p
, x1y1
, x1y1
, yi
);
1869 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1870 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1872 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1873 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1874 brw_MUL( p
, x1y0
, x1y0
, xi
);
1875 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1877 /* Now do the same thing for the front four gradients... */
1879 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1880 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1881 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1882 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1884 brw_push_insn_state( p
);
1885 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1886 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1887 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1888 brw_pop_insn_state( p
);
1890 brw_MUL( p
, x1y0
, x1y0
, t
);
1891 brw_MUL( p
, x1y1
, x1y1
, t
);
1892 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1893 brw_MUL( p
, x0y0
, x0y0
, param0
);
1894 brw_MUL( p
, x0y1
, x0y1
, param0
);
1897 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1898 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1899 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1900 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1902 brw_push_insn_state( p
);
1903 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1904 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1905 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1906 brw_pop_insn_state( p
);
1908 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1909 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1910 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1911 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1912 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1914 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1915 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1916 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1917 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1920 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1921 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1922 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1923 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1925 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1926 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1927 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1928 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1930 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1931 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1932 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1933 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1935 /* The interpolation coefficients are still around from last time, so
1936 again interpolate in the y dimension... */
1937 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1938 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1939 brw_MUL( p
, x0y1
, x0y1
, yi
);
1940 brw_MUL( p
, x1y1
, x1y1
, yi
);
1941 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1942 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1944 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1945 time put the front face in tmp[ 1 ] and we're nearly there... */
1946 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1947 brw_MUL( p
, x1y0
, x1y0
, xi
);
1948 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1950 /* The final interpolation, in the z dimension: */
1951 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1952 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1953 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1955 /* scale by pow( 2, -15 ), as described above */
1956 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1958 release_tmps( c
, mark
);
1961 static void emit_noise3( struct brw_wm_compile
*c
,
1962 struct prog_instruction
*inst
)
1964 struct brw_compile
*p
= &c
->func
;
1965 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1966 GLuint mask
= inst
->DstReg
.WriteMask
;
1968 int mark
= mark_tmps( c
);
1970 assert( mark
== 0 );
1972 src0
= get_src_reg( c
, inst
, 0, 0 );
1973 src1
= get_src_reg( c
, inst
, 0, 1 );
1974 src2
= get_src_reg( c
, inst
, 0, 2 );
1976 param0
= alloc_tmp( c
);
1977 param1
= alloc_tmp( c
);
1978 param2
= alloc_tmp( c
);
1980 brw_MOV( p
, param0
, src0
);
1981 brw_MOV( p
, param1
, src1
);
1982 brw_MOV( p
, param2
, src2
);
1984 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1986 /* Fill in the result: */
1987 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1988 for (i
= 0 ; i
< 4; i
++) {
1989 if (mask
& (1<<i
)) {
1990 dst
= get_dst_reg(c
, inst
, i
);
1991 brw_MOV( p
, dst
, param0
);
1994 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1995 brw_set_saturate( p
, 0 );
1997 release_tmps( c
, mark
);
2001 * For the four-dimensional case, the little micro-optimisation benefits
2002 * we obtain by unrolling all the loops aren't worth the massive bloat it
2003 * now causes. Instead, we loop twice around performing a similar operation
2004 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2005 * code to glue it all together.
2007 static void noise4_sub( struct brw_wm_compile
*c
)
2009 struct brw_compile
*p
= &c
->func
;
2010 struct brw_reg param
[ 4 ],
2011 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2012 w0
, /* noise for the w=0 cube */
2013 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2014 interp
[ 4 ], /* interpolation coefficients */
2015 t
, tmp
[ 8 ], /* float temporaries */
2016 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2017 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2019 int mark
= mark_tmps( c
);
2020 GLuint loop
, origin
;
2022 x0y0
= alloc_tmp( c
);
2023 x0y1
= alloc_tmp( c
);
2024 x1y0
= alloc_tmp( c
);
2025 x1y1
= alloc_tmp( c
);
2027 w0
= alloc_tmp( c
);
2028 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2029 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2031 for( i
= 0; i
< 4; i
++ ) {
2032 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2033 interp
[ i
] = alloc_tmp( c
);
2036 for( i
= 0; i
< 8; i
++ ) {
2037 tmp
[ i
] = alloc_tmp( c
);
2038 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2039 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2042 brw_set_access_mode( p
, BRW_ALIGN_1
);
2044 /* We only want 16 bits of precision from the integral part of each
2045 co-ordinate, but unfortunately the RNDD semantics would saturate
2046 at 16 bits if we performed the operation directly to a 16-bit
2047 destination. Therefore, we round to 32-bit temporaries where
2048 appropriate, and then store only the lower 16 bits. */
2049 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2050 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2051 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2052 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2053 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2054 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2056 /* Modify the flag register here, because the side effect is useful
2057 later (see below). We know for certain that all flags will be
2058 cleared, since the FRC instruction cannot possibly generate
2059 negative results. Even for exceptional inputs (infinities, denormals,
2060 NaNs), the architecture guarantees that the L conditional is false. */
2061 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2062 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2063 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2064 for( i
= 1; i
< 4; i
++ )
2065 brw_FRC( p
, param
[ i
], param
[ i
] );
2067 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2069 for( i
= 0; i
< 4; i
++ )
2070 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2071 for( i
= 0; i
< 4; i
++ )
2072 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2073 for( i
= 0; i
< 4; i
++ )
2074 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2075 for( i
= 0; i
< 4; i
++ )
2076 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2077 for( j
= 0; j
< 3; j
++ )
2078 for( i
= 0; i
< 4; i
++ )
2079 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2081 /* Mark the current address, as it will be a jump destination. The
2082 following code will be executed twice: first, with the flag
2083 register clear indicating the w=0 case, and second with flags
2087 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2088 be hashed. Since we have only 16 bits of precision in the hash, we
2089 must be careful about thorough mixing to maintain entropy as we
2090 squash the input vector into a small scalar. */
2091 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2092 brw_imm_uw( 0xBC8F ) );
2093 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2094 brw_imm_uw( 0xD0BD ) );
2095 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2096 brw_imm_uw( 0x9B93 ) );
2097 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2098 brw_imm_uw( 0xA359 ) );
2099 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2100 brw_imm_uw( 0xBC8F ) );
2102 /* Temporarily disable the execution mask while we work with ExecSize=16
2103 channels (the mask is set for ExecSize=8 and is probably incorrect).
2104 Although this might cause execution of unwanted channels, the code
2105 writes only to temporary registers and has no side effects, so
2106 disabling the mask is harmless. */
2107 brw_push_insn_state( p
);
2108 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2109 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2110 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2111 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2113 /* We're now ready to perform the hashing. The eight hashes are
2114 interleaved for performance. The hash function used is
2115 designed to rapidly achieve avalanche and require only 16x16
2116 bit multiplication, and 8-bit swizzles (which we get for
2118 for( i
= 0; i
< 4; i
++ )
2119 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2120 for( i
= 0; i
< 4; i
++ )
2121 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2122 odd_bytes( wtmp
[ i
] ) );
2123 for( i
= 0; i
< 4; i
++ )
2124 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2125 for( i
= 0; i
< 4; i
++ )
2126 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2127 odd_bytes( wtmp
[ i
] ) );
2128 brw_pop_insn_state( p
);
2130 /* Now we want to initialise the four rear gradients based on the
2131 hashes. Format conversion from signed integer to float leaves
2132 everything scaled too high by a factor of pow( 2, 15 ), but
2133 we correct for that right at the end. */
2135 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2136 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2137 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2138 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2139 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2141 brw_push_insn_state( p
);
2142 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2143 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2144 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2145 brw_pop_insn_state( p
);
2147 brw_MUL( p
, x1y0
, x1y0
, t
);
2148 brw_MUL( p
, x1y1
, x1y1
, t
);
2149 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2150 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2151 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2154 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2155 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2156 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2157 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2159 brw_push_insn_state( p
);
2160 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2161 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2162 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2163 brw_pop_insn_state( p
);
2165 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2166 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2167 /* prepare t for the w component (used below): w the first time through
2168 the loop; w - 1 the second time) */
2169 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2170 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2171 p
->current
->header
.predicate_inverse
= 1;
2172 brw_MOV( p
, t
, param
[ 3 ] );
2173 p
->current
->header
.predicate_inverse
= 0;
2174 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2175 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2176 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2178 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2179 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2180 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2181 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2184 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2185 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2186 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2187 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2189 brw_push_insn_state( p
);
2190 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2191 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2192 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2193 brw_pop_insn_state( p
);
2195 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2196 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2197 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2198 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2200 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2201 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2202 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2203 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2206 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2207 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2208 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2209 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2211 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2212 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2213 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2214 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2215 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2217 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2218 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2219 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2220 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2222 /* Here we interpolate in the y dimension... */
2223 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2224 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2225 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2226 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2227 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2228 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2230 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2231 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2232 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2233 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2235 /* Now do the same thing for the front four gradients... */
2237 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2238 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2239 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2240 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2242 brw_push_insn_state( p
);
2243 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2244 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2245 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2246 brw_pop_insn_state( p
);
2248 brw_MUL( p
, x1y0
, x1y0
, t
);
2249 brw_MUL( p
, x1y1
, x1y1
, t
);
2250 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2251 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2252 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2255 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2256 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2257 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2258 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2260 brw_push_insn_state( p
);
2261 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2262 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2263 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2264 brw_pop_insn_state( p
);
2266 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2267 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2268 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2269 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2270 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2272 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2273 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2274 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2275 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2278 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2279 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2280 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2281 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2283 brw_push_insn_state( p
);
2284 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2285 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2286 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2287 brw_pop_insn_state( p
);
2289 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2290 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2291 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2292 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2293 /* prepare t for the w component (used below): w the first time through
2294 the loop; w - 1 the second time) */
2295 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2296 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2297 p
->current
->header
.predicate_inverse
= 1;
2298 brw_MOV( p
, t
, param
[ 3 ] );
2299 p
->current
->header
.predicate_inverse
= 0;
2300 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2302 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2303 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2304 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2305 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2308 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2309 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2310 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2311 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2313 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2314 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2315 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2316 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2318 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2319 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2320 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2321 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2323 /* Interpolate in the y dimension: */
2324 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2325 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2326 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2327 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2328 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2329 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2331 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2332 time put the front face in tmp[ 1 ] and we're nearly there... */
2333 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2334 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2335 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2337 /* Another interpolation, in the z dimension: */
2338 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2339 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2340 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2342 /* Exit the loop if we've computed both cubes... */
2343 origin
= p
->nr_insn
;
2344 brw_push_insn_state( p
);
2345 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2346 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2347 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2348 brw_pop_insn_state( p
);
2350 /* Save the result for the w=0 case, and increment the w coordinate: */
2351 brw_MOV( p
, w0
, tmp
[ 0 ] );
2352 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2355 /* Loop around for the other cube. Explicitly set the flag register
2356 (unfortunately we must spend an extra instruction to do this: we
2357 can't rely on a side effect of the previous MOV or ADD because
2358 conditional modifiers which are normally true might be false in
2359 exceptional circumstances, e.g. given a NaN input; the add to
2360 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2361 brw_push_insn_state( p
);
2362 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2363 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2364 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2365 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2366 brw_pop_insn_state( p
);
2368 /* Patch the previous conditional branch now that we know the
2369 destination address. */
2370 brw_set_src1( p
->store
+ origin
,
2371 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2373 /* The very last interpolation. */
2374 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2375 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2376 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2378 /* scale by pow( 2, -15 ), as described above */
2379 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2381 release_tmps( c
, mark
);
2384 static void emit_noise4( struct brw_wm_compile
*c
,
2385 struct prog_instruction
*inst
)
2387 struct brw_compile
*p
= &c
->func
;
2388 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2389 GLuint mask
= inst
->DstReg
.WriteMask
;
2391 int mark
= mark_tmps( c
);
2393 assert( mark
== 0 );
2395 src0
= get_src_reg( c
, inst
, 0, 0 );
2396 src1
= get_src_reg( c
, inst
, 0, 1 );
2397 src2
= get_src_reg( c
, inst
, 0, 2 );
2398 src3
= get_src_reg( c
, inst
, 0, 3 );
2400 param0
= alloc_tmp( c
);
2401 param1
= alloc_tmp( c
);
2402 param2
= alloc_tmp( c
);
2403 param3
= alloc_tmp( c
);
2405 brw_MOV( p
, param0
, src0
);
2406 brw_MOV( p
, param1
, src1
);
2407 brw_MOV( p
, param2
, src2
);
2408 brw_MOV( p
, param3
, src3
);
2410 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2412 /* Fill in the result: */
2413 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2414 for (i
= 0 ; i
< 4; i
++) {
2415 if (mask
& (1<<i
)) {
2416 dst
= get_dst_reg(c
, inst
, i
);
2417 brw_MOV( p
, dst
, param0
);
2420 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2421 brw_set_saturate( p
, 0 );
2423 release_tmps( c
, mark
);
2426 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2427 struct prog_instruction
*inst
)
2429 struct brw_compile
*p
= &c
->func
;
2430 GLuint mask
= inst
->DstReg
.WriteMask
;
2431 struct brw_reg src0
[2], dst
[2];
2433 dst
[0] = get_dst_reg(c
, inst
, 0);
2434 dst
[1] = get_dst_reg(c
, inst
, 1);
2436 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2437 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2439 /* Calculate the pixel offset from window bottom left into destination
2442 if (mask
& WRITEMASK_X
) {
2443 /* X' = X - origin_x */
2446 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2447 brw_imm_d(0 - c
->key
.origin_x
));
2450 if (mask
& WRITEMASK_Y
) {
2451 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2454 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2455 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2460 BIAS on SIMD8 not working yet...
2462 static void emit_txb(struct brw_wm_compile
*c
,
2463 struct prog_instruction
*inst
)
2465 struct brw_compile
*p
= &c
->func
;
2466 struct brw_reg dst
[4], src
[4], payload_reg
;
2467 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2470 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2472 for (i
= 0; i
< 4; i
++)
2473 dst
[i
] = get_dst_reg(c
, inst
, i
);
2474 for (i
= 0; i
< 4; i
++)
2475 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2477 switch (inst
->TexSrcTarget
) {
2478 case TEXTURE_1D_INDEX
:
2479 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2480 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2481 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2483 case TEXTURE_2D_INDEX
:
2484 case TEXTURE_RECT_INDEX
:
2485 brw_MOV(p
, brw_message_reg(2), src
[0]);
2486 brw_MOV(p
, brw_message_reg(3), src
[1]);
2487 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2490 brw_MOV(p
, brw_message_reg(2), src
[0]);
2491 brw_MOV(p
, brw_message_reg(3), src
[1]);
2492 brw_MOV(p
, brw_message_reg(4), src
[2]);
2495 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2496 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2498 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2500 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2501 SURF_INDEX_TEXTURE(unit
),
2503 inst
->DstReg
.WriteMask
, /* writemask */
2504 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2505 4, /* response_length */
2511 static void emit_tex(struct brw_wm_compile
*c
,
2512 struct prog_instruction
*inst
)
2514 struct brw_compile
*p
= &c
->func
;
2515 struct brw_reg dst
[4], src
[4], payload_reg
;
2516 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2520 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2522 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2524 for (i
= 0; i
< 4; i
++)
2525 dst
[i
] = get_dst_reg(c
, inst
, i
);
2526 for (i
= 0; i
< 4; i
++)
2527 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2529 switch (inst
->TexSrcTarget
) {
2530 case TEXTURE_1D_INDEX
:
2534 case TEXTURE_2D_INDEX
:
2535 case TEXTURE_RECT_INDEX
:
2536 emit
= WRITEMASK_XY
;
2540 emit
= WRITEMASK_XYZ
;
2546 /* move/load S, T, R coords */
2547 for (i
= 0; i
< nr
; i
++) {
2548 static const GLuint swz
[4] = {0,1,2,2};
2550 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2552 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2557 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2558 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2562 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2564 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2565 SURF_INDEX_TEXTURE(unit
),
2567 inst
->DstReg
.WriteMask
, /* writemask */
2568 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2569 4, /* response_length */
2570 shadow
? 6 : 4, /* msg_length */
2574 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2579 * Resolve subroutine calls after code emit is done.
2581 static void post_wm_emit( struct brw_wm_compile
*c
)
2583 brw_resolve_cals(&c
->func
);
2586 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2589 #define MAX_LOOP_DEPTH 32
2590 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2591 struct brw_instruction
*inst0
, *inst1
;
2592 int i
, if_insn
= 0, loop_insn
= 0;
2593 struct brw_compile
*p
= &c
->func
;
2594 struct brw_indirect stack_index
= brw_indirect(0, 0);
2598 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2599 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2601 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2602 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2605 _mesa_printf("Inst %d: ", i
);
2606 _mesa_print_instruction(inst
);
2609 /* fetch any constants that this instruction needs */
2610 if (c
->use_const_buffer
)
2611 fetch_constants(c
, inst
);
2613 if (inst
->CondUpdate
)
2614 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2616 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2618 switch (inst
->Opcode
) {
2620 emit_pixel_xy(c
, inst
);
2623 emit_delta_xy(c
, inst
);
2626 emit_pixel_w(c
, inst
);
2629 emit_linterp(c
, inst
);
2632 emit_pinterp(c
, inst
);
2635 emit_cinterp(c
, inst
);
2638 emit_wpos_xy(c
, inst
);
2641 emit_fb_write(c
, inst
);
2643 case WM_FRONTFACING
:
2644 emit_frontfacing(c
, inst
);
2668 emit_trunc(c
, inst
);
2705 emit_min_max(c
, inst
);
2741 emit_noise1(c
, inst
);
2744 emit_noise2(c
, inst
);
2747 emit_noise3(c
, inst
);
2750 emit_noise4(c
, inst
);
2762 assert(if_insn
< MAX_IFSN
);
2763 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2766 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2769 assert(if_insn
> 0);
2770 brw_ENDIF(p
, if_inst
[--if_insn
]);
2773 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2779 brw_push_insn_state(p
);
2780 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2781 brw_set_access_mode(p
, BRW_ALIGN_1
);
2782 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2783 brw_set_access_mode(p
, BRW_ALIGN_16
);
2784 brw_ADD(p
, get_addr_reg(stack_index
),
2785 get_addr_reg(stack_index
), brw_imm_d(4));
2786 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2787 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2788 brw_pop_insn_state(p
);
2792 brw_push_insn_state(p
);
2793 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2794 brw_ADD(p
, get_addr_reg(stack_index
),
2795 get_addr_reg(stack_index
), brw_imm_d(-4));
2796 brw_set_access_mode(p
, BRW_ALIGN_1
);
2797 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2798 brw_set_access_mode(p
, BRW_ALIGN_16
);
2799 brw_pop_insn_state(p
);
2802 case OPCODE_BGNLOOP
:
2803 /* XXX may need to invalidate the current_constant regs */
2804 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2808 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2812 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2814 case OPCODE_ENDLOOP
:
2816 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2817 /* patch all the BREAK instructions from
2819 while (inst0
> loop_inst
[loop_insn
]) {
2821 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2822 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2823 inst0
->bits3
.if_else
.pop_count
= 0;
2824 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2825 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2826 inst0
->bits3
.if_else
.pop_count
= 0;
2831 _mesa_printf("unsupported IR in fragment shader %d\n",
2834 if (inst
->CondUpdate
)
2835 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2837 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2841 if (c
->reg_index
>= BRW_WM_MAX_GRF
) {
2842 _mesa_problem(NULL
, "Ran out of registers in brw_wm_emit_glsl()");
2843 /* XXX we need to do some proper error recovery here */
2849 * Do GPU code generation for shaders that use GLSL features such as
2850 * flow control. Other shaders will be compiled with the
2852 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2854 if (INTEL_DEBUG
& DEBUG_WM
) {
2855 _mesa_printf("brw_wm_glsl_emit:\n");
2858 /* initial instruction translation/simplification */
2861 /* actual code generation */
2862 brw_wm_emit_glsl(brw
, c
);
2864 if (INTEL_DEBUG
& DEBUG_WM
) {
2865 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2868 c
->prog_data
.total_grf
= c
->reg_index
;
2869 c
->prog_data
.total_scratch
= 0;