1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
46 * Record the mapping of a Mesa register to a hardware register.
48 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
49 int component
, struct brw_reg reg
)
51 c
->wm_regs
[file
][index
][component
].reg
= reg
;
52 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
56 * Examine instruction's write mask to find index of first component
57 * enabled for writing.
59 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
62 for (i
= 0; i
< 4; i
++)
63 if (inst
->DstReg
.WriteMask
& (1<<i
))
68 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
71 if(c
->tmp_index
== c
->tmp_max
)
72 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
74 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
79 * Save current temp register info.
80 * There must be a matching call to release_tmps().
82 static int mark_tmps(struct brw_wm_compile
*c
)
87 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
89 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
92 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
98 * Convert Mesa src register to brw register.
100 * Since we're running in SOA mode each Mesa register corresponds to four
101 * hardware registers. We allocate the hardware registers as needed here.
103 * \param file register file, one of PROGRAM_x
104 * \param index register number
105 * \param component src component (X=0, Y=1, Z=2, W=3)
106 * \param nr not used?!?
107 * \param neg negate value?
108 * \param abs take absolute value?
110 static struct brw_reg
111 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
112 int nr
, GLuint neg
, GLuint abs
)
116 case PROGRAM_STATE_VAR
:
117 case PROGRAM_CONSTANT
:
118 case PROGRAM_UNIFORM
:
119 file
= PROGRAM_STATE_VAR
;
121 case PROGRAM_UNDEFINED
:
122 return brw_null_reg();
123 case PROGRAM_TEMPORARY
:
126 case PROGRAM_PAYLOAD
:
129 _mesa_problem(NULL
, "Unexpected file in get_reg()");
130 return brw_null_reg();
133 /* see if we've already allocated a HW register for this Mesa register */
134 if (c
->wm_regs
[file
][index
][component
].inited
) {
136 reg
= c
->wm_regs
[file
][index
][component
].reg
;
139 /* no, allocate new register */
140 reg
= brw_vec8_grf(c
->reg_index
, 0);
143 /* if this is a new register allocation, record it in the table */
144 if (!c
->wm_regs
[file
][index
][component
].inited
) {
145 set_reg(c
, file
, index
, component
, reg
);
149 if (c
->reg_index
>= BRW_WM_MAX_GRF
- 12) {
150 /* ran out of temporary registers! */
152 /* This is a big hack for now.
153 * Return bad register index, just don't hang the GPU.
155 _mesa_fprintf(stderr
, "out of regs %d\n", c
->reg_index
);
156 c
->reg_index
= BRW_WM_MAX_GRF
- 13;
158 return brw_null_reg();
162 if (neg
& (1 << component
)) {
172 * Preallocate registers. This sets up the Mesa to hardware register
173 * mapping for certain registers, such as constants (uniforms/state vars)
176 static void prealloc_reg(struct brw_wm_compile
*c
)
180 int nr_interp_regs
= 0;
181 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
183 for (i
= 0; i
< 4; i
++) {
184 if (i
< c
->key
.nr_depth_regs
)
185 reg
= brw_vec8_grf(i
* 2, 0);
187 reg
= brw_vec8_grf(0, 0);
188 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
190 c
->reg_index
+= 2 * c
->key
.nr_depth_regs
;
194 const int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
196 /* use a real constant buffer, or just use a section of the GRF? */
197 c
->fp
->use_const_buffer
= GL_FALSE
; /* (nr_params > 8);*/
199 if (c
->fp
->use_const_buffer
) {
200 /* We'll use a real constant buffer and fetch constants from
201 * it with a dataport read message.
204 /* number of float constants in CURBE */
205 c
->prog_data
.nr_params
= 0;
208 const struct gl_program_parameter_list
*plist
=
209 c
->fp
->program
.Base
.Parameters
;
212 /* number of float constants in CURBE */
213 c
->prog_data
.nr_params
= 4 * nr_params
;
215 /* loop over program constants (float[4]) */
216 for (i
= 0; i
< nr_params
; i
++) {
217 /* loop over XYZW channels */
218 for (j
= 0; j
< 4; j
++, index
++) {
219 reg
= brw_vec1_grf(c
->reg_index
+ index
/ 8, index
% 8);
220 /* Save pointer to parameter/constant value.
221 * Constants will be copied in prepare_constant_buffer()
223 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
224 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
227 /* number of constant regs used (each reg is float[8]) */
228 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
229 c
->reg_index
+= c
->nr_creg
;
233 /* fragment shader inputs */
234 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
235 if (inputs
& (1<<i
)) {
237 reg
= brw_vec8_grf(c
->reg_index
, 0);
238 for (j
= 0; j
< 4; j
++)
239 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
244 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
245 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
246 c
->prog_data
.curb_read_length
= c
->nr_creg
;
247 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
249 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
252 /* An instruction may reference up to three constants.
253 * They'll be found in these registers.
254 * XXX alloc these on demand!
256 if (c
->fp
->use_const_buffer
) {
257 for (i
= 0; i
< 3; i
++) {
258 c
->current_const
[i
].index
= -1;
259 c
->current_const
[i
].reg
= alloc_tmp(c
);
263 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
264 printf("AFTER PRE_ALLOC, reg_index = %d\n", c
->reg_index
);
270 * Check if any of the instruction's src registers are constants, uniforms,
271 * or statevars. If so, fetch any constants that we don't already have in
272 * the three GRF slots.
274 static void fetch_constants(struct brw_wm_compile
*c
,
275 const struct prog_instruction
*inst
)
277 struct brw_compile
*p
= &c
->func
;
280 /* loop over instruction src regs */
281 for (i
= 0; i
< 3; i
++) {
282 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
283 if (src
->File
== PROGRAM_STATE_VAR
||
284 src
->File
== PROGRAM_CONSTANT
||
285 src
->File
== PROGRAM_UNIFORM
) {
286 if (c
->current_const
[i
].index
!= src
->Index
) {
287 c
->current_const
[i
].index
= src
->Index
;
290 printf(" fetch const[%d] for arg %d into reg %d\n",
291 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
294 /* need to fetch the constant now */
296 c
->current_const
[i
].reg
, /* writeback dest */
298 src
->RelAddr
, /* relative indexing? */
299 16 * src
->Index
, /* byte offset */
300 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
309 * Convert Mesa dst register to brw register.
311 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
312 const struct prog_instruction
*inst
,
316 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
321 static struct brw_reg
322 get_src_reg_const(struct brw_wm_compile
*c
,
323 const struct prog_instruction
*inst
,
324 GLuint srcRegIndex
, GLuint component
)
326 /* We should have already fetched the constant from the constant
327 * buffer in fetch_constants(). Now we just have to return a
328 * register description that extracts the needed component and
329 * smears it across all eight vector components.
331 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
332 struct brw_reg const_reg
;
334 assert(component
< 4);
335 assert(srcRegIndex
< 3);
336 assert(c
->current_const
[srcRegIndex
].index
!= -1);
337 const_reg
= c
->current_const
[srcRegIndex
].reg
;
339 /* extract desired float from the const_reg, and smear */
340 const_reg
= stride(const_reg
, 0, 1, 0);
341 const_reg
.subnr
= component
* 4;
343 if (src
->Negate
& (1 << component
))
344 const_reg
= negate(const_reg
);
346 const_reg
= brw_abs(const_reg
);
349 printf(" form const[%d].%d for arg %d, reg %d\n",
350 c
->current_const
[srcRegIndex
].index
,
361 * Convert Mesa src register to brw register.
363 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
364 const struct prog_instruction
*inst
,
365 GLuint srcRegIndex
, GLuint channel
)
367 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
369 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
371 if (c
->fp
->use_const_buffer
&&
372 (src
->File
== PROGRAM_STATE_VAR
||
373 src
->File
== PROGRAM_CONSTANT
||
374 src
->File
== PROGRAM_UNIFORM
)) {
375 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
378 /* other type of source register */
379 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
380 src
->Negate
, src
->Abs
);
386 * Same as \sa get_src_reg() but if the register is a literal, emit
387 * a brw_reg encoding the literal.
388 * Note that a brw instruction only allows one src operand to be a literal.
389 * For instructions with more than one operand, only the second can be a
390 * literal. This means that we treat some literals as constants/uniforms
391 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
394 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
395 const struct prog_instruction
*inst
,
396 GLuint srcRegIndex
, GLuint channel
)
398 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
399 if (src
->File
== PROGRAM_CONSTANT
) {
401 const int component
= GET_SWZ(src
->Swizzle
, channel
);
402 const GLfloat
*param
=
403 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
404 GLfloat value
= param
[component
];
405 if (src
->Negate
& (1 << channel
))
408 value
= FABSF(value
);
410 printf(" form immed value %f for chan %d\n", value
, channel
);
412 return brw_imm_f(value
);
415 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
421 * Subroutines are minimal support for resusable instruction sequences.
422 * They are implemented as simply as possible to minimise overhead: there
423 * is no explicit support for communication between the caller and callee
424 * other than saving the return address in a temporary register, nor is
425 * there any automatic local storage. This implies that great care is
426 * required before attempting reentrancy or any kind of nested
427 * subroutine invocations.
429 static void invoke_subroutine( struct brw_wm_compile
*c
,
430 enum _subroutine subroutine
,
431 void (*emit
)( struct brw_wm_compile
* ) )
433 struct brw_compile
*p
= &c
->func
;
435 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
437 if( c
->subroutines
[ subroutine
] ) {
438 /* subroutine previously emitted: reuse existing instructions */
440 int mark
= mark_tmps( c
);
441 struct brw_reg return_address
= retype( alloc_tmp( c
),
442 BRW_REGISTER_TYPE_UD
);
443 int here
= p
->nr_insn
;
445 brw_push_insn_state(p
);
446 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
447 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
449 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
450 brw_imm_d( ( c
->subroutines
[ subroutine
] -
452 brw_pop_insn_state(p
);
454 release_tmps( c
, mark
);
456 /* previously unused subroutine: emit, and mark for later reuse */
458 int mark
= mark_tmps( c
);
459 struct brw_reg return_address
= retype( alloc_tmp( c
),
460 BRW_REGISTER_TYPE_UD
);
461 struct brw_instruction
*calc
;
462 int base
= p
->nr_insn
;
464 brw_push_insn_state(p
);
465 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
466 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
467 brw_pop_insn_state(p
);
469 c
->subroutines
[ subroutine
] = p
->nr_insn
;
473 brw_push_insn_state(p
);
474 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
475 brw_MOV( p
, brw_ip_reg(), return_address
);
476 brw_pop_insn_state(p
);
478 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
480 release_tmps( c
, mark
);
484 static void emit_abs( struct brw_wm_compile
*c
,
485 const struct prog_instruction
*inst
)
488 struct brw_compile
*p
= &c
->func
;
489 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
490 for (i
= 0; i
< 4; i
++) {
491 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
492 struct brw_reg src
, dst
;
493 dst
= get_dst_reg(c
, inst
, i
);
494 src
= get_src_reg(c
, inst
, 0, i
);
495 brw_MOV(p
, dst
, brw_abs(src
));
498 brw_set_saturate(p
, 0);
501 static void emit_trunc( struct brw_wm_compile
*c
,
502 const struct prog_instruction
*inst
)
505 struct brw_compile
*p
= &c
->func
;
506 GLuint mask
= inst
->DstReg
.WriteMask
;
507 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
508 for (i
= 0; i
< 4; i
++) {
510 struct brw_reg src
, dst
;
511 dst
= get_dst_reg(c
, inst
, i
);
512 src
= get_src_reg(c
, inst
, 0, i
);
513 brw_RNDZ(p
, dst
, src
);
516 brw_set_saturate(p
, 0);
519 static void emit_mov( struct brw_wm_compile
*c
,
520 const struct prog_instruction
*inst
)
523 struct brw_compile
*p
= &c
->func
;
524 GLuint mask
= inst
->DstReg
.WriteMask
;
525 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
526 for (i
= 0; i
< 4; i
++) {
528 struct brw_reg src
, dst
;
529 dst
= get_dst_reg(c
, inst
, i
);
530 /* XXX some moves from immediate value don't work reliably!!! */
531 /*src = get_src_reg_imm(c, inst, 0, i);*/
532 src
= get_src_reg(c
, inst
, 0, i
);
533 brw_MOV(p
, dst
, src
);
536 brw_set_saturate(p
, 0);
539 static void emit_pixel_xy(struct brw_wm_compile
*c
,
540 const struct prog_instruction
*inst
)
542 struct brw_reg r1
= brw_vec1_grf(1, 0);
543 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
545 struct brw_reg dst0
, dst1
;
546 struct brw_compile
*p
= &c
->func
;
547 GLuint mask
= inst
->DstReg
.WriteMask
;
549 dst0
= get_dst_reg(c
, inst
, 0);
550 dst1
= get_dst_reg(c
, inst
, 1);
551 /* Calculate pixel centers by adding 1 or 0 to each of the
552 * micro-tile coordinates passed in r1.
554 if (mask
& WRITEMASK_X
) {
556 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
557 stride(suboffset(r1_uw
, 4), 2, 4, 0),
558 brw_imm_v(0x10101010));
561 if (mask
& WRITEMASK_Y
) {
563 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
564 stride(suboffset(r1_uw
, 5), 2, 4, 0),
565 brw_imm_v(0x11001100));
569 static void emit_delta_xy(struct brw_wm_compile
*c
,
570 const struct prog_instruction
*inst
)
572 struct brw_reg r1
= brw_vec1_grf(1, 0);
573 struct brw_reg dst0
, dst1
, src0
, src1
;
574 struct brw_compile
*p
= &c
->func
;
575 GLuint mask
= inst
->DstReg
.WriteMask
;
577 dst0
= get_dst_reg(c
, inst
, 0);
578 dst1
= get_dst_reg(c
, inst
, 1);
579 src0
= get_src_reg(c
, inst
, 0, 0);
580 src1
= get_src_reg(c
, inst
, 0, 1);
581 /* Calc delta X,Y by subtracting origin in r1 from the pixel
584 if (mask
& WRITEMASK_X
) {
587 retype(src0
, BRW_REGISTER_TYPE_UW
),
591 if (mask
& WRITEMASK_Y
) {
594 retype(src1
, BRW_REGISTER_TYPE_UW
),
595 negate(suboffset(r1
,1)));
600 static void fire_fb_write( struct brw_wm_compile
*c
,
606 struct brw_compile
*p
= &c
->func
;
607 /* Pass through control information:
609 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
611 brw_push_insn_state(p
);
612 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
614 brw_message_reg(base_reg
+ 1),
616 brw_pop_insn_state(p
);
618 /* Send framebuffer write message: */
620 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
622 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
629 static void emit_fb_write(struct brw_wm_compile
*c
,
630 const struct prog_instruction
*inst
)
632 struct brw_compile
*p
= &c
->func
;
638 /* Reserve a space for AA - may not be needed:
640 if (c
->key
.aa_dest_stencil_reg
)
643 brw_push_insn_state(p
);
644 for (channel
= 0; channel
< 4; channel
++) {
645 src0
= get_src_reg(c
, inst
, 0, channel
);
646 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
647 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
648 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
650 /* skip over the regs populated above: */
652 brw_pop_insn_state(p
);
654 if (c
->key
.source_depth_to_render_target
) {
655 if (c
->key
.computes_depth
) {
656 src0
= get_src_reg(c
, inst
, 2, 2);
657 brw_MOV(p
, brw_message_reg(nr
), src0
);
660 src0
= get_src_reg(c
, inst
, 1, 1);
661 brw_MOV(p
, brw_message_reg(nr
), src0
);
667 if (c
->key
.dest_depth_reg
) {
668 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
669 GLuint off
= c
->key
.dest_depth_reg
% 2;
674 /* XXX do we need this code? comp always 1, off always 0, it seems */
676 brw_push_insn_state(p
);
677 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
679 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
681 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
682 brw_pop_insn_state(p
);
687 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
688 brw_MOV(p
, brw_message_reg(nr
), src
);
693 target
= inst
->Aux
>> 1;
695 fire_fb_write(c
, 0, nr
, target
, eot
);
698 static void emit_pixel_w( struct brw_wm_compile
*c
,
699 const struct prog_instruction
*inst
)
701 struct brw_compile
*p
= &c
->func
;
702 GLuint mask
= inst
->DstReg
.WriteMask
;
703 if (mask
& WRITEMASK_W
) {
704 struct brw_reg dst
, src0
, delta0
, delta1
;
705 struct brw_reg interp3
;
707 dst
= get_dst_reg(c
, inst
, 3);
708 src0
= get_src_reg(c
, inst
, 0, 0);
709 delta0
= get_src_reg(c
, inst
, 1, 0);
710 delta1
= get_src_reg(c
, inst
, 1, 1);
712 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
713 /* Calc 1/w - just linterp wpos[3] optimized by putting the
714 * result straight into a message reg.
716 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
717 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
721 BRW_MATH_FUNCTION_INV
,
722 BRW_MATH_SATURATE_NONE
,
724 BRW_MATH_PRECISION_FULL
);
728 static void emit_linterp(struct brw_wm_compile
*c
,
729 const struct prog_instruction
*inst
)
731 struct brw_compile
*p
= &c
->func
;
732 GLuint mask
= inst
->DstReg
.WriteMask
;
733 struct brw_reg interp
[4];
734 struct brw_reg dst
, delta0
, delta1
;
738 src0
= get_src_reg(c
, inst
, 0, 0);
739 delta0
= get_src_reg(c
, inst
, 1, 0);
740 delta1
= get_src_reg(c
, inst
, 1, 1);
743 interp
[0] = brw_vec1_grf(nr
, 0);
744 interp
[1] = brw_vec1_grf(nr
, 4);
745 interp
[2] = brw_vec1_grf(nr
+1, 0);
746 interp
[3] = brw_vec1_grf(nr
+1, 4);
748 for(i
= 0; i
< 4; i
++ ) {
750 dst
= get_dst_reg(c
, inst
, i
);
751 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
752 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
757 static void emit_cinterp(struct brw_wm_compile
*c
,
758 const struct prog_instruction
*inst
)
760 struct brw_compile
*p
= &c
->func
;
761 GLuint mask
= inst
->DstReg
.WriteMask
;
763 struct brw_reg interp
[4];
764 struct brw_reg dst
, src0
;
767 src0
= get_src_reg(c
, inst
, 0, 0);
770 interp
[0] = brw_vec1_grf(nr
, 0);
771 interp
[1] = brw_vec1_grf(nr
, 4);
772 interp
[2] = brw_vec1_grf(nr
+1, 0);
773 interp
[3] = brw_vec1_grf(nr
+1, 4);
775 for(i
= 0; i
< 4; i
++ ) {
777 dst
= get_dst_reg(c
, inst
, i
);
778 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
783 static void emit_pinterp(struct brw_wm_compile
*c
,
784 const struct prog_instruction
*inst
)
786 struct brw_compile
*p
= &c
->func
;
787 GLuint mask
= inst
->DstReg
.WriteMask
;
789 struct brw_reg interp
[4];
790 struct brw_reg dst
, delta0
, delta1
;
791 struct brw_reg src0
, w
;
794 src0
= get_src_reg(c
, inst
, 0, 0);
795 delta0
= get_src_reg(c
, inst
, 1, 0);
796 delta1
= get_src_reg(c
, inst
, 1, 1);
797 w
= get_src_reg(c
, inst
, 2, 3);
800 interp
[0] = brw_vec1_grf(nr
, 0);
801 interp
[1] = brw_vec1_grf(nr
, 4);
802 interp
[2] = brw_vec1_grf(nr
+1, 0);
803 interp
[3] = brw_vec1_grf(nr
+1, 4);
805 for(i
= 0; i
< 4; i
++ ) {
807 dst
= get_dst_reg(c
, inst
, i
);
808 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
809 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
811 brw_MUL(p
, dst
, dst
, w
);
816 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
817 static void emit_frontfacing(struct brw_wm_compile
*c
,
818 const struct prog_instruction
*inst
)
820 struct brw_compile
*p
= &c
->func
;
821 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
823 GLuint mask
= inst
->DstReg
.WriteMask
;
826 for (i
= 0; i
< 4; i
++) {
828 dst
= get_dst_reg(c
, inst
, i
);
829 brw_MOV(p
, dst
, brw_imm_f(0.0));
833 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
836 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
837 for (i
= 0; i
< 4; i
++) {
839 dst
= get_dst_reg(c
, inst
, i
);
840 brw_MOV(p
, dst
, brw_imm_f(1.0));
843 brw_set_predicate_control_flag_value(p
, 0xff);
846 static void emit_xpd(struct brw_wm_compile
*c
,
847 const struct prog_instruction
*inst
)
850 struct brw_compile
*p
= &c
->func
;
851 GLuint mask
= inst
->DstReg
.WriteMask
;
852 for (i
= 0; i
< 4; i
++) {
856 struct brw_reg src0
, src1
, dst
;
857 dst
= get_dst_reg(c
, inst
, i
);
858 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
859 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
860 brw_MUL(p
, brw_null_reg(), src0
, src1
);
861 src0
= get_src_reg(c
, inst
, 0, i1
);
862 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
863 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
864 brw_MAC(p
, dst
, src0
, src1
);
865 brw_set_saturate(p
, 0);
868 brw_set_saturate(p
, 0);
871 static void emit_dp3(struct brw_wm_compile
*c
,
872 const struct prog_instruction
*inst
)
874 struct brw_reg src0
[3], src1
[3], dst
;
876 struct brw_compile
*p
= &c
->func
;
877 for (i
= 0; i
< 3; i
++) {
878 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
879 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
882 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
883 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
884 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
885 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
886 brw_MAC(p
, dst
, src0
[2], src1
[2]);
887 brw_set_saturate(p
, 0);
890 static void emit_dp4(struct brw_wm_compile
*c
,
891 const struct prog_instruction
*inst
)
893 struct brw_reg src0
[4], src1
[4], dst
;
895 struct brw_compile
*p
= &c
->func
;
896 for (i
= 0; i
< 4; i
++) {
897 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
898 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
900 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
901 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
902 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
903 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
904 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
905 brw_MAC(p
, dst
, src0
[3], src1
[3]);
906 brw_set_saturate(p
, 0);
909 static void emit_dph(struct brw_wm_compile
*c
,
910 const struct prog_instruction
*inst
)
912 struct brw_reg src0
[4], src1
[4], dst
;
914 struct brw_compile
*p
= &c
->func
;
915 for (i
= 0; i
< 4; i
++) {
916 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
917 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
919 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
920 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
921 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
922 brw_MAC(p
, dst
, src0
[2], src1
[2]);
923 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
924 brw_ADD(p
, dst
, dst
, src1
[3]);
925 brw_set_saturate(p
, 0);
929 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
930 * Note that the result of the function is smeared across the dest
931 * register's X, Y, Z and W channels (subject to writemasking of course).
933 static void emit_math1(struct brw_wm_compile
*c
,
934 const struct prog_instruction
*inst
, GLuint func
)
936 struct brw_compile
*p
= &c
->func
;
937 struct brw_reg src0
, dst
, tmp
;
938 const int mark
= mark_tmps( c
);
943 /* Get first component of source register */
944 src0
= get_src_reg(c
, inst
, 0, 0);
946 /* tmp = func(src0) */
947 brw_MOV(p
, brw_message_reg(2), src0
);
951 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
954 BRW_MATH_DATA_VECTOR
,
955 BRW_MATH_PRECISION_FULL
);
957 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
959 /* replicate tmp value across enabled dest channels */
960 for (i
= 0; i
< 4; i
++) {
961 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
962 dst
= get_dst_reg(c
, inst
, i
);
963 brw_MOV(p
, dst
, tmp
);
967 release_tmps(c
, mark
);
970 static void emit_rcp(struct brw_wm_compile
*c
,
971 const struct prog_instruction
*inst
)
973 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
976 static void emit_rsq(struct brw_wm_compile
*c
,
977 const struct prog_instruction
*inst
)
979 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
982 static void emit_sin(struct brw_wm_compile
*c
,
983 const struct prog_instruction
*inst
)
985 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
988 static void emit_cos(struct brw_wm_compile
*c
,
989 const struct prog_instruction
*inst
)
991 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
994 static void emit_ex2(struct brw_wm_compile
*c
,
995 const struct prog_instruction
*inst
)
997 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1000 static void emit_lg2(struct brw_wm_compile
*c
,
1001 const struct prog_instruction
*inst
)
1003 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1006 static void emit_add(struct brw_wm_compile
*c
,
1007 const struct prog_instruction
*inst
)
1009 struct brw_compile
*p
= &c
->func
;
1010 struct brw_reg src0
, src1
, dst
;
1011 GLuint mask
= inst
->DstReg
.WriteMask
;
1013 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1014 for (i
= 0 ; i
< 4; i
++) {
1015 if (mask
& (1<<i
)) {
1016 dst
= get_dst_reg(c
, inst
, i
);
1017 src0
= get_src_reg(c
, inst
, 0, i
);
1018 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1019 brw_ADD(p
, dst
, src0
, src1
);
1022 brw_set_saturate(p
, 0);
1025 static void emit_arl(struct brw_wm_compile
*c
,
1026 const struct prog_instruction
*inst
)
1028 struct brw_compile
*p
= &c
->func
;
1029 struct brw_reg src0
, addr_reg
;
1030 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1031 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1032 BRW_ARF_ADDRESS
, 0);
1033 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1034 brw_MOV(p
, addr_reg
, src0
);
1035 brw_set_saturate(p
, 0);
1038 static void emit_sub(struct brw_wm_compile
*c
,
1039 const struct prog_instruction
*inst
)
1041 struct brw_compile
*p
= &c
->func
;
1042 struct brw_reg src0
, src1
, dst
;
1043 GLuint mask
= inst
->DstReg
.WriteMask
;
1045 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1046 for (i
= 0 ; i
< 4; i
++) {
1047 if (mask
& (1<<i
)) {
1048 dst
= get_dst_reg(c
, inst
, i
);
1049 src0
= get_src_reg(c
, inst
, 0, i
);
1050 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1051 brw_ADD(p
, dst
, src0
, negate(src1
));
1054 brw_set_saturate(p
, 0);
1057 static void emit_mul(struct brw_wm_compile
*c
,
1058 const struct prog_instruction
*inst
)
1060 struct brw_compile
*p
= &c
->func
;
1061 struct brw_reg src0
, src1
, dst
;
1062 GLuint mask
= inst
->DstReg
.WriteMask
;
1064 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1065 for (i
= 0 ; i
< 4; i
++) {
1066 if (mask
& (1<<i
)) {
1067 dst
= get_dst_reg(c
, inst
, i
);
1068 src0
= get_src_reg(c
, inst
, 0, i
);
1069 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1070 brw_MUL(p
, dst
, src0
, src1
);
1073 brw_set_saturate(p
, 0);
1076 static void emit_frc(struct brw_wm_compile
*c
,
1077 const struct prog_instruction
*inst
)
1079 struct brw_compile
*p
= &c
->func
;
1080 struct brw_reg src0
, dst
;
1081 GLuint mask
= inst
->DstReg
.WriteMask
;
1083 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1084 for (i
= 0 ; i
< 4; i
++) {
1085 if (mask
& (1<<i
)) {
1086 dst
= get_dst_reg(c
, inst
, i
);
1087 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1088 brw_FRC(p
, dst
, src0
);
1091 if (inst
->SaturateMode
!= SATURATE_OFF
)
1092 brw_set_saturate(p
, 0);
1095 static void emit_flr(struct brw_wm_compile
*c
,
1096 const struct prog_instruction
*inst
)
1098 struct brw_compile
*p
= &c
->func
;
1099 struct brw_reg src0
, dst
;
1100 GLuint mask
= inst
->DstReg
.WriteMask
;
1102 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1103 for (i
= 0 ; i
< 4; i
++) {
1104 if (mask
& (1<<i
)) {
1105 dst
= get_dst_reg(c
, inst
, i
);
1106 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1107 brw_RNDD(p
, dst
, src0
);
1110 brw_set_saturate(p
, 0);
1114 static void emit_min_max(struct brw_wm_compile
*c
,
1115 const struct prog_instruction
*inst
)
1117 struct brw_compile
*p
= &c
->func
;
1118 const GLuint mask
= inst
->DstReg
.WriteMask
;
1119 const int mark
= mark_tmps(c
);
1121 brw_push_insn_state(p
);
1122 for (i
= 0; i
< 4; i
++) {
1123 if (mask
& (1<<i
)) {
1124 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1125 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1126 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1128 /* if dst==src0 or dst==src1 we need to use a temp reg */
1129 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1130 brw_same_reg(dst
, src1
);
1137 printf(" Min/max: dst %d src0 %d src1 %d\n",
1138 dst.nr, src0.nr, src1.nr);
1140 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1141 brw_MOV(p
, dst
, src0
);
1142 brw_set_saturate(p
, 0);
1144 if (inst
->Opcode
== OPCODE_MIN
)
1145 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1147 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1149 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1150 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1151 brw_MOV(p
, dst
, src1
);
1152 brw_set_saturate(p
, 0);
1153 brw_set_predicate_control_flag_value(p
, 0xff);
1155 brw_MOV(p
, real_dst
, dst
);
1158 brw_pop_insn_state(p
);
1159 release_tmps(c
, mark
);
1162 static void emit_pow(struct brw_wm_compile
*c
,
1163 const struct prog_instruction
*inst
)
1165 struct brw_compile
*p
= &c
->func
;
1166 struct brw_reg dst
, src0
, src1
;
1167 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1168 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1169 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1171 brw_MOV(p
, brw_message_reg(2), src0
);
1172 brw_MOV(p
, brw_message_reg(3), src1
);
1176 BRW_MATH_FUNCTION_POW
,
1177 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1180 BRW_MATH_DATA_VECTOR
,
1181 BRW_MATH_PRECISION_FULL
);
1184 static void emit_lrp(struct brw_wm_compile
*c
,
1185 const struct prog_instruction
*inst
)
1187 struct brw_compile
*p
= &c
->func
;
1188 GLuint mask
= inst
->DstReg
.WriteMask
;
1189 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1191 int mark
= mark_tmps(c
);
1192 for (i
= 0; i
< 4; i
++) {
1193 if (mask
& (1<<i
)) {
1194 dst
= get_dst_reg(c
, inst
, i
);
1195 src0
= get_src_reg(c
, inst
, 0, i
);
1197 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1199 if (src1
.nr
== dst
.nr
) {
1200 tmp1
= alloc_tmp(c
);
1201 brw_MOV(p
, tmp1
, src1
);
1205 src2
= get_src_reg(c
, inst
, 2, i
);
1206 if (src2
.nr
== dst
.nr
) {
1207 tmp2
= alloc_tmp(c
);
1208 brw_MOV(p
, tmp2
, src2
);
1212 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1213 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1214 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1215 brw_MAC(p
, dst
, src0
, tmp1
);
1216 brw_set_saturate(p
, 0);
1218 release_tmps(c
, mark
);
1223 * For GLSL shaders, this KIL will be unconditional.
1224 * It may be contained inside an IF/ENDIF structure of course.
1226 static void emit_kil(struct brw_wm_compile
*c
)
1228 struct brw_compile
*p
= &c
->func
;
1229 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1230 brw_push_insn_state(p
);
1231 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1232 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1233 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1234 brw_pop_insn_state(p
);
1237 static void emit_mad(struct brw_wm_compile
*c
,
1238 const struct prog_instruction
*inst
)
1240 struct brw_compile
*p
= &c
->func
;
1241 GLuint mask
= inst
->DstReg
.WriteMask
;
1242 struct brw_reg dst
, src0
, src1
, src2
;
1245 for (i
= 0; i
< 4; i
++) {
1246 if (mask
& (1<<i
)) {
1247 dst
= get_dst_reg(c
, inst
, i
);
1248 src0
= get_src_reg(c
, inst
, 0, i
);
1249 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1250 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1251 brw_MUL(p
, dst
, src0
, src1
);
1253 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1254 brw_ADD(p
, dst
, dst
, src2
);
1255 brw_set_saturate(p
, 0);
1260 static void emit_sop(struct brw_wm_compile
*c
,
1261 const struct prog_instruction
*inst
, GLuint cond
)
1263 struct brw_compile
*p
= &c
->func
;
1264 GLuint mask
= inst
->DstReg
.WriteMask
;
1265 struct brw_reg dst
, src0
, src1
;
1268 for (i
= 0; i
< 4; i
++) {
1269 if (mask
& (1<<i
)) {
1270 dst
= get_dst_reg(c
, inst
, i
);
1271 src0
= get_src_reg(c
, inst
, 0, i
);
1272 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1273 brw_push_insn_state(p
);
1274 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1275 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1276 brw_MOV(p
, dst
, brw_imm_f(0.0));
1277 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1278 brw_MOV(p
, dst
, brw_imm_f(1.0));
1279 brw_pop_insn_state(p
);
1284 static void emit_slt(struct brw_wm_compile
*c
,
1285 const struct prog_instruction
*inst
)
1287 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1290 static void emit_sle(struct brw_wm_compile
*c
,
1291 const struct prog_instruction
*inst
)
1293 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1296 static void emit_sgt(struct brw_wm_compile
*c
,
1297 const struct prog_instruction
*inst
)
1299 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1302 static void emit_sge(struct brw_wm_compile
*c
,
1303 const struct prog_instruction
*inst
)
1305 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1308 static void emit_seq(struct brw_wm_compile
*c
,
1309 const struct prog_instruction
*inst
)
1311 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1314 static void emit_sne(struct brw_wm_compile
*c
,
1315 const struct prog_instruction
*inst
)
1317 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1320 static void emit_ddx(struct brw_wm_compile
*c
,
1321 const struct prog_instruction
*inst
)
1323 struct brw_compile
*p
= &c
->func
;
1324 GLuint mask
= inst
->DstReg
.WriteMask
;
1325 struct brw_reg interp
[4];
1327 struct brw_reg src0
, w
;
1329 src0
= get_src_reg(c
, inst
, 0, 0);
1330 w
= get_src_reg(c
, inst
, 1, 3);
1332 interp
[0] = brw_vec1_grf(nr
, 0);
1333 interp
[1] = brw_vec1_grf(nr
, 4);
1334 interp
[2] = brw_vec1_grf(nr
+1, 0);
1335 interp
[3] = brw_vec1_grf(nr
+1, 4);
1336 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1337 for(i
= 0; i
< 4; i
++ ) {
1338 if (mask
& (1<<i
)) {
1339 dst
= get_dst_reg(c
, inst
, i
);
1340 brw_MOV(p
, dst
, interp
[i
]);
1341 brw_MUL(p
, dst
, dst
, w
);
1344 brw_set_saturate(p
, 0);
1347 static void emit_ddy(struct brw_wm_compile
*c
,
1348 const struct prog_instruction
*inst
)
1350 struct brw_compile
*p
= &c
->func
;
1351 GLuint mask
= inst
->DstReg
.WriteMask
;
1352 struct brw_reg interp
[4];
1354 struct brw_reg src0
, w
;
1357 src0
= get_src_reg(c
, inst
, 0, 0);
1359 w
= get_src_reg(c
, inst
, 1, 3);
1360 interp
[0] = brw_vec1_grf(nr
, 0);
1361 interp
[1] = brw_vec1_grf(nr
, 4);
1362 interp
[2] = brw_vec1_grf(nr
+1, 0);
1363 interp
[3] = brw_vec1_grf(nr
+1, 4);
1364 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1365 for(i
= 0; i
< 4; i
++ ) {
1366 if (mask
& (1<<i
)) {
1367 dst
= get_dst_reg(c
, inst
, i
);
1368 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1369 brw_MUL(p
, dst
, dst
, w
);
1372 brw_set_saturate(p
, 0);
1375 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1377 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1381 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1383 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1386 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1388 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1391 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1393 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1397 /* One-, two- and three-dimensional Perlin noise, similar to the description
1398 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1399 static void noise1_sub( struct brw_wm_compile
*c
) {
1401 struct brw_compile
*p
= &c
->func
;
1402 struct brw_reg param
,
1403 x0
, x1
, /* gradients at each end */
1404 t
, tmp
[ 2 ], /* float temporaries */
1405 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1407 int mark
= mark_tmps( c
);
1409 x0
= alloc_tmp( c
);
1410 x1
= alloc_tmp( c
);
1412 tmp
[ 0 ] = alloc_tmp( c
);
1413 tmp
[ 1 ] = alloc_tmp( c
);
1414 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1415 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1416 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1417 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1418 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1420 param
= lookup_tmp( c
, mark
- 2 );
1422 brw_set_access_mode( p
, BRW_ALIGN_1
);
1424 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1426 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1427 be hashed. Also compute the remainder (offset within the unit
1428 length), interleaved to reduce register dependency penalties. */
1429 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1430 brw_FRC( p
, param
, param
);
1431 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1432 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1433 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1435 /* We're now ready to perform the hashing. The two hashes are
1436 interleaved for performance. The hash function used is
1437 designed to rapidly achieve avalanche and require only 32x16
1438 bit multiplication, and 16-bit swizzles (which we get for
1439 free). We can't use immediate operands in the multiplies,
1440 because immediates are permitted only in src1 and the 16-bit
1441 factor is permitted only in src0. */
1442 for( i
= 0; i
< 2; i
++ )
1443 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1444 for( i
= 0; i
< 2; i
++ )
1445 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1446 high_words( itmp
[ i
] ) );
1447 for( i
= 0; i
< 2; i
++ )
1448 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1449 for( i
= 0; i
< 2; i
++ )
1450 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1451 high_words( itmp
[ i
] ) );
1452 for( i
= 0; i
< 2; i
++ )
1453 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1454 for( i
= 0; i
< 2; i
++ )
1455 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1456 high_words( itmp
[ i
] ) );
1458 /* Now we want to initialise the two gradients based on the
1459 hashes. Format conversion from signed integer to float leaves
1460 everything scaled too high by a factor of pow( 2, 31 ), but
1461 we correct for that right at the end. */
1462 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1463 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1464 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1466 brw_MUL( p
, x0
, x0
, param
);
1467 brw_MUL( p
, x1
, x1
, t
);
1469 /* We interpolate between the gradients using the polynomial
1470 6t^5 - 15t^4 + 10t^3 (Perlin). */
1471 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1472 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1473 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1474 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1475 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1476 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1478 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1479 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1480 brw_MUL( p
, x1
, x1
, param
);
1481 brw_ADD( p
, x0
, x0
, x1
);
1482 /* scale by pow( 2, -30 ), to compensate for the format conversion
1483 above and an extra factor of 2 so that a single gradient covers
1485 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1487 release_tmps( c
, mark
);
1490 static void emit_noise1( struct brw_wm_compile
*c
,
1491 const struct prog_instruction
*inst
)
1493 struct brw_compile
*p
= &c
->func
;
1494 struct brw_reg src
, param
, dst
;
1495 GLuint mask
= inst
->DstReg
.WriteMask
;
1497 int mark
= mark_tmps( c
);
1499 assert( mark
== 0 );
1501 src
= get_src_reg( c
, inst
, 0, 0 );
1503 param
= alloc_tmp( c
);
1505 brw_MOV( p
, param
, src
);
1507 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1509 /* Fill in the result: */
1510 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1511 for (i
= 0 ; i
< 4; i
++) {
1512 if (mask
& (1<<i
)) {
1513 dst
= get_dst_reg(c
, inst
, i
);
1514 brw_MOV( p
, dst
, param
);
1517 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1518 brw_set_saturate( p
, 0 );
1520 release_tmps( c
, mark
);
1523 static void noise2_sub( struct brw_wm_compile
*c
) {
1525 struct brw_compile
*p
= &c
->func
;
1526 struct brw_reg param0
, param1
,
1527 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1528 t
, tmp
[ 4 ], /* float temporaries */
1529 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1531 int mark
= mark_tmps( c
);
1533 x0y0
= alloc_tmp( c
);
1534 x0y1
= alloc_tmp( c
);
1535 x1y0
= alloc_tmp( c
);
1536 x1y1
= alloc_tmp( c
);
1538 for( i
= 0; i
< 4; i
++ ) {
1539 tmp
[ i
] = alloc_tmp( c
);
1540 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1542 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1543 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1544 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1546 param0
= lookup_tmp( c
, mark
- 3 );
1547 param1
= lookup_tmp( c
, mark
- 2 );
1549 brw_set_access_mode( p
, BRW_ALIGN_1
);
1551 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1552 be hashed. Also compute the remainders (offsets within the unit
1553 square), interleaved to reduce register dependency penalties. */
1554 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1555 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1556 brw_FRC( p
, param0
, param0
);
1557 brw_FRC( p
, param1
, param1
);
1558 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1559 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1560 low_words( itmp
[ 1 ] ) );
1561 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1562 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1563 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1564 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1565 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1567 /* We're now ready to perform the hashing. The four hashes are
1568 interleaved for performance. The hash function used is
1569 designed to rapidly achieve avalanche and require only 32x16
1570 bit multiplication, and 16-bit swizzles (which we get for
1571 free). We can't use immediate operands in the multiplies,
1572 because immediates are permitted only in src1 and the 16-bit
1573 factor is permitted only in src0. */
1574 for( i
= 0; i
< 4; i
++ )
1575 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1576 for( i
= 0; i
< 4; i
++ )
1577 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1578 high_words( itmp
[ i
] ) );
1579 for( i
= 0; i
< 4; i
++ )
1580 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1581 for( i
= 0; i
< 4; i
++ )
1582 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1583 high_words( itmp
[ i
] ) );
1584 for( i
= 0; i
< 4; i
++ )
1585 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1586 for( i
= 0; i
< 4; i
++ )
1587 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1588 high_words( itmp
[ i
] ) );
1590 /* Now we want to initialise the four gradients based on the
1591 hashes. Format conversion from signed integer to float leaves
1592 everything scaled too high by a factor of pow( 2, 15 ), but
1593 we correct for that right at the end. */
1594 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1595 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1596 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1597 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1598 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1600 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1601 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1602 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1603 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1605 brw_MUL( p
, x1y0
, x1y0
, t
);
1606 brw_MUL( p
, x1y1
, x1y1
, t
);
1607 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1608 brw_MUL( p
, x0y0
, x0y0
, param0
);
1609 brw_MUL( p
, x0y1
, x0y1
, param0
);
1611 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1612 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1613 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1614 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1616 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1617 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1618 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1619 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1621 /* We interpolate between the gradients using the polynomial
1622 6t^5 - 15t^4 + 10t^3 (Perlin). */
1623 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1624 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1625 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1626 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1627 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1628 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1629 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1631 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1632 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1633 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1634 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1635 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1637 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1638 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1639 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1640 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1642 /* Here we interpolate in the y dimension... */
1643 brw_MUL( p
, x0y1
, x0y1
, param1
);
1644 brw_MUL( p
, x1y1
, x1y1
, param1
);
1645 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1646 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1648 /* And now in x. There are horrible register dependencies here,
1649 but we have nothing else to do. */
1650 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1651 brw_MUL( p
, x1y0
, x1y0
, param0
);
1652 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1654 /* scale by pow( 2, -15 ), as described above */
1655 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1657 release_tmps( c
, mark
);
1660 static void emit_noise2( struct brw_wm_compile
*c
,
1661 const struct prog_instruction
*inst
)
1663 struct brw_compile
*p
= &c
->func
;
1664 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1665 GLuint mask
= inst
->DstReg
.WriteMask
;
1667 int mark
= mark_tmps( c
);
1669 assert( mark
== 0 );
1671 src0
= get_src_reg( c
, inst
, 0, 0 );
1672 src1
= get_src_reg( c
, inst
, 0, 1 );
1674 param0
= alloc_tmp( c
);
1675 param1
= alloc_tmp( c
);
1677 brw_MOV( p
, param0
, src0
);
1678 brw_MOV( p
, param1
, src1
);
1680 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1682 /* Fill in the result: */
1683 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1684 for (i
= 0 ; i
< 4; i
++) {
1685 if (mask
& (1<<i
)) {
1686 dst
= get_dst_reg(c
, inst
, i
);
1687 brw_MOV( p
, dst
, param0
);
1690 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1691 brw_set_saturate( p
, 0 );
1693 release_tmps( c
, mark
);
1697 * The three-dimensional case is much like the one- and two- versions above,
1698 * but since the number of corners is rapidly growing we now pack 16 16-bit
1699 * hashes into each register to extract more parallelism from the EUs.
1701 static void noise3_sub( struct brw_wm_compile
*c
) {
1703 struct brw_compile
*p
= &c
->func
;
1704 struct brw_reg param0
, param1
, param2
,
1705 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1706 xi
, yi
, zi
, /* interpolation coefficients */
1707 t
, tmp
[ 8 ], /* float temporaries */
1708 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1709 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1711 int mark
= mark_tmps( c
);
1713 x0y0
= alloc_tmp( c
);
1714 x0y1
= alloc_tmp( c
);
1715 x1y0
= alloc_tmp( c
);
1716 x1y1
= alloc_tmp( c
);
1717 xi
= alloc_tmp( c
);
1718 yi
= alloc_tmp( c
);
1719 zi
= alloc_tmp( c
);
1721 for( i
= 0; i
< 8; i
++ ) {
1722 tmp
[ i
] = alloc_tmp( c
);
1723 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1724 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1727 param0
= lookup_tmp( c
, mark
- 4 );
1728 param1
= lookup_tmp( c
, mark
- 3 );
1729 param2
= lookup_tmp( c
, mark
- 2 );
1731 brw_set_access_mode( p
, BRW_ALIGN_1
);
1733 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1734 be hashed. Also compute the remainders (offsets within the unit
1735 cube), interleaved to reduce register dependency penalties. */
1736 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1737 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1738 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1739 brw_FRC( p
, param0
, param0
);
1740 brw_FRC( p
, param1
, param1
);
1741 brw_FRC( p
, param2
, param2
);
1742 /* Since we now have only 16 bits of precision in the hash, we must
1743 be more careful about thorough mixing to maintain entropy as we
1744 squash the input vector into a small scalar. */
1745 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1746 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1747 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1748 brw_imm_uw( 0x9B93 ) );
1749 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1750 brw_imm_uw( 0xBC8F ) );
1752 /* Temporarily disable the execution mask while we work with ExecSize=16
1753 channels (the mask is set for ExecSize=8 and is probably incorrect).
1754 Although this might cause execution of unwanted channels, the code
1755 writes only to temporary registers and has no side effects, so
1756 disabling the mask is harmless. */
1757 brw_push_insn_state( p
);
1758 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1759 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1760 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1761 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1763 /* We're now ready to perform the hashing. The eight hashes are
1764 interleaved for performance. The hash function used is
1765 designed to rapidly achieve avalanche and require only 16x16
1766 bit multiplication, and 8-bit swizzles (which we get for
1768 for( i
= 0; i
< 4; i
++ )
1769 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1770 for( i
= 0; i
< 4; i
++ )
1771 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1772 odd_bytes( wtmp
[ i
] ) );
1773 for( i
= 0; i
< 4; i
++ )
1774 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1775 for( i
= 0; i
< 4; i
++ )
1776 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1777 odd_bytes( wtmp
[ i
] ) );
1778 brw_pop_insn_state( p
);
1780 /* Now we want to initialise the four rear gradients based on the
1781 hashes. Format conversion from signed integer to float leaves
1782 everything scaled too high by a factor of pow( 2, 15 ), but
1783 we correct for that right at the end. */
1785 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1786 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1787 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1788 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1789 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1791 brw_push_insn_state( p
);
1792 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1793 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1794 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1795 brw_pop_insn_state( p
);
1797 brw_MUL( p
, x1y0
, x1y0
, t
);
1798 brw_MUL( p
, x1y1
, x1y1
, t
);
1799 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1800 brw_MUL( p
, x0y0
, x0y0
, param0
);
1801 brw_MUL( p
, x0y1
, x0y1
, param0
);
1804 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1805 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1806 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1807 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1809 brw_push_insn_state( p
);
1810 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1811 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1812 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1813 brw_pop_insn_state( p
);
1815 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1816 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1817 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1818 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1819 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1821 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1822 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1823 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1824 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1827 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1828 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1829 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1830 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1832 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1833 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1834 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1835 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1837 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1838 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1839 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1840 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1842 /* We interpolate between the gradients using the polynomial
1843 6t^5 - 15t^4 + 10t^3 (Perlin). */
1844 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1845 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1846 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1847 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1848 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1849 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1850 brw_MUL( p
, xi
, xi
, param0
);
1851 brw_MUL( p
, yi
, yi
, param1
);
1852 brw_MUL( p
, zi
, zi
, param2
);
1853 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1854 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1855 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1856 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1857 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1858 brw_MUL( p
, xi
, xi
, param0
);
1859 brw_MUL( p
, yi
, yi
, param1
);
1860 brw_MUL( p
, zi
, zi
, param2
);
1861 brw_MUL( p
, xi
, xi
, param0
);
1862 brw_MUL( p
, yi
, yi
, param1
);
1863 brw_MUL( p
, zi
, zi
, param2
);
1864 brw_MUL( p
, xi
, xi
, param0
);
1865 brw_MUL( p
, yi
, yi
, param1
);
1866 brw_MUL( p
, zi
, zi
, param2
);
1868 /* Here we interpolate in the y dimension... */
1869 brw_MUL( p
, x0y1
, x0y1
, yi
);
1870 brw_MUL( p
, x1y1
, x1y1
, yi
);
1871 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1872 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1874 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1875 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1876 brw_MUL( p
, x1y0
, x1y0
, xi
);
1877 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1879 /* Now do the same thing for the front four gradients... */
1881 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1882 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1883 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1884 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1886 brw_push_insn_state( p
);
1887 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1888 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1889 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1890 brw_pop_insn_state( p
);
1892 brw_MUL( p
, x1y0
, x1y0
, t
);
1893 brw_MUL( p
, x1y1
, x1y1
, t
);
1894 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1895 brw_MUL( p
, x0y0
, x0y0
, param0
);
1896 brw_MUL( p
, x0y1
, x0y1
, param0
);
1899 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1900 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1901 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1902 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1904 brw_push_insn_state( p
);
1905 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1906 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1907 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1908 brw_pop_insn_state( p
);
1910 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1911 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1912 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1913 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1914 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1916 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1917 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1918 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1919 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1922 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1923 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1924 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1925 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1927 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1928 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1929 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1930 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1932 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1933 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1934 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1935 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1937 /* The interpolation coefficients are still around from last time, so
1938 again interpolate in the y dimension... */
1939 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1940 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1941 brw_MUL( p
, x0y1
, x0y1
, yi
);
1942 brw_MUL( p
, x1y1
, x1y1
, yi
);
1943 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1944 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1946 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1947 time put the front face in tmp[ 1 ] and we're nearly there... */
1948 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1949 brw_MUL( p
, x1y0
, x1y0
, xi
);
1950 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1952 /* The final interpolation, in the z dimension: */
1953 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1954 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1955 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1957 /* scale by pow( 2, -15 ), as described above */
1958 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1960 release_tmps( c
, mark
);
1963 static void emit_noise3( struct brw_wm_compile
*c
,
1964 const struct prog_instruction
*inst
)
1966 struct brw_compile
*p
= &c
->func
;
1967 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1968 GLuint mask
= inst
->DstReg
.WriteMask
;
1970 int mark
= mark_tmps( c
);
1972 assert( mark
== 0 );
1974 src0
= get_src_reg( c
, inst
, 0, 0 );
1975 src1
= get_src_reg( c
, inst
, 0, 1 );
1976 src2
= get_src_reg( c
, inst
, 0, 2 );
1978 param0
= alloc_tmp( c
);
1979 param1
= alloc_tmp( c
);
1980 param2
= alloc_tmp( c
);
1982 brw_MOV( p
, param0
, src0
);
1983 brw_MOV( p
, param1
, src1
);
1984 brw_MOV( p
, param2
, src2
);
1986 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1988 /* Fill in the result: */
1989 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1990 for (i
= 0 ; i
< 4; i
++) {
1991 if (mask
& (1<<i
)) {
1992 dst
= get_dst_reg(c
, inst
, i
);
1993 brw_MOV( p
, dst
, param0
);
1996 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1997 brw_set_saturate( p
, 0 );
1999 release_tmps( c
, mark
);
2003 * For the four-dimensional case, the little micro-optimisation benefits
2004 * we obtain by unrolling all the loops aren't worth the massive bloat it
2005 * now causes. Instead, we loop twice around performing a similar operation
2006 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2007 * code to glue it all together.
2009 static void noise4_sub( struct brw_wm_compile
*c
)
2011 struct brw_compile
*p
= &c
->func
;
2012 struct brw_reg param
[ 4 ],
2013 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2014 w0
, /* noise for the w=0 cube */
2015 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2016 interp
[ 4 ], /* interpolation coefficients */
2017 t
, tmp
[ 8 ], /* float temporaries */
2018 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2019 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2021 int mark
= mark_tmps( c
);
2022 GLuint loop
, origin
;
2024 x0y0
= alloc_tmp( c
);
2025 x0y1
= alloc_tmp( c
);
2026 x1y0
= alloc_tmp( c
);
2027 x1y1
= alloc_tmp( c
);
2029 w0
= alloc_tmp( c
);
2030 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2031 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2033 for( i
= 0; i
< 4; i
++ ) {
2034 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2035 interp
[ i
] = alloc_tmp( c
);
2038 for( i
= 0; i
< 8; i
++ ) {
2039 tmp
[ i
] = alloc_tmp( c
);
2040 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2041 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2044 brw_set_access_mode( p
, BRW_ALIGN_1
);
2046 /* We only want 16 bits of precision from the integral part of each
2047 co-ordinate, but unfortunately the RNDD semantics would saturate
2048 at 16 bits if we performed the operation directly to a 16-bit
2049 destination. Therefore, we round to 32-bit temporaries where
2050 appropriate, and then store only the lower 16 bits. */
2051 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2052 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2053 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2054 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2055 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2056 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2058 /* Modify the flag register here, because the side effect is useful
2059 later (see below). We know for certain that all flags will be
2060 cleared, since the FRC instruction cannot possibly generate
2061 negative results. Even for exceptional inputs (infinities, denormals,
2062 NaNs), the architecture guarantees that the L conditional is false. */
2063 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2064 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2065 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2066 for( i
= 1; i
< 4; i
++ )
2067 brw_FRC( p
, param
[ i
], param
[ i
] );
2069 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2071 for( i
= 0; i
< 4; i
++ )
2072 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2073 for( i
= 0; i
< 4; i
++ )
2074 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2075 for( i
= 0; i
< 4; i
++ )
2076 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2077 for( i
= 0; i
< 4; i
++ )
2078 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2079 for( j
= 0; j
< 3; j
++ )
2080 for( i
= 0; i
< 4; i
++ )
2081 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2083 /* Mark the current address, as it will be a jump destination. The
2084 following code will be executed twice: first, with the flag
2085 register clear indicating the w=0 case, and second with flags
2089 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2090 be hashed. Since we have only 16 bits of precision in the hash, we
2091 must be careful about thorough mixing to maintain entropy as we
2092 squash the input vector into a small scalar. */
2093 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2094 brw_imm_uw( 0xBC8F ) );
2095 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2096 brw_imm_uw( 0xD0BD ) );
2097 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2098 brw_imm_uw( 0x9B93 ) );
2099 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2100 brw_imm_uw( 0xA359 ) );
2101 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2102 brw_imm_uw( 0xBC8F ) );
2104 /* Temporarily disable the execution mask while we work with ExecSize=16
2105 channels (the mask is set for ExecSize=8 and is probably incorrect).
2106 Although this might cause execution of unwanted channels, the code
2107 writes only to temporary registers and has no side effects, so
2108 disabling the mask is harmless. */
2109 brw_push_insn_state( p
);
2110 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2111 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2112 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2113 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2115 /* We're now ready to perform the hashing. The eight hashes are
2116 interleaved for performance. The hash function used is
2117 designed to rapidly achieve avalanche and require only 16x16
2118 bit multiplication, and 8-bit swizzles (which we get for
2120 for( i
= 0; i
< 4; i
++ )
2121 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2122 for( i
= 0; i
< 4; i
++ )
2123 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2124 odd_bytes( wtmp
[ i
] ) );
2125 for( i
= 0; i
< 4; i
++ )
2126 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2127 for( i
= 0; i
< 4; i
++ )
2128 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2129 odd_bytes( wtmp
[ i
] ) );
2130 brw_pop_insn_state( p
);
2132 /* Now we want to initialise the four rear gradients based on the
2133 hashes. Format conversion from signed integer to float leaves
2134 everything scaled too high by a factor of pow( 2, 15 ), but
2135 we correct for that right at the end. */
2137 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2138 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2139 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2140 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2141 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2143 brw_push_insn_state( p
);
2144 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2145 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2146 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2147 brw_pop_insn_state( p
);
2149 brw_MUL( p
, x1y0
, x1y0
, t
);
2150 brw_MUL( p
, x1y1
, x1y1
, t
);
2151 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2152 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2153 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2156 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2157 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2158 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2159 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2161 brw_push_insn_state( p
);
2162 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2163 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2164 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2165 brw_pop_insn_state( p
);
2167 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2168 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2169 /* prepare t for the w component (used below): w the first time through
2170 the loop; w - 1 the second time) */
2171 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2172 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2173 p
->current
->header
.predicate_inverse
= 1;
2174 brw_MOV( p
, t
, param
[ 3 ] );
2175 p
->current
->header
.predicate_inverse
= 0;
2176 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2177 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2178 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2180 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2181 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2182 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2183 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2186 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2187 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2188 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2189 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2191 brw_push_insn_state( p
);
2192 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2193 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2194 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2195 brw_pop_insn_state( p
);
2197 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2198 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2199 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2200 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2202 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2203 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2204 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2205 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2208 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2209 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2210 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2211 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2213 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2214 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2215 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2216 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2217 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2219 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2220 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2221 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2222 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2224 /* Here we interpolate in the y dimension... */
2225 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2226 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2227 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2228 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2229 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2230 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2232 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2233 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2234 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2235 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2237 /* Now do the same thing for the front four gradients... */
2239 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2240 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2241 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2242 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2244 brw_push_insn_state( p
);
2245 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2246 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2247 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2248 brw_pop_insn_state( p
);
2250 brw_MUL( p
, x1y0
, x1y0
, t
);
2251 brw_MUL( p
, x1y1
, x1y1
, t
);
2252 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2253 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2254 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2257 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2258 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2259 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2260 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2262 brw_push_insn_state( p
);
2263 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2264 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2265 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2266 brw_pop_insn_state( p
);
2268 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2269 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2270 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2271 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2272 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2274 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2275 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2276 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2277 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2280 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2281 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2282 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2283 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2285 brw_push_insn_state( p
);
2286 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2287 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2288 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2289 brw_pop_insn_state( p
);
2291 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2292 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2293 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2294 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2295 /* prepare t for the w component (used below): w the first time through
2296 the loop; w - 1 the second time) */
2297 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2298 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2299 p
->current
->header
.predicate_inverse
= 1;
2300 brw_MOV( p
, t
, param
[ 3 ] );
2301 p
->current
->header
.predicate_inverse
= 0;
2302 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2304 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2305 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2306 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2307 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2310 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2311 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2312 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2313 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2315 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2316 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2317 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2318 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2320 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2321 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2322 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2323 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2325 /* Interpolate in the y dimension: */
2326 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2327 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2328 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2329 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2330 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2331 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2333 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2334 time put the front face in tmp[ 1 ] and we're nearly there... */
2335 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2336 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2337 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2339 /* Another interpolation, in the z dimension: */
2340 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2341 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2342 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2344 /* Exit the loop if we've computed both cubes... */
2345 origin
= p
->nr_insn
;
2346 brw_push_insn_state( p
);
2347 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2348 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2349 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2350 brw_pop_insn_state( p
);
2352 /* Save the result for the w=0 case, and increment the w coordinate: */
2353 brw_MOV( p
, w0
, tmp
[ 0 ] );
2354 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2357 /* Loop around for the other cube. Explicitly set the flag register
2358 (unfortunately we must spend an extra instruction to do this: we
2359 can't rely on a side effect of the previous MOV or ADD because
2360 conditional modifiers which are normally true might be false in
2361 exceptional circumstances, e.g. given a NaN input; the add to
2362 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2363 brw_push_insn_state( p
);
2364 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2365 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2366 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2367 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2368 brw_pop_insn_state( p
);
2370 /* Patch the previous conditional branch now that we know the
2371 destination address. */
2372 brw_set_src1( p
->store
+ origin
,
2373 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2375 /* The very last interpolation. */
2376 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2377 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2378 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2380 /* scale by pow( 2, -15 ), as described above */
2381 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2383 release_tmps( c
, mark
);
2386 static void emit_noise4( struct brw_wm_compile
*c
,
2387 const struct prog_instruction
*inst
)
2389 struct brw_compile
*p
= &c
->func
;
2390 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2391 GLuint mask
= inst
->DstReg
.WriteMask
;
2393 int mark
= mark_tmps( c
);
2395 assert( mark
== 0 );
2397 src0
= get_src_reg( c
, inst
, 0, 0 );
2398 src1
= get_src_reg( c
, inst
, 0, 1 );
2399 src2
= get_src_reg( c
, inst
, 0, 2 );
2400 src3
= get_src_reg( c
, inst
, 0, 3 );
2402 param0
= alloc_tmp( c
);
2403 param1
= alloc_tmp( c
);
2404 param2
= alloc_tmp( c
);
2405 param3
= alloc_tmp( c
);
2407 brw_MOV( p
, param0
, src0
);
2408 brw_MOV( p
, param1
, src1
);
2409 brw_MOV( p
, param2
, src2
);
2410 brw_MOV( p
, param3
, src3
);
2412 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2414 /* Fill in the result: */
2415 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2416 for (i
= 0 ; i
< 4; i
++) {
2417 if (mask
& (1<<i
)) {
2418 dst
= get_dst_reg(c
, inst
, i
);
2419 brw_MOV( p
, dst
, param0
);
2422 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2423 brw_set_saturate( p
, 0 );
2425 release_tmps( c
, mark
);
2428 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2429 const struct prog_instruction
*inst
)
2431 struct brw_compile
*p
= &c
->func
;
2432 GLuint mask
= inst
->DstReg
.WriteMask
;
2433 struct brw_reg src0
[2], dst
[2];
2435 dst
[0] = get_dst_reg(c
, inst
, 0);
2436 dst
[1] = get_dst_reg(c
, inst
, 1);
2438 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2439 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2441 /* Calculate the pixel offset from window bottom left into destination
2444 if (mask
& WRITEMASK_X
) {
2445 /* X' = X - origin_x */
2448 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2449 brw_imm_d(0 - c
->key
.origin_x
));
2452 if (mask
& WRITEMASK_Y
) {
2453 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2456 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2457 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2462 BIAS on SIMD8 not working yet...
2464 static void emit_txb(struct brw_wm_compile
*c
,
2465 const struct prog_instruction
*inst
)
2467 struct brw_compile
*p
= &c
->func
;
2468 struct brw_reg dst
[4], src
[4], payload_reg
;
2469 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2472 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2474 for (i
= 0; i
< 4; i
++)
2475 dst
[i
] = get_dst_reg(c
, inst
, i
);
2476 for (i
= 0; i
< 4; i
++)
2477 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2479 switch (inst
->TexSrcTarget
) {
2480 case TEXTURE_1D_INDEX
:
2481 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2482 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2483 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2485 case TEXTURE_2D_INDEX
:
2486 case TEXTURE_RECT_INDEX
:
2487 brw_MOV(p
, brw_message_reg(2), src
[0]);
2488 brw_MOV(p
, brw_message_reg(3), src
[1]);
2489 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2492 brw_MOV(p
, brw_message_reg(2), src
[0]);
2493 brw_MOV(p
, brw_message_reg(3), src
[1]);
2494 brw_MOV(p
, brw_message_reg(4), src
[2]);
2497 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2498 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2500 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2502 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2503 SURF_INDEX_TEXTURE(unit
),
2505 inst
->DstReg
.WriteMask
, /* writemask */
2506 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2507 4, /* response_length */
2513 static void emit_tex(struct brw_wm_compile
*c
,
2514 const struct prog_instruction
*inst
)
2516 struct brw_compile
*p
= &c
->func
;
2517 struct brw_reg dst
[4], src
[4], payload_reg
;
2518 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2522 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2524 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2526 for (i
= 0; i
< 4; i
++)
2527 dst
[i
] = get_dst_reg(c
, inst
, i
);
2528 for (i
= 0; i
< 4; i
++)
2529 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2531 switch (inst
->TexSrcTarget
) {
2532 case TEXTURE_1D_INDEX
:
2536 case TEXTURE_2D_INDEX
:
2537 case TEXTURE_RECT_INDEX
:
2538 emit
= WRITEMASK_XY
;
2542 emit
= WRITEMASK_XYZ
;
2548 /* move/load S, T, R coords */
2549 for (i
= 0; i
< nr
; i
++) {
2550 static const GLuint swz
[4] = {0,1,2,2};
2552 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2554 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2559 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2560 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2564 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2566 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2567 SURF_INDEX_TEXTURE(unit
),
2569 inst
->DstReg
.WriteMask
, /* writemask */
2570 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2571 4, /* response_length */
2572 shadow
? 6 : 4, /* msg_length */
2576 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2581 * Resolve subroutine calls after code emit is done.
2583 static void post_wm_emit( struct brw_wm_compile
*c
)
2585 brw_resolve_cals(&c
->func
);
2588 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2591 #define MAX_LOOP_DEPTH 32
2592 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2593 struct brw_instruction
*inst0
, *inst1
;
2594 int i
, if_insn
= 0, loop_insn
= 0;
2595 struct brw_compile
*p
= &c
->func
;
2596 struct brw_indirect stack_index
= brw_indirect(0, 0);
2600 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2601 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2603 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2604 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2607 _mesa_printf("Inst %d: ", i
);
2608 _mesa_print_instruction(inst
);
2611 /* fetch any constants that this instruction needs */
2612 if (c
->fp
->use_const_buffer
)
2613 fetch_constants(c
, inst
);
2615 if (inst
->CondUpdate
)
2616 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2618 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2620 switch (inst
->Opcode
) {
2622 emit_pixel_xy(c
, inst
);
2625 emit_delta_xy(c
, inst
);
2628 emit_pixel_w(c
, inst
);
2631 emit_linterp(c
, inst
);
2634 emit_pinterp(c
, inst
);
2637 emit_cinterp(c
, inst
);
2640 emit_wpos_xy(c
, inst
);
2643 emit_fb_write(c
, inst
);
2645 case WM_FRONTFACING
:
2646 emit_frontfacing(c
, inst
);
2670 emit_trunc(c
, inst
);
2707 emit_min_max(c
, inst
);
2743 emit_noise1(c
, inst
);
2746 emit_noise2(c
, inst
);
2749 emit_noise3(c
, inst
);
2752 emit_noise4(c
, inst
);
2764 assert(if_insn
< MAX_IFSN
);
2765 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2768 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2771 assert(if_insn
> 0);
2772 brw_ENDIF(p
, if_inst
[--if_insn
]);
2775 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2781 brw_push_insn_state(p
);
2782 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2783 brw_set_access_mode(p
, BRW_ALIGN_1
);
2784 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2785 brw_set_access_mode(p
, BRW_ALIGN_16
);
2786 brw_ADD(p
, get_addr_reg(stack_index
),
2787 get_addr_reg(stack_index
), brw_imm_d(4));
2788 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2789 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2790 brw_pop_insn_state(p
);
2794 brw_push_insn_state(p
);
2795 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2796 brw_ADD(p
, get_addr_reg(stack_index
),
2797 get_addr_reg(stack_index
), brw_imm_d(-4));
2798 brw_set_access_mode(p
, BRW_ALIGN_1
);
2799 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2800 brw_set_access_mode(p
, BRW_ALIGN_16
);
2801 brw_pop_insn_state(p
);
2804 case OPCODE_BGNLOOP
:
2805 /* XXX may need to invalidate the current_constant regs */
2806 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2810 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2814 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2816 case OPCODE_ENDLOOP
:
2818 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2819 /* patch all the BREAK instructions from
2821 while (inst0
> loop_inst
[loop_insn
]) {
2823 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2824 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2825 inst0
->bits3
.if_else
.pop_count
= 0;
2826 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2827 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2828 inst0
->bits3
.if_else
.pop_count
= 0;
2833 _mesa_printf("unsupported IR in fragment shader %d\n",
2836 if (inst
->CondUpdate
)
2837 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2839 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2843 if (c
->reg_index
>= BRW_WM_MAX_GRF
) {
2844 _mesa_problem(NULL
, "Ran out of registers in brw_wm_emit_glsl()");
2845 /* XXX we need to do some proper error recovery here */
2851 * Do GPU code generation for shaders that use GLSL features such as
2852 * flow control. Other shaders will be compiled with the
2854 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2856 if (INTEL_DEBUG
& DEBUG_WM
) {
2857 _mesa_printf("brw_wm_glsl_emit:\n");
2860 /* initial instruction translation/simplification */
2863 /* actual code generation */
2864 brw_wm_emit_glsl(brw
, c
);
2866 if (INTEL_DEBUG
& DEBUG_WM
) {
2867 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2870 c
->prog_data
.total_grf
= c
->reg_index
;
2871 c
->prog_data
.total_scratch
= 0;