1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "brw_context.h"
8 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
13 * Determine if the given fragment program uses GLSL features such
14 * as flow conditionals, loops, subroutines.
15 * Some GLSL shaders may use these features, others might not.
17 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
20 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
21 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
22 switch (inst
->Opcode
) {
46 * Record the mapping of a Mesa register to a hardware register.
48 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
49 int component
, struct brw_reg reg
)
51 c
->wm_regs
[file
][index
][component
].reg
= reg
;
52 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
56 * Examine instruction's write mask to find index of first component
57 * enabled for writing.
59 static int get_scalar_dst_index(struct prog_instruction
*inst
)
62 for (i
= 0; i
< 4; i
++)
63 if (inst
->DstReg
.WriteMask
& (1<<i
))
68 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
71 if(c
->tmp_index
== c
->tmp_max
)
72 c
->tmp_regs
[ c
->tmp_max
++ ] = c
->reg_index
++;
74 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
79 * Save current temp register info.
80 * There must be a matching call to release_tmps().
82 static int mark_tmps(struct brw_wm_compile
*c
)
87 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
89 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
92 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
98 * Convert Mesa src register to brw register.
100 * Since we're running in SOA mode each Mesa register corresponds to four
101 * hardware registers. We allocate the hardware registers as needed here.
103 * \param file register file, one of PROGRAM_x
104 * \param index register number
105 * \param component src component (X=0, Y=1, Z=2, W=3)
106 * \param nr not used?!?
107 * \param neg negate value?
108 * \param abs take absolute value?
110 static struct brw_reg
111 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
112 int nr
, GLuint neg
, GLuint abs
)
116 case PROGRAM_STATE_VAR
:
117 case PROGRAM_CONSTANT
:
118 case PROGRAM_UNIFORM
:
119 file
= PROGRAM_STATE_VAR
;
121 case PROGRAM_UNDEFINED
:
122 return brw_null_reg();
123 case PROGRAM_TEMPORARY
:
126 case PROGRAM_PAYLOAD
:
129 _mesa_problem(NULL
, "Unexpected file in get_reg()");
130 return brw_null_reg();
133 /* see if we've already allocated a HW register for this Mesa register */
134 if (c
->wm_regs
[file
][index
][component
].inited
) {
136 reg
= c
->wm_regs
[file
][index
][component
].reg
;
139 /* no, allocate new register */
140 reg
= brw_vec8_grf(c
->reg_index
, 0);
143 /* if this is a new register allocation, record it in the table */
144 if (!c
->wm_regs
[file
][index
][component
].inited
) {
145 set_reg(c
, file
, index
, component
, reg
);
149 if (c
->reg_index
>= BRW_WM_MAX_GRF
- 12) {
150 /* ran out of temporary registers! */
152 /* This is a big hack for now.
153 * Return bad register index, just don't hang the GPU.
155 _mesa_fprintf(stderr
, "out of regs %d\n", c
->reg_index
);
156 c
->reg_index
= BRW_WM_MAX_GRF
- 13;
158 return brw_null_reg();
162 if (neg
& (1 << component
)) {
172 * Preallocate registers. This sets up the Mesa to hardware register
173 * mapping for certain registers, such as constants (uniforms/state vars)
176 static void prealloc_reg(struct brw_wm_compile
*c
)
180 int nr_interp_regs
= 0;
181 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
183 for (i
= 0; i
< 4; i
++) {
184 if (i
< c
->key
.nr_depth_regs
)
185 reg
= brw_vec8_grf(i
* 2, 0);
187 reg
= brw_vec8_grf(0, 0);
188 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
190 c
->reg_index
+= 2 * c
->key
.nr_depth_regs
;
194 const int nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
195 const struct gl_program_parameter_list
*plist
=
196 c
->fp
->program
.Base
.Parameters
;
199 /* number of float constants */
200 c
->prog_data
.nr_params
= 4 * nr_params
;
202 /* loop over program constants (float[4]) */
203 for (i
= 0; i
< nr_params
; i
++) {
204 /* loop over XYZW channels */
205 for (j
= 0; j
< 4; j
++, index
++) {
206 reg
= brw_vec1_grf(c
->reg_index
+ index
/ 8, index
% 8);
207 /* Save pointer to parameter/constant value.
208 * Constants will be copied in prepare_constant_buffer()
210 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
211 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
214 /* number of constant regs used (each reg is float[8]) */
215 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
216 c
->reg_index
+= c
->nr_creg
;
219 /* fragment shader inputs */
220 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
221 if (inputs
& (1<<i
)) {
223 reg
= brw_vec8_grf(c
->reg_index
, 0);
224 for (j
= 0; j
< 4; j
++)
225 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
230 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
231 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
232 c
->prog_data
.curb_read_length
= c
->nr_creg
;
233 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
235 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, c
->reg_index
, 0);
241 * Convert Mesa dst register to brw register.
243 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
244 struct prog_instruction
*inst
, int component
)
247 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
253 * Convert Mesa src register to brw register.
255 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
256 struct prog_src_register
*src
, int index
)
259 int component
= GET_SWZ(src
->Swizzle
, index
);
260 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
261 src
->NegateBase
, src
->Abs
);
265 * Subroutines are minimal support for resusable instruction sequences.
266 * They are implemented as simply as possible to minimise overhead: there
267 * is no explicit support for communication between the caller and callee
268 * other than saving the return address in a temporary register, nor is
269 * there any automatic local storage. This implies that great care is
270 * required before attempting reentrancy or any kind of nested
271 * subroutine invocations.
273 static void invoke_subroutine( struct brw_wm_compile
*c
,
274 enum _subroutine subroutine
,
275 void (*emit
)( struct brw_wm_compile
* ) )
277 struct brw_compile
*p
= &c
->func
;
279 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
281 if( c
->subroutines
[ subroutine
] ) {
282 /* subroutine previously emitted: reuse existing instructions */
284 int mark
= mark_tmps( c
);
285 struct brw_reg return_address
= retype( alloc_tmp( c
),
286 BRW_REGISTER_TYPE_UD
);
287 int here
= p
->nr_insn
;
289 brw_push_insn_state(p
);
290 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
291 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
293 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
294 brw_imm_d( ( c
->subroutines
[ subroutine
] -
296 brw_pop_insn_state(p
);
298 release_tmps( c
, mark
);
300 /* previously unused subroutine: emit, and mark for later reuse */
302 int mark
= mark_tmps( c
);
303 struct brw_reg return_address
= retype( alloc_tmp( c
),
304 BRW_REGISTER_TYPE_UD
);
305 struct brw_instruction
*calc
;
306 int base
= p
->nr_insn
;
308 brw_push_insn_state(p
);
309 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
310 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
311 brw_pop_insn_state(p
);
313 c
->subroutines
[ subroutine
] = p
->nr_insn
;
317 brw_push_insn_state(p
);
318 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
319 brw_MOV( p
, brw_ip_reg(), return_address
);
320 brw_pop_insn_state(p
);
322 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
324 release_tmps( c
, mark
);
328 static void emit_abs( struct brw_wm_compile
*c
,
329 struct prog_instruction
*inst
)
332 struct brw_compile
*p
= &c
->func
;
333 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
334 for (i
= 0; i
< 4; i
++) {
335 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
336 struct brw_reg src
, dst
;
337 dst
= get_dst_reg(c
, inst
, i
);
338 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
339 brw_MOV(p
, dst
, brw_abs(src
));
342 brw_set_saturate(p
, 0);
345 static void emit_trunc( struct brw_wm_compile
*c
,
346 struct prog_instruction
*inst
)
349 struct brw_compile
*p
= &c
->func
;
350 GLuint mask
= inst
->DstReg
.WriteMask
;
351 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
352 for (i
= 0; i
< 4; i
++) {
354 struct brw_reg src
, dst
;
355 dst
= get_dst_reg(c
, inst
, i
);
356 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
357 brw_RNDZ(p
, dst
, src
);
360 brw_set_saturate(p
, 0);
363 static void emit_mov( struct brw_wm_compile
*c
,
364 struct prog_instruction
*inst
)
367 struct brw_compile
*p
= &c
->func
;
368 GLuint mask
= inst
->DstReg
.WriteMask
;
369 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
370 for (i
= 0; i
< 4; i
++) {
372 struct brw_reg src
, dst
;
373 dst
= get_dst_reg(c
, inst
, i
);
374 src
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
375 brw_MOV(p
, dst
, src
);
378 brw_set_saturate(p
, 0);
381 static void emit_pixel_xy(struct brw_wm_compile
*c
,
382 struct prog_instruction
*inst
)
384 struct brw_reg r1
= brw_vec1_grf(1, 0);
385 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
387 struct brw_reg dst0
, dst1
;
388 struct brw_compile
*p
= &c
->func
;
389 GLuint mask
= inst
->DstReg
.WriteMask
;
391 dst0
= get_dst_reg(c
, inst
, 0);
392 dst1
= get_dst_reg(c
, inst
, 1);
393 /* Calculate pixel centers by adding 1 or 0 to each of the
394 * micro-tile coordinates passed in r1.
396 if (mask
& WRITEMASK_X
) {
398 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
399 stride(suboffset(r1_uw
, 4), 2, 4, 0),
400 brw_imm_v(0x10101010));
403 if (mask
& WRITEMASK_Y
) {
405 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
406 stride(suboffset(r1_uw
, 5), 2, 4, 0),
407 brw_imm_v(0x11001100));
411 static void emit_delta_xy(struct brw_wm_compile
*c
,
412 struct prog_instruction
*inst
)
414 struct brw_reg r1
= brw_vec1_grf(1, 0);
415 struct brw_reg dst0
, dst1
, src0
, src1
;
416 struct brw_compile
*p
= &c
->func
;
417 GLuint mask
= inst
->DstReg
.WriteMask
;
419 dst0
= get_dst_reg(c
, inst
, 0);
420 dst1
= get_dst_reg(c
, inst
, 1);
421 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
422 src1
= get_src_reg(c
, &inst
->SrcReg
[0], 1);
423 /* Calc delta X,Y by subtracting origin in r1 from the pixel
426 if (mask
& WRITEMASK_X
) {
429 retype(src0
, BRW_REGISTER_TYPE_UW
),
433 if (mask
& WRITEMASK_Y
) {
436 retype(src1
, BRW_REGISTER_TYPE_UW
),
437 negate(suboffset(r1
,1)));
442 static void fire_fb_write( struct brw_wm_compile
*c
,
448 struct brw_compile
*p
= &c
->func
;
449 /* Pass through control information:
451 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
453 brw_push_insn_state(p
);
454 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
456 brw_message_reg(base_reg
+ 1),
458 brw_pop_insn_state(p
);
460 /* Send framebuffer write message: */
462 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
464 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
471 static void emit_fb_write(struct brw_wm_compile
*c
,
472 struct prog_instruction
*inst
)
474 struct brw_compile
*p
= &c
->func
;
480 /* Reserve a space for AA - may not be needed:
482 if (c
->key
.aa_dest_stencil_reg
)
485 brw_push_insn_state(p
);
486 for (channel
= 0; channel
< 4; channel
++) {
487 src0
= get_src_reg(c
, &inst
->SrcReg
[0], channel
);
488 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
489 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
490 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
492 /* skip over the regs populated above: */
494 brw_pop_insn_state(p
);
496 if (c
->key
.source_depth_to_render_target
) {
497 if (c
->key
.computes_depth
) {
498 src0
= get_src_reg(c
, &inst
->SrcReg
[2], 2);
499 brw_MOV(p
, brw_message_reg(nr
), src0
);
502 src0
= get_src_reg(c
, &inst
->SrcReg
[1], 1);
503 brw_MOV(p
, brw_message_reg(nr
), src0
);
509 if (c
->key
.dest_depth_reg
) {
510 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
511 GLuint off
= c
->key
.dest_depth_reg
% 2;
516 /* XXX do we need this code? comp always 1, off always 0, it seems */
518 brw_push_insn_state(p
);
519 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
521 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
523 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
524 brw_pop_insn_state(p
);
529 struct brw_reg src
= get_src_reg(c
, &inst
->SrcReg
[1], 1);
530 brw_MOV(p
, brw_message_reg(nr
), src
);
535 target
= inst
->Aux
>> 1;
537 fire_fb_write(c
, 0, nr
, target
, eot
);
540 static void emit_pixel_w( struct brw_wm_compile
*c
,
541 struct prog_instruction
*inst
)
543 struct brw_compile
*p
= &c
->func
;
544 GLuint mask
= inst
->DstReg
.WriteMask
;
545 if (mask
& WRITEMASK_W
) {
546 struct brw_reg dst
, src0
, delta0
, delta1
;
547 struct brw_reg interp3
;
549 dst
= get_dst_reg(c
, inst
, 3);
550 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
551 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0);
552 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1);
554 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
555 /* Calc 1/w - just linterp wpos[3] optimized by putting the
556 * result straight into a message reg.
558 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
559 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
563 BRW_MATH_FUNCTION_INV
,
564 BRW_MATH_SATURATE_NONE
,
566 BRW_MATH_PRECISION_FULL
);
570 static void emit_linterp(struct brw_wm_compile
*c
,
571 struct prog_instruction
*inst
)
573 struct brw_compile
*p
= &c
->func
;
574 GLuint mask
= inst
->DstReg
.WriteMask
;
575 struct brw_reg interp
[4];
576 struct brw_reg dst
, delta0
, delta1
;
580 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
581 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0);
582 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1);
585 interp
[0] = brw_vec1_grf(nr
, 0);
586 interp
[1] = brw_vec1_grf(nr
, 4);
587 interp
[2] = brw_vec1_grf(nr
+1, 0);
588 interp
[3] = brw_vec1_grf(nr
+1, 4);
590 for(i
= 0; i
< 4; i
++ ) {
592 dst
= get_dst_reg(c
, inst
, i
);
593 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
594 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
599 static void emit_cinterp(struct brw_wm_compile
*c
,
600 struct prog_instruction
*inst
)
602 struct brw_compile
*p
= &c
->func
;
603 GLuint mask
= inst
->DstReg
.WriteMask
;
605 struct brw_reg interp
[4];
606 struct brw_reg dst
, src0
;
609 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
612 interp
[0] = brw_vec1_grf(nr
, 0);
613 interp
[1] = brw_vec1_grf(nr
, 4);
614 interp
[2] = brw_vec1_grf(nr
+1, 0);
615 interp
[3] = brw_vec1_grf(nr
+1, 4);
617 for(i
= 0; i
< 4; i
++ ) {
619 dst
= get_dst_reg(c
, inst
, i
);
620 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
625 static void emit_pinterp(struct brw_wm_compile
*c
,
626 struct prog_instruction
*inst
)
628 struct brw_compile
*p
= &c
->func
;
629 GLuint mask
= inst
->DstReg
.WriteMask
;
631 struct brw_reg interp
[4];
632 struct brw_reg dst
, delta0
, delta1
;
633 struct brw_reg src0
, w
;
636 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
637 delta0
= get_src_reg(c
, &inst
->SrcReg
[1], 0);
638 delta1
= get_src_reg(c
, &inst
->SrcReg
[1], 1);
639 w
= get_src_reg(c
, &inst
->SrcReg
[2], 3);
642 interp
[0] = brw_vec1_grf(nr
, 0);
643 interp
[1] = brw_vec1_grf(nr
, 4);
644 interp
[2] = brw_vec1_grf(nr
+1, 0);
645 interp
[3] = brw_vec1_grf(nr
+1, 4);
647 for(i
= 0; i
< 4; i
++ ) {
649 dst
= get_dst_reg(c
, inst
, i
);
650 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
651 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
653 brw_MUL(p
, dst
, dst
, w
);
658 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
659 static void emit_frontfacing(struct brw_wm_compile
*c
,
660 struct prog_instruction
*inst
)
662 struct brw_compile
*p
= &c
->func
;
663 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
665 GLuint mask
= inst
->DstReg
.WriteMask
;
668 for (i
= 0; i
< 4; i
++) {
670 dst
= get_dst_reg(c
, inst
, i
);
671 brw_MOV(p
, dst
, brw_imm_f(0.0));
675 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
678 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
679 for (i
= 0; i
< 4; i
++) {
681 dst
= get_dst_reg(c
, inst
, i
);
682 brw_MOV(p
, dst
, brw_imm_f(1.0));
685 brw_set_predicate_control_flag_value(p
, 0xff);
688 static void emit_xpd(struct brw_wm_compile
*c
,
689 struct prog_instruction
*inst
)
692 struct brw_compile
*p
= &c
->func
;
693 GLuint mask
= inst
->DstReg
.WriteMask
;
694 for (i
= 0; i
< 4; i
++) {
698 struct brw_reg src0
, src1
, dst
;
699 dst
= get_dst_reg(c
, inst
, i
);
700 src0
= negate(get_src_reg(c
, &inst
->SrcReg
[0], i2
));
701 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i1
);
702 brw_MUL(p
, brw_null_reg(), src0
, src1
);
703 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i1
);
704 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i2
);
705 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
706 brw_MAC(p
, dst
, src0
, src1
);
707 brw_set_saturate(p
, 0);
710 brw_set_saturate(p
, 0);
713 static void emit_dp3(struct brw_wm_compile
*c
,
714 struct prog_instruction
*inst
)
716 struct brw_reg src0
[3], src1
[3], dst
;
718 struct brw_compile
*p
= &c
->func
;
719 for (i
= 0; i
< 3; i
++) {
720 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
);
721 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
);
724 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
725 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
726 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
727 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
728 brw_MAC(p
, dst
, src0
[2], src1
[2]);
729 brw_set_saturate(p
, 0);
732 static void emit_dp4(struct brw_wm_compile
*c
,
733 struct prog_instruction
*inst
)
735 struct brw_reg src0
[4], src1
[4], dst
;
737 struct brw_compile
*p
= &c
->func
;
738 for (i
= 0; i
< 4; i
++) {
739 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
);
740 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
);
742 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
743 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
744 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
745 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
746 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
747 brw_MAC(p
, dst
, src0
[3], src1
[3]);
748 brw_set_saturate(p
, 0);
751 static void emit_dph(struct brw_wm_compile
*c
,
752 struct prog_instruction
*inst
)
754 struct brw_reg src0
[4], src1
[4], dst
;
756 struct brw_compile
*p
= &c
->func
;
757 for (i
= 0; i
< 4; i
++) {
758 src0
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
);
759 src1
[i
] = get_src_reg(c
, &inst
->SrcReg
[1], i
);
761 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
762 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
763 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
764 brw_MAC(p
, dst
, src0
[2], src1
[2]);
765 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
766 brw_ADD(p
, dst
, dst
, src1
[3]);
767 brw_set_saturate(p
, 0);
771 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
772 * Note that the result of the function is smeared across the dest
773 * register's X, Y, Z and W channels (subject to writemasking of course).
775 static void emit_math1(struct brw_wm_compile
*c
,
776 struct prog_instruction
*inst
, GLuint func
)
778 struct brw_compile
*p
= &c
->func
;
779 struct brw_reg src0
, dst
, tmp
;
780 const int mark
= mark_tmps( c
);
785 /* Get first component of source register */
786 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
788 /* tmp = func(src0) */
789 brw_MOV(p
, brw_message_reg(2), src0
);
793 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
796 BRW_MATH_DATA_VECTOR
,
797 BRW_MATH_PRECISION_FULL
);
799 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
801 /* replicate tmp value across enabled dest channels */
802 for (i
= 0; i
< 4; i
++) {
803 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
804 dst
= get_dst_reg(c
, inst
, i
);
805 brw_MOV(p
, dst
, tmp
);
809 release_tmps(c
, mark
);
812 static void emit_rcp(struct brw_wm_compile
*c
,
813 struct prog_instruction
*inst
)
815 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
818 static void emit_rsq(struct brw_wm_compile
*c
,
819 struct prog_instruction
*inst
)
821 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
824 static void emit_sin(struct brw_wm_compile
*c
,
825 struct prog_instruction
*inst
)
827 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
830 static void emit_cos(struct brw_wm_compile
*c
,
831 struct prog_instruction
*inst
)
833 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
836 static void emit_ex2(struct brw_wm_compile
*c
,
837 struct prog_instruction
*inst
)
839 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
842 static void emit_lg2(struct brw_wm_compile
*c
,
843 struct prog_instruction
*inst
)
845 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
848 static void emit_add(struct brw_wm_compile
*c
,
849 struct prog_instruction
*inst
)
851 struct brw_compile
*p
= &c
->func
;
852 struct brw_reg src0
, src1
, dst
;
853 GLuint mask
= inst
->DstReg
.WriteMask
;
855 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
856 for (i
= 0 ; i
< 4; i
++) {
858 dst
= get_dst_reg(c
, inst
, i
);
859 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
860 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
861 brw_ADD(p
, dst
, src0
, src1
);
864 brw_set_saturate(p
, 0);
867 static void emit_sub(struct brw_wm_compile
*c
,
868 struct prog_instruction
*inst
)
870 struct brw_compile
*p
= &c
->func
;
871 struct brw_reg src0
, src1
, dst
;
872 GLuint mask
= inst
->DstReg
.WriteMask
;
874 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
875 for (i
= 0 ; i
< 4; i
++) {
877 dst
= get_dst_reg(c
, inst
, i
);
878 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
879 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
880 brw_ADD(p
, dst
, src0
, negate(src1
));
883 brw_set_saturate(p
, 0);
886 static void emit_mul(struct brw_wm_compile
*c
,
887 struct prog_instruction
*inst
)
889 struct brw_compile
*p
= &c
->func
;
890 struct brw_reg src0
, src1
, dst
;
891 GLuint mask
= inst
->DstReg
.WriteMask
;
893 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
894 for (i
= 0 ; i
< 4; i
++) {
896 dst
= get_dst_reg(c
, inst
, i
);
897 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
898 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
899 brw_MUL(p
, dst
, src0
, src1
);
902 brw_set_saturate(p
, 0);
905 static void emit_frc(struct brw_wm_compile
*c
,
906 struct prog_instruction
*inst
)
908 struct brw_compile
*p
= &c
->func
;
909 struct brw_reg src0
, dst
;
910 GLuint mask
= inst
->DstReg
.WriteMask
;
912 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
913 for (i
= 0 ; i
< 4; i
++) {
915 dst
= get_dst_reg(c
, inst
, i
);
916 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
917 brw_FRC(p
, dst
, src0
);
920 if (inst
->SaturateMode
!= SATURATE_OFF
)
921 brw_set_saturate(p
, 0);
924 static void emit_flr(struct brw_wm_compile
*c
,
925 struct prog_instruction
*inst
)
927 struct brw_compile
*p
= &c
->func
;
928 struct brw_reg src0
, dst
;
929 GLuint mask
= inst
->DstReg
.WriteMask
;
931 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
932 for (i
= 0 ; i
< 4; i
++) {
934 dst
= get_dst_reg(c
, inst
, i
);
935 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
936 brw_RNDD(p
, dst
, src0
);
939 brw_set_saturate(p
, 0);
942 static void emit_max(struct brw_wm_compile
*c
,
943 struct prog_instruction
*inst
)
945 struct brw_compile
*p
= &c
->func
;
946 GLuint mask
= inst
->DstReg
.WriteMask
;
947 struct brw_reg src0
, src1
, dst
;
949 brw_push_insn_state(p
);
950 for (i
= 0; i
< 4; i
++) {
952 dst
= get_dst_reg(c
, inst
, i
);
953 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
954 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
955 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
956 brw_MOV(p
, dst
, src0
);
957 brw_set_saturate(p
, 0);
959 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src0
, src1
);
960 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
961 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
962 brw_MOV(p
, dst
, src1
);
963 brw_set_saturate(p
, 0);
964 brw_set_predicate_control_flag_value(p
, 0xff);
967 brw_pop_insn_state(p
);
970 static void emit_min(struct brw_wm_compile
*c
,
971 struct prog_instruction
*inst
)
973 struct brw_compile
*p
= &c
->func
;
974 GLuint mask
= inst
->DstReg
.WriteMask
;
975 struct brw_reg src0
, src1
, dst
;
977 brw_push_insn_state(p
);
978 for (i
= 0; i
< 4; i
++) {
980 dst
= get_dst_reg(c
, inst
, i
);
981 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
982 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
983 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
984 brw_MOV(p
, dst
, src0
);
985 brw_set_saturate(p
, 0);
987 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
988 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
989 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
990 brw_MOV(p
, dst
, src1
);
991 brw_set_saturate(p
, 0);
992 brw_set_predicate_control_flag_value(p
, 0xff);
995 brw_pop_insn_state(p
);
998 static void emit_pow(struct brw_wm_compile
*c
,
999 struct prog_instruction
*inst
)
1001 struct brw_compile
*p
= &c
->func
;
1002 struct brw_reg dst
, src0
, src1
;
1003 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1004 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
1005 src1
= get_src_reg(c
, &inst
->SrcReg
[1], 0);
1007 brw_MOV(p
, brw_message_reg(2), src0
);
1008 brw_MOV(p
, brw_message_reg(3), src1
);
1012 BRW_MATH_FUNCTION_POW
,
1013 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1016 BRW_MATH_DATA_VECTOR
,
1017 BRW_MATH_PRECISION_FULL
);
1020 static void emit_lrp(struct brw_wm_compile
*c
,
1021 struct prog_instruction
*inst
)
1023 struct brw_compile
*p
= &c
->func
;
1024 GLuint mask
= inst
->DstReg
.WriteMask
;
1025 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1027 int mark
= mark_tmps(c
);
1028 for (i
= 0; i
< 4; i
++) {
1029 if (mask
& (1<<i
)) {
1030 dst
= get_dst_reg(c
, inst
, i
);
1031 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
1033 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
1035 if (src1
.nr
== dst
.nr
) {
1036 tmp1
= alloc_tmp(c
);
1037 brw_MOV(p
, tmp1
, src1
);
1041 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
);
1042 if (src2
.nr
== dst
.nr
) {
1043 tmp2
= alloc_tmp(c
);
1044 brw_MOV(p
, tmp2
, src2
);
1048 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1049 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1050 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1051 brw_MAC(p
, dst
, src0
, tmp1
);
1052 brw_set_saturate(p
, 0);
1054 release_tmps(c
, mark
);
1059 * For GLSL shaders, this KIL will be unconditional.
1060 * It may be contained inside an IF/ENDIF structure of course.
1062 static void emit_kil(struct brw_wm_compile
*c
)
1064 struct brw_compile
*p
= &c
->func
;
1065 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1066 brw_push_insn_state(p
);
1067 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1068 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1069 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1070 brw_pop_insn_state(p
);
1073 static void emit_mad(struct brw_wm_compile
*c
,
1074 struct prog_instruction
*inst
)
1076 struct brw_compile
*p
= &c
->func
;
1077 GLuint mask
= inst
->DstReg
.WriteMask
;
1078 struct brw_reg dst
, src0
, src1
, src2
;
1081 for (i
= 0; i
< 4; i
++) {
1082 if (mask
& (1<<i
)) {
1083 dst
= get_dst_reg(c
, inst
, i
);
1084 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
1085 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
1086 src2
= get_src_reg(c
, &inst
->SrcReg
[2], i
);
1087 brw_MUL(p
, dst
, src0
, src1
);
1089 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1090 brw_ADD(p
, dst
, dst
, src2
);
1091 brw_set_saturate(p
, 0);
1096 static void emit_sop(struct brw_wm_compile
*c
,
1097 struct prog_instruction
*inst
, GLuint cond
)
1099 struct brw_compile
*p
= &c
->func
;
1100 GLuint mask
= inst
->DstReg
.WriteMask
;
1101 struct brw_reg dst
, src0
, src1
;
1104 for (i
= 0; i
< 4; i
++) {
1105 if (mask
& (1<<i
)) {
1106 dst
= get_dst_reg(c
, inst
, i
);
1107 src0
= get_src_reg(c
, &inst
->SrcReg
[0], i
);
1108 src1
= get_src_reg(c
, &inst
->SrcReg
[1], i
);
1109 brw_push_insn_state(p
);
1110 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1111 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1112 brw_MOV(p
, dst
, brw_imm_f(0.0));
1113 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1114 brw_MOV(p
, dst
, brw_imm_f(1.0));
1115 brw_pop_insn_state(p
);
1120 static void emit_slt(struct brw_wm_compile
*c
,
1121 struct prog_instruction
*inst
)
1123 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1126 static void emit_sle(struct brw_wm_compile
*c
,
1127 struct prog_instruction
*inst
)
1129 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1132 static void emit_sgt(struct brw_wm_compile
*c
,
1133 struct prog_instruction
*inst
)
1135 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1138 static void emit_sge(struct brw_wm_compile
*c
,
1139 struct prog_instruction
*inst
)
1141 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1144 static void emit_seq(struct brw_wm_compile
*c
,
1145 struct prog_instruction
*inst
)
1147 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1150 static void emit_sne(struct brw_wm_compile
*c
,
1151 struct prog_instruction
*inst
)
1153 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1156 static void emit_ddx(struct brw_wm_compile
*c
,
1157 struct prog_instruction
*inst
)
1159 struct brw_compile
*p
= &c
->func
;
1160 GLuint mask
= inst
->DstReg
.WriteMask
;
1161 struct brw_reg interp
[4];
1163 struct brw_reg src0
, w
;
1165 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
1166 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3);
1168 interp
[0] = brw_vec1_grf(nr
, 0);
1169 interp
[1] = brw_vec1_grf(nr
, 4);
1170 interp
[2] = brw_vec1_grf(nr
+1, 0);
1171 interp
[3] = brw_vec1_grf(nr
+1, 4);
1172 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1173 for(i
= 0; i
< 4; i
++ ) {
1174 if (mask
& (1<<i
)) {
1175 dst
= get_dst_reg(c
, inst
, i
);
1176 brw_MOV(p
, dst
, interp
[i
]);
1177 brw_MUL(p
, dst
, dst
, w
);
1180 brw_set_saturate(p
, 0);
1183 static void emit_ddy(struct brw_wm_compile
*c
,
1184 struct prog_instruction
*inst
)
1186 struct brw_compile
*p
= &c
->func
;
1187 GLuint mask
= inst
->DstReg
.WriteMask
;
1188 struct brw_reg interp
[4];
1190 struct brw_reg src0
, w
;
1193 src0
= get_src_reg(c
, &inst
->SrcReg
[0], 0);
1195 w
= get_src_reg(c
, &inst
->SrcReg
[1], 3);
1196 interp
[0] = brw_vec1_grf(nr
, 0);
1197 interp
[1] = brw_vec1_grf(nr
, 4);
1198 interp
[2] = brw_vec1_grf(nr
+1, 0);
1199 interp
[3] = brw_vec1_grf(nr
+1, 4);
1200 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1201 for(i
= 0; i
< 4; i
++ ) {
1202 if (mask
& (1<<i
)) {
1203 dst
= get_dst_reg(c
, inst
, i
);
1204 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1205 brw_MUL(p
, dst
, dst
, w
);
1208 brw_set_saturate(p
, 0);
1211 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1213 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1217 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1219 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1222 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1224 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1227 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1229 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1233 /* One-, two- and three-dimensional Perlin noise, similar to the description
1234 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1235 static void noise1_sub( struct brw_wm_compile
*c
) {
1237 struct brw_compile
*p
= &c
->func
;
1238 struct brw_reg param
,
1239 x0
, x1
, /* gradients at each end */
1240 t
, tmp
[ 2 ], /* float temporaries */
1241 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1243 int mark
= mark_tmps( c
);
1245 x0
= alloc_tmp( c
);
1246 x1
= alloc_tmp( c
);
1248 tmp
[ 0 ] = alloc_tmp( c
);
1249 tmp
[ 1 ] = alloc_tmp( c
);
1250 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1251 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1252 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1253 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1254 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1256 param
= lookup_tmp( c
, mark
- 2 );
1258 brw_set_access_mode( p
, BRW_ALIGN_1
);
1260 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1262 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1263 be hashed. Also compute the remainder (offset within the unit
1264 length), interleaved to reduce register dependency penalties. */
1265 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1266 brw_FRC( p
, param
, param
);
1267 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1268 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1269 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1271 /* We're now ready to perform the hashing. The two hashes are
1272 interleaved for performance. The hash function used is
1273 designed to rapidly achieve avalanche and require only 32x16
1274 bit multiplication, and 16-bit swizzles (which we get for
1275 free). We can't use immediate operands in the multiplies,
1276 because immediates are permitted only in src1 and the 16-bit
1277 factor is permitted only in src0. */
1278 for( i
= 0; i
< 2; i
++ )
1279 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1280 for( i
= 0; i
< 2; i
++ )
1281 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1282 high_words( itmp
[ i
] ) );
1283 for( i
= 0; i
< 2; i
++ )
1284 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1285 for( i
= 0; i
< 2; i
++ )
1286 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1287 high_words( itmp
[ i
] ) );
1288 for( i
= 0; i
< 2; i
++ )
1289 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1290 for( i
= 0; i
< 2; i
++ )
1291 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1292 high_words( itmp
[ i
] ) );
1294 /* Now we want to initialise the two gradients based on the
1295 hashes. Format conversion from signed integer to float leaves
1296 everything scaled too high by a factor of pow( 2, 31 ), but
1297 we correct for that right at the end. */
1298 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1299 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1300 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1302 brw_MUL( p
, x0
, x0
, param
);
1303 brw_MUL( p
, x1
, x1
, t
);
1305 /* We interpolate between the gradients using the polynomial
1306 6t^5 - 15t^4 + 10t^3 (Perlin). */
1307 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1308 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1309 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1310 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1311 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1312 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1314 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1315 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1316 brw_MUL( p
, x1
, x1
, param
);
1317 brw_ADD( p
, x0
, x0
, x1
);
1318 /* scale by pow( 2, -30 ), to compensate for the format conversion
1319 above and an extra factor of 2 so that a single gradient covers
1321 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1323 release_tmps( c
, mark
);
1326 static void emit_noise1( struct brw_wm_compile
*c
,
1327 struct prog_instruction
*inst
)
1329 struct brw_compile
*p
= &c
->func
;
1330 struct brw_reg src
, param
, dst
;
1331 GLuint mask
= inst
->DstReg
.WriteMask
;
1333 int mark
= mark_tmps( c
);
1335 assert( mark
== 0 );
1337 src
= get_src_reg( c
, inst
->SrcReg
, 0 );
1339 param
= alloc_tmp( c
);
1341 brw_MOV( p
, param
, src
);
1343 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1345 /* Fill in the result: */
1346 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1347 for (i
= 0 ; i
< 4; i
++) {
1348 if (mask
& (1<<i
)) {
1349 dst
= get_dst_reg(c
, inst
, i
);
1350 brw_MOV( p
, dst
, param
);
1353 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1354 brw_set_saturate( p
, 0 );
1356 release_tmps( c
, mark
);
1359 static void noise2_sub( struct brw_wm_compile
*c
) {
1361 struct brw_compile
*p
= &c
->func
;
1362 struct brw_reg param0
, param1
,
1363 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1364 t
, tmp
[ 4 ], /* float temporaries */
1365 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1367 int mark
= mark_tmps( c
);
1369 x0y0
= alloc_tmp( c
);
1370 x0y1
= alloc_tmp( c
);
1371 x1y0
= alloc_tmp( c
);
1372 x1y1
= alloc_tmp( c
);
1374 for( i
= 0; i
< 4; i
++ ) {
1375 tmp
[ i
] = alloc_tmp( c
);
1376 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1378 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1379 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1380 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1382 param0
= lookup_tmp( c
, mark
- 3 );
1383 param1
= lookup_tmp( c
, mark
- 2 );
1385 brw_set_access_mode( p
, BRW_ALIGN_1
);
1387 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1388 be hashed. Also compute the remainders (offsets within the unit
1389 square), interleaved to reduce register dependency penalties. */
1390 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1391 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1392 brw_FRC( p
, param0
, param0
);
1393 brw_FRC( p
, param1
, param1
);
1394 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1395 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1396 low_words( itmp
[ 1 ] ) );
1397 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1398 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1399 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1400 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1401 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1403 /* We're now ready to perform the hashing. The four hashes are
1404 interleaved for performance. The hash function used is
1405 designed to rapidly achieve avalanche and require only 32x16
1406 bit multiplication, and 16-bit swizzles (which we get for
1407 free). We can't use immediate operands in the multiplies,
1408 because immediates are permitted only in src1 and the 16-bit
1409 factor is permitted only in src0. */
1410 for( i
= 0; i
< 4; i
++ )
1411 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1412 for( i
= 0; i
< 4; i
++ )
1413 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1414 high_words( itmp
[ i
] ) );
1415 for( i
= 0; i
< 4; i
++ )
1416 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1417 for( i
= 0; i
< 4; i
++ )
1418 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1419 high_words( itmp
[ i
] ) );
1420 for( i
= 0; i
< 4; i
++ )
1421 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1422 for( i
= 0; i
< 4; i
++ )
1423 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1424 high_words( itmp
[ i
] ) );
1426 /* Now we want to initialise the four gradients based on the
1427 hashes. Format conversion from signed integer to float leaves
1428 everything scaled too high by a factor of pow( 2, 15 ), but
1429 we correct for that right at the end. */
1430 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1431 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1432 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1433 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1434 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1436 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1437 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1438 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1439 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1441 brw_MUL( p
, x1y0
, x1y0
, t
);
1442 brw_MUL( p
, x1y1
, x1y1
, t
);
1443 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1444 brw_MUL( p
, x0y0
, x0y0
, param0
);
1445 brw_MUL( p
, x0y1
, x0y1
, param0
);
1447 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1448 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1449 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1450 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1452 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1453 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1454 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1455 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1457 /* We interpolate between the gradients using the polynomial
1458 6t^5 - 15t^4 + 10t^3 (Perlin). */
1459 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1460 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1461 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1462 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1463 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1464 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1465 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1467 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1468 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1469 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1470 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1471 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1473 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1474 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1475 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1476 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1478 /* Here we interpolate in the y dimension... */
1479 brw_MUL( p
, x0y1
, x0y1
, param1
);
1480 brw_MUL( p
, x1y1
, x1y1
, param1
);
1481 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1482 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1484 /* And now in x. There are horrible register dependencies here,
1485 but we have nothing else to do. */
1486 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1487 brw_MUL( p
, x1y0
, x1y0
, param0
);
1488 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1490 /* scale by pow( 2, -15 ), as described above */
1491 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1493 release_tmps( c
, mark
);
1496 static void emit_noise2( struct brw_wm_compile
*c
,
1497 struct prog_instruction
*inst
)
1499 struct brw_compile
*p
= &c
->func
;
1500 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1501 GLuint mask
= inst
->DstReg
.WriteMask
;
1503 int mark
= mark_tmps( c
);
1505 assert( mark
== 0 );
1507 src0
= get_src_reg( c
, inst
->SrcReg
, 0 );
1508 src1
= get_src_reg( c
, inst
->SrcReg
, 1 );
1510 param0
= alloc_tmp( c
);
1511 param1
= alloc_tmp( c
);
1513 brw_MOV( p
, param0
, src0
);
1514 brw_MOV( p
, param1
, src1
);
1516 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1518 /* Fill in the result: */
1519 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1520 for (i
= 0 ; i
< 4; i
++) {
1521 if (mask
& (1<<i
)) {
1522 dst
= get_dst_reg(c
, inst
, i
);
1523 brw_MOV( p
, dst
, param0
);
1526 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1527 brw_set_saturate( p
, 0 );
1529 release_tmps( c
, mark
);
1533 * The three-dimensional case is much like the one- and two- versions above,
1534 * but since the number of corners is rapidly growing we now pack 16 16-bit
1535 * hashes into each register to extract more parallelism from the EUs.
1537 static void noise3_sub( struct brw_wm_compile
*c
) {
1539 struct brw_compile
*p
= &c
->func
;
1540 struct brw_reg param0
, param1
, param2
,
1541 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1542 xi
, yi
, zi
, /* interpolation coefficients */
1543 t
, tmp
[ 8 ], /* float temporaries */
1544 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1545 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1547 int mark
= mark_tmps( c
);
1549 x0y0
= alloc_tmp( c
);
1550 x0y1
= alloc_tmp( c
);
1551 x1y0
= alloc_tmp( c
);
1552 x1y1
= alloc_tmp( c
);
1553 xi
= alloc_tmp( c
);
1554 yi
= alloc_tmp( c
);
1555 zi
= alloc_tmp( c
);
1557 for( i
= 0; i
< 8; i
++ ) {
1558 tmp
[ i
] = alloc_tmp( c
);
1559 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1560 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1563 param0
= lookup_tmp( c
, mark
- 4 );
1564 param1
= lookup_tmp( c
, mark
- 3 );
1565 param2
= lookup_tmp( c
, mark
- 2 );
1567 brw_set_access_mode( p
, BRW_ALIGN_1
);
1569 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1570 be hashed. Also compute the remainders (offsets within the unit
1571 cube), interleaved to reduce register dependency penalties. */
1572 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1573 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1574 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1575 brw_FRC( p
, param0
, param0
);
1576 brw_FRC( p
, param1
, param1
);
1577 brw_FRC( p
, param2
, param2
);
1578 /* Since we now have only 16 bits of precision in the hash, we must
1579 be more careful about thorough mixing to maintain entropy as we
1580 squash the input vector into a small scalar. */
1581 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1582 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1583 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1584 brw_imm_uw( 0x9B93 ) );
1585 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1586 brw_imm_uw( 0xBC8F ) );
1588 /* Temporarily disable the execution mask while we work with ExecSize=16
1589 channels (the mask is set for ExecSize=8 and is probably incorrect).
1590 Although this might cause execution of unwanted channels, the code
1591 writes only to temporary registers and has no side effects, so
1592 disabling the mask is harmless. */
1593 brw_push_insn_state( p
);
1594 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1595 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1596 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1597 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1599 /* We're now ready to perform the hashing. The eight hashes are
1600 interleaved for performance. The hash function used is
1601 designed to rapidly achieve avalanche and require only 16x16
1602 bit multiplication, and 8-bit swizzles (which we get for
1604 for( i
= 0; i
< 4; i
++ )
1605 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1606 for( i
= 0; i
< 4; i
++ )
1607 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1608 odd_bytes( wtmp
[ i
] ) );
1609 for( i
= 0; i
< 4; i
++ )
1610 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1611 for( i
= 0; i
< 4; i
++ )
1612 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1613 odd_bytes( wtmp
[ i
] ) );
1614 brw_pop_insn_state( p
);
1616 /* Now we want to initialise the four rear gradients based on the
1617 hashes. Format conversion from signed integer to float leaves
1618 everything scaled too high by a factor of pow( 2, 15 ), but
1619 we correct for that right at the end. */
1621 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1622 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1623 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1624 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1625 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1627 brw_push_insn_state( p
);
1628 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1629 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1630 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1631 brw_pop_insn_state( p
);
1633 brw_MUL( p
, x1y0
, x1y0
, t
);
1634 brw_MUL( p
, x1y1
, x1y1
, t
);
1635 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1636 brw_MUL( p
, x0y0
, x0y0
, param0
);
1637 brw_MUL( p
, x0y1
, x0y1
, param0
);
1640 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1641 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1642 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1643 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1645 brw_push_insn_state( p
);
1646 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1647 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1648 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1649 brw_pop_insn_state( p
);
1651 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1652 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1653 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1654 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1655 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1657 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1658 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1659 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1660 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1663 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1664 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1665 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1666 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1668 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1669 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1670 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1671 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1673 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1674 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1675 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1676 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1678 /* We interpolate between the gradients using the polynomial
1679 6t^5 - 15t^4 + 10t^3 (Perlin). */
1680 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1681 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1682 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1683 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1684 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1685 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1686 brw_MUL( p
, xi
, xi
, param0
);
1687 brw_MUL( p
, yi
, yi
, param1
);
1688 brw_MUL( p
, zi
, zi
, param2
);
1689 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1690 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1691 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1692 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1693 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1694 brw_MUL( p
, xi
, xi
, param0
);
1695 brw_MUL( p
, yi
, yi
, param1
);
1696 brw_MUL( p
, zi
, zi
, param2
);
1697 brw_MUL( p
, xi
, xi
, param0
);
1698 brw_MUL( p
, yi
, yi
, param1
);
1699 brw_MUL( p
, zi
, zi
, param2
);
1700 brw_MUL( p
, xi
, xi
, param0
);
1701 brw_MUL( p
, yi
, yi
, param1
);
1702 brw_MUL( p
, zi
, zi
, param2
);
1704 /* Here we interpolate in the y dimension... */
1705 brw_MUL( p
, x0y1
, x0y1
, yi
);
1706 brw_MUL( p
, x1y1
, x1y1
, yi
);
1707 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1708 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1710 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1711 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1712 brw_MUL( p
, x1y0
, x1y0
, xi
);
1713 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
1715 /* Now do the same thing for the front four gradients... */
1717 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
1718 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
1719 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
1720 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
1722 brw_push_insn_state( p
);
1723 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1724 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1725 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1726 brw_pop_insn_state( p
);
1728 brw_MUL( p
, x1y0
, x1y0
, t
);
1729 brw_MUL( p
, x1y1
, x1y1
, t
);
1730 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1731 brw_MUL( p
, x0y0
, x0y0
, param0
);
1732 brw_MUL( p
, x0y1
, x0y1
, param0
);
1735 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1736 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1737 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1738 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1740 brw_push_insn_state( p
);
1741 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1742 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
1743 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
1744 brw_pop_insn_state( p
);
1746 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1747 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1748 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
1749 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1750 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1752 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1753 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1754 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1755 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1758 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
1759 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
1760 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
1761 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
1763 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
1764 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1765 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
1766 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1768 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1769 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1770 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1771 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1773 /* The interpolation coefficients are still around from last time, so
1774 again interpolate in the y dimension... */
1775 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
1776 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
1777 brw_MUL( p
, x0y1
, x0y1
, yi
);
1778 brw_MUL( p
, x1y1
, x1y1
, yi
);
1779 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1780 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1782 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
1783 time put the front face in tmp[ 1 ] and we're nearly there... */
1784 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1785 brw_MUL( p
, x1y0
, x1y0
, xi
);
1786 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
1788 /* The final interpolation, in the z dimension: */
1789 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
1790 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
1791 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
1793 /* scale by pow( 2, -15 ), as described above */
1794 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
1796 release_tmps( c
, mark
);
1799 static void emit_noise3( struct brw_wm_compile
*c
,
1800 struct prog_instruction
*inst
)
1802 struct brw_compile
*p
= &c
->func
;
1803 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
1804 GLuint mask
= inst
->DstReg
.WriteMask
;
1806 int mark
= mark_tmps( c
);
1808 assert( mark
== 0 );
1810 src0
= get_src_reg( c
, inst
->SrcReg
, 0 );
1811 src1
= get_src_reg( c
, inst
->SrcReg
, 1 );
1812 src2
= get_src_reg( c
, inst
->SrcReg
, 2 );
1814 param0
= alloc_tmp( c
);
1815 param1
= alloc_tmp( c
);
1816 param2
= alloc_tmp( c
);
1818 brw_MOV( p
, param0
, src0
);
1819 brw_MOV( p
, param1
, src1
);
1820 brw_MOV( p
, param2
, src2
);
1822 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
1824 /* Fill in the result: */
1825 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1826 for (i
= 0 ; i
< 4; i
++) {
1827 if (mask
& (1<<i
)) {
1828 dst
= get_dst_reg(c
, inst
, i
);
1829 brw_MOV( p
, dst
, param0
);
1832 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1833 brw_set_saturate( p
, 0 );
1835 release_tmps( c
, mark
);
1839 * For the four-dimensional case, the little micro-optimisation benefits
1840 * we obtain by unrolling all the loops aren't worth the massive bloat it
1841 * now causes. Instead, we loop twice around performing a similar operation
1842 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
1843 * code to glue it all together.
1845 static void noise4_sub( struct brw_wm_compile
*c
)
1847 struct brw_compile
*p
= &c
->func
;
1848 struct brw_reg param
[ 4 ],
1849 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1850 w0
, /* noise for the w=0 cube */
1851 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
1852 interp
[ 4 ], /* interpolation coefficients */
1853 t
, tmp
[ 8 ], /* float temporaries */
1854 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1855 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1857 int mark
= mark_tmps( c
);
1858 GLuint loop
, origin
;
1860 x0y0
= alloc_tmp( c
);
1861 x0y1
= alloc_tmp( c
);
1862 x1y0
= alloc_tmp( c
);
1863 x1y1
= alloc_tmp( c
);
1865 w0
= alloc_tmp( c
);
1866 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1867 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
1869 for( i
= 0; i
< 4; i
++ ) {
1870 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
1871 interp
[ i
] = alloc_tmp( c
);
1874 for( i
= 0; i
< 8; i
++ ) {
1875 tmp
[ i
] = alloc_tmp( c
);
1876 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1877 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1880 brw_set_access_mode( p
, BRW_ALIGN_1
);
1882 /* We only want 16 bits of precision from the integral part of each
1883 co-ordinate, but unfortunately the RNDD semantics would saturate
1884 at 16 bits if we performed the operation directly to a 16-bit
1885 destination. Therefore, we round to 32-bit temporaries where
1886 appropriate, and then store only the lower 16 bits. */
1887 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
1888 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
1889 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
1890 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
1891 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
1892 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
1894 /* Modify the flag register here, because the side effect is useful
1895 later (see below). We know for certain that all flags will be
1896 cleared, since the FRC instruction cannot possibly generate
1897 negative results. Even for exceptional inputs (infinities, denormals,
1898 NaNs), the architecture guarantees that the L conditional is false. */
1899 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
1900 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
1901 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
1902 for( i
= 1; i
< 4; i
++ )
1903 brw_FRC( p
, param
[ i
], param
[ i
] );
1905 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
1907 for( i
= 0; i
< 4; i
++ )
1908 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
1909 for( i
= 0; i
< 4; i
++ )
1910 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
1911 for( i
= 0; i
< 4; i
++ )
1912 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1913 for( i
= 0; i
< 4; i
++ )
1914 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
1915 for( j
= 0; j
< 3; j
++ )
1916 for( i
= 0; i
< 4; i
++ )
1917 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
1919 /* Mark the current address, as it will be a jump destination. The
1920 following code will be executed twice: first, with the flag
1921 register clear indicating the w=0 case, and second with flags
1925 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1926 be hashed. Since we have only 16 bits of precision in the hash, we
1927 must be careful about thorough mixing to maintain entropy as we
1928 squash the input vector into a small scalar. */
1929 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
1930 brw_imm_uw( 0xBC8F ) );
1931 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
1932 brw_imm_uw( 0xD0BD ) );
1933 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
1934 brw_imm_uw( 0x9B93 ) );
1935 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
1936 brw_imm_uw( 0xA359 ) );
1937 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1938 brw_imm_uw( 0xBC8F ) );
1940 /* Temporarily disable the execution mask while we work with ExecSize=16
1941 channels (the mask is set for ExecSize=8 and is probably incorrect).
1942 Although this might cause execution of unwanted channels, the code
1943 writes only to temporary registers and has no side effects, so
1944 disabling the mask is harmless. */
1945 brw_push_insn_state( p
);
1946 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1947 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1948 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1949 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1951 /* We're now ready to perform the hashing. The eight hashes are
1952 interleaved for performance. The hash function used is
1953 designed to rapidly achieve avalanche and require only 16x16
1954 bit multiplication, and 8-bit swizzles (which we get for
1956 for( i
= 0; i
< 4; i
++ )
1957 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1958 for( i
= 0; i
< 4; i
++ )
1959 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1960 odd_bytes( wtmp
[ i
] ) );
1961 for( i
= 0; i
< 4; i
++ )
1962 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1963 for( i
= 0; i
< 4; i
++ )
1964 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1965 odd_bytes( wtmp
[ i
] ) );
1966 brw_pop_insn_state( p
);
1968 /* Now we want to initialise the four rear gradients based on the
1969 hashes. Format conversion from signed integer to float leaves
1970 everything scaled too high by a factor of pow( 2, 15 ), but
1971 we correct for that right at the end. */
1973 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
1974 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1975 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1976 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1977 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1979 brw_push_insn_state( p
);
1980 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1981 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
1982 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
1983 brw_pop_insn_state( p
);
1985 brw_MUL( p
, x1y0
, x1y0
, t
);
1986 brw_MUL( p
, x1y1
, x1y1
, t
);
1987 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
1988 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
1989 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
1992 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1993 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1994 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1995 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1997 brw_push_insn_state( p
);
1998 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1999 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2000 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2001 brw_pop_insn_state( p
);
2003 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2004 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2005 /* prepare t for the w component (used below): w the first time through
2006 the loop; w - 1 the second time) */
2007 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2008 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2009 p
->current
->header
.predicate_inverse
= 1;
2010 brw_MOV( p
, t
, param
[ 3 ] );
2011 p
->current
->header
.predicate_inverse
= 0;
2012 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2013 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2014 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2016 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2017 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2018 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2019 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2022 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2023 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2024 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2025 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2027 brw_push_insn_state( p
);
2028 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2029 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2030 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2031 brw_pop_insn_state( p
);
2033 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2034 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2035 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2036 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2038 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2039 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2040 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2041 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2044 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2045 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2046 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2047 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2049 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2050 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2051 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2052 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2053 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2055 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2056 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2057 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2058 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2060 /* Here we interpolate in the y dimension... */
2061 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2062 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2063 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2064 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2065 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2066 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2068 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2069 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2070 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2071 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2073 /* Now do the same thing for the front four gradients... */
2075 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2076 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2077 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2078 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2080 brw_push_insn_state( p
);
2081 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2082 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2083 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2084 brw_pop_insn_state( p
);
2086 brw_MUL( p
, x1y0
, x1y0
, t
);
2087 brw_MUL( p
, x1y1
, x1y1
, t
);
2088 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2089 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2090 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2093 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2094 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2095 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2096 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2098 brw_push_insn_state( p
);
2099 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2100 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2101 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2102 brw_pop_insn_state( p
);
2104 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2105 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2106 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2107 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2108 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2110 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2111 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2112 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2113 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2116 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2117 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2118 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2119 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2121 brw_push_insn_state( p
);
2122 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2123 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2124 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2125 brw_pop_insn_state( p
);
2127 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2128 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2129 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2130 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2131 /* prepare t for the w component (used below): w the first time through
2132 the loop; w - 1 the second time) */
2133 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2134 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2135 p
->current
->header
.predicate_inverse
= 1;
2136 brw_MOV( p
, t
, param
[ 3 ] );
2137 p
->current
->header
.predicate_inverse
= 0;
2138 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2140 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2141 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2142 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2143 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2146 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2147 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2148 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2149 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2151 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2152 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2153 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2154 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2156 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2157 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2158 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2159 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2161 /* Interpolate in the y dimension: */
2162 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2163 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2164 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2165 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2166 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2167 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2169 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2170 time put the front face in tmp[ 1 ] and we're nearly there... */
2171 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2172 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2173 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2175 /* Another interpolation, in the z dimension: */
2176 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2177 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2178 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2180 /* Exit the loop if we've computed both cubes... */
2181 origin
= p
->nr_insn
;
2182 brw_push_insn_state( p
);
2183 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2184 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2185 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2186 brw_pop_insn_state( p
);
2188 /* Save the result for the w=0 case, and increment the w coordinate: */
2189 brw_MOV( p
, w0
, tmp
[ 0 ] );
2190 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2193 /* Loop around for the other cube. Explicitly set the flag register
2194 (unfortunately we must spend an extra instruction to do this: we
2195 can't rely on a side effect of the previous MOV or ADD because
2196 conditional modifiers which are normally true might be false in
2197 exceptional circumstances, e.g. given a NaN input; the add to
2198 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2199 brw_push_insn_state( p
);
2200 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2201 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2202 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2203 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2204 brw_pop_insn_state( p
);
2206 /* Patch the previous conditional branch now that we know the
2207 destination address. */
2208 brw_set_src1( p
->store
+ origin
,
2209 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2211 /* The very last interpolation. */
2212 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2213 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2214 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2216 /* scale by pow( 2, -15 ), as described above */
2217 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2219 release_tmps( c
, mark
);
2222 static void emit_noise4( struct brw_wm_compile
*c
,
2223 struct prog_instruction
*inst
)
2225 struct brw_compile
*p
= &c
->func
;
2226 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2227 GLuint mask
= inst
->DstReg
.WriteMask
;
2229 int mark
= mark_tmps( c
);
2231 assert( mark
== 0 );
2233 src0
= get_src_reg( c
, inst
->SrcReg
, 0 );
2234 src1
= get_src_reg( c
, inst
->SrcReg
, 1 );
2235 src2
= get_src_reg( c
, inst
->SrcReg
, 2 );
2236 src3
= get_src_reg( c
, inst
->SrcReg
, 3 );
2238 param0
= alloc_tmp( c
);
2239 param1
= alloc_tmp( c
);
2240 param2
= alloc_tmp( c
);
2241 param3
= alloc_tmp( c
);
2243 brw_MOV( p
, param0
, src0
);
2244 brw_MOV( p
, param1
, src1
);
2245 brw_MOV( p
, param2
, src2
);
2246 brw_MOV( p
, param3
, src3
);
2248 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2250 /* Fill in the result: */
2251 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2252 for (i
= 0 ; i
< 4; i
++) {
2253 if (mask
& (1<<i
)) {
2254 dst
= get_dst_reg(c
, inst
, i
);
2255 brw_MOV( p
, dst
, param0
);
2258 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2259 brw_set_saturate( p
, 0 );
2261 release_tmps( c
, mark
);
2264 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2265 struct prog_instruction
*inst
)
2267 struct brw_compile
*p
= &c
->func
;
2268 GLuint mask
= inst
->DstReg
.WriteMask
;
2269 struct brw_reg src0
[2], dst
[2];
2271 dst
[0] = get_dst_reg(c
, inst
, 0);
2272 dst
[1] = get_dst_reg(c
, inst
, 1);
2274 src0
[0] = get_src_reg(c
, &inst
->SrcReg
[0], 0);
2275 src0
[1] = get_src_reg(c
, &inst
->SrcReg
[0], 1);
2277 /* Calculate the pixel offset from window bottom left into destination
2280 if (mask
& WRITEMASK_X
) {
2281 /* X' = X - origin_x */
2284 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2285 brw_imm_d(0 - c
->key
.origin_x
));
2288 if (mask
& WRITEMASK_Y
) {
2289 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2292 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2293 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2298 BIAS on SIMD8 not working yet...
2300 static void emit_txb(struct brw_wm_compile
*c
,
2301 struct prog_instruction
*inst
)
2303 struct brw_compile
*p
= &c
->func
;
2304 struct brw_reg dst
[4], src
[4], payload_reg
;
2305 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2308 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2309 for (i
= 0; i
< 4; i
++)
2310 dst
[i
] = get_dst_reg(c
, inst
, i
);
2311 for (i
= 0; i
< 4; i
++)
2312 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
);
2314 switch (inst
->TexSrcTarget
) {
2315 case TEXTURE_1D_INDEX
:
2316 brw_MOV(p
, brw_message_reg(2), src
[0]);
2317 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0));
2318 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2320 case TEXTURE_2D_INDEX
:
2321 case TEXTURE_RECT_INDEX
:
2322 brw_MOV(p
, brw_message_reg(2), src
[0]);
2323 brw_MOV(p
, brw_message_reg(3), src
[1]);
2324 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2327 brw_MOV(p
, brw_message_reg(2), src
[0]);
2328 brw_MOV(p
, brw_message_reg(3), src
[1]);
2329 brw_MOV(p
, brw_message_reg(4), src
[2]);
2332 brw_MOV(p
, brw_message_reg(5), src
[3]);
2333 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0));
2335 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2337 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2338 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2340 inst
->DstReg
.WriteMask
,
2341 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
,
2347 static void emit_tex(struct brw_wm_compile
*c
,
2348 struct prog_instruction
*inst
)
2350 struct brw_compile
*p
= &c
->func
;
2351 struct brw_reg dst
[4], src
[4], payload_reg
;
2352 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2357 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2359 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2361 for (i
= 0; i
< 4; i
++)
2362 dst
[i
] = get_dst_reg(c
, inst
, i
);
2363 for (i
= 0; i
< 4; i
++)
2364 src
[i
] = get_src_reg(c
, &inst
->SrcReg
[0], i
);
2367 switch (inst
->TexSrcTarget
) {
2368 case TEXTURE_1D_INDEX
:
2372 case TEXTURE_2D_INDEX
:
2373 case TEXTURE_RECT_INDEX
:
2374 emit
= WRITEMASK_XY
;
2378 emit
= WRITEMASK_XYZ
;
2384 for (i
= 0; i
< nr
; i
++) {
2385 static const GLuint swz
[4] = {0,1,2,2};
2387 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2389 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2394 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0));
2395 brw_MOV(p
, brw_message_reg(6), src
[2]);
2399 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
),
2401 retype(payload_reg
, BRW_REGISTER_TYPE_UW
),
2402 unit
+ MAX_DRAW_BUFFERS
, /* surface */
2404 inst
->DstReg
.WriteMask
,
2405 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
,
2411 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2415 * Resolve subroutine calls after code emit is done.
2417 static void post_wm_emit( struct brw_wm_compile
*c
)
2419 brw_resolve_cals(&c
->func
);
2422 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2425 #define MAX_LOOP_DEPTH 32
2426 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2427 struct brw_instruction
*inst0
, *inst1
;
2428 int i
, if_insn
= 0, loop_insn
= 0;
2429 struct brw_compile
*p
= &c
->func
;
2430 struct brw_indirect stack_index
= brw_indirect(0, 0);
2434 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2435 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2437 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2438 struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2440 if (inst
->CondUpdate
)
2441 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2443 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2445 switch (inst
->Opcode
) {
2447 emit_pixel_xy(c
, inst
);
2450 emit_delta_xy(c
, inst
);
2453 emit_pixel_w(c
, inst
);
2456 emit_linterp(c
, inst
);
2459 emit_pinterp(c
, inst
);
2462 emit_cinterp(c
, inst
);
2465 emit_wpos_xy(c
, inst
);
2468 emit_fb_write(c
, inst
);
2470 case WM_FRONTFACING
:
2471 emit_frontfacing(c
, inst
);
2492 emit_trunc(c
, inst
);
2567 emit_noise1(c
, inst
);
2570 emit_noise2(c
, inst
);
2573 emit_noise3(c
, inst
);
2576 emit_noise4(c
, inst
);
2588 assert(if_insn
< MAX_IFSN
);
2589 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2592 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2595 assert(if_insn
> 0);
2596 brw_ENDIF(p
, if_inst
[--if_insn
]);
2599 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2605 brw_push_insn_state(p
);
2606 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2607 brw_set_access_mode(p
, BRW_ALIGN_1
);
2608 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2609 brw_set_access_mode(p
, BRW_ALIGN_16
);
2610 brw_ADD(p
, get_addr_reg(stack_index
),
2611 get_addr_reg(stack_index
), brw_imm_d(4));
2612 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2613 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2614 brw_pop_insn_state(p
);
2618 brw_push_insn_state(p
);
2619 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2620 brw_ADD(p
, get_addr_reg(stack_index
),
2621 get_addr_reg(stack_index
), brw_imm_d(-4));
2622 brw_set_access_mode(p
, BRW_ALIGN_1
);
2623 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2624 brw_set_access_mode(p
, BRW_ALIGN_16
);
2625 brw_pop_insn_state(p
);
2628 case OPCODE_BGNLOOP
:
2629 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2633 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2637 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2639 case OPCODE_ENDLOOP
:
2641 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2642 /* patch all the BREAK instructions from
2644 while (inst0
> loop_inst
[loop_insn
]) {
2646 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2647 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2648 inst0
->bits3
.if_else
.pop_count
= 0;
2649 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2650 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2651 inst0
->bits3
.if_else
.pop_count
= 0;
2656 _mesa_printf("unsupported IR in fragment shader %d\n",
2659 if (inst
->CondUpdate
)
2660 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2662 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2666 if (c
->reg_index
>= BRW_WM_MAX_GRF
) {
2667 _mesa_problem(NULL
, "Ran out of registers in brw_wm_emit_glsl()");
2668 /* XXX we need to do some proper error recovery here */
2674 * Do GPU code generation for shaders that use GLSL features such as
2675 * flow control. Other shaders will be compiled with the
2677 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2679 if (INTEL_DEBUG
& DEBUG_WM
) {
2680 _mesa_printf("brw_wm_glsl_emit:\n");
2683 /* initial instruction translation/simplification */
2686 /* actual code generation */
2687 brw_wm_emit_glsl(brw
, c
);
2689 if (INTEL_DEBUG
& DEBUG_WM
) {
2690 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2693 c
->prog_data
.total_grf
= c
->reg_index
;
2694 c
->prog_data
.total_scratch
= 0;