1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
15 * Determine if the given fragment program uses GLSL features such
16 * as flow conditionals, loops, subroutines.
17 * Some GLSL shaders may use these features, others might not.
19 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
23 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
24 switch (inst
->Opcode
) {
49 reclaim_temps(struct brw_wm_compile
*c
);
52 /** Mark GRF register as used. */
54 prealloc_grf(struct brw_wm_compile
*c
, int r
)
56 c
->used_grf
[r
] = GL_TRUE
;
60 /** Mark given GRF register as not in use. */
62 release_grf(struct brw_wm_compile
*c
, int r
)
64 /*assert(c->used_grf[r]);*/
65 c
->used_grf
[r
] = GL_FALSE
;
66 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
70 /** Return index of a free GRF, mark it as used. */
72 alloc_grf(struct brw_wm_compile
*c
)
75 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
76 if (!c
->used_grf
[r
]) {
77 c
->used_grf
[r
] = GL_TRUE
;
78 c
->first_free_grf
= r
+ 1; /* a guess */
83 /* no free temps, try to reclaim some */
85 c
->first_free_grf
= 0;
88 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
89 if (!c
->used_grf
[r
]) {
90 c
->used_grf
[r
] = GL_TRUE
;
91 c
->first_free_grf
= r
+ 1; /* a guess */
96 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
97 assert(c
->used_grf
[r
]);
100 /* really, no free GRF regs found */
101 if (!c
->out_of_regs
) {
102 /* print warning once per compilation */
103 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
104 c
->out_of_regs
= GL_TRUE
;
111 /** Return number of GRF registers used */
113 num_grf_used(const struct brw_wm_compile
*c
)
116 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
125 * Record the mapping of a Mesa register to a hardware register.
127 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
128 int component
, struct brw_reg reg
)
130 c
->wm_regs
[file
][index
][component
].reg
= reg
;
131 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
135 * Examine instruction's write mask to find index of first component
136 * enabled for writing.
138 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
141 for (i
= 0; i
< 4; i
++)
142 if (inst
->DstReg
.WriteMask
& (1<<i
))
147 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
151 /* if we need to allocate another temp, grow the tmp_regs[] array */
152 if (c
->tmp_index
== c
->tmp_max
) {
153 int r
= alloc_grf(c
);
155 /*printf("Out of temps in %s\n", __FUNCTION__);*/
156 r
= 50; /* XXX random register! */
158 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
161 /* form the GRF register */
162 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
163 /*printf("alloc_temp %d\n", reg.nr);*/
164 assert(reg
.nr
< BRW_WM_MAX_GRF
);
170 * Save current temp register info.
171 * There must be a matching call to release_tmps().
173 static int mark_tmps(struct brw_wm_compile
*c
)
178 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
180 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
183 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
189 * Convert Mesa src register to brw register.
191 * Since we're running in SOA mode each Mesa register corresponds to four
192 * hardware registers. We allocate the hardware registers as needed here.
194 * \param file register file, one of PROGRAM_x
195 * \param index register number
196 * \param component src component (X=0, Y=1, Z=2, W=3)
197 * \param nr not used?!?
198 * \param neg negate value?
199 * \param abs take absolute value?
201 static struct brw_reg
202 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
203 int nr
, GLuint neg
, GLuint abs
)
207 case PROGRAM_STATE_VAR
:
208 case PROGRAM_CONSTANT
:
209 case PROGRAM_UNIFORM
:
210 file
= PROGRAM_STATE_VAR
;
212 case PROGRAM_UNDEFINED
:
213 return brw_null_reg();
214 case PROGRAM_TEMPORARY
:
217 case PROGRAM_PAYLOAD
:
220 _mesa_problem(NULL
, "Unexpected file in get_reg()");
221 return brw_null_reg();
225 assert(component
< 4);
227 /* see if we've already allocated a HW register for this Mesa register */
228 if (c
->wm_regs
[file
][index
][component
].inited
) {
230 reg
= c
->wm_regs
[file
][index
][component
].reg
;
233 /* no, allocate new register */
234 int grf
= alloc_grf(c
);
235 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
237 /* totally out of temps */
238 grf
= 51; /* XXX random register! */
241 reg
= brw_vec8_grf(grf
, 0);
242 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
244 set_reg(c
, file
, index
, component
, reg
);
247 if (neg
& (1 << component
)) {
258 * This is called if we run out of GRF registers. Examine the live intervals
259 * of temp regs in the program and free those which won't be used again.
262 reclaim_temps(struct brw_wm_compile
*c
)
264 GLint intBegin
[MAX_PROGRAM_TEMPS
];
265 GLint intEnd
[MAX_PROGRAM_TEMPS
];
268 /*printf("Reclaim temps:\n");*/
270 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
273 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
274 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
275 /* program temp[i] can be freed */
277 /*printf(" temp[%d] is dead\n", index);*/
278 for (component
= 0; component
< 4; component
++) {
279 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
280 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
283 printf(" Reclaim temp %d, reg %d at inst %d\n",
284 index, r, c->cur_inst);
286 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
297 * Preallocate registers. This sets up the Mesa to hardware register
298 * mapping for certain registers, such as constants (uniforms/state vars)
301 static void prealloc_reg(struct brw_wm_compile
*c
)
305 int urb_read_length
= 0;
306 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
307 GLuint reg_index
= 0;
309 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
310 c
->first_free_grf
= 0;
312 for (i
= 0; i
< 4; i
++) {
313 if (i
< c
->key
.nr_depth_regs
)
314 reg
= brw_vec8_grf(i
* 2, 0);
316 reg
= brw_vec8_grf(0, 0);
317 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
319 reg_index
+= 2 * c
->key
.nr_depth_regs
;
323 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
324 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
326 /* use a real constant buffer, or just use a section of the GRF? */
327 /* XXX this heuristic may need adjustment... */
328 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
329 c
->fp
->use_const_buffer
= GL_TRUE
;
331 c
->fp
->use_const_buffer
= GL_FALSE
;
332 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
334 if (c
->fp
->use_const_buffer
) {
335 /* We'll use a real constant buffer and fetch constants from
336 * it with a dataport read message.
339 /* number of float constants in CURBE */
340 c
->prog_data
.nr_params
= 0;
343 const struct gl_program_parameter_list
*plist
=
344 c
->fp
->program
.Base
.Parameters
;
347 /* number of float constants in CURBE */
348 c
->prog_data
.nr_params
= 4 * nr_params
;
350 /* loop over program constants (float[4]) */
351 for (i
= 0; i
< nr_params
; i
++) {
352 /* loop over XYZW channels */
353 for (j
= 0; j
< 4; j
++, index
++) {
354 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
355 /* Save pointer to parameter/constant value.
356 * Constants will be copied in prepare_constant_buffer()
358 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
359 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
362 /* number of constant regs used (each reg is float[8]) */
363 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
364 reg_index
+= c
->nr_creg
;
368 /* fragment shader inputs */
369 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
372 if (i
>= VERT_RESULT_VAR0
)
373 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
374 else if (i
<= VERT_RESULT_TEX7
)
379 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
380 urb_read_length
= reg_index
;
381 reg
= brw_vec8_grf(reg_index
, 0);
382 for (j
= 0; j
< 4; j
++)
383 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
385 if (c
->key
.vp_outputs_written
& (1 << i
)) {
390 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
391 c
->prog_data
.urb_read_length
= urb_read_length
;
392 c
->prog_data
.curb_read_length
= c
->nr_creg
;
393 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
395 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
398 /* mark GRF regs [0..reg_index-1] as in-use */
399 for (i
= 0; i
< reg_index
; i
++)
402 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
403 prealloc_grf(c
, 126);
404 prealloc_grf(c
, 127);
406 /* An instruction may reference up to three constants.
407 * They'll be found in these registers.
408 * XXX alloc these on demand!
410 if (c
->fp
->use_const_buffer
) {
411 for (i
= 0; i
< 3; i
++) {
412 c
->current_const
[i
].index
= -1;
413 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
417 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
418 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
424 * Check if any of the instruction's src registers are constants, uniforms,
425 * or statevars. If so, fetch any constants that we don't already have in
426 * the three GRF slots.
428 static void fetch_constants(struct brw_wm_compile
*c
,
429 const struct prog_instruction
*inst
)
431 struct brw_compile
*p
= &c
->func
;
434 /* loop over instruction src regs */
435 for (i
= 0; i
< 3; i
++) {
436 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
437 if (src
->File
== PROGRAM_STATE_VAR
||
438 src
->File
== PROGRAM_CONSTANT
||
439 src
->File
== PROGRAM_UNIFORM
) {
440 c
->current_const
[i
].index
= src
->Index
;
443 printf(" fetch const[%d] for arg %d into reg %d\n",
444 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
447 /* need to fetch the constant now */
449 c
->current_const
[i
].reg
, /* writeback dest */
450 src
->RelAddr
, /* relative indexing? */
451 16 * src
->Index
, /* byte offset */
452 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
460 * Convert Mesa dst register to brw register.
462 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
463 const struct prog_instruction
*inst
,
467 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
472 static struct brw_reg
473 get_src_reg_const(struct brw_wm_compile
*c
,
474 const struct prog_instruction
*inst
,
475 GLuint srcRegIndex
, GLuint component
)
477 /* We should have already fetched the constant from the constant
478 * buffer in fetch_constants(). Now we just have to return a
479 * register description that extracts the needed component and
480 * smears it across all eight vector components.
482 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
483 struct brw_reg const_reg
;
485 assert(component
< 4);
486 assert(srcRegIndex
< 3);
487 assert(c
->current_const
[srcRegIndex
].index
!= -1);
488 const_reg
= c
->current_const
[srcRegIndex
].reg
;
490 /* extract desired float from the const_reg, and smear */
491 const_reg
= stride(const_reg
, 0, 1, 0);
492 const_reg
.subnr
= component
* 4;
494 if (src
->Negate
& (1 << component
))
495 const_reg
= negate(const_reg
);
497 const_reg
= brw_abs(const_reg
);
500 printf(" form const[%d].%d for arg %d, reg %d\n",
501 c
->current_const
[srcRegIndex
].index
,
512 * Convert Mesa src register to brw register.
514 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
515 const struct prog_instruction
*inst
,
516 GLuint srcRegIndex
, GLuint channel
)
518 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
520 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
522 /* Extended swizzle terms */
523 if (component
== SWIZZLE_ZERO
) {
524 return brw_imm_f(0.0F
);
526 else if (component
== SWIZZLE_ONE
) {
527 return brw_imm_f(1.0F
);
530 if (c
->fp
->use_const_buffer
&&
531 (src
->File
== PROGRAM_STATE_VAR
||
532 src
->File
== PROGRAM_CONSTANT
||
533 src
->File
== PROGRAM_UNIFORM
)) {
534 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
537 /* other type of source register */
538 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
539 src
->Negate
, src
->Abs
);
545 * Same as \sa get_src_reg() but if the register is a literal, emit
546 * a brw_reg encoding the literal.
547 * Note that a brw instruction only allows one src operand to be a literal.
548 * For instructions with more than one operand, only the second can be a
549 * literal. This means that we treat some literals as constants/uniforms
550 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
553 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
554 const struct prog_instruction
*inst
,
555 GLuint srcRegIndex
, GLuint channel
)
557 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
558 if (src
->File
== PROGRAM_CONSTANT
) {
560 const int component
= GET_SWZ(src
->Swizzle
, channel
);
561 const GLfloat
*param
=
562 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
563 GLfloat value
= param
[component
];
564 if (src
->Negate
& (1 << channel
))
567 value
= FABSF(value
);
569 printf(" form immed value %f for chan %d\n", value
, channel
);
571 return brw_imm_f(value
);
574 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
580 * Subroutines are minimal support for resusable instruction sequences.
581 * They are implemented as simply as possible to minimise overhead: there
582 * is no explicit support for communication between the caller and callee
583 * other than saving the return address in a temporary register, nor is
584 * there any automatic local storage. This implies that great care is
585 * required before attempting reentrancy or any kind of nested
586 * subroutine invocations.
588 static void invoke_subroutine( struct brw_wm_compile
*c
,
589 enum _subroutine subroutine
,
590 void (*emit
)( struct brw_wm_compile
* ) )
592 struct brw_compile
*p
= &c
->func
;
594 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
596 if( c
->subroutines
[ subroutine
] ) {
597 /* subroutine previously emitted: reuse existing instructions */
599 int mark
= mark_tmps( c
);
600 struct brw_reg return_address
= retype( alloc_tmp( c
),
601 BRW_REGISTER_TYPE_UD
);
602 int here
= p
->nr_insn
;
604 brw_push_insn_state(p
);
605 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
606 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
608 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
609 brw_imm_d( ( c
->subroutines
[ subroutine
] -
611 brw_pop_insn_state(p
);
613 release_tmps( c
, mark
);
615 /* previously unused subroutine: emit, and mark for later reuse */
617 int mark
= mark_tmps( c
);
618 struct brw_reg return_address
= retype( alloc_tmp( c
),
619 BRW_REGISTER_TYPE_UD
);
620 struct brw_instruction
*calc
;
621 int base
= p
->nr_insn
;
623 brw_push_insn_state(p
);
624 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
625 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
626 brw_pop_insn_state(p
);
628 c
->subroutines
[ subroutine
] = p
->nr_insn
;
632 brw_push_insn_state(p
);
633 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
634 brw_MOV( p
, brw_ip_reg(), return_address
);
635 brw_pop_insn_state(p
);
637 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
639 release_tmps( c
, mark
);
643 static void emit_trunc( struct brw_wm_compile
*c
,
644 const struct prog_instruction
*inst
)
647 struct brw_compile
*p
= &c
->func
;
648 GLuint mask
= inst
->DstReg
.WriteMask
;
649 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
650 for (i
= 0; i
< 4; i
++) {
652 struct brw_reg src
, dst
;
653 dst
= get_dst_reg(c
, inst
, i
);
654 src
= get_src_reg(c
, inst
, 0, i
);
655 brw_RNDZ(p
, dst
, src
);
658 brw_set_saturate(p
, 0);
661 static void emit_mov( struct brw_wm_compile
*c
,
662 const struct prog_instruction
*inst
)
665 struct brw_compile
*p
= &c
->func
;
666 GLuint mask
= inst
->DstReg
.WriteMask
;
667 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
668 for (i
= 0; i
< 4; i
++) {
670 struct brw_reg src
, dst
;
671 dst
= get_dst_reg(c
, inst
, i
);
672 /* XXX some moves from immediate value don't work reliably!!! */
673 /*src = get_src_reg_imm(c, inst, 0, i);*/
674 src
= get_src_reg(c
, inst
, 0, i
);
675 brw_MOV(p
, dst
, src
);
678 brw_set_saturate(p
, 0);
681 static void emit_pixel_xy(struct brw_wm_compile
*c
,
682 const struct prog_instruction
*inst
)
684 struct brw_reg r1
= brw_vec1_grf(1, 0);
685 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
687 struct brw_reg dst0
, dst1
;
688 struct brw_compile
*p
= &c
->func
;
689 GLuint mask
= inst
->DstReg
.WriteMask
;
691 dst0
= get_dst_reg(c
, inst
, 0);
692 dst1
= get_dst_reg(c
, inst
, 1);
693 /* Calculate pixel centers by adding 1 or 0 to each of the
694 * micro-tile coordinates passed in r1.
696 if (mask
& WRITEMASK_X
) {
698 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
699 stride(suboffset(r1_uw
, 4), 2, 4, 0),
700 brw_imm_v(0x10101010));
703 if (mask
& WRITEMASK_Y
) {
705 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
706 stride(suboffset(r1_uw
, 5), 2, 4, 0),
707 brw_imm_v(0x11001100));
711 static void emit_delta_xy(struct brw_wm_compile
*c
,
712 const struct prog_instruction
*inst
)
714 struct brw_reg r1
= brw_vec1_grf(1, 0);
715 struct brw_reg dst0
, dst1
, src0
, src1
;
716 struct brw_compile
*p
= &c
->func
;
717 GLuint mask
= inst
->DstReg
.WriteMask
;
719 dst0
= get_dst_reg(c
, inst
, 0);
720 dst1
= get_dst_reg(c
, inst
, 1);
721 src0
= get_src_reg(c
, inst
, 0, 0);
722 src1
= get_src_reg(c
, inst
, 0, 1);
723 /* Calc delta X,Y by subtracting origin in r1 from the pixel
726 if (mask
& WRITEMASK_X
) {
729 retype(src0
, BRW_REGISTER_TYPE_UW
),
733 if (mask
& WRITEMASK_Y
) {
736 retype(src1
, BRW_REGISTER_TYPE_UW
),
737 negate(suboffset(r1
,1)));
742 static void fire_fb_write( struct brw_wm_compile
*c
,
748 struct brw_compile
*p
= &c
->func
;
749 /* Pass through control information:
751 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
753 brw_push_insn_state(p
);
754 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
756 brw_message_reg(base_reg
+ 1),
758 brw_pop_insn_state(p
);
760 /* Send framebuffer write message: */
762 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
764 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
771 static void emit_fb_write(struct brw_wm_compile
*c
,
772 const struct prog_instruction
*inst
)
774 struct brw_compile
*p
= &c
->func
;
780 /* Reserve a space for AA - may not be needed:
782 if (c
->key
.aa_dest_stencil_reg
)
785 brw_push_insn_state(p
);
786 for (channel
= 0; channel
< 4; channel
++) {
787 src0
= get_src_reg(c
, inst
, 0, channel
);
788 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
789 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
790 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
792 /* skip over the regs populated above: */
794 brw_pop_insn_state(p
);
796 if (c
->key
.source_depth_to_render_target
) {
797 if (c
->key
.computes_depth
) {
798 src0
= get_src_reg(c
, inst
, 2, 2);
799 brw_MOV(p
, brw_message_reg(nr
), src0
);
802 src0
= get_src_reg(c
, inst
, 1, 1);
803 brw_MOV(p
, brw_message_reg(nr
), src0
);
809 if (c
->key
.dest_depth_reg
) {
810 const GLuint comp
= c
->key
.dest_depth_reg
/ 2;
811 const GLuint off
= c
->key
.dest_depth_reg
% 2;
814 /* XXX this code needs review/testing */
815 struct brw_reg arg1_0
= get_src_reg(c
, inst
, 1, comp
);
816 struct brw_reg arg1_1
= get_src_reg(c
, inst
, 1, comp
+1);
818 brw_push_insn_state(p
);
819 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
821 brw_MOV(p
, brw_message_reg(nr
), offset(arg1_0
, 1));
823 brw_MOV(p
, brw_message_reg(nr
+1), arg1_1
);
824 brw_pop_insn_state(p
);
828 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
829 brw_MOV(p
, brw_message_reg(nr
), src
);
834 target
= inst
->Aux
>> 1;
836 fire_fb_write(c
, 0, nr
, target
, eot
);
839 static void emit_pixel_w( struct brw_wm_compile
*c
,
840 const struct prog_instruction
*inst
)
842 struct brw_compile
*p
= &c
->func
;
843 GLuint mask
= inst
->DstReg
.WriteMask
;
844 if (mask
& WRITEMASK_W
) {
845 struct brw_reg dst
, src0
, delta0
, delta1
;
846 struct brw_reg interp3
;
848 dst
= get_dst_reg(c
, inst
, 3);
849 src0
= get_src_reg(c
, inst
, 0, 0);
850 delta0
= get_src_reg(c
, inst
, 1, 0);
851 delta1
= get_src_reg(c
, inst
, 1, 1);
853 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
854 /* Calc 1/w - just linterp wpos[3] optimized by putting the
855 * result straight into a message reg.
857 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
858 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
862 BRW_MATH_FUNCTION_INV
,
863 BRW_MATH_SATURATE_NONE
,
865 BRW_MATH_PRECISION_FULL
);
869 static void emit_linterp(struct brw_wm_compile
*c
,
870 const struct prog_instruction
*inst
)
872 struct brw_compile
*p
= &c
->func
;
873 GLuint mask
= inst
->DstReg
.WriteMask
;
874 struct brw_reg interp
[4];
875 struct brw_reg dst
, delta0
, delta1
;
879 src0
= get_src_reg(c
, inst
, 0, 0);
880 delta0
= get_src_reg(c
, inst
, 1, 0);
881 delta1
= get_src_reg(c
, inst
, 1, 1);
884 interp
[0] = brw_vec1_grf(nr
, 0);
885 interp
[1] = brw_vec1_grf(nr
, 4);
886 interp
[2] = brw_vec1_grf(nr
+1, 0);
887 interp
[3] = brw_vec1_grf(nr
+1, 4);
889 for(i
= 0; i
< 4; i
++ ) {
891 dst
= get_dst_reg(c
, inst
, i
);
892 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
893 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
898 static void emit_cinterp(struct brw_wm_compile
*c
,
899 const struct prog_instruction
*inst
)
901 struct brw_compile
*p
= &c
->func
;
902 GLuint mask
= inst
->DstReg
.WriteMask
;
904 struct brw_reg interp
[4];
905 struct brw_reg dst
, src0
;
908 src0
= get_src_reg(c
, inst
, 0, 0);
911 interp
[0] = brw_vec1_grf(nr
, 0);
912 interp
[1] = brw_vec1_grf(nr
, 4);
913 interp
[2] = brw_vec1_grf(nr
+1, 0);
914 interp
[3] = brw_vec1_grf(nr
+1, 4);
916 for(i
= 0; i
< 4; i
++ ) {
918 dst
= get_dst_reg(c
, inst
, i
);
919 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
924 static void emit_pinterp(struct brw_wm_compile
*c
,
925 const struct prog_instruction
*inst
)
927 struct brw_compile
*p
= &c
->func
;
928 GLuint mask
= inst
->DstReg
.WriteMask
;
930 struct brw_reg interp
[4];
931 struct brw_reg dst
, delta0
, delta1
;
932 struct brw_reg src0
, w
;
935 src0
= get_src_reg(c
, inst
, 0, 0);
936 delta0
= get_src_reg(c
, inst
, 1, 0);
937 delta1
= get_src_reg(c
, inst
, 1, 1);
938 w
= get_src_reg(c
, inst
, 2, 3);
941 interp
[0] = brw_vec1_grf(nr
, 0);
942 interp
[1] = brw_vec1_grf(nr
, 4);
943 interp
[2] = brw_vec1_grf(nr
+1, 0);
944 interp
[3] = brw_vec1_grf(nr
+1, 4);
946 for(i
= 0; i
< 4; i
++ ) {
948 dst
= get_dst_reg(c
, inst
, i
);
949 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
950 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
952 brw_MUL(p
, dst
, dst
, w
);
957 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
958 static void emit_frontfacing(struct brw_wm_compile
*c
,
959 const struct prog_instruction
*inst
)
961 struct brw_compile
*p
= &c
->func
;
962 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
964 GLuint mask
= inst
->DstReg
.WriteMask
;
967 for (i
= 0; i
< 4; i
++) {
969 dst
= get_dst_reg(c
, inst
, i
);
970 brw_MOV(p
, dst
, brw_imm_f(0.0));
974 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
977 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
978 for (i
= 0; i
< 4; i
++) {
980 dst
= get_dst_reg(c
, inst
, i
);
981 brw_MOV(p
, dst
, brw_imm_f(1.0));
984 brw_set_predicate_control_flag_value(p
, 0xff);
987 static void emit_xpd(struct brw_wm_compile
*c
,
988 const struct prog_instruction
*inst
)
991 struct brw_compile
*p
= &c
->func
;
992 GLuint mask
= inst
->DstReg
.WriteMask
;
993 for (i
= 0; i
< 4; i
++) {
997 struct brw_reg src0
, src1
, dst
;
998 dst
= get_dst_reg(c
, inst
, i
);
999 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
1000 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
1001 brw_MUL(p
, brw_null_reg(), src0
, src1
);
1002 src0
= get_src_reg(c
, inst
, 0, i1
);
1003 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
1004 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1005 brw_MAC(p
, dst
, src0
, src1
);
1006 brw_set_saturate(p
, 0);
1009 brw_set_saturate(p
, 0);
1012 static void emit_dp3(struct brw_wm_compile
*c
,
1013 const struct prog_instruction
*inst
)
1015 struct brw_reg src0
[3], src1
[3], dst
;
1017 struct brw_compile
*p
= &c
->func
;
1018 GLuint mask
= inst
->DstReg
.WriteMask
;
1019 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
1021 if (!(mask
& WRITEMASK_XYZW
))
1024 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1026 for (i
= 0; i
< 3; i
++) {
1027 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1028 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1031 dst
= get_dst_reg(c
, inst
, dst_chan
);
1032 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1033 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1034 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1035 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1036 brw_set_saturate(p
, 0);
1039 static void emit_dp4(struct brw_wm_compile
*c
,
1040 const struct prog_instruction
*inst
)
1042 struct brw_reg src0
[4], src1
[4], dst
;
1044 struct brw_compile
*p
= &c
->func
;
1045 GLuint mask
= inst
->DstReg
.WriteMask
;
1046 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
1048 if (!(mask
& WRITEMASK_XYZW
))
1051 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1053 for (i
= 0; i
< 4; i
++) {
1054 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1055 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1057 dst
= get_dst_reg(c
, inst
, dst_chan
);
1058 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1059 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1060 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1061 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1062 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1063 brw_set_saturate(p
, 0);
1066 static void emit_dph(struct brw_wm_compile
*c
,
1067 const struct prog_instruction
*inst
)
1069 struct brw_reg src0
[4], src1
[4], dst
;
1071 struct brw_compile
*p
= &c
->func
;
1072 GLuint mask
= inst
->DstReg
.WriteMask
;
1073 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
1075 if (!(mask
& WRITEMASK_XYZW
))
1078 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1080 for (i
= 0; i
< 4; i
++) {
1081 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1082 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1084 dst
= get_dst_reg(c
, inst
, dst_chan
);
1085 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1086 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1087 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1088 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1089 brw_ADD(p
, dst
, dst
, src1
[3]);
1090 brw_set_saturate(p
, 0);
1094 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1095 * Note that the result of the function is smeared across the dest
1096 * register's X, Y, Z and W channels (subject to writemasking of course).
1098 static void emit_math1(struct brw_wm_compile
*c
,
1099 const struct prog_instruction
*inst
, GLuint func
)
1101 struct brw_compile
*p
= &c
->func
;
1102 struct brw_reg src0
, dst
;
1103 GLuint mask
= inst
->DstReg
.WriteMask
;
1104 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
1106 if (!(mask
& WRITEMASK_XYZW
))
1109 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1111 /* Get first component of source register */
1112 dst
= get_dst_reg(c
, inst
, dst_chan
);
1113 src0
= get_src_reg(c
, inst
, 0, 0);
1115 brw_MOV(p
, brw_message_reg(2), src0
);
1119 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1122 BRW_MATH_DATA_VECTOR
,
1123 BRW_MATH_PRECISION_FULL
);
1126 static void emit_rcp(struct brw_wm_compile
*c
,
1127 const struct prog_instruction
*inst
)
1129 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1132 static void emit_rsq(struct brw_wm_compile
*c
,
1133 const struct prog_instruction
*inst
)
1135 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1138 static void emit_sin(struct brw_wm_compile
*c
,
1139 const struct prog_instruction
*inst
)
1141 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1144 static void emit_cos(struct brw_wm_compile
*c
,
1145 const struct prog_instruction
*inst
)
1147 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1150 static void emit_ex2(struct brw_wm_compile
*c
,
1151 const struct prog_instruction
*inst
)
1153 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1156 static void emit_lg2(struct brw_wm_compile
*c
,
1157 const struct prog_instruction
*inst
)
1159 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1162 static void emit_add(struct brw_wm_compile
*c
,
1163 const struct prog_instruction
*inst
)
1165 struct brw_compile
*p
= &c
->func
;
1166 struct brw_reg src0
, src1
, dst
;
1167 GLuint mask
= inst
->DstReg
.WriteMask
;
1169 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1170 for (i
= 0 ; i
< 4; i
++) {
1171 if (mask
& (1<<i
)) {
1172 dst
= get_dst_reg(c
, inst
, i
);
1173 src0
= get_src_reg(c
, inst
, 0, i
);
1174 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1175 brw_ADD(p
, dst
, src0
, src1
);
1178 brw_set_saturate(p
, 0);
1181 static void emit_arl(struct brw_wm_compile
*c
,
1182 const struct prog_instruction
*inst
)
1184 struct brw_compile
*p
= &c
->func
;
1185 struct brw_reg src0
, addr_reg
;
1186 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1187 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1188 BRW_ARF_ADDRESS
, 0);
1189 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1190 brw_MOV(p
, addr_reg
, src0
);
1191 brw_set_saturate(p
, 0);
1195 static void emit_mul(struct brw_wm_compile
*c
,
1196 const struct prog_instruction
*inst
)
1198 struct brw_compile
*p
= &c
->func
;
1199 struct brw_reg src0
, src1
, dst
;
1200 GLuint mask
= inst
->DstReg
.WriteMask
;
1202 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1203 for (i
= 0 ; i
< 4; i
++) {
1204 if (mask
& (1<<i
)) {
1205 dst
= get_dst_reg(c
, inst
, i
);
1206 src0
= get_src_reg(c
, inst
, 0, i
);
1207 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1208 brw_MUL(p
, dst
, src0
, src1
);
1211 brw_set_saturate(p
, 0);
1214 static void emit_frc(struct brw_wm_compile
*c
,
1215 const struct prog_instruction
*inst
)
1217 struct brw_compile
*p
= &c
->func
;
1218 struct brw_reg src0
, dst
;
1219 GLuint mask
= inst
->DstReg
.WriteMask
;
1221 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1222 for (i
= 0 ; i
< 4; i
++) {
1223 if (mask
& (1<<i
)) {
1224 dst
= get_dst_reg(c
, inst
, i
);
1225 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1226 brw_FRC(p
, dst
, src0
);
1229 if (inst
->SaturateMode
!= SATURATE_OFF
)
1230 brw_set_saturate(p
, 0);
1233 static void emit_flr(struct brw_wm_compile
*c
,
1234 const struct prog_instruction
*inst
)
1236 struct brw_compile
*p
= &c
->func
;
1237 struct brw_reg src0
, dst
;
1238 GLuint mask
= inst
->DstReg
.WriteMask
;
1240 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1241 for (i
= 0 ; i
< 4; i
++) {
1242 if (mask
& (1<<i
)) {
1243 dst
= get_dst_reg(c
, inst
, i
);
1244 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1245 brw_RNDD(p
, dst
, src0
);
1248 brw_set_saturate(p
, 0);
1252 static void emit_min_max(struct brw_wm_compile
*c
,
1253 const struct prog_instruction
*inst
)
1255 struct brw_compile
*p
= &c
->func
;
1256 const GLuint mask
= inst
->DstReg
.WriteMask
;
1257 const int mark
= mark_tmps(c
);
1259 brw_push_insn_state(p
);
1260 for (i
= 0; i
< 4; i
++) {
1261 if (mask
& (1<<i
)) {
1262 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1263 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1264 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1266 /* if dst==src0 or dst==src1 we need to use a temp reg */
1267 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1268 brw_same_reg(dst
, src1
);
1275 printf(" Min/max: dst %d src0 %d src1 %d\n",
1276 dst.nr, src0.nr, src1.nr);
1278 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1279 brw_MOV(p
, dst
, src0
);
1280 brw_set_saturate(p
, 0);
1282 if (inst
->Opcode
== OPCODE_MIN
)
1283 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1285 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1287 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1288 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1289 brw_MOV(p
, dst
, src1
);
1290 brw_set_saturate(p
, 0);
1291 brw_set_predicate_control_flag_value(p
, 0xff);
1293 brw_MOV(p
, real_dst
, dst
);
1296 brw_pop_insn_state(p
);
1297 release_tmps(c
, mark
);
1300 static void emit_pow(struct brw_wm_compile
*c
,
1301 const struct prog_instruction
*inst
)
1303 struct brw_compile
*p
= &c
->func
;
1304 struct brw_reg dst
, src0
, src1
;
1305 GLuint mask
= inst
->DstReg
.WriteMask
;
1306 int dst_chan
= _mesa_ffs(mask
& WRITEMASK_XYZW
) - 1;
1308 if (!(mask
& WRITEMASK_XYZW
))
1311 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1313 dst
= get_dst_reg(c
, inst
, dst_chan
);
1314 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1315 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1317 brw_MOV(p
, brw_message_reg(2), src0
);
1318 brw_MOV(p
, brw_message_reg(3), src1
);
1322 BRW_MATH_FUNCTION_POW
,
1323 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1326 BRW_MATH_DATA_VECTOR
,
1327 BRW_MATH_PRECISION_FULL
);
1330 static void emit_lrp(struct brw_wm_compile
*c
,
1331 const struct prog_instruction
*inst
)
1333 struct brw_compile
*p
= &c
->func
;
1334 GLuint mask
= inst
->DstReg
.WriteMask
;
1335 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1337 int mark
= mark_tmps(c
);
1338 for (i
= 0; i
< 4; i
++) {
1339 if (mask
& (1<<i
)) {
1340 dst
= get_dst_reg(c
, inst
, i
);
1341 src0
= get_src_reg(c
, inst
, 0, i
);
1343 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1345 if (src1
.nr
== dst
.nr
) {
1346 tmp1
= alloc_tmp(c
);
1347 brw_MOV(p
, tmp1
, src1
);
1351 src2
= get_src_reg(c
, inst
, 2, i
);
1352 if (src2
.nr
== dst
.nr
) {
1353 tmp2
= alloc_tmp(c
);
1354 brw_MOV(p
, tmp2
, src2
);
1358 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1359 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1360 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1361 brw_MAC(p
, dst
, src0
, tmp1
);
1362 brw_set_saturate(p
, 0);
1364 release_tmps(c
, mark
);
1369 * For GLSL shaders, this KIL will be unconditional.
1370 * It may be contained inside an IF/ENDIF structure of course.
1372 static void emit_kil(struct brw_wm_compile
*c
)
1374 struct brw_compile
*p
= &c
->func
;
1375 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1376 brw_push_insn_state(p
);
1377 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1378 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1379 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1380 brw_pop_insn_state(p
);
1383 static void emit_mad(struct brw_wm_compile
*c
,
1384 const struct prog_instruction
*inst
)
1386 struct brw_compile
*p
= &c
->func
;
1387 GLuint mask
= inst
->DstReg
.WriteMask
;
1388 struct brw_reg dst
, src0
, src1
, src2
;
1391 for (i
= 0; i
< 4; i
++) {
1392 if (mask
& (1<<i
)) {
1393 dst
= get_dst_reg(c
, inst
, i
);
1394 src0
= get_src_reg(c
, inst
, 0, i
);
1395 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1396 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1397 brw_MUL(p
, dst
, src0
, src1
);
1399 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1400 brw_ADD(p
, dst
, dst
, src2
);
1401 brw_set_saturate(p
, 0);
1406 static void emit_sop(struct brw_wm_compile
*c
,
1407 const struct prog_instruction
*inst
, GLuint cond
)
1409 struct brw_compile
*p
= &c
->func
;
1410 GLuint mask
= inst
->DstReg
.WriteMask
;
1411 struct brw_reg dst
, src0
, src1
;
1414 for (i
= 0; i
< 4; i
++) {
1415 if (mask
& (1<<i
)) {
1416 dst
= get_dst_reg(c
, inst
, i
);
1417 src0
= get_src_reg(c
, inst
, 0, i
);
1418 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1419 brw_push_insn_state(p
);
1420 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1421 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1422 brw_MOV(p
, dst
, brw_imm_f(0.0));
1423 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1424 brw_MOV(p
, dst
, brw_imm_f(1.0));
1425 brw_pop_insn_state(p
);
1430 static void emit_slt(struct brw_wm_compile
*c
,
1431 const struct prog_instruction
*inst
)
1433 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1436 static void emit_sle(struct brw_wm_compile
*c
,
1437 const struct prog_instruction
*inst
)
1439 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1442 static void emit_sgt(struct brw_wm_compile
*c
,
1443 const struct prog_instruction
*inst
)
1445 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1448 static void emit_sge(struct brw_wm_compile
*c
,
1449 const struct prog_instruction
*inst
)
1451 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1454 static void emit_seq(struct brw_wm_compile
*c
,
1455 const struct prog_instruction
*inst
)
1457 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1460 static void emit_sne(struct brw_wm_compile
*c
,
1461 const struct prog_instruction
*inst
)
1463 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1466 static void emit_ddx(struct brw_wm_compile
*c
,
1467 const struct prog_instruction
*inst
)
1469 struct brw_compile
*p
= &c
->func
;
1470 GLuint mask
= inst
->DstReg
.WriteMask
;
1471 struct brw_reg interp
[4];
1473 struct brw_reg src0
, w
;
1475 src0
= get_src_reg(c
, inst
, 0, 0);
1476 w
= get_src_reg(c
, inst
, 1, 3);
1478 interp
[0] = brw_vec1_grf(nr
, 0);
1479 interp
[1] = brw_vec1_grf(nr
, 4);
1480 interp
[2] = brw_vec1_grf(nr
+1, 0);
1481 interp
[3] = brw_vec1_grf(nr
+1, 4);
1482 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1483 for(i
= 0; i
< 4; i
++ ) {
1484 if (mask
& (1<<i
)) {
1485 dst
= get_dst_reg(c
, inst
, i
);
1486 brw_MOV(p
, dst
, interp
[i
]);
1487 brw_MUL(p
, dst
, dst
, w
);
1490 brw_set_saturate(p
, 0);
1493 static void emit_ddy(struct brw_wm_compile
*c
,
1494 const struct prog_instruction
*inst
)
1496 struct brw_compile
*p
= &c
->func
;
1497 GLuint mask
= inst
->DstReg
.WriteMask
;
1498 struct brw_reg interp
[4];
1500 struct brw_reg src0
, w
;
1503 src0
= get_src_reg(c
, inst
, 0, 0);
1505 w
= get_src_reg(c
, inst
, 1, 3);
1506 interp
[0] = brw_vec1_grf(nr
, 0);
1507 interp
[1] = brw_vec1_grf(nr
, 4);
1508 interp
[2] = brw_vec1_grf(nr
+1, 0);
1509 interp
[3] = brw_vec1_grf(nr
+1, 4);
1510 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1511 for(i
= 0; i
< 4; i
++ ) {
1512 if (mask
& (1<<i
)) {
1513 dst
= get_dst_reg(c
, inst
, i
);
1514 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1515 brw_MUL(p
, dst
, dst
, w
);
1518 brw_set_saturate(p
, 0);
1521 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1523 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1527 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1529 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1532 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1534 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1537 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1539 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1543 /* One-, two- and three-dimensional Perlin noise, similar to the description
1544 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1545 static void noise1_sub( struct brw_wm_compile
*c
) {
1547 struct brw_compile
*p
= &c
->func
;
1548 struct brw_reg param
,
1549 x0
, x1
, /* gradients at each end */
1550 t
, tmp
[ 2 ], /* float temporaries */
1551 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1553 int mark
= mark_tmps( c
);
1555 x0
= alloc_tmp( c
);
1556 x1
= alloc_tmp( c
);
1558 tmp
[ 0 ] = alloc_tmp( c
);
1559 tmp
[ 1 ] = alloc_tmp( c
);
1560 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1561 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1562 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1563 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1564 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1566 param
= lookup_tmp( c
, mark
- 2 );
1568 brw_set_access_mode( p
, BRW_ALIGN_1
);
1570 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1572 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1573 be hashed. Also compute the remainder (offset within the unit
1574 length), interleaved to reduce register dependency penalties. */
1575 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1576 brw_FRC( p
, param
, param
);
1577 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1578 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1579 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1581 /* We're now ready to perform the hashing. The two hashes are
1582 interleaved for performance. The hash function used is
1583 designed to rapidly achieve avalanche and require only 32x16
1584 bit multiplication, and 16-bit swizzles (which we get for
1585 free). We can't use immediate operands in the multiplies,
1586 because immediates are permitted only in src1 and the 16-bit
1587 factor is permitted only in src0. */
1588 for( i
= 0; i
< 2; i
++ )
1589 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1590 for( i
= 0; i
< 2; i
++ )
1591 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1592 high_words( itmp
[ i
] ) );
1593 for( i
= 0; i
< 2; i
++ )
1594 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1595 for( i
= 0; i
< 2; i
++ )
1596 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1597 high_words( itmp
[ i
] ) );
1598 for( i
= 0; i
< 2; i
++ )
1599 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1600 for( i
= 0; i
< 2; i
++ )
1601 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1602 high_words( itmp
[ i
] ) );
1604 /* Now we want to initialise the two gradients based on the
1605 hashes. Format conversion from signed integer to float leaves
1606 everything scaled too high by a factor of pow( 2, 31 ), but
1607 we correct for that right at the end. */
1608 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1609 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1610 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1612 brw_MUL( p
, x0
, x0
, param
);
1613 brw_MUL( p
, x1
, x1
, t
);
1615 /* We interpolate between the gradients using the polynomial
1616 6t^5 - 15t^4 + 10t^3 (Perlin). */
1617 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1618 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1619 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1620 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1621 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1622 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1624 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1625 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1626 brw_MUL( p
, x1
, x1
, param
);
1627 brw_ADD( p
, x0
, x0
, x1
);
1628 /* scale by pow( 2, -30 ), to compensate for the format conversion
1629 above and an extra factor of 2 so that a single gradient covers
1631 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1633 release_tmps( c
, mark
);
1636 static void emit_noise1( struct brw_wm_compile
*c
,
1637 const struct prog_instruction
*inst
)
1639 struct brw_compile
*p
= &c
->func
;
1640 struct brw_reg src
, param
, dst
;
1641 GLuint mask
= inst
->DstReg
.WriteMask
;
1643 int mark
= mark_tmps( c
);
1645 assert( mark
== 0 );
1647 src
= get_src_reg( c
, inst
, 0, 0 );
1649 param
= alloc_tmp( c
);
1651 brw_MOV( p
, param
, src
);
1653 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1655 /* Fill in the result: */
1656 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1657 for (i
= 0 ; i
< 4; i
++) {
1658 if (mask
& (1<<i
)) {
1659 dst
= get_dst_reg(c
, inst
, i
);
1660 brw_MOV( p
, dst
, param
);
1663 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1664 brw_set_saturate( p
, 0 );
1666 release_tmps( c
, mark
);
1669 static void noise2_sub( struct brw_wm_compile
*c
) {
1671 struct brw_compile
*p
= &c
->func
;
1672 struct brw_reg param0
, param1
,
1673 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1674 t
, tmp
[ 4 ], /* float temporaries */
1675 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1677 int mark
= mark_tmps( c
);
1679 x0y0
= alloc_tmp( c
);
1680 x0y1
= alloc_tmp( c
);
1681 x1y0
= alloc_tmp( c
);
1682 x1y1
= alloc_tmp( c
);
1684 for( i
= 0; i
< 4; i
++ ) {
1685 tmp
[ i
] = alloc_tmp( c
);
1686 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1688 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1689 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1690 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1692 param0
= lookup_tmp( c
, mark
- 3 );
1693 param1
= lookup_tmp( c
, mark
- 2 );
1695 brw_set_access_mode( p
, BRW_ALIGN_1
);
1697 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1698 be hashed. Also compute the remainders (offsets within the unit
1699 square), interleaved to reduce register dependency penalties. */
1700 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1701 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1702 brw_FRC( p
, param0
, param0
);
1703 brw_FRC( p
, param1
, param1
);
1704 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1705 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1706 low_words( itmp
[ 1 ] ) );
1707 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1708 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1709 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1710 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1711 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1713 /* We're now ready to perform the hashing. The four hashes are
1714 interleaved for performance. The hash function used is
1715 designed to rapidly achieve avalanche and require only 32x16
1716 bit multiplication, and 16-bit swizzles (which we get for
1717 free). We can't use immediate operands in the multiplies,
1718 because immediates are permitted only in src1 and the 16-bit
1719 factor is permitted only in src0. */
1720 for( i
= 0; i
< 4; i
++ )
1721 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1722 for( i
= 0; i
< 4; i
++ )
1723 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1724 high_words( itmp
[ i
] ) );
1725 for( i
= 0; i
< 4; i
++ )
1726 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1727 for( i
= 0; i
< 4; i
++ )
1728 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1729 high_words( itmp
[ i
] ) );
1730 for( i
= 0; i
< 4; i
++ )
1731 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1732 for( i
= 0; i
< 4; i
++ )
1733 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1734 high_words( itmp
[ i
] ) );
1736 /* Now we want to initialise the four gradients based on the
1737 hashes. Format conversion from signed integer to float leaves
1738 everything scaled too high by a factor of pow( 2, 15 ), but
1739 we correct for that right at the end. */
1740 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1741 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1742 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1743 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1744 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1746 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1747 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1748 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1749 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1751 brw_MUL( p
, x1y0
, x1y0
, t
);
1752 brw_MUL( p
, x1y1
, x1y1
, t
);
1753 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1754 brw_MUL( p
, x0y0
, x0y0
, param0
);
1755 brw_MUL( p
, x0y1
, x0y1
, param0
);
1757 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1758 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1759 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1760 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1762 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1763 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1764 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1765 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1767 /* We interpolate between the gradients using the polynomial
1768 6t^5 - 15t^4 + 10t^3 (Perlin). */
1769 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1770 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1771 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1772 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1773 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1774 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1775 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1777 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1778 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1779 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1780 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1781 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1783 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1784 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1785 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1786 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1788 /* Here we interpolate in the y dimension... */
1789 brw_MUL( p
, x0y1
, x0y1
, param1
);
1790 brw_MUL( p
, x1y1
, x1y1
, param1
);
1791 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1792 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1794 /* And now in x. There are horrible register dependencies here,
1795 but we have nothing else to do. */
1796 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1797 brw_MUL( p
, x1y0
, x1y0
, param0
);
1798 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1800 /* scale by pow( 2, -15 ), as described above */
1801 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1803 release_tmps( c
, mark
);
1806 static void emit_noise2( struct brw_wm_compile
*c
,
1807 const struct prog_instruction
*inst
)
1809 struct brw_compile
*p
= &c
->func
;
1810 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1811 GLuint mask
= inst
->DstReg
.WriteMask
;
1813 int mark
= mark_tmps( c
);
1815 assert( mark
== 0 );
1817 src0
= get_src_reg( c
, inst
, 0, 0 );
1818 src1
= get_src_reg( c
, inst
, 0, 1 );
1820 param0
= alloc_tmp( c
);
1821 param1
= alloc_tmp( c
);
1823 brw_MOV( p
, param0
, src0
);
1824 brw_MOV( p
, param1
, src1
);
1826 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1828 /* Fill in the result: */
1829 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1830 for (i
= 0 ; i
< 4; i
++) {
1831 if (mask
& (1<<i
)) {
1832 dst
= get_dst_reg(c
, inst
, i
);
1833 brw_MOV( p
, dst
, param0
);
1836 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1837 brw_set_saturate( p
, 0 );
1839 release_tmps( c
, mark
);
1843 * The three-dimensional case is much like the one- and two- versions above,
1844 * but since the number of corners is rapidly growing we now pack 16 16-bit
1845 * hashes into each register to extract more parallelism from the EUs.
1847 static void noise3_sub( struct brw_wm_compile
*c
) {
1849 struct brw_compile
*p
= &c
->func
;
1850 struct brw_reg param0
, param1
, param2
,
1851 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1852 xi
, yi
, zi
, /* interpolation coefficients */
1853 t
, tmp
[ 8 ], /* float temporaries */
1854 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1855 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1857 int mark
= mark_tmps( c
);
1859 x0y0
= alloc_tmp( c
);
1860 x0y1
= alloc_tmp( c
);
1861 x1y0
= alloc_tmp( c
);
1862 x1y1
= alloc_tmp( c
);
1863 xi
= alloc_tmp( c
);
1864 yi
= alloc_tmp( c
);
1865 zi
= alloc_tmp( c
);
1867 for( i
= 0; i
< 8; i
++ ) {
1868 tmp
[ i
] = alloc_tmp( c
);
1869 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1870 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1873 param0
= lookup_tmp( c
, mark
- 4 );
1874 param1
= lookup_tmp( c
, mark
- 3 );
1875 param2
= lookup_tmp( c
, mark
- 2 );
1877 brw_set_access_mode( p
, BRW_ALIGN_1
);
1879 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1880 be hashed. Also compute the remainders (offsets within the unit
1881 cube), interleaved to reduce register dependency penalties. */
1882 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1883 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1884 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1885 brw_FRC( p
, param0
, param0
);
1886 brw_FRC( p
, param1
, param1
);
1887 brw_FRC( p
, param2
, param2
);
1888 /* Since we now have only 16 bits of precision in the hash, we must
1889 be more careful about thorough mixing to maintain entropy as we
1890 squash the input vector into a small scalar. */
1891 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1892 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1893 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1894 brw_imm_uw( 0x9B93 ) );
1895 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1896 brw_imm_uw( 0xBC8F ) );
1898 /* Temporarily disable the execution mask while we work with ExecSize=16
1899 channels (the mask is set for ExecSize=8 and is probably incorrect).
1900 Although this might cause execution of unwanted channels, the code
1901 writes only to temporary registers and has no side effects, so
1902 disabling the mask is harmless. */
1903 brw_push_insn_state( p
);
1904 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1905 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1906 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1907 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1909 /* We're now ready to perform the hashing. The eight hashes are
1910 interleaved for performance. The hash function used is
1911 designed to rapidly achieve avalanche and require only 16x16
1912 bit multiplication, and 8-bit swizzles (which we get for
1914 for( i
= 0; i
< 4; i
++ )
1915 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1916 for( i
= 0; i
< 4; i
++ )
1917 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1918 odd_bytes( wtmp
[ i
] ) );
1919 for( i
= 0; i
< 4; i
++ )
1920 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1921 for( i
= 0; i
< 4; i
++ )
1922 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1923 odd_bytes( wtmp
[ i
] ) );
1924 brw_pop_insn_state( p
);
1926 /* Now we want to initialise the four rear gradients based on the
1927 hashes. Format conversion from signed integer to float leaves
1928 everything scaled too high by a factor of pow( 2, 15 ), but
1929 we correct for that right at the end. */
1931 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1932 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1933 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1934 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1935 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1937 brw_push_insn_state( p
);
1938 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1939 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1940 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1941 brw_pop_insn_state( p
);
1943 brw_MUL( p
, x1y0
, x1y0
, t
);
1944 brw_MUL( p
, x1y1
, x1y1
, t
);
1945 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1946 brw_MUL( p
, x0y0
, x0y0
, param0
);
1947 brw_MUL( p
, x0y1
, x0y1
, param0
);
1950 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1951 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1952 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1953 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1955 brw_push_insn_state( p
);
1956 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1957 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1958 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1959 brw_pop_insn_state( p
);
1961 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1962 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1963 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1964 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1965 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1967 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1968 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1969 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1970 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1973 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1974 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1975 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1976 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1978 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1979 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1980 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1981 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1983 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1984 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1985 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1986 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1988 /* We interpolate between the gradients using the polynomial
1989 6t^5 - 15t^4 + 10t^3 (Perlin). */
1990 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1991 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1992 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1993 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1994 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1995 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1996 brw_MUL( p
, xi
, xi
, param0
);
1997 brw_MUL( p
, yi
, yi
, param1
);
1998 brw_MUL( p
, zi
, zi
, param2
);
1999 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
2000 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
2001 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
2002 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
2003 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
2004 brw_MUL( p
, xi
, xi
, param0
);
2005 brw_MUL( p
, yi
, yi
, param1
);
2006 brw_MUL( p
, zi
, zi
, param2
);
2007 brw_MUL( p
, xi
, xi
, param0
);
2008 brw_MUL( p
, yi
, yi
, param1
);
2009 brw_MUL( p
, zi
, zi
, param2
);
2010 brw_MUL( p
, xi
, xi
, param0
);
2011 brw_MUL( p
, yi
, yi
, param1
);
2012 brw_MUL( p
, zi
, zi
, param2
);
2014 /* Here we interpolate in the y dimension... */
2015 brw_MUL( p
, x0y1
, x0y1
, yi
);
2016 brw_MUL( p
, x1y1
, x1y1
, yi
);
2017 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2018 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2020 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2021 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2022 brw_MUL( p
, x1y0
, x1y0
, xi
);
2023 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2025 /* Now do the same thing for the front four gradients... */
2027 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2028 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2029 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2030 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2032 brw_push_insn_state( p
);
2033 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2034 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2035 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2036 brw_pop_insn_state( p
);
2038 brw_MUL( p
, x1y0
, x1y0
, t
);
2039 brw_MUL( p
, x1y1
, x1y1
, t
);
2040 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
2041 brw_MUL( p
, x0y0
, x0y0
, param0
);
2042 brw_MUL( p
, x0y1
, x0y1
, param0
);
2045 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2046 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2047 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2048 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2050 brw_push_insn_state( p
);
2051 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2052 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2053 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2054 brw_pop_insn_state( p
);
2056 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2057 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2058 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
2059 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
2060 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
2062 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2063 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2064 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2065 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2068 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2069 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2070 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2071 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2073 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2074 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2075 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2076 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2078 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2079 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2080 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2081 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2083 /* The interpolation coefficients are still around from last time, so
2084 again interpolate in the y dimension... */
2085 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2086 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2087 brw_MUL( p
, x0y1
, x0y1
, yi
);
2088 brw_MUL( p
, x1y1
, x1y1
, yi
);
2089 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2090 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2092 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2093 time put the front face in tmp[ 1 ] and we're nearly there... */
2094 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2095 brw_MUL( p
, x1y0
, x1y0
, xi
);
2096 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2098 /* The final interpolation, in the z dimension: */
2099 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2100 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
2101 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2103 /* scale by pow( 2, -15 ), as described above */
2104 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2106 release_tmps( c
, mark
);
2109 static void emit_noise3( struct brw_wm_compile
*c
,
2110 const struct prog_instruction
*inst
)
2112 struct brw_compile
*p
= &c
->func
;
2113 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
2114 GLuint mask
= inst
->DstReg
.WriteMask
;
2116 int mark
= mark_tmps( c
);
2118 assert( mark
== 0 );
2120 src0
= get_src_reg( c
, inst
, 0, 0 );
2121 src1
= get_src_reg( c
, inst
, 0, 1 );
2122 src2
= get_src_reg( c
, inst
, 0, 2 );
2124 param0
= alloc_tmp( c
);
2125 param1
= alloc_tmp( c
);
2126 param2
= alloc_tmp( c
);
2128 brw_MOV( p
, param0
, src0
);
2129 brw_MOV( p
, param1
, src1
);
2130 brw_MOV( p
, param2
, src2
);
2132 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
2134 /* Fill in the result: */
2135 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2136 for (i
= 0 ; i
< 4; i
++) {
2137 if (mask
& (1<<i
)) {
2138 dst
= get_dst_reg(c
, inst
, i
);
2139 brw_MOV( p
, dst
, param0
);
2142 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2143 brw_set_saturate( p
, 0 );
2145 release_tmps( c
, mark
);
2149 * For the four-dimensional case, the little micro-optimisation benefits
2150 * we obtain by unrolling all the loops aren't worth the massive bloat it
2151 * now causes. Instead, we loop twice around performing a similar operation
2152 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2153 * code to glue it all together.
2155 static void noise4_sub( struct brw_wm_compile
*c
)
2157 struct brw_compile
*p
= &c
->func
;
2158 struct brw_reg param
[ 4 ],
2159 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2160 w0
, /* noise for the w=0 cube */
2161 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2162 interp
[ 4 ], /* interpolation coefficients */
2163 t
, tmp
[ 8 ], /* float temporaries */
2164 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2165 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2167 int mark
= mark_tmps( c
);
2168 GLuint loop
, origin
;
2170 x0y0
= alloc_tmp( c
);
2171 x0y1
= alloc_tmp( c
);
2172 x1y0
= alloc_tmp( c
);
2173 x1y1
= alloc_tmp( c
);
2175 w0
= alloc_tmp( c
);
2176 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2177 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2179 for( i
= 0; i
< 4; i
++ ) {
2180 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2181 interp
[ i
] = alloc_tmp( c
);
2184 for( i
= 0; i
< 8; i
++ ) {
2185 tmp
[ i
] = alloc_tmp( c
);
2186 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2187 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2190 brw_set_access_mode( p
, BRW_ALIGN_1
);
2192 /* We only want 16 bits of precision from the integral part of each
2193 co-ordinate, but unfortunately the RNDD semantics would saturate
2194 at 16 bits if we performed the operation directly to a 16-bit
2195 destination. Therefore, we round to 32-bit temporaries where
2196 appropriate, and then store only the lower 16 bits. */
2197 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2198 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2199 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2200 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2201 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2202 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2204 /* Modify the flag register here, because the side effect is useful
2205 later (see below). We know for certain that all flags will be
2206 cleared, since the FRC instruction cannot possibly generate
2207 negative results. Even for exceptional inputs (infinities, denormals,
2208 NaNs), the architecture guarantees that the L conditional is false. */
2209 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2210 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2211 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2212 for( i
= 1; i
< 4; i
++ )
2213 brw_FRC( p
, param
[ i
], param
[ i
] );
2215 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2217 for( i
= 0; i
< 4; i
++ )
2218 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2219 for( i
= 0; i
< 4; i
++ )
2220 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2221 for( i
= 0; i
< 4; i
++ )
2222 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2223 for( i
= 0; i
< 4; i
++ )
2224 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2225 for( j
= 0; j
< 3; j
++ )
2226 for( i
= 0; i
< 4; i
++ )
2227 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2229 /* Mark the current address, as it will be a jump destination. The
2230 following code will be executed twice: first, with the flag
2231 register clear indicating the w=0 case, and second with flags
2235 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2236 be hashed. Since we have only 16 bits of precision in the hash, we
2237 must be careful about thorough mixing to maintain entropy as we
2238 squash the input vector into a small scalar. */
2239 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2240 brw_imm_uw( 0xBC8F ) );
2241 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2242 brw_imm_uw( 0xD0BD ) );
2243 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2244 brw_imm_uw( 0x9B93 ) );
2245 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2246 brw_imm_uw( 0xA359 ) );
2247 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2248 brw_imm_uw( 0xBC8F ) );
2250 /* Temporarily disable the execution mask while we work with ExecSize=16
2251 channels (the mask is set for ExecSize=8 and is probably incorrect).
2252 Although this might cause execution of unwanted channels, the code
2253 writes only to temporary registers and has no side effects, so
2254 disabling the mask is harmless. */
2255 brw_push_insn_state( p
);
2256 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2257 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2258 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2259 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2261 /* We're now ready to perform the hashing. The eight hashes are
2262 interleaved for performance. The hash function used is
2263 designed to rapidly achieve avalanche and require only 16x16
2264 bit multiplication, and 8-bit swizzles (which we get for
2266 for( i
= 0; i
< 4; i
++ )
2267 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2268 for( i
= 0; i
< 4; i
++ )
2269 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2270 odd_bytes( wtmp
[ i
] ) );
2271 for( i
= 0; i
< 4; i
++ )
2272 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2273 for( i
= 0; i
< 4; i
++ )
2274 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2275 odd_bytes( wtmp
[ i
] ) );
2276 brw_pop_insn_state( p
);
2278 /* Now we want to initialise the four rear gradients based on the
2279 hashes. Format conversion from signed integer to float leaves
2280 everything scaled too high by a factor of pow( 2, 15 ), but
2281 we correct for that right at the end. */
2283 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2284 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2285 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2286 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2287 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2289 brw_push_insn_state( p
);
2290 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2291 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2292 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2293 brw_pop_insn_state( p
);
2295 brw_MUL( p
, x1y0
, x1y0
, t
);
2296 brw_MUL( p
, x1y1
, x1y1
, t
);
2297 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2298 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2299 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2302 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2303 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2304 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2305 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2307 brw_push_insn_state( p
);
2308 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2309 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2310 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2311 brw_pop_insn_state( p
);
2313 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2314 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2315 /* prepare t for the w component (used below): w the first time through
2316 the loop; w - 1 the second time) */
2317 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2318 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2319 p
->current
->header
.predicate_inverse
= 1;
2320 brw_MOV( p
, t
, param
[ 3 ] );
2321 p
->current
->header
.predicate_inverse
= 0;
2322 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2323 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2324 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2326 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2327 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2328 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2329 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2332 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2333 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2334 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2335 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2337 brw_push_insn_state( p
);
2338 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2339 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2340 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2341 brw_pop_insn_state( p
);
2343 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2344 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2345 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2346 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2348 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2349 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2350 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2351 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2354 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2355 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2356 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2357 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2359 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2360 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2361 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2362 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2363 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2365 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2366 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2367 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2368 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2370 /* Here we interpolate in the y dimension... */
2371 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2372 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2373 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2374 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2375 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2376 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2378 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2379 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2380 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2381 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2383 /* Now do the same thing for the front four gradients... */
2385 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2386 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2387 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2388 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2390 brw_push_insn_state( p
);
2391 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2392 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2393 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2394 brw_pop_insn_state( p
);
2396 brw_MUL( p
, x1y0
, x1y0
, t
);
2397 brw_MUL( p
, x1y1
, x1y1
, t
);
2398 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2399 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2400 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2403 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2404 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2405 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2406 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2408 brw_push_insn_state( p
);
2409 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2410 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2411 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2412 brw_pop_insn_state( p
);
2414 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2415 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2416 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2417 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2418 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2420 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2421 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2422 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2423 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2426 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2427 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2428 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2429 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2431 brw_push_insn_state( p
);
2432 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2433 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2434 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2435 brw_pop_insn_state( p
);
2437 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2438 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2439 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2440 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2441 /* prepare t for the w component (used below): w the first time through
2442 the loop; w - 1 the second time) */
2443 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2444 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2445 p
->current
->header
.predicate_inverse
= 1;
2446 brw_MOV( p
, t
, param
[ 3 ] );
2447 p
->current
->header
.predicate_inverse
= 0;
2448 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2450 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2451 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2452 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2453 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2456 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2457 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2458 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2459 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2461 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2462 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2463 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2464 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2466 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2467 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2468 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2469 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2471 /* Interpolate in the y dimension: */
2472 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2473 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2474 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2475 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2476 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2477 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2479 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2480 time put the front face in tmp[ 1 ] and we're nearly there... */
2481 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2482 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2483 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2485 /* Another interpolation, in the z dimension: */
2486 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2487 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2488 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2490 /* Exit the loop if we've computed both cubes... */
2491 origin
= p
->nr_insn
;
2492 brw_push_insn_state( p
);
2493 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2494 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2495 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2496 brw_pop_insn_state( p
);
2498 /* Save the result for the w=0 case, and increment the w coordinate: */
2499 brw_MOV( p
, w0
, tmp
[ 0 ] );
2500 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2503 /* Loop around for the other cube. Explicitly set the flag register
2504 (unfortunately we must spend an extra instruction to do this: we
2505 can't rely on a side effect of the previous MOV or ADD because
2506 conditional modifiers which are normally true might be false in
2507 exceptional circumstances, e.g. given a NaN input; the add to
2508 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2509 brw_push_insn_state( p
);
2510 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2511 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2512 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2513 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2514 brw_pop_insn_state( p
);
2516 /* Patch the previous conditional branch now that we know the
2517 destination address. */
2518 brw_set_src1( p
->store
+ origin
,
2519 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2521 /* The very last interpolation. */
2522 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2523 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2524 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2526 /* scale by pow( 2, -15 ), as described above */
2527 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2529 release_tmps( c
, mark
);
2532 static void emit_noise4( struct brw_wm_compile
*c
,
2533 const struct prog_instruction
*inst
)
2535 struct brw_compile
*p
= &c
->func
;
2536 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2537 GLuint mask
= inst
->DstReg
.WriteMask
;
2539 int mark
= mark_tmps( c
);
2541 assert( mark
== 0 );
2543 src0
= get_src_reg( c
, inst
, 0, 0 );
2544 src1
= get_src_reg( c
, inst
, 0, 1 );
2545 src2
= get_src_reg( c
, inst
, 0, 2 );
2546 src3
= get_src_reg( c
, inst
, 0, 3 );
2548 param0
= alloc_tmp( c
);
2549 param1
= alloc_tmp( c
);
2550 param2
= alloc_tmp( c
);
2551 param3
= alloc_tmp( c
);
2553 brw_MOV( p
, param0
, src0
);
2554 brw_MOV( p
, param1
, src1
);
2555 brw_MOV( p
, param2
, src2
);
2556 brw_MOV( p
, param3
, src3
);
2558 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2560 /* Fill in the result: */
2561 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2562 for (i
= 0 ; i
< 4; i
++) {
2563 if (mask
& (1<<i
)) {
2564 dst
= get_dst_reg(c
, inst
, i
);
2565 brw_MOV( p
, dst
, param0
);
2568 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2569 brw_set_saturate( p
, 0 );
2571 release_tmps( c
, mark
);
2574 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2575 const struct prog_instruction
*inst
)
2577 struct brw_compile
*p
= &c
->func
;
2578 GLuint mask
= inst
->DstReg
.WriteMask
;
2579 struct brw_reg src0
[2], dst
[2];
2581 dst
[0] = get_dst_reg(c
, inst
, 0);
2582 dst
[1] = get_dst_reg(c
, inst
, 1);
2584 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2585 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2587 /* Calculate the pixel offset from window bottom left into destination
2590 if (mask
& WRITEMASK_X
) {
2591 /* X' = X - origin_x */
2594 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2595 brw_imm_d(0 - c
->key
.origin_x
));
2598 if (mask
& WRITEMASK_Y
) {
2599 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2602 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2603 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2608 BIAS on SIMD8 not working yet...
2610 static void emit_txb(struct brw_wm_compile
*c
,
2611 const struct prog_instruction
*inst
)
2613 struct brw_compile
*p
= &c
->func
;
2614 struct brw_reg dst
[4], src
[4], payload_reg
;
2615 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2619 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2621 for (i
= 0; i
< 4; i
++)
2622 dst
[i
] = get_dst_reg(c
, inst
, i
);
2623 for (i
= 0; i
< 4; i
++)
2624 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2626 switch (inst
->TexSrcTarget
) {
2627 case TEXTURE_1D_INDEX
:
2628 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2629 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2630 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2632 case TEXTURE_2D_INDEX
:
2633 case TEXTURE_RECT_INDEX
:
2634 brw_MOV(p
, brw_message_reg(2), src
[0]);
2635 brw_MOV(p
, brw_message_reg(3), src
[1]);
2636 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2639 brw_MOV(p
, brw_message_reg(2), src
[0]);
2640 brw_MOV(p
, brw_message_reg(3), src
[1]);
2641 brw_MOV(p
, brw_message_reg(4), src
[2]);
2644 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2645 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2647 if (BRW_IS_IGDNG(p
->brw
)) {
2648 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG
;
2650 /* Does it work well on SIMD8? */
2651 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
2655 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2657 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2658 SURF_INDEX_TEXTURE(unit
),
2660 inst
->DstReg
.WriteMask
, /* writemask */
2661 msg_type
, /* msg_type */
2662 4, /* response_length */
2666 BRW_SAMPLER_SIMD_MODE_SIMD8
);
2670 static void emit_tex(struct brw_wm_compile
*c
,
2671 const struct prog_instruction
*inst
)
2673 struct brw_compile
*p
= &c
->func
;
2674 struct brw_reg dst
[4], src
[4], payload_reg
;
2675 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2679 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2682 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2684 for (i
= 0; i
< 4; i
++)
2685 dst
[i
] = get_dst_reg(c
, inst
, i
);
2686 for (i
= 0; i
< 4; i
++)
2687 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2689 switch (inst
->TexSrcTarget
) {
2690 case TEXTURE_1D_INDEX
:
2694 case TEXTURE_2D_INDEX
:
2695 case TEXTURE_RECT_INDEX
:
2696 emit
= WRITEMASK_XY
;
2700 emit
= WRITEMASK_XYZ
;
2706 /* move/load S, T, R coords */
2707 for (i
= 0; i
< nr
; i
++) {
2708 static const GLuint swz
[4] = {0,1,2,2};
2710 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2712 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2717 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2718 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2721 if (BRW_IS_IGDNG(p
->brw
)) {
2723 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG
;
2725 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG
;
2727 /* Does it work for shadow on SIMD8 ? */
2728 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
;
2732 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2734 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2735 SURF_INDEX_TEXTURE(unit
),
2737 inst
->DstReg
.WriteMask
, /* writemask */
2738 msg_type
, /* msg_type */
2739 4, /* response_length */
2740 shadow
? 6 : 4, /* msg_length */
2743 BRW_SAMPLER_SIMD_MODE_SIMD8
);
2746 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2751 * Resolve subroutine calls after code emit is done.
2753 static void post_wm_emit( struct brw_wm_compile
*c
)
2755 brw_resolve_cals(&c
->func
);
2758 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2760 #define MAX_IF_DEPTH 32
2761 #define MAX_LOOP_DEPTH 32
2762 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
2763 GLuint i
, if_depth
= 0, loop_depth
= 0;
2764 struct brw_compile
*p
= &c
->func
;
2765 struct brw_indirect stack_index
= brw_indirect(0, 0);
2767 c
->out_of_regs
= GL_FALSE
;
2770 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2771 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2773 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2774 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2779 _mesa_printf("Inst %d: ", i
);
2780 _mesa_print_instruction(inst
);
2783 /* fetch any constants that this instruction needs */
2784 if (c
->fp
->use_const_buffer
)
2785 fetch_constants(c
, inst
);
2787 if (inst
->CondUpdate
)
2788 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2790 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2792 switch (inst
->Opcode
) {
2794 emit_pixel_xy(c
, inst
);
2797 emit_delta_xy(c
, inst
);
2800 emit_pixel_w(c
, inst
);
2803 emit_linterp(c
, inst
);
2806 emit_pinterp(c
, inst
);
2809 emit_cinterp(c
, inst
);
2812 emit_wpos_xy(c
, inst
);
2815 emit_fb_write(c
, inst
);
2817 case WM_FRONTFACING
:
2818 emit_frontfacing(c
, inst
);
2836 emit_trunc(c
, inst
);
2874 emit_min_max(c
, inst
);
2910 emit_noise1(c
, inst
);
2913 emit_noise2(c
, inst
);
2916 emit_noise3(c
, inst
);
2919 emit_noise4(c
, inst
);
2931 assert(if_depth
< MAX_IF_DEPTH
);
2932 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2935 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2938 assert(if_depth
> 0);
2939 brw_ENDIF(p
, if_inst
[--if_depth
]);
2942 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2948 brw_push_insn_state(p
);
2949 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2950 brw_set_access_mode(p
, BRW_ALIGN_1
);
2951 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2952 brw_set_access_mode(p
, BRW_ALIGN_16
);
2953 brw_ADD(p
, get_addr_reg(stack_index
),
2954 get_addr_reg(stack_index
), brw_imm_d(4));
2955 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2956 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2957 brw_pop_insn_state(p
);
2961 brw_push_insn_state(p
);
2962 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2963 brw_ADD(p
, get_addr_reg(stack_index
),
2964 get_addr_reg(stack_index
), brw_imm_d(-4));
2965 brw_set_access_mode(p
, BRW_ALIGN_1
);
2966 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2967 brw_set_access_mode(p
, BRW_ALIGN_16
);
2968 brw_pop_insn_state(p
);
2971 case OPCODE_BGNLOOP
:
2972 /* XXX may need to invalidate the current_constant regs */
2973 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2977 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2981 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2983 case OPCODE_ENDLOOP
:
2985 struct brw_instruction
*inst0
, *inst1
;
2988 if (BRW_IS_IGDNG(brw
))
2992 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2993 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2994 while (inst0
> loop_inst
[loop_depth
]) {
2996 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2997 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2998 inst0
->bits3
.if_else
.pop_count
= 0;
3000 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
3001 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
3002 inst0
->bits3
.if_else
.pop_count
= 0;
3008 _mesa_printf("unsupported IR in fragment shader %d\n",
3012 if (inst
->CondUpdate
)
3013 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
3015 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
3019 if (INTEL_DEBUG
& DEBUG_WM
) {
3020 _mesa_printf("wm-native:\n");
3021 for (i
= 0; i
< p
->nr_insn
; i
++)
3022 brw_disasm(stderr
, &p
->store
[i
]);
3028 * Do GPU code generation for shaders that use GLSL features such as
3029 * flow control. Other shaders will be compiled with the
3031 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
3033 if (INTEL_DEBUG
& DEBUG_WM
) {
3034 _mesa_printf("brw_wm_glsl_emit:\n");
3037 /* initial instruction translation/simplification */
3040 /* actual code generation */
3041 brw_wm_emit_glsl(brw
, c
);
3043 if (INTEL_DEBUG
& DEBUG_WM
) {
3044 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
3047 c
->prog_data
.total_grf
= num_grf_used(c
);
3048 c
->prog_data
.total_scratch
= 0;