1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
15 * Determine if the given fragment program uses GLSL features such
16 * as flow conditionals, loops, subroutines.
17 * Some GLSL shaders may use these features, others might not.
19 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
23 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
24 switch (inst
->Opcode
) {
49 reclaim_temps(struct brw_wm_compile
*c
);
52 /** Mark GRF register as used. */
54 prealloc_grf(struct brw_wm_compile
*c
, int r
)
56 c
->used_grf
[r
] = GL_TRUE
;
60 /** Mark given GRF register as not in use. */
62 release_grf(struct brw_wm_compile
*c
, int r
)
64 /*assert(c->used_grf[r]);*/
65 c
->used_grf
[r
] = GL_FALSE
;
66 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
70 /** Return index of a free GRF, mark it as used. */
72 alloc_grf(struct brw_wm_compile
*c
)
75 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
76 if (!c
->used_grf
[r
]) {
77 c
->used_grf
[r
] = GL_TRUE
;
78 c
->first_free_grf
= r
+ 1; /* a guess */
83 /* no free temps, try to reclaim some */
85 c
->first_free_grf
= 0;
88 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
89 if (!c
->used_grf
[r
]) {
90 c
->used_grf
[r
] = GL_TRUE
;
91 c
->first_free_grf
= r
+ 1; /* a guess */
96 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
97 assert(c
->used_grf
[r
]);
100 /* really, no free GRF regs found */
101 if (!c
->out_of_regs
) {
102 /* print warning once per compilation */
103 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
104 c
->out_of_regs
= GL_TRUE
;
111 /** Return number of GRF registers used */
113 num_grf_used(const struct brw_wm_compile
*c
)
116 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
125 * Record the mapping of a Mesa register to a hardware register.
127 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
128 int component
, struct brw_reg reg
)
130 c
->wm_regs
[file
][index
][component
].reg
= reg
;
131 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
135 * Examine instruction's write mask to find index of first component
136 * enabled for writing.
138 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
141 for (i
= 0; i
< 4; i
++)
142 if (inst
->DstReg
.WriteMask
& (1<<i
))
147 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
151 /* if we need to allocate another temp, grow the tmp_regs[] array */
152 if (c
->tmp_index
== c
->tmp_max
) {
153 int r
= alloc_grf(c
);
155 /*printf("Out of temps in %s\n", __FUNCTION__);*/
156 r
= 50; /* XXX random register! */
158 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
161 /* form the GRF register */
162 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
163 /*printf("alloc_temp %d\n", reg.nr);*/
164 assert(reg
.nr
< BRW_WM_MAX_GRF
);
170 * Save current temp register info.
171 * There must be a matching call to release_tmps().
173 static int mark_tmps(struct brw_wm_compile
*c
)
178 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
180 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
183 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
189 * Convert Mesa src register to brw register.
191 * Since we're running in SOA mode each Mesa register corresponds to four
192 * hardware registers. We allocate the hardware registers as needed here.
194 * \param file register file, one of PROGRAM_x
195 * \param index register number
196 * \param component src component (X=0, Y=1, Z=2, W=3)
197 * \param nr not used?!?
198 * \param neg negate value?
199 * \param abs take absolute value?
201 static struct brw_reg
202 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
203 int nr
, GLuint neg
, GLuint abs
)
207 case PROGRAM_STATE_VAR
:
208 case PROGRAM_CONSTANT
:
209 case PROGRAM_UNIFORM
:
210 file
= PROGRAM_STATE_VAR
;
212 case PROGRAM_UNDEFINED
:
213 return brw_null_reg();
214 case PROGRAM_TEMPORARY
:
217 case PROGRAM_PAYLOAD
:
220 _mesa_problem(NULL
, "Unexpected file in get_reg()");
221 return brw_null_reg();
225 assert(component
< 4);
227 /* see if we've already allocated a HW register for this Mesa register */
228 if (c
->wm_regs
[file
][index
][component
].inited
) {
230 reg
= c
->wm_regs
[file
][index
][component
].reg
;
233 /* no, allocate new register */
234 int grf
= alloc_grf(c
);
235 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
237 /* totally out of temps */
238 grf
= 51; /* XXX random register! */
241 reg
= brw_vec8_grf(grf
, 0);
242 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
244 set_reg(c
, file
, index
, component
, reg
);
247 if (neg
& (1 << component
)) {
258 * This is called if we run out of GRF registers. Examine the live intervals
259 * of temp regs in the program and free those which won't be used again.
262 reclaim_temps(struct brw_wm_compile
*c
)
264 GLint intBegin
[MAX_PROGRAM_TEMPS
];
265 GLint intEnd
[MAX_PROGRAM_TEMPS
];
268 /*printf("Reclaim temps:\n");*/
270 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
273 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
274 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
275 /* program temp[i] can be freed */
277 /*printf(" temp[%d] is dead\n", index);*/
278 for (component
= 0; component
< 4; component
++) {
279 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
280 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
283 printf(" Reclaim temp %d, reg %d at inst %d\n",
284 index, r, c->cur_inst);
286 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
297 * Preallocate registers. This sets up the Mesa to hardware register
298 * mapping for certain registers, such as constants (uniforms/state vars)
301 static void prealloc_reg(struct brw_wm_compile
*c
)
305 int urb_read_length
= 0;
306 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
307 GLuint reg_index
= 0;
309 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
310 c
->first_free_grf
= 0;
312 for (i
= 0; i
< 4; i
++) {
313 if (i
< c
->key
.nr_depth_regs
)
314 reg
= brw_vec8_grf(i
* 2, 0);
316 reg
= brw_vec8_grf(0, 0);
317 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
319 reg_index
+= 2 * c
->key
.nr_depth_regs
;
323 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
324 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
326 /* use a real constant buffer, or just use a section of the GRF? */
327 /* XXX this heuristic may need adjustment... */
328 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
329 c
->fp
->use_const_buffer
= GL_TRUE
;
331 c
->fp
->use_const_buffer
= GL_FALSE
;
332 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
334 if (c
->fp
->use_const_buffer
) {
335 /* We'll use a real constant buffer and fetch constants from
336 * it with a dataport read message.
339 /* number of float constants in CURBE */
340 c
->prog_data
.nr_params
= 0;
343 const struct gl_program_parameter_list
*plist
=
344 c
->fp
->program
.Base
.Parameters
;
347 /* number of float constants in CURBE */
348 c
->prog_data
.nr_params
= 4 * nr_params
;
350 /* loop over program constants (float[4]) */
351 for (i
= 0; i
< nr_params
; i
++) {
352 /* loop over XYZW channels */
353 for (j
= 0; j
< 4; j
++, index
++) {
354 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
355 /* Save pointer to parameter/constant value.
356 * Constants will be copied in prepare_constant_buffer()
358 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
359 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
362 /* number of constant regs used (each reg is float[8]) */
363 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
364 reg_index
+= c
->nr_creg
;
368 /* fragment shader inputs */
369 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
372 if (i
>= VERT_RESULT_VAR0
)
373 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
374 else if (i
<= VERT_RESULT_TEX7
)
379 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
380 urb_read_length
= reg_index
;
381 reg
= brw_vec8_grf(reg_index
, 0);
382 for (j
= 0; j
< 4; j
++)
383 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
385 if (c
->key
.vp_outputs_written
& (1 << i
)) {
390 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
391 c
->prog_data
.urb_read_length
= urb_read_length
;
392 c
->prog_data
.curb_read_length
= c
->nr_creg
;
393 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
395 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
398 /* mark GRF regs [0..reg_index-1] as in-use */
399 for (i
= 0; i
< reg_index
; i
++)
402 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
403 prealloc_grf(c
, 126);
404 prealloc_grf(c
, 127);
406 /* An instruction may reference up to three constants.
407 * They'll be found in these registers.
408 * XXX alloc these on demand!
410 if (c
->fp
->use_const_buffer
) {
411 for (i
= 0; i
< 3; i
++) {
412 c
->current_const
[i
].index
= -1;
413 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
417 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
418 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
424 * Check if any of the instruction's src registers are constants, uniforms,
425 * or statevars. If so, fetch any constants that we don't already have in
426 * the three GRF slots.
428 static void fetch_constants(struct brw_wm_compile
*c
,
429 const struct prog_instruction
*inst
)
431 struct brw_compile
*p
= &c
->func
;
434 /* loop over instruction src regs */
435 for (i
= 0; i
< 3; i
++) {
436 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
437 if (src
->File
== PROGRAM_STATE_VAR
||
438 src
->File
== PROGRAM_CONSTANT
||
439 src
->File
== PROGRAM_UNIFORM
) {
440 c
->current_const
[i
].index
= src
->Index
;
443 printf(" fetch const[%d] for arg %d into reg %d\n",
444 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
447 /* need to fetch the constant now */
449 c
->current_const
[i
].reg
, /* writeback dest */
450 src
->RelAddr
, /* relative indexing? */
451 16 * src
->Index
, /* byte offset */
452 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
460 * Convert Mesa dst register to brw register.
462 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
463 const struct prog_instruction
*inst
,
467 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
472 static struct brw_reg
473 get_src_reg_const(struct brw_wm_compile
*c
,
474 const struct prog_instruction
*inst
,
475 GLuint srcRegIndex
, GLuint component
)
477 /* We should have already fetched the constant from the constant
478 * buffer in fetch_constants(). Now we just have to return a
479 * register description that extracts the needed component and
480 * smears it across all eight vector components.
482 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
483 struct brw_reg const_reg
;
485 assert(component
< 4);
486 assert(srcRegIndex
< 3);
487 assert(c
->current_const
[srcRegIndex
].index
!= -1);
488 const_reg
= c
->current_const
[srcRegIndex
].reg
;
490 /* extract desired float from the const_reg, and smear */
491 const_reg
= stride(const_reg
, 0, 1, 0);
492 const_reg
.subnr
= component
* 4;
494 if (src
->Negate
& (1 << component
))
495 const_reg
= negate(const_reg
);
497 const_reg
= brw_abs(const_reg
);
500 printf(" form const[%d].%d for arg %d, reg %d\n",
501 c
->current_const
[srcRegIndex
].index
,
512 * Convert Mesa src register to brw register.
514 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
515 const struct prog_instruction
*inst
,
516 GLuint srcRegIndex
, GLuint channel
)
518 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
520 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
522 /* Extended swizzle terms */
523 if (component
== SWIZZLE_ZERO
) {
524 return brw_imm_f(0.0F
);
526 else if (component
== SWIZZLE_ONE
) {
527 return brw_imm_f(1.0F
);
530 if (c
->fp
->use_const_buffer
&&
531 (src
->File
== PROGRAM_STATE_VAR
||
532 src
->File
== PROGRAM_CONSTANT
||
533 src
->File
== PROGRAM_UNIFORM
)) {
534 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
537 /* other type of source register */
538 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
539 src
->Negate
, src
->Abs
);
545 * Same as \sa get_src_reg() but if the register is a literal, emit
546 * a brw_reg encoding the literal.
547 * Note that a brw instruction only allows one src operand to be a literal.
548 * For instructions with more than one operand, only the second can be a
549 * literal. This means that we treat some literals as constants/uniforms
550 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
553 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
554 const struct prog_instruction
*inst
,
555 GLuint srcRegIndex
, GLuint channel
)
557 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
558 if (src
->File
== PROGRAM_CONSTANT
) {
560 const int component
= GET_SWZ(src
->Swizzle
, channel
);
561 const GLfloat
*param
=
562 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
563 GLfloat value
= param
[component
];
564 if (src
->Negate
& (1 << channel
))
567 value
= FABSF(value
);
569 printf(" form immed value %f for chan %d\n", value
, channel
);
571 return brw_imm_f(value
);
574 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
580 * Subroutines are minimal support for resusable instruction sequences.
581 * They are implemented as simply as possible to minimise overhead: there
582 * is no explicit support for communication between the caller and callee
583 * other than saving the return address in a temporary register, nor is
584 * there any automatic local storage. This implies that great care is
585 * required before attempting reentrancy or any kind of nested
586 * subroutine invocations.
588 static void invoke_subroutine( struct brw_wm_compile
*c
,
589 enum _subroutine subroutine
,
590 void (*emit
)( struct brw_wm_compile
* ) )
592 struct brw_compile
*p
= &c
->func
;
594 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
596 if( c
->subroutines
[ subroutine
] ) {
597 /* subroutine previously emitted: reuse existing instructions */
599 int mark
= mark_tmps( c
);
600 struct brw_reg return_address
= retype( alloc_tmp( c
),
601 BRW_REGISTER_TYPE_UD
);
602 int here
= p
->nr_insn
;
604 brw_push_insn_state(p
);
605 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
606 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
608 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
609 brw_imm_d( ( c
->subroutines
[ subroutine
] -
611 brw_pop_insn_state(p
);
613 release_tmps( c
, mark
);
615 /* previously unused subroutine: emit, and mark for later reuse */
617 int mark
= mark_tmps( c
);
618 struct brw_reg return_address
= retype( alloc_tmp( c
),
619 BRW_REGISTER_TYPE_UD
);
620 struct brw_instruction
*calc
;
621 int base
= p
->nr_insn
;
623 brw_push_insn_state(p
);
624 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
625 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
626 brw_pop_insn_state(p
);
628 c
->subroutines
[ subroutine
] = p
->nr_insn
;
632 brw_push_insn_state(p
);
633 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
634 brw_MOV( p
, brw_ip_reg(), return_address
);
635 brw_pop_insn_state(p
);
637 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
639 release_tmps( c
, mark
);
643 static void emit_abs( struct brw_wm_compile
*c
,
644 const struct prog_instruction
*inst
)
647 struct brw_compile
*p
= &c
->func
;
648 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
649 for (i
= 0; i
< 4; i
++) {
650 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
651 struct brw_reg src
, dst
;
652 dst
= get_dst_reg(c
, inst
, i
);
653 src
= get_src_reg(c
, inst
, 0, i
);
654 brw_MOV(p
, dst
, brw_abs(src
));
657 brw_set_saturate(p
, 0);
660 static void emit_trunc( struct brw_wm_compile
*c
,
661 const struct prog_instruction
*inst
)
664 struct brw_compile
*p
= &c
->func
;
665 GLuint mask
= inst
->DstReg
.WriteMask
;
666 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
667 for (i
= 0; i
< 4; i
++) {
669 struct brw_reg src
, dst
;
670 dst
= get_dst_reg(c
, inst
, i
);
671 src
= get_src_reg(c
, inst
, 0, i
);
672 brw_RNDZ(p
, dst
, src
);
675 brw_set_saturate(p
, 0);
678 static void emit_mov( struct brw_wm_compile
*c
,
679 const struct prog_instruction
*inst
)
682 struct brw_compile
*p
= &c
->func
;
683 GLuint mask
= inst
->DstReg
.WriteMask
;
684 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
685 for (i
= 0; i
< 4; i
++) {
687 struct brw_reg src
, dst
;
688 dst
= get_dst_reg(c
, inst
, i
);
689 /* XXX some moves from immediate value don't work reliably!!! */
690 /*src = get_src_reg_imm(c, inst, 0, i);*/
691 src
= get_src_reg(c
, inst
, 0, i
);
692 brw_MOV(p
, dst
, src
);
695 brw_set_saturate(p
, 0);
698 static void emit_pixel_xy(struct brw_wm_compile
*c
,
699 const struct prog_instruction
*inst
)
701 struct brw_reg r1
= brw_vec1_grf(1, 0);
702 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
704 struct brw_reg dst0
, dst1
;
705 struct brw_compile
*p
= &c
->func
;
706 GLuint mask
= inst
->DstReg
.WriteMask
;
708 dst0
= get_dst_reg(c
, inst
, 0);
709 dst1
= get_dst_reg(c
, inst
, 1);
710 /* Calculate pixel centers by adding 1 or 0 to each of the
711 * micro-tile coordinates passed in r1.
713 if (mask
& WRITEMASK_X
) {
715 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
716 stride(suboffset(r1_uw
, 4), 2, 4, 0),
717 brw_imm_v(0x10101010));
720 if (mask
& WRITEMASK_Y
) {
722 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
723 stride(suboffset(r1_uw
, 5), 2, 4, 0),
724 brw_imm_v(0x11001100));
728 static void emit_delta_xy(struct brw_wm_compile
*c
,
729 const struct prog_instruction
*inst
)
731 struct brw_reg r1
= brw_vec1_grf(1, 0);
732 struct brw_reg dst0
, dst1
, src0
, src1
;
733 struct brw_compile
*p
= &c
->func
;
734 GLuint mask
= inst
->DstReg
.WriteMask
;
736 dst0
= get_dst_reg(c
, inst
, 0);
737 dst1
= get_dst_reg(c
, inst
, 1);
738 src0
= get_src_reg(c
, inst
, 0, 0);
739 src1
= get_src_reg(c
, inst
, 0, 1);
740 /* Calc delta X,Y by subtracting origin in r1 from the pixel
743 if (mask
& WRITEMASK_X
) {
746 retype(src0
, BRW_REGISTER_TYPE_UW
),
750 if (mask
& WRITEMASK_Y
) {
753 retype(src1
, BRW_REGISTER_TYPE_UW
),
754 negate(suboffset(r1
,1)));
759 static void fire_fb_write( struct brw_wm_compile
*c
,
765 struct brw_compile
*p
= &c
->func
;
766 /* Pass through control information:
768 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
770 brw_push_insn_state(p
);
771 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
773 brw_message_reg(base_reg
+ 1),
775 brw_pop_insn_state(p
);
777 /* Send framebuffer write message: */
779 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
781 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
788 static void emit_fb_write(struct brw_wm_compile
*c
,
789 const struct prog_instruction
*inst
)
791 struct brw_compile
*p
= &c
->func
;
797 /* Reserve a space for AA - may not be needed:
799 if (c
->key
.aa_dest_stencil_reg
)
802 brw_push_insn_state(p
);
803 for (channel
= 0; channel
< 4; channel
++) {
804 src0
= get_src_reg(c
, inst
, 0, channel
);
805 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
806 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
807 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
809 /* skip over the regs populated above: */
811 brw_pop_insn_state(p
);
813 if (c
->key
.source_depth_to_render_target
) {
814 if (c
->key
.computes_depth
) {
815 src0
= get_src_reg(c
, inst
, 2, 2);
816 brw_MOV(p
, brw_message_reg(nr
), src0
);
819 src0
= get_src_reg(c
, inst
, 1, 1);
820 brw_MOV(p
, brw_message_reg(nr
), src0
);
826 if (c
->key
.dest_depth_reg
) {
827 const GLuint comp
= c
->key
.dest_depth_reg
/ 2;
828 const GLuint off
= c
->key
.dest_depth_reg
% 2;
831 /* XXX this code needs review/testing */
832 struct brw_reg arg1_0
= get_src_reg(c
, inst
, 1, comp
);
833 struct brw_reg arg1_1
= get_src_reg(c
, inst
, 1, comp
+1);
835 brw_push_insn_state(p
);
836 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
838 brw_MOV(p
, brw_message_reg(nr
), offset(arg1_0
, 1));
840 brw_MOV(p
, brw_message_reg(nr
+1), arg1_1
);
841 brw_pop_insn_state(p
);
845 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
846 brw_MOV(p
, brw_message_reg(nr
), src
);
851 target
= inst
->Aux
>> 1;
853 fire_fb_write(c
, 0, nr
, target
, eot
);
856 static void emit_pixel_w( struct brw_wm_compile
*c
,
857 const struct prog_instruction
*inst
)
859 struct brw_compile
*p
= &c
->func
;
860 GLuint mask
= inst
->DstReg
.WriteMask
;
861 if (mask
& WRITEMASK_W
) {
862 struct brw_reg dst
, src0
, delta0
, delta1
;
863 struct brw_reg interp3
;
865 dst
= get_dst_reg(c
, inst
, 3);
866 src0
= get_src_reg(c
, inst
, 0, 0);
867 delta0
= get_src_reg(c
, inst
, 1, 0);
868 delta1
= get_src_reg(c
, inst
, 1, 1);
870 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
871 /* Calc 1/w - just linterp wpos[3] optimized by putting the
872 * result straight into a message reg.
874 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
875 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
879 BRW_MATH_FUNCTION_INV
,
880 BRW_MATH_SATURATE_NONE
,
882 BRW_MATH_PRECISION_FULL
);
886 static void emit_linterp(struct brw_wm_compile
*c
,
887 const struct prog_instruction
*inst
)
889 struct brw_compile
*p
= &c
->func
;
890 GLuint mask
= inst
->DstReg
.WriteMask
;
891 struct brw_reg interp
[4];
892 struct brw_reg dst
, delta0
, delta1
;
896 src0
= get_src_reg(c
, inst
, 0, 0);
897 delta0
= get_src_reg(c
, inst
, 1, 0);
898 delta1
= get_src_reg(c
, inst
, 1, 1);
901 interp
[0] = brw_vec1_grf(nr
, 0);
902 interp
[1] = brw_vec1_grf(nr
, 4);
903 interp
[2] = brw_vec1_grf(nr
+1, 0);
904 interp
[3] = brw_vec1_grf(nr
+1, 4);
906 for(i
= 0; i
< 4; i
++ ) {
908 dst
= get_dst_reg(c
, inst
, i
);
909 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
910 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
915 static void emit_cinterp(struct brw_wm_compile
*c
,
916 const struct prog_instruction
*inst
)
918 struct brw_compile
*p
= &c
->func
;
919 GLuint mask
= inst
->DstReg
.WriteMask
;
921 struct brw_reg interp
[4];
922 struct brw_reg dst
, src0
;
925 src0
= get_src_reg(c
, inst
, 0, 0);
928 interp
[0] = brw_vec1_grf(nr
, 0);
929 interp
[1] = brw_vec1_grf(nr
, 4);
930 interp
[2] = brw_vec1_grf(nr
+1, 0);
931 interp
[3] = brw_vec1_grf(nr
+1, 4);
933 for(i
= 0; i
< 4; i
++ ) {
935 dst
= get_dst_reg(c
, inst
, i
);
936 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
941 static void emit_pinterp(struct brw_wm_compile
*c
,
942 const struct prog_instruction
*inst
)
944 struct brw_compile
*p
= &c
->func
;
945 GLuint mask
= inst
->DstReg
.WriteMask
;
947 struct brw_reg interp
[4];
948 struct brw_reg dst
, delta0
, delta1
;
949 struct brw_reg src0
, w
;
952 src0
= get_src_reg(c
, inst
, 0, 0);
953 delta0
= get_src_reg(c
, inst
, 1, 0);
954 delta1
= get_src_reg(c
, inst
, 1, 1);
955 w
= get_src_reg(c
, inst
, 2, 3);
958 interp
[0] = brw_vec1_grf(nr
, 0);
959 interp
[1] = brw_vec1_grf(nr
, 4);
960 interp
[2] = brw_vec1_grf(nr
+1, 0);
961 interp
[3] = brw_vec1_grf(nr
+1, 4);
963 for(i
= 0; i
< 4; i
++ ) {
965 dst
= get_dst_reg(c
, inst
, i
);
966 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
967 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
969 brw_MUL(p
, dst
, dst
, w
);
974 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
975 static void emit_frontfacing(struct brw_wm_compile
*c
,
976 const struct prog_instruction
*inst
)
978 struct brw_compile
*p
= &c
->func
;
979 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
981 GLuint mask
= inst
->DstReg
.WriteMask
;
984 for (i
= 0; i
< 4; i
++) {
986 dst
= get_dst_reg(c
, inst
, i
);
987 brw_MOV(p
, dst
, brw_imm_f(0.0));
991 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
994 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
995 for (i
= 0; i
< 4; i
++) {
997 dst
= get_dst_reg(c
, inst
, i
);
998 brw_MOV(p
, dst
, brw_imm_f(1.0));
1001 brw_set_predicate_control_flag_value(p
, 0xff);
1004 static void emit_xpd(struct brw_wm_compile
*c
,
1005 const struct prog_instruction
*inst
)
1008 struct brw_compile
*p
= &c
->func
;
1009 GLuint mask
= inst
->DstReg
.WriteMask
;
1010 for (i
= 0; i
< 4; i
++) {
1011 GLuint i2
= (i
+2)%3;
1012 GLuint i1
= (i
+1)%3;
1013 if (mask
& (1<<i
)) {
1014 struct brw_reg src0
, src1
, dst
;
1015 dst
= get_dst_reg(c
, inst
, i
);
1016 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
1017 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
1018 brw_MUL(p
, brw_null_reg(), src0
, src1
);
1019 src0
= get_src_reg(c
, inst
, 0, i1
);
1020 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
1021 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1022 brw_MAC(p
, dst
, src0
, src1
);
1023 brw_set_saturate(p
, 0);
1026 brw_set_saturate(p
, 0);
1029 static void emit_dp3(struct brw_wm_compile
*c
,
1030 const struct prog_instruction
*inst
)
1032 struct brw_reg src0
[3], src1
[3], dst
;
1034 struct brw_compile
*p
= &c
->func
;
1035 for (i
= 0; i
< 3; i
++) {
1036 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1037 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1040 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1041 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1042 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1043 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1044 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1045 brw_set_saturate(p
, 0);
1048 static void emit_dp4(struct brw_wm_compile
*c
,
1049 const struct prog_instruction
*inst
)
1051 struct brw_reg src0
[4], src1
[4], dst
;
1053 struct brw_compile
*p
= &c
->func
;
1054 for (i
= 0; i
< 4; i
++) {
1055 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1056 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1058 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1059 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1060 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1061 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1062 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1063 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1064 brw_set_saturate(p
, 0);
1067 static void emit_dph(struct brw_wm_compile
*c
,
1068 const struct prog_instruction
*inst
)
1070 struct brw_reg src0
[4], src1
[4], dst
;
1072 struct brw_compile
*p
= &c
->func
;
1073 for (i
= 0; i
< 4; i
++) {
1074 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1075 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1077 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1078 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1079 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1080 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1081 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1082 brw_ADD(p
, dst
, dst
, src1
[3]);
1083 brw_set_saturate(p
, 0);
1087 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1088 * Note that the result of the function is smeared across the dest
1089 * register's X, Y, Z and W channels (subject to writemasking of course).
1091 static void emit_math1(struct brw_wm_compile
*c
,
1092 const struct prog_instruction
*inst
, GLuint func
)
1094 struct brw_compile
*p
= &c
->func
;
1095 struct brw_reg src0
, dst
, tmp
;
1096 const int mark
= mark_tmps( c
);
1101 /* Get first component of source register */
1102 src0
= get_src_reg(c
, inst
, 0, 0);
1104 /* tmp = func(src0) */
1105 brw_MOV(p
, brw_message_reg(2), src0
);
1109 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1112 BRW_MATH_DATA_VECTOR
,
1113 BRW_MATH_PRECISION_FULL
);
1115 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
1117 /* replicate tmp value across enabled dest channels */
1118 for (i
= 0; i
< 4; i
++) {
1119 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
1120 dst
= get_dst_reg(c
, inst
, i
);
1121 brw_MOV(p
, dst
, tmp
);
1125 release_tmps(c
, mark
);
1128 static void emit_rcp(struct brw_wm_compile
*c
,
1129 const struct prog_instruction
*inst
)
1131 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1134 static void emit_rsq(struct brw_wm_compile
*c
,
1135 const struct prog_instruction
*inst
)
1137 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1140 static void emit_sin(struct brw_wm_compile
*c
,
1141 const struct prog_instruction
*inst
)
1143 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1146 static void emit_cos(struct brw_wm_compile
*c
,
1147 const struct prog_instruction
*inst
)
1149 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1152 static void emit_ex2(struct brw_wm_compile
*c
,
1153 const struct prog_instruction
*inst
)
1155 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1158 static void emit_lg2(struct brw_wm_compile
*c
,
1159 const struct prog_instruction
*inst
)
1161 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1164 static void emit_add(struct brw_wm_compile
*c
,
1165 const struct prog_instruction
*inst
)
1167 struct brw_compile
*p
= &c
->func
;
1168 struct brw_reg src0
, src1
, dst
;
1169 GLuint mask
= inst
->DstReg
.WriteMask
;
1171 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1172 for (i
= 0 ; i
< 4; i
++) {
1173 if (mask
& (1<<i
)) {
1174 dst
= get_dst_reg(c
, inst
, i
);
1175 src0
= get_src_reg(c
, inst
, 0, i
);
1176 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1177 brw_ADD(p
, dst
, src0
, src1
);
1180 brw_set_saturate(p
, 0);
1183 static void emit_arl(struct brw_wm_compile
*c
,
1184 const struct prog_instruction
*inst
)
1186 struct brw_compile
*p
= &c
->func
;
1187 struct brw_reg src0
, addr_reg
;
1188 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1189 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1190 BRW_ARF_ADDRESS
, 0);
1191 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1192 brw_MOV(p
, addr_reg
, src0
);
1193 brw_set_saturate(p
, 0);
1196 static void emit_sub(struct brw_wm_compile
*c
,
1197 const struct prog_instruction
*inst
)
1199 struct brw_compile
*p
= &c
->func
;
1200 struct brw_reg src0
, src1
, dst
;
1201 GLuint mask
= inst
->DstReg
.WriteMask
;
1203 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1204 for (i
= 0 ; i
< 4; i
++) {
1205 if (mask
& (1<<i
)) {
1206 dst
= get_dst_reg(c
, inst
, i
);
1207 src0
= get_src_reg(c
, inst
, 0, i
);
1208 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1209 brw_ADD(p
, dst
, src0
, negate(src1
));
1212 brw_set_saturate(p
, 0);
1215 static void emit_mul(struct brw_wm_compile
*c
,
1216 const struct prog_instruction
*inst
)
1218 struct brw_compile
*p
= &c
->func
;
1219 struct brw_reg src0
, src1
, dst
;
1220 GLuint mask
= inst
->DstReg
.WriteMask
;
1222 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1223 for (i
= 0 ; i
< 4; i
++) {
1224 if (mask
& (1<<i
)) {
1225 dst
= get_dst_reg(c
, inst
, i
);
1226 src0
= get_src_reg(c
, inst
, 0, i
);
1227 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1228 brw_MUL(p
, dst
, src0
, src1
);
1231 brw_set_saturate(p
, 0);
1234 static void emit_frc(struct brw_wm_compile
*c
,
1235 const struct prog_instruction
*inst
)
1237 struct brw_compile
*p
= &c
->func
;
1238 struct brw_reg src0
, dst
;
1239 GLuint mask
= inst
->DstReg
.WriteMask
;
1241 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1242 for (i
= 0 ; i
< 4; i
++) {
1243 if (mask
& (1<<i
)) {
1244 dst
= get_dst_reg(c
, inst
, i
);
1245 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1246 brw_FRC(p
, dst
, src0
);
1249 if (inst
->SaturateMode
!= SATURATE_OFF
)
1250 brw_set_saturate(p
, 0);
1253 static void emit_flr(struct brw_wm_compile
*c
,
1254 const struct prog_instruction
*inst
)
1256 struct brw_compile
*p
= &c
->func
;
1257 struct brw_reg src0
, dst
;
1258 GLuint mask
= inst
->DstReg
.WriteMask
;
1260 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1261 for (i
= 0 ; i
< 4; i
++) {
1262 if (mask
& (1<<i
)) {
1263 dst
= get_dst_reg(c
, inst
, i
);
1264 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1265 brw_RNDD(p
, dst
, src0
);
1268 brw_set_saturate(p
, 0);
1272 static void emit_min_max(struct brw_wm_compile
*c
,
1273 const struct prog_instruction
*inst
)
1275 struct brw_compile
*p
= &c
->func
;
1276 const GLuint mask
= inst
->DstReg
.WriteMask
;
1277 const int mark
= mark_tmps(c
);
1279 brw_push_insn_state(p
);
1280 for (i
= 0; i
< 4; i
++) {
1281 if (mask
& (1<<i
)) {
1282 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1283 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1284 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1286 /* if dst==src0 or dst==src1 we need to use a temp reg */
1287 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1288 brw_same_reg(dst
, src1
);
1295 printf(" Min/max: dst %d src0 %d src1 %d\n",
1296 dst.nr, src0.nr, src1.nr);
1298 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1299 brw_MOV(p
, dst
, src0
);
1300 brw_set_saturate(p
, 0);
1302 if (inst
->Opcode
== OPCODE_MIN
)
1303 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1305 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1307 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1308 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1309 brw_MOV(p
, dst
, src1
);
1310 brw_set_saturate(p
, 0);
1311 brw_set_predicate_control_flag_value(p
, 0xff);
1313 brw_MOV(p
, real_dst
, dst
);
1316 brw_pop_insn_state(p
);
1317 release_tmps(c
, mark
);
1320 static void emit_pow(struct brw_wm_compile
*c
,
1321 const struct prog_instruction
*inst
)
1323 struct brw_compile
*p
= &c
->func
;
1324 struct brw_reg dst
, src0
, src1
;
1325 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1326 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1327 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1329 brw_MOV(p
, brw_message_reg(2), src0
);
1330 brw_MOV(p
, brw_message_reg(3), src1
);
1334 BRW_MATH_FUNCTION_POW
,
1335 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1338 BRW_MATH_DATA_VECTOR
,
1339 BRW_MATH_PRECISION_FULL
);
1342 static void emit_lrp(struct brw_wm_compile
*c
,
1343 const struct prog_instruction
*inst
)
1345 struct brw_compile
*p
= &c
->func
;
1346 GLuint mask
= inst
->DstReg
.WriteMask
;
1347 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1349 int mark
= mark_tmps(c
);
1350 for (i
= 0; i
< 4; i
++) {
1351 if (mask
& (1<<i
)) {
1352 dst
= get_dst_reg(c
, inst
, i
);
1353 src0
= get_src_reg(c
, inst
, 0, i
);
1355 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1357 if (src1
.nr
== dst
.nr
) {
1358 tmp1
= alloc_tmp(c
);
1359 brw_MOV(p
, tmp1
, src1
);
1363 src2
= get_src_reg(c
, inst
, 2, i
);
1364 if (src2
.nr
== dst
.nr
) {
1365 tmp2
= alloc_tmp(c
);
1366 brw_MOV(p
, tmp2
, src2
);
1370 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1371 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1372 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1373 brw_MAC(p
, dst
, src0
, tmp1
);
1374 brw_set_saturate(p
, 0);
1376 release_tmps(c
, mark
);
1381 * For GLSL shaders, this KIL will be unconditional.
1382 * It may be contained inside an IF/ENDIF structure of course.
1384 static void emit_kil(struct brw_wm_compile
*c
)
1386 struct brw_compile
*p
= &c
->func
;
1387 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1388 brw_push_insn_state(p
);
1389 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1390 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1391 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1392 brw_pop_insn_state(p
);
1395 static void emit_mad(struct brw_wm_compile
*c
,
1396 const struct prog_instruction
*inst
)
1398 struct brw_compile
*p
= &c
->func
;
1399 GLuint mask
= inst
->DstReg
.WriteMask
;
1400 struct brw_reg dst
, src0
, src1
, src2
;
1403 for (i
= 0; i
< 4; i
++) {
1404 if (mask
& (1<<i
)) {
1405 dst
= get_dst_reg(c
, inst
, i
);
1406 src0
= get_src_reg(c
, inst
, 0, i
);
1407 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1408 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1409 brw_MUL(p
, dst
, src0
, src1
);
1411 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1412 brw_ADD(p
, dst
, dst
, src2
);
1413 brw_set_saturate(p
, 0);
1418 static void emit_sop(struct brw_wm_compile
*c
,
1419 const struct prog_instruction
*inst
, GLuint cond
)
1421 struct brw_compile
*p
= &c
->func
;
1422 GLuint mask
= inst
->DstReg
.WriteMask
;
1423 struct brw_reg dst
, src0
, src1
;
1426 for (i
= 0; i
< 4; i
++) {
1427 if (mask
& (1<<i
)) {
1428 dst
= get_dst_reg(c
, inst
, i
);
1429 src0
= get_src_reg(c
, inst
, 0, i
);
1430 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1431 brw_push_insn_state(p
);
1432 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1433 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1434 brw_MOV(p
, dst
, brw_imm_f(0.0));
1435 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1436 brw_MOV(p
, dst
, brw_imm_f(1.0));
1437 brw_pop_insn_state(p
);
1442 static void emit_slt(struct brw_wm_compile
*c
,
1443 const struct prog_instruction
*inst
)
1445 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1448 static void emit_sle(struct brw_wm_compile
*c
,
1449 const struct prog_instruction
*inst
)
1451 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1454 static void emit_sgt(struct brw_wm_compile
*c
,
1455 const struct prog_instruction
*inst
)
1457 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1460 static void emit_sge(struct brw_wm_compile
*c
,
1461 const struct prog_instruction
*inst
)
1463 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1466 static void emit_seq(struct brw_wm_compile
*c
,
1467 const struct prog_instruction
*inst
)
1469 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1472 static void emit_sne(struct brw_wm_compile
*c
,
1473 const struct prog_instruction
*inst
)
1475 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1478 static void emit_ddx(struct brw_wm_compile
*c
,
1479 const struct prog_instruction
*inst
)
1481 struct brw_compile
*p
= &c
->func
;
1482 GLuint mask
= inst
->DstReg
.WriteMask
;
1483 struct brw_reg interp
[4];
1485 struct brw_reg src0
, w
;
1487 src0
= get_src_reg(c
, inst
, 0, 0);
1488 w
= get_src_reg(c
, inst
, 1, 3);
1490 interp
[0] = brw_vec1_grf(nr
, 0);
1491 interp
[1] = brw_vec1_grf(nr
, 4);
1492 interp
[2] = brw_vec1_grf(nr
+1, 0);
1493 interp
[3] = brw_vec1_grf(nr
+1, 4);
1494 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1495 for(i
= 0; i
< 4; i
++ ) {
1496 if (mask
& (1<<i
)) {
1497 dst
= get_dst_reg(c
, inst
, i
);
1498 brw_MOV(p
, dst
, interp
[i
]);
1499 brw_MUL(p
, dst
, dst
, w
);
1502 brw_set_saturate(p
, 0);
1505 static void emit_ddy(struct brw_wm_compile
*c
,
1506 const struct prog_instruction
*inst
)
1508 struct brw_compile
*p
= &c
->func
;
1509 GLuint mask
= inst
->DstReg
.WriteMask
;
1510 struct brw_reg interp
[4];
1512 struct brw_reg src0
, w
;
1515 src0
= get_src_reg(c
, inst
, 0, 0);
1517 w
= get_src_reg(c
, inst
, 1, 3);
1518 interp
[0] = brw_vec1_grf(nr
, 0);
1519 interp
[1] = brw_vec1_grf(nr
, 4);
1520 interp
[2] = brw_vec1_grf(nr
+1, 0);
1521 interp
[3] = brw_vec1_grf(nr
+1, 4);
1522 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1523 for(i
= 0; i
< 4; i
++ ) {
1524 if (mask
& (1<<i
)) {
1525 dst
= get_dst_reg(c
, inst
, i
);
1526 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1527 brw_MUL(p
, dst
, dst
, w
);
1530 brw_set_saturate(p
, 0);
1533 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1535 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1539 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1541 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1544 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1546 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1549 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1551 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1555 /* One-, two- and three-dimensional Perlin noise, similar to the description
1556 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1557 static void noise1_sub( struct brw_wm_compile
*c
) {
1559 struct brw_compile
*p
= &c
->func
;
1560 struct brw_reg param
,
1561 x0
, x1
, /* gradients at each end */
1562 t
, tmp
[ 2 ], /* float temporaries */
1563 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1565 int mark
= mark_tmps( c
);
1567 x0
= alloc_tmp( c
);
1568 x1
= alloc_tmp( c
);
1570 tmp
[ 0 ] = alloc_tmp( c
);
1571 tmp
[ 1 ] = alloc_tmp( c
);
1572 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1573 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1574 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1575 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1576 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1578 param
= lookup_tmp( c
, mark
- 2 );
1580 brw_set_access_mode( p
, BRW_ALIGN_1
);
1582 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1584 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1585 be hashed. Also compute the remainder (offset within the unit
1586 length), interleaved to reduce register dependency penalties. */
1587 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1588 brw_FRC( p
, param
, param
);
1589 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1590 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1591 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1593 /* We're now ready to perform the hashing. The two hashes are
1594 interleaved for performance. The hash function used is
1595 designed to rapidly achieve avalanche and require only 32x16
1596 bit multiplication, and 16-bit swizzles (which we get for
1597 free). We can't use immediate operands in the multiplies,
1598 because immediates are permitted only in src1 and the 16-bit
1599 factor is permitted only in src0. */
1600 for( i
= 0; i
< 2; i
++ )
1601 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1602 for( i
= 0; i
< 2; i
++ )
1603 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1604 high_words( itmp
[ i
] ) );
1605 for( i
= 0; i
< 2; i
++ )
1606 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1607 for( i
= 0; i
< 2; i
++ )
1608 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1609 high_words( itmp
[ i
] ) );
1610 for( i
= 0; i
< 2; i
++ )
1611 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1612 for( i
= 0; i
< 2; i
++ )
1613 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1614 high_words( itmp
[ i
] ) );
1616 /* Now we want to initialise the two gradients based on the
1617 hashes. Format conversion from signed integer to float leaves
1618 everything scaled too high by a factor of pow( 2, 31 ), but
1619 we correct for that right at the end. */
1620 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1621 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1622 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1624 brw_MUL( p
, x0
, x0
, param
);
1625 brw_MUL( p
, x1
, x1
, t
);
1627 /* We interpolate between the gradients using the polynomial
1628 6t^5 - 15t^4 + 10t^3 (Perlin). */
1629 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1630 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1631 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1632 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1633 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1634 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1636 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1637 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1638 brw_MUL( p
, x1
, x1
, param
);
1639 brw_ADD( p
, x0
, x0
, x1
);
1640 /* scale by pow( 2, -30 ), to compensate for the format conversion
1641 above and an extra factor of 2 so that a single gradient covers
1643 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1645 release_tmps( c
, mark
);
1648 static void emit_noise1( struct brw_wm_compile
*c
,
1649 const struct prog_instruction
*inst
)
1651 struct brw_compile
*p
= &c
->func
;
1652 struct brw_reg src
, param
, dst
;
1653 GLuint mask
= inst
->DstReg
.WriteMask
;
1655 int mark
= mark_tmps( c
);
1657 assert( mark
== 0 );
1659 src
= get_src_reg( c
, inst
, 0, 0 );
1661 param
= alloc_tmp( c
);
1663 brw_MOV( p
, param
, src
);
1665 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1667 /* Fill in the result: */
1668 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1669 for (i
= 0 ; i
< 4; i
++) {
1670 if (mask
& (1<<i
)) {
1671 dst
= get_dst_reg(c
, inst
, i
);
1672 brw_MOV( p
, dst
, param
);
1675 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1676 brw_set_saturate( p
, 0 );
1678 release_tmps( c
, mark
);
1681 static void noise2_sub( struct brw_wm_compile
*c
) {
1683 struct brw_compile
*p
= &c
->func
;
1684 struct brw_reg param0
, param1
,
1685 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1686 t
, tmp
[ 4 ], /* float temporaries */
1687 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1689 int mark
= mark_tmps( c
);
1691 x0y0
= alloc_tmp( c
);
1692 x0y1
= alloc_tmp( c
);
1693 x1y0
= alloc_tmp( c
);
1694 x1y1
= alloc_tmp( c
);
1696 for( i
= 0; i
< 4; i
++ ) {
1697 tmp
[ i
] = alloc_tmp( c
);
1698 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1700 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1701 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1702 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1704 param0
= lookup_tmp( c
, mark
- 3 );
1705 param1
= lookup_tmp( c
, mark
- 2 );
1707 brw_set_access_mode( p
, BRW_ALIGN_1
);
1709 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1710 be hashed. Also compute the remainders (offsets within the unit
1711 square), interleaved to reduce register dependency penalties. */
1712 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1713 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1714 brw_FRC( p
, param0
, param0
);
1715 brw_FRC( p
, param1
, param1
);
1716 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1717 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1718 low_words( itmp
[ 1 ] ) );
1719 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1720 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1721 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1722 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1723 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1725 /* We're now ready to perform the hashing. The four hashes are
1726 interleaved for performance. The hash function used is
1727 designed to rapidly achieve avalanche and require only 32x16
1728 bit multiplication, and 16-bit swizzles (which we get for
1729 free). We can't use immediate operands in the multiplies,
1730 because immediates are permitted only in src1 and the 16-bit
1731 factor is permitted only in src0. */
1732 for( i
= 0; i
< 4; i
++ )
1733 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1734 for( i
= 0; i
< 4; i
++ )
1735 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1736 high_words( itmp
[ i
] ) );
1737 for( i
= 0; i
< 4; i
++ )
1738 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1739 for( i
= 0; i
< 4; i
++ )
1740 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1741 high_words( itmp
[ i
] ) );
1742 for( i
= 0; i
< 4; i
++ )
1743 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1744 for( i
= 0; i
< 4; i
++ )
1745 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1746 high_words( itmp
[ i
] ) );
1748 /* Now we want to initialise the four gradients based on the
1749 hashes. Format conversion from signed integer to float leaves
1750 everything scaled too high by a factor of pow( 2, 15 ), but
1751 we correct for that right at the end. */
1752 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1753 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1754 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1755 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1756 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1758 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1759 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1760 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1761 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1763 brw_MUL( p
, x1y0
, x1y0
, t
);
1764 brw_MUL( p
, x1y1
, x1y1
, t
);
1765 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1766 brw_MUL( p
, x0y0
, x0y0
, param0
);
1767 brw_MUL( p
, x0y1
, x0y1
, param0
);
1769 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1770 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1771 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1772 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1774 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1775 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1776 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1777 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1779 /* We interpolate between the gradients using the polynomial
1780 6t^5 - 15t^4 + 10t^3 (Perlin). */
1781 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1782 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1783 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1784 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1785 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1786 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1787 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1789 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1790 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1791 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1792 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1793 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1795 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1796 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1797 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1798 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1800 /* Here we interpolate in the y dimension... */
1801 brw_MUL( p
, x0y1
, x0y1
, param1
);
1802 brw_MUL( p
, x1y1
, x1y1
, param1
);
1803 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1804 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1806 /* And now in x. There are horrible register dependencies here,
1807 but we have nothing else to do. */
1808 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1809 brw_MUL( p
, x1y0
, x1y0
, param0
);
1810 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1812 /* scale by pow( 2, -15 ), as described above */
1813 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1815 release_tmps( c
, mark
);
1818 static void emit_noise2( struct brw_wm_compile
*c
,
1819 const struct prog_instruction
*inst
)
1821 struct brw_compile
*p
= &c
->func
;
1822 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1823 GLuint mask
= inst
->DstReg
.WriteMask
;
1825 int mark
= mark_tmps( c
);
1827 assert( mark
== 0 );
1829 src0
= get_src_reg( c
, inst
, 0, 0 );
1830 src1
= get_src_reg( c
, inst
, 0, 1 );
1832 param0
= alloc_tmp( c
);
1833 param1
= alloc_tmp( c
);
1835 brw_MOV( p
, param0
, src0
);
1836 brw_MOV( p
, param1
, src1
);
1838 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1840 /* Fill in the result: */
1841 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1842 for (i
= 0 ; i
< 4; i
++) {
1843 if (mask
& (1<<i
)) {
1844 dst
= get_dst_reg(c
, inst
, i
);
1845 brw_MOV( p
, dst
, param0
);
1848 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1849 brw_set_saturate( p
, 0 );
1851 release_tmps( c
, mark
);
1855 * The three-dimensional case is much like the one- and two- versions above,
1856 * but since the number of corners is rapidly growing we now pack 16 16-bit
1857 * hashes into each register to extract more parallelism from the EUs.
1859 static void noise3_sub( struct brw_wm_compile
*c
) {
1861 struct brw_compile
*p
= &c
->func
;
1862 struct brw_reg param0
, param1
, param2
,
1863 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1864 xi
, yi
, zi
, /* interpolation coefficients */
1865 t
, tmp
[ 8 ], /* float temporaries */
1866 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1867 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1869 int mark
= mark_tmps( c
);
1871 x0y0
= alloc_tmp( c
);
1872 x0y1
= alloc_tmp( c
);
1873 x1y0
= alloc_tmp( c
);
1874 x1y1
= alloc_tmp( c
);
1875 xi
= alloc_tmp( c
);
1876 yi
= alloc_tmp( c
);
1877 zi
= alloc_tmp( c
);
1879 for( i
= 0; i
< 8; i
++ ) {
1880 tmp
[ i
] = alloc_tmp( c
);
1881 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1882 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1885 param0
= lookup_tmp( c
, mark
- 4 );
1886 param1
= lookup_tmp( c
, mark
- 3 );
1887 param2
= lookup_tmp( c
, mark
- 2 );
1889 brw_set_access_mode( p
, BRW_ALIGN_1
);
1891 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1892 be hashed. Also compute the remainders (offsets within the unit
1893 cube), interleaved to reduce register dependency penalties. */
1894 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1895 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1896 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1897 brw_FRC( p
, param0
, param0
);
1898 brw_FRC( p
, param1
, param1
);
1899 brw_FRC( p
, param2
, param2
);
1900 /* Since we now have only 16 bits of precision in the hash, we must
1901 be more careful about thorough mixing to maintain entropy as we
1902 squash the input vector into a small scalar. */
1903 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1904 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1905 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1906 brw_imm_uw( 0x9B93 ) );
1907 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1908 brw_imm_uw( 0xBC8F ) );
1910 /* Temporarily disable the execution mask while we work with ExecSize=16
1911 channels (the mask is set for ExecSize=8 and is probably incorrect).
1912 Although this might cause execution of unwanted channels, the code
1913 writes only to temporary registers and has no side effects, so
1914 disabling the mask is harmless. */
1915 brw_push_insn_state( p
);
1916 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1917 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1918 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1919 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1921 /* We're now ready to perform the hashing. The eight hashes are
1922 interleaved for performance. The hash function used is
1923 designed to rapidly achieve avalanche and require only 16x16
1924 bit multiplication, and 8-bit swizzles (which we get for
1926 for( i
= 0; i
< 4; i
++ )
1927 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1928 for( i
= 0; i
< 4; i
++ )
1929 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1930 odd_bytes( wtmp
[ i
] ) );
1931 for( i
= 0; i
< 4; i
++ )
1932 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1933 for( i
= 0; i
< 4; i
++ )
1934 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1935 odd_bytes( wtmp
[ i
] ) );
1936 brw_pop_insn_state( p
);
1938 /* Now we want to initialise the four rear gradients based on the
1939 hashes. Format conversion from signed integer to float leaves
1940 everything scaled too high by a factor of pow( 2, 15 ), but
1941 we correct for that right at the end. */
1943 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1944 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1945 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1946 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1947 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1949 brw_push_insn_state( p
);
1950 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1951 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1952 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1953 brw_pop_insn_state( p
);
1955 brw_MUL( p
, x1y0
, x1y0
, t
);
1956 brw_MUL( p
, x1y1
, x1y1
, t
);
1957 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1958 brw_MUL( p
, x0y0
, x0y0
, param0
);
1959 brw_MUL( p
, x0y1
, x0y1
, param0
);
1962 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1963 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1964 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1965 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1967 brw_push_insn_state( p
);
1968 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1969 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1970 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1971 brw_pop_insn_state( p
);
1973 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1974 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1975 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1976 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1977 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1979 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1980 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1981 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1982 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1985 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1986 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1987 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1988 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1990 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1991 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1992 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1993 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1995 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1996 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1997 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1998 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2000 /* We interpolate between the gradients using the polynomial
2001 6t^5 - 15t^4 + 10t^3 (Perlin). */
2002 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
2003 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
2004 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
2005 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
2006 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
2007 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
2008 brw_MUL( p
, xi
, xi
, param0
);
2009 brw_MUL( p
, yi
, yi
, param1
);
2010 brw_MUL( p
, zi
, zi
, param2
);
2011 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
2012 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
2013 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
2014 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
2015 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
2016 brw_MUL( p
, xi
, xi
, param0
);
2017 brw_MUL( p
, yi
, yi
, param1
);
2018 brw_MUL( p
, zi
, zi
, param2
);
2019 brw_MUL( p
, xi
, xi
, param0
);
2020 brw_MUL( p
, yi
, yi
, param1
);
2021 brw_MUL( p
, zi
, zi
, param2
);
2022 brw_MUL( p
, xi
, xi
, param0
);
2023 brw_MUL( p
, yi
, yi
, param1
);
2024 brw_MUL( p
, zi
, zi
, param2
);
2026 /* Here we interpolate in the y dimension... */
2027 brw_MUL( p
, x0y1
, x0y1
, yi
);
2028 brw_MUL( p
, x1y1
, x1y1
, yi
);
2029 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2030 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2032 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2033 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2034 brw_MUL( p
, x1y0
, x1y0
, xi
);
2035 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2037 /* Now do the same thing for the front four gradients... */
2039 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2040 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2041 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2042 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2044 brw_push_insn_state( p
);
2045 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2046 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2047 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2048 brw_pop_insn_state( p
);
2050 brw_MUL( p
, x1y0
, x1y0
, t
);
2051 brw_MUL( p
, x1y1
, x1y1
, t
);
2052 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
2053 brw_MUL( p
, x0y0
, x0y0
, param0
);
2054 brw_MUL( p
, x0y1
, x0y1
, param0
);
2057 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2058 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2059 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2060 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2062 brw_push_insn_state( p
);
2063 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2064 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2065 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2066 brw_pop_insn_state( p
);
2068 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2069 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2070 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
2071 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
2072 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
2074 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2075 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2076 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2077 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2080 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2081 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2082 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2083 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2085 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2086 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2087 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2088 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2090 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2091 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2092 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2093 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2095 /* The interpolation coefficients are still around from last time, so
2096 again interpolate in the y dimension... */
2097 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2098 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2099 brw_MUL( p
, x0y1
, x0y1
, yi
);
2100 brw_MUL( p
, x1y1
, x1y1
, yi
);
2101 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2102 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2104 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2105 time put the front face in tmp[ 1 ] and we're nearly there... */
2106 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2107 brw_MUL( p
, x1y0
, x1y0
, xi
);
2108 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2110 /* The final interpolation, in the z dimension: */
2111 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2112 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
2113 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2115 /* scale by pow( 2, -15 ), as described above */
2116 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2118 release_tmps( c
, mark
);
2121 static void emit_noise3( struct brw_wm_compile
*c
,
2122 const struct prog_instruction
*inst
)
2124 struct brw_compile
*p
= &c
->func
;
2125 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
2126 GLuint mask
= inst
->DstReg
.WriteMask
;
2128 int mark
= mark_tmps( c
);
2130 assert( mark
== 0 );
2132 src0
= get_src_reg( c
, inst
, 0, 0 );
2133 src1
= get_src_reg( c
, inst
, 0, 1 );
2134 src2
= get_src_reg( c
, inst
, 0, 2 );
2136 param0
= alloc_tmp( c
);
2137 param1
= alloc_tmp( c
);
2138 param2
= alloc_tmp( c
);
2140 brw_MOV( p
, param0
, src0
);
2141 brw_MOV( p
, param1
, src1
);
2142 brw_MOV( p
, param2
, src2
);
2144 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
2146 /* Fill in the result: */
2147 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2148 for (i
= 0 ; i
< 4; i
++) {
2149 if (mask
& (1<<i
)) {
2150 dst
= get_dst_reg(c
, inst
, i
);
2151 brw_MOV( p
, dst
, param0
);
2154 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2155 brw_set_saturate( p
, 0 );
2157 release_tmps( c
, mark
);
2161 * For the four-dimensional case, the little micro-optimisation benefits
2162 * we obtain by unrolling all the loops aren't worth the massive bloat it
2163 * now causes. Instead, we loop twice around performing a similar operation
2164 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2165 * code to glue it all together.
2167 static void noise4_sub( struct brw_wm_compile
*c
)
2169 struct brw_compile
*p
= &c
->func
;
2170 struct brw_reg param
[ 4 ],
2171 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2172 w0
, /* noise for the w=0 cube */
2173 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2174 interp
[ 4 ], /* interpolation coefficients */
2175 t
, tmp
[ 8 ], /* float temporaries */
2176 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2177 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2179 int mark
= mark_tmps( c
);
2180 GLuint loop
, origin
;
2182 x0y0
= alloc_tmp( c
);
2183 x0y1
= alloc_tmp( c
);
2184 x1y0
= alloc_tmp( c
);
2185 x1y1
= alloc_tmp( c
);
2187 w0
= alloc_tmp( c
);
2188 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2189 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2191 for( i
= 0; i
< 4; i
++ ) {
2192 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2193 interp
[ i
] = alloc_tmp( c
);
2196 for( i
= 0; i
< 8; i
++ ) {
2197 tmp
[ i
] = alloc_tmp( c
);
2198 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2199 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2202 brw_set_access_mode( p
, BRW_ALIGN_1
);
2204 /* We only want 16 bits of precision from the integral part of each
2205 co-ordinate, but unfortunately the RNDD semantics would saturate
2206 at 16 bits if we performed the operation directly to a 16-bit
2207 destination. Therefore, we round to 32-bit temporaries where
2208 appropriate, and then store only the lower 16 bits. */
2209 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2210 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2211 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2212 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2213 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2214 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2216 /* Modify the flag register here, because the side effect is useful
2217 later (see below). We know for certain that all flags will be
2218 cleared, since the FRC instruction cannot possibly generate
2219 negative results. Even for exceptional inputs (infinities, denormals,
2220 NaNs), the architecture guarantees that the L conditional is false. */
2221 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2222 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2223 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2224 for( i
= 1; i
< 4; i
++ )
2225 brw_FRC( p
, param
[ i
], param
[ i
] );
2227 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2229 for( i
= 0; i
< 4; i
++ )
2230 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2231 for( i
= 0; i
< 4; i
++ )
2232 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2233 for( i
= 0; i
< 4; i
++ )
2234 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2235 for( i
= 0; i
< 4; i
++ )
2236 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2237 for( j
= 0; j
< 3; j
++ )
2238 for( i
= 0; i
< 4; i
++ )
2239 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2241 /* Mark the current address, as it will be a jump destination. The
2242 following code will be executed twice: first, with the flag
2243 register clear indicating the w=0 case, and second with flags
2247 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2248 be hashed. Since we have only 16 bits of precision in the hash, we
2249 must be careful about thorough mixing to maintain entropy as we
2250 squash the input vector into a small scalar. */
2251 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2252 brw_imm_uw( 0xBC8F ) );
2253 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2254 brw_imm_uw( 0xD0BD ) );
2255 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2256 brw_imm_uw( 0x9B93 ) );
2257 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2258 brw_imm_uw( 0xA359 ) );
2259 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2260 brw_imm_uw( 0xBC8F ) );
2262 /* Temporarily disable the execution mask while we work with ExecSize=16
2263 channels (the mask is set for ExecSize=8 and is probably incorrect).
2264 Although this might cause execution of unwanted channels, the code
2265 writes only to temporary registers and has no side effects, so
2266 disabling the mask is harmless. */
2267 brw_push_insn_state( p
);
2268 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2269 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2270 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2271 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2273 /* We're now ready to perform the hashing. The eight hashes are
2274 interleaved for performance. The hash function used is
2275 designed to rapidly achieve avalanche and require only 16x16
2276 bit multiplication, and 8-bit swizzles (which we get for
2278 for( i
= 0; i
< 4; i
++ )
2279 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2280 for( i
= 0; i
< 4; i
++ )
2281 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2282 odd_bytes( wtmp
[ i
] ) );
2283 for( i
= 0; i
< 4; i
++ )
2284 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2285 for( i
= 0; i
< 4; i
++ )
2286 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2287 odd_bytes( wtmp
[ i
] ) );
2288 brw_pop_insn_state( p
);
2290 /* Now we want to initialise the four rear gradients based on the
2291 hashes. Format conversion from signed integer to float leaves
2292 everything scaled too high by a factor of pow( 2, 15 ), but
2293 we correct for that right at the end. */
2295 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2296 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2297 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2298 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2299 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2301 brw_push_insn_state( p
);
2302 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2303 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2304 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2305 brw_pop_insn_state( p
);
2307 brw_MUL( p
, x1y0
, x1y0
, t
);
2308 brw_MUL( p
, x1y1
, x1y1
, t
);
2309 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2310 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2311 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2314 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2315 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2316 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2317 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2319 brw_push_insn_state( p
);
2320 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2321 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2322 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2323 brw_pop_insn_state( p
);
2325 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2326 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2327 /* prepare t for the w component (used below): w the first time through
2328 the loop; w - 1 the second time) */
2329 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2330 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2331 p
->current
->header
.predicate_inverse
= 1;
2332 brw_MOV( p
, t
, param
[ 3 ] );
2333 p
->current
->header
.predicate_inverse
= 0;
2334 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2335 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2336 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2338 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2339 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2340 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2341 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2344 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2345 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2346 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2347 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2349 brw_push_insn_state( p
);
2350 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2351 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2352 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2353 brw_pop_insn_state( p
);
2355 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2356 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2357 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2358 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2360 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2361 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2362 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2363 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2366 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2367 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2368 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2369 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2371 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2372 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2373 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2374 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2375 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2377 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2378 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2379 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2380 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2382 /* Here we interpolate in the y dimension... */
2383 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2384 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2385 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2386 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2387 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2388 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2390 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2391 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2392 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2393 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2395 /* Now do the same thing for the front four gradients... */
2397 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2398 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2399 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2400 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2402 brw_push_insn_state( p
);
2403 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2404 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2405 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2406 brw_pop_insn_state( p
);
2408 brw_MUL( p
, x1y0
, x1y0
, t
);
2409 brw_MUL( p
, x1y1
, x1y1
, t
);
2410 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2411 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2412 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2415 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2416 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2417 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2418 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2420 brw_push_insn_state( p
);
2421 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2422 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2423 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2424 brw_pop_insn_state( p
);
2426 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2427 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2428 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2429 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2430 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2432 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2433 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2434 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2435 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2438 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2439 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2440 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2441 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2443 brw_push_insn_state( p
);
2444 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2445 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2446 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2447 brw_pop_insn_state( p
);
2449 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2450 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2451 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2452 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2453 /* prepare t for the w component (used below): w the first time through
2454 the loop; w - 1 the second time) */
2455 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2456 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2457 p
->current
->header
.predicate_inverse
= 1;
2458 brw_MOV( p
, t
, param
[ 3 ] );
2459 p
->current
->header
.predicate_inverse
= 0;
2460 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2462 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2463 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2464 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2465 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2468 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2469 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2470 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2471 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2473 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2474 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2475 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2476 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2478 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2479 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2480 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2481 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2483 /* Interpolate in the y dimension: */
2484 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2485 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2486 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2487 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2488 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2489 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2491 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2492 time put the front face in tmp[ 1 ] and we're nearly there... */
2493 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2494 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2495 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2497 /* Another interpolation, in the z dimension: */
2498 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2499 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2500 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2502 /* Exit the loop if we've computed both cubes... */
2503 origin
= p
->nr_insn
;
2504 brw_push_insn_state( p
);
2505 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2506 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2507 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2508 brw_pop_insn_state( p
);
2510 /* Save the result for the w=0 case, and increment the w coordinate: */
2511 brw_MOV( p
, w0
, tmp
[ 0 ] );
2512 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2515 /* Loop around for the other cube. Explicitly set the flag register
2516 (unfortunately we must spend an extra instruction to do this: we
2517 can't rely on a side effect of the previous MOV or ADD because
2518 conditional modifiers which are normally true might be false in
2519 exceptional circumstances, e.g. given a NaN input; the add to
2520 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2521 brw_push_insn_state( p
);
2522 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2523 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2524 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2525 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2526 brw_pop_insn_state( p
);
2528 /* Patch the previous conditional branch now that we know the
2529 destination address. */
2530 brw_set_src1( p
->store
+ origin
,
2531 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2533 /* The very last interpolation. */
2534 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2535 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2536 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2538 /* scale by pow( 2, -15 ), as described above */
2539 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2541 release_tmps( c
, mark
);
2544 static void emit_noise4( struct brw_wm_compile
*c
,
2545 const struct prog_instruction
*inst
)
2547 struct brw_compile
*p
= &c
->func
;
2548 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2549 GLuint mask
= inst
->DstReg
.WriteMask
;
2551 int mark
= mark_tmps( c
);
2553 assert( mark
== 0 );
2555 src0
= get_src_reg( c
, inst
, 0, 0 );
2556 src1
= get_src_reg( c
, inst
, 0, 1 );
2557 src2
= get_src_reg( c
, inst
, 0, 2 );
2558 src3
= get_src_reg( c
, inst
, 0, 3 );
2560 param0
= alloc_tmp( c
);
2561 param1
= alloc_tmp( c
);
2562 param2
= alloc_tmp( c
);
2563 param3
= alloc_tmp( c
);
2565 brw_MOV( p
, param0
, src0
);
2566 brw_MOV( p
, param1
, src1
);
2567 brw_MOV( p
, param2
, src2
);
2568 brw_MOV( p
, param3
, src3
);
2570 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2572 /* Fill in the result: */
2573 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2574 for (i
= 0 ; i
< 4; i
++) {
2575 if (mask
& (1<<i
)) {
2576 dst
= get_dst_reg(c
, inst
, i
);
2577 brw_MOV( p
, dst
, param0
);
2580 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2581 brw_set_saturate( p
, 0 );
2583 release_tmps( c
, mark
);
2586 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2587 const struct prog_instruction
*inst
)
2589 struct brw_compile
*p
= &c
->func
;
2590 GLuint mask
= inst
->DstReg
.WriteMask
;
2591 struct brw_reg src0
[2], dst
[2];
2593 dst
[0] = get_dst_reg(c
, inst
, 0);
2594 dst
[1] = get_dst_reg(c
, inst
, 1);
2596 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2597 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2599 /* Calculate the pixel offset from window bottom left into destination
2602 if (mask
& WRITEMASK_X
) {
2603 /* X' = X - origin_x */
2606 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2607 brw_imm_d(0 - c
->key
.origin_x
));
2610 if (mask
& WRITEMASK_Y
) {
2611 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2614 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2615 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2620 BIAS on SIMD8 not working yet...
2622 static void emit_txb(struct brw_wm_compile
*c
,
2623 const struct prog_instruction
*inst
)
2625 struct brw_compile
*p
= &c
->func
;
2626 struct brw_reg dst
[4], src
[4], payload_reg
;
2627 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2631 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2633 for (i
= 0; i
< 4; i
++)
2634 dst
[i
] = get_dst_reg(c
, inst
, i
);
2635 for (i
= 0; i
< 4; i
++)
2636 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2638 switch (inst
->TexSrcTarget
) {
2639 case TEXTURE_1D_INDEX
:
2640 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2641 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2642 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2644 case TEXTURE_2D_INDEX
:
2645 case TEXTURE_RECT_INDEX
:
2646 brw_MOV(p
, brw_message_reg(2), src
[0]);
2647 brw_MOV(p
, brw_message_reg(3), src
[1]);
2648 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2651 brw_MOV(p
, brw_message_reg(2), src
[0]);
2652 brw_MOV(p
, brw_message_reg(3), src
[1]);
2653 brw_MOV(p
, brw_message_reg(4), src
[2]);
2656 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2657 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2659 if (BRW_IS_IGDNG(p
->brw
)) {
2660 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG
;
2662 /* Does it work well on SIMD8? */
2663 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
2667 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2669 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2670 SURF_INDEX_TEXTURE(unit
),
2672 inst
->DstReg
.WriteMask
, /* writemask */
2673 msg_type
, /* msg_type */
2674 4, /* response_length */
2678 BRW_SAMPLER_SIMD_MODE_SIMD8
);
2682 static void emit_tex(struct brw_wm_compile
*c
,
2683 const struct prog_instruction
*inst
)
2685 struct brw_compile
*p
= &c
->func
;
2686 struct brw_reg dst
[4], src
[4], payload_reg
;
2687 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2691 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2694 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2696 for (i
= 0; i
< 4; i
++)
2697 dst
[i
] = get_dst_reg(c
, inst
, i
);
2698 for (i
= 0; i
< 4; i
++)
2699 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2701 switch (inst
->TexSrcTarget
) {
2702 case TEXTURE_1D_INDEX
:
2706 case TEXTURE_2D_INDEX
:
2707 case TEXTURE_RECT_INDEX
:
2708 emit
= WRITEMASK_XY
;
2712 emit
= WRITEMASK_XYZ
;
2718 /* move/load S, T, R coords */
2719 for (i
= 0; i
< nr
; i
++) {
2720 static const GLuint swz
[4] = {0,1,2,2};
2722 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2724 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2729 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2730 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2733 if (BRW_IS_IGDNG(p
->brw
)) {
2735 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG
;
2737 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG
;
2739 /* Does it work for shadow on SIMD8 ? */
2740 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
;
2744 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2746 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2747 SURF_INDEX_TEXTURE(unit
),
2749 inst
->DstReg
.WriteMask
, /* writemask */
2750 msg_type
, /* msg_type */
2751 4, /* response_length */
2752 shadow
? 6 : 4, /* msg_length */
2755 BRW_SAMPLER_SIMD_MODE_SIMD8
);
2758 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2763 * Resolve subroutine calls after code emit is done.
2765 static void post_wm_emit( struct brw_wm_compile
*c
)
2767 brw_resolve_cals(&c
->func
);
2770 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2772 #define MAX_IF_DEPTH 32
2773 #define MAX_LOOP_DEPTH 32
2774 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
2775 GLuint i
, if_depth
= 0, loop_depth
= 0;
2776 struct brw_compile
*p
= &c
->func
;
2777 struct brw_indirect stack_index
= brw_indirect(0, 0);
2779 c
->out_of_regs
= GL_FALSE
;
2782 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2783 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2785 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2786 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2791 _mesa_printf("Inst %d: ", i
);
2792 _mesa_print_instruction(inst
);
2795 /* fetch any constants that this instruction needs */
2796 if (c
->fp
->use_const_buffer
)
2797 fetch_constants(c
, inst
);
2799 if (inst
->CondUpdate
)
2800 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2802 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2804 switch (inst
->Opcode
) {
2806 emit_pixel_xy(c
, inst
);
2809 emit_delta_xy(c
, inst
);
2812 emit_pixel_w(c
, inst
);
2815 emit_linterp(c
, inst
);
2818 emit_pinterp(c
, inst
);
2821 emit_cinterp(c
, inst
);
2824 emit_wpos_xy(c
, inst
);
2827 emit_fb_write(c
, inst
);
2829 case WM_FRONTFACING
:
2830 emit_frontfacing(c
, inst
);
2854 emit_trunc(c
, inst
);
2892 emit_min_max(c
, inst
);
2928 emit_noise1(c
, inst
);
2931 emit_noise2(c
, inst
);
2934 emit_noise3(c
, inst
);
2937 emit_noise4(c
, inst
);
2949 assert(if_depth
< MAX_IF_DEPTH
);
2950 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2953 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2956 assert(if_depth
> 0);
2957 brw_ENDIF(p
, if_inst
[--if_depth
]);
2960 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2966 brw_push_insn_state(p
);
2967 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2968 brw_set_access_mode(p
, BRW_ALIGN_1
);
2969 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2970 brw_set_access_mode(p
, BRW_ALIGN_16
);
2971 brw_ADD(p
, get_addr_reg(stack_index
),
2972 get_addr_reg(stack_index
), brw_imm_d(4));
2973 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2974 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2975 brw_pop_insn_state(p
);
2979 brw_push_insn_state(p
);
2980 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2981 brw_ADD(p
, get_addr_reg(stack_index
),
2982 get_addr_reg(stack_index
), brw_imm_d(-4));
2983 brw_set_access_mode(p
, BRW_ALIGN_1
);
2984 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2985 brw_set_access_mode(p
, BRW_ALIGN_16
);
2986 brw_pop_insn_state(p
);
2989 case OPCODE_BGNLOOP
:
2990 /* XXX may need to invalidate the current_constant regs */
2991 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2995 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2999 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
3001 case OPCODE_ENDLOOP
:
3003 struct brw_instruction
*inst0
, *inst1
;
3006 if (BRW_IS_IGDNG(brw
))
3010 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
3011 /* patch all the BREAK/CONT instructions from last BGNLOOP */
3012 while (inst0
> loop_inst
[loop_depth
]) {
3014 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
3015 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
3016 inst0
->bits3
.if_else
.pop_count
= 0;
3018 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
3019 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
3020 inst0
->bits3
.if_else
.pop_count
= 0;
3026 _mesa_printf("unsupported IR in fragment shader %d\n",
3030 if (inst
->CondUpdate
)
3031 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
3033 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
3037 if (INTEL_DEBUG
& DEBUG_WM
) {
3038 _mesa_printf("wm-native:\n");
3039 for (i
= 0; i
< p
->nr_insn
; i
++)
3040 brw_disasm(stderr
, &p
->store
[i
]);
3047 * Do GPU code generation for shaders that use GLSL features such as
3048 * flow control. Other shaders will be compiled with the
3050 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
3052 if (INTEL_DEBUG
& DEBUG_WM
) {
3053 _mesa_printf("brw_wm_glsl_emit:\n");
3056 /* initial instruction translation/simplification */
3059 /* actual code generation */
3060 brw_wm_emit_glsl(brw
, c
);
3062 if (INTEL_DEBUG
& DEBUG_WM
) {
3063 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
3066 c
->prog_data
.total_grf
= num_grf_used(c
);
3067 c
->prog_data
.total_scratch
= 0;