1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
15 * Determine if the given fragment program uses GLSL features such
16 * as flow conditionals, loops, subroutines.
17 * Some GLSL shaders may use these features, others might not.
19 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
23 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
24 switch (inst
->Opcode
) {
49 reclaim_temps(struct brw_wm_compile
*c
);
52 /** Mark GRF register as used. */
54 prealloc_grf(struct brw_wm_compile
*c
, int r
)
56 c
->used_grf
[r
] = GL_TRUE
;
60 /** Mark given GRF register as not in use. */
62 release_grf(struct brw_wm_compile
*c
, int r
)
64 /*assert(c->used_grf[r]);*/
65 c
->used_grf
[r
] = GL_FALSE
;
66 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
70 /** Return index of a free GRF, mark it as used. */
72 alloc_grf(struct brw_wm_compile
*c
)
75 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
76 if (!c
->used_grf
[r
]) {
77 c
->used_grf
[r
] = GL_TRUE
;
78 c
->first_free_grf
= r
+ 1; /* a guess */
83 /* no free temps, try to reclaim some */
85 c
->first_free_grf
= 0;
88 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
89 if (!c
->used_grf
[r
]) {
90 c
->used_grf
[r
] = GL_TRUE
;
91 c
->first_free_grf
= r
+ 1; /* a guess */
96 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
97 assert(c
->used_grf
[r
]);
100 /* really, no free GRF regs found */
101 if (!c
->out_of_regs
) {
102 /* print warning once per compilation */
103 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
104 c
->out_of_regs
= GL_TRUE
;
111 /** Return number of GRF registers used */
113 num_grf_used(const struct brw_wm_compile
*c
)
116 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
125 * Record the mapping of a Mesa register to a hardware register.
127 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
128 int component
, struct brw_reg reg
)
130 c
->wm_regs
[file
][index
][component
].reg
= reg
;
131 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
135 * Examine instruction's write mask to find index of first component
136 * enabled for writing.
138 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
141 for (i
= 0; i
< 4; i
++)
142 if (inst
->DstReg
.WriteMask
& (1<<i
))
147 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
151 /* if we need to allocate another temp, grow the tmp_regs[] array */
152 if (c
->tmp_index
== c
->tmp_max
) {
153 int r
= alloc_grf(c
);
155 /*printf("Out of temps in %s\n", __FUNCTION__);*/
156 r
= 50; /* XXX random register! */
158 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
161 /* form the GRF register */
162 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
163 /*printf("alloc_temp %d\n", reg.nr);*/
164 assert(reg
.nr
< BRW_WM_MAX_GRF
);
170 * Save current temp register info.
171 * There must be a matching call to release_tmps().
173 static int mark_tmps(struct brw_wm_compile
*c
)
178 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
180 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
183 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
189 * Convert Mesa src register to brw register.
191 * Since we're running in SOA mode each Mesa register corresponds to four
192 * hardware registers. We allocate the hardware registers as needed here.
194 * \param file register file, one of PROGRAM_x
195 * \param index register number
196 * \param component src component (X=0, Y=1, Z=2, W=3)
197 * \param nr not used?!?
198 * \param neg negate value?
199 * \param abs take absolute value?
201 static struct brw_reg
202 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
203 int nr
, GLuint neg
, GLuint abs
)
207 case PROGRAM_STATE_VAR
:
208 case PROGRAM_CONSTANT
:
209 case PROGRAM_UNIFORM
:
210 file
= PROGRAM_STATE_VAR
;
212 case PROGRAM_UNDEFINED
:
213 return brw_null_reg();
214 case PROGRAM_TEMPORARY
:
217 case PROGRAM_PAYLOAD
:
220 _mesa_problem(NULL
, "Unexpected file in get_reg()");
221 return brw_null_reg();
225 assert(component
< 4);
227 /* see if we've already allocated a HW register for this Mesa register */
228 if (c
->wm_regs
[file
][index
][component
].inited
) {
230 reg
= c
->wm_regs
[file
][index
][component
].reg
;
233 /* no, allocate new register */
234 int grf
= alloc_grf(c
);
235 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
237 /* totally out of temps */
238 grf
= 51; /* XXX random register! */
241 reg
= brw_vec8_grf(grf
, 0);
242 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
244 set_reg(c
, file
, index
, component
, reg
);
247 if (neg
& (1 << component
)) {
258 * This is called if we run out of GRF registers. Examine the live intervals
259 * of temp regs in the program and free those which won't be used again.
262 reclaim_temps(struct brw_wm_compile
*c
)
264 GLint intBegin
[MAX_PROGRAM_TEMPS
];
265 GLint intEnd
[MAX_PROGRAM_TEMPS
];
268 /*printf("Reclaim temps:\n");*/
270 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
273 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
274 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
275 /* program temp[i] can be freed */
277 /*printf(" temp[%d] is dead\n", index);*/
278 for (component
= 0; component
< 4; component
++) {
279 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
280 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
283 printf(" Reclaim temp %d, reg %d at inst %d\n",
284 index, r, c->cur_inst);
286 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
297 * Preallocate registers. This sets up the Mesa to hardware register
298 * mapping for certain registers, such as constants (uniforms/state vars)
301 static void prealloc_reg(struct brw_wm_compile
*c
)
305 int urb_read_length
= 0;
306 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
307 GLuint reg_index
= 0;
309 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
310 c
->first_free_grf
= 0;
312 for (i
= 0; i
< 4; i
++) {
313 if (i
< c
->key
.nr_depth_regs
)
314 reg
= brw_vec8_grf(i
* 2, 0);
316 reg
= brw_vec8_grf(0, 0);
317 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
319 reg_index
+= 2 * c
->key
.nr_depth_regs
;
323 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
324 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
326 /* use a real constant buffer, or just use a section of the GRF? */
327 /* XXX this heuristic may need adjustment... */
328 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
329 c
->fp
->use_const_buffer
= GL_TRUE
;
331 c
->fp
->use_const_buffer
= GL_FALSE
;
332 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
334 if (c
->fp
->use_const_buffer
) {
335 /* We'll use a real constant buffer and fetch constants from
336 * it with a dataport read message.
339 /* number of float constants in CURBE */
340 c
->prog_data
.nr_params
= 0;
343 const struct gl_program_parameter_list
*plist
=
344 c
->fp
->program
.Base
.Parameters
;
347 /* number of float constants in CURBE */
348 c
->prog_data
.nr_params
= 4 * nr_params
;
350 /* loop over program constants (float[4]) */
351 for (i
= 0; i
< nr_params
; i
++) {
352 /* loop over XYZW channels */
353 for (j
= 0; j
< 4; j
++, index
++) {
354 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
355 /* Save pointer to parameter/constant value.
356 * Constants will be copied in prepare_constant_buffer()
358 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
359 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
362 /* number of constant regs used (each reg is float[8]) */
363 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
364 reg_index
+= c
->nr_creg
;
368 /* fragment shader inputs */
369 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
372 if (i
>= VERT_RESULT_VAR0
)
373 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
374 else if (i
<= VERT_RESULT_TEX7
)
379 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
380 urb_read_length
= reg_index
;
381 reg
= brw_vec8_grf(reg_index
, 0);
382 for (j
= 0; j
< 4; j
++)
383 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
385 if (c
->key
.vp_outputs_written
& (1 << i
)) {
390 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
391 c
->prog_data
.urb_read_length
= urb_read_length
;
392 c
->prog_data
.curb_read_length
= c
->nr_creg
;
393 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
395 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
398 /* mark GRF regs [0..reg_index-1] as in-use */
399 for (i
= 0; i
< reg_index
; i
++)
402 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
403 prealloc_grf(c
, 126);
404 prealloc_grf(c
, 127);
406 /* An instruction may reference up to three constants.
407 * They'll be found in these registers.
408 * XXX alloc these on demand!
410 if (c
->fp
->use_const_buffer
) {
411 for (i
= 0; i
< 3; i
++) {
412 c
->current_const
[i
].index
= -1;
413 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
417 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
418 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
424 * Check if any of the instruction's src registers are constants, uniforms,
425 * or statevars. If so, fetch any constants that we don't already have in
426 * the three GRF slots.
428 static void fetch_constants(struct brw_wm_compile
*c
,
429 const struct prog_instruction
*inst
)
431 struct brw_compile
*p
= &c
->func
;
434 /* loop over instruction src regs */
435 for (i
= 0; i
< 3; i
++) {
436 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
437 if (src
->File
== PROGRAM_STATE_VAR
||
438 src
->File
== PROGRAM_CONSTANT
||
439 src
->File
== PROGRAM_UNIFORM
) {
440 c
->current_const
[i
].index
= src
->Index
;
443 printf(" fetch const[%d] for arg %d into reg %d\n",
444 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
447 /* need to fetch the constant now */
449 c
->current_const
[i
].reg
, /* writeback dest */
451 src
->RelAddr
, /* relative indexing? */
452 16 * src
->Index
, /* byte offset */
453 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
461 * Convert Mesa dst register to brw register.
463 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
464 const struct prog_instruction
*inst
,
468 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
473 static struct brw_reg
474 get_src_reg_const(struct brw_wm_compile
*c
,
475 const struct prog_instruction
*inst
,
476 GLuint srcRegIndex
, GLuint component
)
478 /* We should have already fetched the constant from the constant
479 * buffer in fetch_constants(). Now we just have to return a
480 * register description that extracts the needed component and
481 * smears it across all eight vector components.
483 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
484 struct brw_reg const_reg
;
486 assert(component
< 4);
487 assert(srcRegIndex
< 3);
488 assert(c
->current_const
[srcRegIndex
].index
!= -1);
489 const_reg
= c
->current_const
[srcRegIndex
].reg
;
491 /* extract desired float from the const_reg, and smear */
492 const_reg
= stride(const_reg
, 0, 1, 0);
493 const_reg
.subnr
= component
* 4;
495 if (src
->Negate
& (1 << component
))
496 const_reg
= negate(const_reg
);
498 const_reg
= brw_abs(const_reg
);
501 printf(" form const[%d].%d for arg %d, reg %d\n",
502 c
->current_const
[srcRegIndex
].index
,
513 * Convert Mesa src register to brw register.
515 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
516 const struct prog_instruction
*inst
,
517 GLuint srcRegIndex
, GLuint channel
)
519 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
521 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
523 /* Extended swizzle terms */
524 if (component
== SWIZZLE_ZERO
) {
525 return brw_imm_f(0.0F
);
527 else if (component
== SWIZZLE_ONE
) {
528 return brw_imm_f(1.0F
);
531 if (c
->fp
->use_const_buffer
&&
532 (src
->File
== PROGRAM_STATE_VAR
||
533 src
->File
== PROGRAM_CONSTANT
||
534 src
->File
== PROGRAM_UNIFORM
)) {
535 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
538 /* other type of source register */
539 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
540 src
->Negate
, src
->Abs
);
546 * Same as \sa get_src_reg() but if the register is a literal, emit
547 * a brw_reg encoding the literal.
548 * Note that a brw instruction only allows one src operand to be a literal.
549 * For instructions with more than one operand, only the second can be a
550 * literal. This means that we treat some literals as constants/uniforms
551 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
554 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
555 const struct prog_instruction
*inst
,
556 GLuint srcRegIndex
, GLuint channel
)
558 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
559 if (src
->File
== PROGRAM_CONSTANT
) {
561 const int component
= GET_SWZ(src
->Swizzle
, channel
);
562 const GLfloat
*param
=
563 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
564 GLfloat value
= param
[component
];
565 if (src
->Negate
& (1 << channel
))
568 value
= FABSF(value
);
570 printf(" form immed value %f for chan %d\n", value
, channel
);
572 return brw_imm_f(value
);
575 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
581 * Subroutines are minimal support for resusable instruction sequences.
582 * They are implemented as simply as possible to minimise overhead: there
583 * is no explicit support for communication between the caller and callee
584 * other than saving the return address in a temporary register, nor is
585 * there any automatic local storage. This implies that great care is
586 * required before attempting reentrancy or any kind of nested
587 * subroutine invocations.
589 static void invoke_subroutine( struct brw_wm_compile
*c
,
590 enum _subroutine subroutine
,
591 void (*emit
)( struct brw_wm_compile
* ) )
593 struct brw_compile
*p
= &c
->func
;
595 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
597 if( c
->subroutines
[ subroutine
] ) {
598 /* subroutine previously emitted: reuse existing instructions */
600 int mark
= mark_tmps( c
);
601 struct brw_reg return_address
= retype( alloc_tmp( c
),
602 BRW_REGISTER_TYPE_UD
);
603 int here
= p
->nr_insn
;
605 brw_push_insn_state(p
);
606 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
607 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
609 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
610 brw_imm_d( ( c
->subroutines
[ subroutine
] -
612 brw_pop_insn_state(p
);
614 release_tmps( c
, mark
);
616 /* previously unused subroutine: emit, and mark for later reuse */
618 int mark
= mark_tmps( c
);
619 struct brw_reg return_address
= retype( alloc_tmp( c
),
620 BRW_REGISTER_TYPE_UD
);
621 struct brw_instruction
*calc
;
622 int base
= p
->nr_insn
;
624 brw_push_insn_state(p
);
625 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
626 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
627 brw_pop_insn_state(p
);
629 c
->subroutines
[ subroutine
] = p
->nr_insn
;
633 brw_push_insn_state(p
);
634 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
635 brw_MOV( p
, brw_ip_reg(), return_address
);
636 brw_pop_insn_state(p
);
638 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
640 release_tmps( c
, mark
);
644 static void emit_abs( struct brw_wm_compile
*c
,
645 const struct prog_instruction
*inst
)
648 struct brw_compile
*p
= &c
->func
;
649 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
650 for (i
= 0; i
< 4; i
++) {
651 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
652 struct brw_reg src
, dst
;
653 dst
= get_dst_reg(c
, inst
, i
);
654 src
= get_src_reg(c
, inst
, 0, i
);
655 brw_MOV(p
, dst
, brw_abs(src
));
658 brw_set_saturate(p
, 0);
661 static void emit_trunc( struct brw_wm_compile
*c
,
662 const struct prog_instruction
*inst
)
665 struct brw_compile
*p
= &c
->func
;
666 GLuint mask
= inst
->DstReg
.WriteMask
;
667 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
668 for (i
= 0; i
< 4; i
++) {
670 struct brw_reg src
, dst
;
671 dst
= get_dst_reg(c
, inst
, i
);
672 src
= get_src_reg(c
, inst
, 0, i
);
673 brw_RNDZ(p
, dst
, src
);
676 brw_set_saturate(p
, 0);
679 static void emit_mov( struct brw_wm_compile
*c
,
680 const struct prog_instruction
*inst
)
683 struct brw_compile
*p
= &c
->func
;
684 GLuint mask
= inst
->DstReg
.WriteMask
;
685 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
686 for (i
= 0; i
< 4; i
++) {
688 struct brw_reg src
, dst
;
689 dst
= get_dst_reg(c
, inst
, i
);
690 /* XXX some moves from immediate value don't work reliably!!! */
691 /*src = get_src_reg_imm(c, inst, 0, i);*/
692 src
= get_src_reg(c
, inst
, 0, i
);
693 brw_MOV(p
, dst
, src
);
696 brw_set_saturate(p
, 0);
699 static void emit_pixel_xy(struct brw_wm_compile
*c
,
700 const struct prog_instruction
*inst
)
702 struct brw_reg r1
= brw_vec1_grf(1, 0);
703 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
705 struct brw_reg dst0
, dst1
;
706 struct brw_compile
*p
= &c
->func
;
707 GLuint mask
= inst
->DstReg
.WriteMask
;
709 dst0
= get_dst_reg(c
, inst
, 0);
710 dst1
= get_dst_reg(c
, inst
, 1);
711 /* Calculate pixel centers by adding 1 or 0 to each of the
712 * micro-tile coordinates passed in r1.
714 if (mask
& WRITEMASK_X
) {
716 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
717 stride(suboffset(r1_uw
, 4), 2, 4, 0),
718 brw_imm_v(0x10101010));
721 if (mask
& WRITEMASK_Y
) {
723 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
724 stride(suboffset(r1_uw
, 5), 2, 4, 0),
725 brw_imm_v(0x11001100));
729 static void emit_delta_xy(struct brw_wm_compile
*c
,
730 const struct prog_instruction
*inst
)
732 struct brw_reg r1
= brw_vec1_grf(1, 0);
733 struct brw_reg dst0
, dst1
, src0
, src1
;
734 struct brw_compile
*p
= &c
->func
;
735 GLuint mask
= inst
->DstReg
.WriteMask
;
737 dst0
= get_dst_reg(c
, inst
, 0);
738 dst1
= get_dst_reg(c
, inst
, 1);
739 src0
= get_src_reg(c
, inst
, 0, 0);
740 src1
= get_src_reg(c
, inst
, 0, 1);
741 /* Calc delta X,Y by subtracting origin in r1 from the pixel
744 if (mask
& WRITEMASK_X
) {
747 retype(src0
, BRW_REGISTER_TYPE_UW
),
751 if (mask
& WRITEMASK_Y
) {
754 retype(src1
, BRW_REGISTER_TYPE_UW
),
755 negate(suboffset(r1
,1)));
760 static void fire_fb_write( struct brw_wm_compile
*c
,
766 struct brw_compile
*p
= &c
->func
;
767 /* Pass through control information:
769 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
771 brw_push_insn_state(p
);
772 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
774 brw_message_reg(base_reg
+ 1),
776 brw_pop_insn_state(p
);
778 /* Send framebuffer write message: */
780 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
782 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
789 static void emit_fb_write(struct brw_wm_compile
*c
,
790 const struct prog_instruction
*inst
)
792 struct brw_compile
*p
= &c
->func
;
798 /* Reserve a space for AA - may not be needed:
800 if (c
->key
.aa_dest_stencil_reg
)
803 brw_push_insn_state(p
);
804 for (channel
= 0; channel
< 4; channel
++) {
805 src0
= get_src_reg(c
, inst
, 0, channel
);
806 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
807 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
808 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
810 /* skip over the regs populated above: */
812 brw_pop_insn_state(p
);
814 if (c
->key
.source_depth_to_render_target
) {
815 if (c
->key
.computes_depth
) {
816 src0
= get_src_reg(c
, inst
, 2, 2);
817 brw_MOV(p
, brw_message_reg(nr
), src0
);
820 src0
= get_src_reg(c
, inst
, 1, 1);
821 brw_MOV(p
, brw_message_reg(nr
), src0
);
827 if (c
->key
.dest_depth_reg
) {
828 const GLuint comp
= c
->key
.dest_depth_reg
/ 2;
829 const GLuint off
= c
->key
.dest_depth_reg
% 2;
832 /* XXX this code needs review/testing */
833 struct brw_reg arg1_0
= get_src_reg(c
, inst
, 1, comp
);
834 struct brw_reg arg1_1
= get_src_reg(c
, inst
, 1, comp
+1);
836 brw_push_insn_state(p
);
837 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
839 brw_MOV(p
, brw_message_reg(nr
), offset(arg1_0
, 1));
841 brw_MOV(p
, brw_message_reg(nr
+1), arg1_1
);
842 brw_pop_insn_state(p
);
846 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
847 brw_MOV(p
, brw_message_reg(nr
), src
);
852 target
= inst
->Aux
>> 1;
854 fire_fb_write(c
, 0, nr
, target
, eot
);
857 static void emit_pixel_w( struct brw_wm_compile
*c
,
858 const struct prog_instruction
*inst
)
860 struct brw_compile
*p
= &c
->func
;
861 GLuint mask
= inst
->DstReg
.WriteMask
;
862 if (mask
& WRITEMASK_W
) {
863 struct brw_reg dst
, src0
, delta0
, delta1
;
864 struct brw_reg interp3
;
866 dst
= get_dst_reg(c
, inst
, 3);
867 src0
= get_src_reg(c
, inst
, 0, 0);
868 delta0
= get_src_reg(c
, inst
, 1, 0);
869 delta1
= get_src_reg(c
, inst
, 1, 1);
871 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
872 /* Calc 1/w - just linterp wpos[3] optimized by putting the
873 * result straight into a message reg.
875 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
876 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
880 BRW_MATH_FUNCTION_INV
,
881 BRW_MATH_SATURATE_NONE
,
883 BRW_MATH_PRECISION_FULL
);
887 static void emit_linterp(struct brw_wm_compile
*c
,
888 const struct prog_instruction
*inst
)
890 struct brw_compile
*p
= &c
->func
;
891 GLuint mask
= inst
->DstReg
.WriteMask
;
892 struct brw_reg interp
[4];
893 struct brw_reg dst
, delta0
, delta1
;
897 src0
= get_src_reg(c
, inst
, 0, 0);
898 delta0
= get_src_reg(c
, inst
, 1, 0);
899 delta1
= get_src_reg(c
, inst
, 1, 1);
902 interp
[0] = brw_vec1_grf(nr
, 0);
903 interp
[1] = brw_vec1_grf(nr
, 4);
904 interp
[2] = brw_vec1_grf(nr
+1, 0);
905 interp
[3] = brw_vec1_grf(nr
+1, 4);
907 for(i
= 0; i
< 4; i
++ ) {
909 dst
= get_dst_reg(c
, inst
, i
);
910 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
911 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
916 static void emit_cinterp(struct brw_wm_compile
*c
,
917 const struct prog_instruction
*inst
)
919 struct brw_compile
*p
= &c
->func
;
920 GLuint mask
= inst
->DstReg
.WriteMask
;
922 struct brw_reg interp
[4];
923 struct brw_reg dst
, src0
;
926 src0
= get_src_reg(c
, inst
, 0, 0);
929 interp
[0] = brw_vec1_grf(nr
, 0);
930 interp
[1] = brw_vec1_grf(nr
, 4);
931 interp
[2] = brw_vec1_grf(nr
+1, 0);
932 interp
[3] = brw_vec1_grf(nr
+1, 4);
934 for(i
= 0; i
< 4; i
++ ) {
936 dst
= get_dst_reg(c
, inst
, i
);
937 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
942 static void emit_pinterp(struct brw_wm_compile
*c
,
943 const struct prog_instruction
*inst
)
945 struct brw_compile
*p
= &c
->func
;
946 GLuint mask
= inst
->DstReg
.WriteMask
;
948 struct brw_reg interp
[4];
949 struct brw_reg dst
, delta0
, delta1
;
950 struct brw_reg src0
, w
;
953 src0
= get_src_reg(c
, inst
, 0, 0);
954 delta0
= get_src_reg(c
, inst
, 1, 0);
955 delta1
= get_src_reg(c
, inst
, 1, 1);
956 w
= get_src_reg(c
, inst
, 2, 3);
959 interp
[0] = brw_vec1_grf(nr
, 0);
960 interp
[1] = brw_vec1_grf(nr
, 4);
961 interp
[2] = brw_vec1_grf(nr
+1, 0);
962 interp
[3] = brw_vec1_grf(nr
+1, 4);
964 for(i
= 0; i
< 4; i
++ ) {
966 dst
= get_dst_reg(c
, inst
, i
);
967 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
968 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
970 brw_MUL(p
, dst
, dst
, w
);
975 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
976 static void emit_frontfacing(struct brw_wm_compile
*c
,
977 const struct prog_instruction
*inst
)
979 struct brw_compile
*p
= &c
->func
;
980 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
982 GLuint mask
= inst
->DstReg
.WriteMask
;
985 for (i
= 0; i
< 4; i
++) {
987 dst
= get_dst_reg(c
, inst
, i
);
988 brw_MOV(p
, dst
, brw_imm_f(0.0));
992 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
995 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
996 for (i
= 0; i
< 4; i
++) {
998 dst
= get_dst_reg(c
, inst
, i
);
999 brw_MOV(p
, dst
, brw_imm_f(1.0));
1002 brw_set_predicate_control_flag_value(p
, 0xff);
1005 static void emit_xpd(struct brw_wm_compile
*c
,
1006 const struct prog_instruction
*inst
)
1009 struct brw_compile
*p
= &c
->func
;
1010 GLuint mask
= inst
->DstReg
.WriteMask
;
1011 for (i
= 0; i
< 4; i
++) {
1012 GLuint i2
= (i
+2)%3;
1013 GLuint i1
= (i
+1)%3;
1014 if (mask
& (1<<i
)) {
1015 struct brw_reg src0
, src1
, dst
;
1016 dst
= get_dst_reg(c
, inst
, i
);
1017 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
1018 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
1019 brw_MUL(p
, brw_null_reg(), src0
, src1
);
1020 src0
= get_src_reg(c
, inst
, 0, i1
);
1021 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
1022 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1023 brw_MAC(p
, dst
, src0
, src1
);
1024 brw_set_saturate(p
, 0);
1027 brw_set_saturate(p
, 0);
1030 static void emit_dp3(struct brw_wm_compile
*c
,
1031 const struct prog_instruction
*inst
)
1033 struct brw_reg src0
[3], src1
[3], dst
;
1035 struct brw_compile
*p
= &c
->func
;
1036 for (i
= 0; i
< 3; i
++) {
1037 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1038 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1041 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1042 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1043 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1044 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1045 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1046 brw_set_saturate(p
, 0);
1049 static void emit_dp4(struct brw_wm_compile
*c
,
1050 const struct prog_instruction
*inst
)
1052 struct brw_reg src0
[4], src1
[4], dst
;
1054 struct brw_compile
*p
= &c
->func
;
1055 for (i
= 0; i
< 4; i
++) {
1056 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1057 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1059 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1060 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1061 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1062 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1063 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1064 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1065 brw_set_saturate(p
, 0);
1068 static void emit_dph(struct brw_wm_compile
*c
,
1069 const struct prog_instruction
*inst
)
1071 struct brw_reg src0
[4], src1
[4], dst
;
1073 struct brw_compile
*p
= &c
->func
;
1074 for (i
= 0; i
< 4; i
++) {
1075 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1076 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1078 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1079 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1080 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1081 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1082 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1083 brw_ADD(p
, dst
, dst
, src1
[3]);
1084 brw_set_saturate(p
, 0);
1088 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1089 * Note that the result of the function is smeared across the dest
1090 * register's X, Y, Z and W channels (subject to writemasking of course).
1092 static void emit_math1(struct brw_wm_compile
*c
,
1093 const struct prog_instruction
*inst
, GLuint func
)
1095 struct brw_compile
*p
= &c
->func
;
1096 struct brw_reg src0
, dst
, tmp
;
1097 const int mark
= mark_tmps( c
);
1102 /* Get first component of source register */
1103 src0
= get_src_reg(c
, inst
, 0, 0);
1105 /* tmp = func(src0) */
1106 brw_MOV(p
, brw_message_reg(2), src0
);
1110 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1113 BRW_MATH_DATA_VECTOR
,
1114 BRW_MATH_PRECISION_FULL
);
1116 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
1118 /* replicate tmp value across enabled dest channels */
1119 for (i
= 0; i
< 4; i
++) {
1120 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
1121 dst
= get_dst_reg(c
, inst
, i
);
1122 brw_MOV(p
, dst
, tmp
);
1126 release_tmps(c
, mark
);
1129 static void emit_rcp(struct brw_wm_compile
*c
,
1130 const struct prog_instruction
*inst
)
1132 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1135 static void emit_rsq(struct brw_wm_compile
*c
,
1136 const struct prog_instruction
*inst
)
1138 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1141 static void emit_sin(struct brw_wm_compile
*c
,
1142 const struct prog_instruction
*inst
)
1144 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1147 static void emit_cos(struct brw_wm_compile
*c
,
1148 const struct prog_instruction
*inst
)
1150 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1153 static void emit_ex2(struct brw_wm_compile
*c
,
1154 const struct prog_instruction
*inst
)
1156 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1159 static void emit_lg2(struct brw_wm_compile
*c
,
1160 const struct prog_instruction
*inst
)
1162 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1165 static void emit_add(struct brw_wm_compile
*c
,
1166 const struct prog_instruction
*inst
)
1168 struct brw_compile
*p
= &c
->func
;
1169 struct brw_reg src0
, src1
, dst
;
1170 GLuint mask
= inst
->DstReg
.WriteMask
;
1172 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1173 for (i
= 0 ; i
< 4; i
++) {
1174 if (mask
& (1<<i
)) {
1175 dst
= get_dst_reg(c
, inst
, i
);
1176 src0
= get_src_reg(c
, inst
, 0, i
);
1177 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1178 brw_ADD(p
, dst
, src0
, src1
);
1181 brw_set_saturate(p
, 0);
1184 static void emit_arl(struct brw_wm_compile
*c
,
1185 const struct prog_instruction
*inst
)
1187 struct brw_compile
*p
= &c
->func
;
1188 struct brw_reg src0
, addr_reg
;
1189 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1190 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1191 BRW_ARF_ADDRESS
, 0);
1192 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1193 brw_MOV(p
, addr_reg
, src0
);
1194 brw_set_saturate(p
, 0);
1197 static void emit_sub(struct brw_wm_compile
*c
,
1198 const struct prog_instruction
*inst
)
1200 struct brw_compile
*p
= &c
->func
;
1201 struct brw_reg src0
, src1
, dst
;
1202 GLuint mask
= inst
->DstReg
.WriteMask
;
1204 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1205 for (i
= 0 ; i
< 4; i
++) {
1206 if (mask
& (1<<i
)) {
1207 dst
= get_dst_reg(c
, inst
, i
);
1208 src0
= get_src_reg(c
, inst
, 0, i
);
1209 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1210 brw_ADD(p
, dst
, src0
, negate(src1
));
1213 brw_set_saturate(p
, 0);
1216 static void emit_mul(struct brw_wm_compile
*c
,
1217 const struct prog_instruction
*inst
)
1219 struct brw_compile
*p
= &c
->func
;
1220 struct brw_reg src0
, src1
, dst
;
1221 GLuint mask
= inst
->DstReg
.WriteMask
;
1223 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1224 for (i
= 0 ; i
< 4; i
++) {
1225 if (mask
& (1<<i
)) {
1226 dst
= get_dst_reg(c
, inst
, i
);
1227 src0
= get_src_reg(c
, inst
, 0, i
);
1228 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1229 brw_MUL(p
, dst
, src0
, src1
);
1232 brw_set_saturate(p
, 0);
1235 static void emit_frc(struct brw_wm_compile
*c
,
1236 const struct prog_instruction
*inst
)
1238 struct brw_compile
*p
= &c
->func
;
1239 struct brw_reg src0
, dst
;
1240 GLuint mask
= inst
->DstReg
.WriteMask
;
1242 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1243 for (i
= 0 ; i
< 4; i
++) {
1244 if (mask
& (1<<i
)) {
1245 dst
= get_dst_reg(c
, inst
, i
);
1246 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1247 brw_FRC(p
, dst
, src0
);
1250 if (inst
->SaturateMode
!= SATURATE_OFF
)
1251 brw_set_saturate(p
, 0);
1254 static void emit_flr(struct brw_wm_compile
*c
,
1255 const struct prog_instruction
*inst
)
1257 struct brw_compile
*p
= &c
->func
;
1258 struct brw_reg src0
, dst
;
1259 GLuint mask
= inst
->DstReg
.WriteMask
;
1261 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1262 for (i
= 0 ; i
< 4; i
++) {
1263 if (mask
& (1<<i
)) {
1264 dst
= get_dst_reg(c
, inst
, i
);
1265 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1266 brw_RNDD(p
, dst
, src0
);
1269 brw_set_saturate(p
, 0);
1273 static void emit_min_max(struct brw_wm_compile
*c
,
1274 const struct prog_instruction
*inst
)
1276 struct brw_compile
*p
= &c
->func
;
1277 const GLuint mask
= inst
->DstReg
.WriteMask
;
1278 const int mark
= mark_tmps(c
);
1280 brw_push_insn_state(p
);
1281 for (i
= 0; i
< 4; i
++) {
1282 if (mask
& (1<<i
)) {
1283 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1284 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1285 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1287 /* if dst==src0 or dst==src1 we need to use a temp reg */
1288 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1289 brw_same_reg(dst
, src1
);
1296 printf(" Min/max: dst %d src0 %d src1 %d\n",
1297 dst.nr, src0.nr, src1.nr);
1299 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1300 brw_MOV(p
, dst
, src0
);
1301 brw_set_saturate(p
, 0);
1303 if (inst
->Opcode
== OPCODE_MIN
)
1304 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1306 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1308 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1309 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1310 brw_MOV(p
, dst
, src1
);
1311 brw_set_saturate(p
, 0);
1312 brw_set_predicate_control_flag_value(p
, 0xff);
1314 brw_MOV(p
, real_dst
, dst
);
1317 brw_pop_insn_state(p
);
1318 release_tmps(c
, mark
);
1321 static void emit_pow(struct brw_wm_compile
*c
,
1322 const struct prog_instruction
*inst
)
1324 struct brw_compile
*p
= &c
->func
;
1325 struct brw_reg dst
, src0
, src1
;
1326 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1327 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1328 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1330 brw_MOV(p
, brw_message_reg(2), src0
);
1331 brw_MOV(p
, brw_message_reg(3), src1
);
1335 BRW_MATH_FUNCTION_POW
,
1336 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1339 BRW_MATH_DATA_VECTOR
,
1340 BRW_MATH_PRECISION_FULL
);
1343 static void emit_lrp(struct brw_wm_compile
*c
,
1344 const struct prog_instruction
*inst
)
1346 struct brw_compile
*p
= &c
->func
;
1347 GLuint mask
= inst
->DstReg
.WriteMask
;
1348 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1350 int mark
= mark_tmps(c
);
1351 for (i
= 0; i
< 4; i
++) {
1352 if (mask
& (1<<i
)) {
1353 dst
= get_dst_reg(c
, inst
, i
);
1354 src0
= get_src_reg(c
, inst
, 0, i
);
1356 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1358 if (src1
.nr
== dst
.nr
) {
1359 tmp1
= alloc_tmp(c
);
1360 brw_MOV(p
, tmp1
, src1
);
1364 src2
= get_src_reg(c
, inst
, 2, i
);
1365 if (src2
.nr
== dst
.nr
) {
1366 tmp2
= alloc_tmp(c
);
1367 brw_MOV(p
, tmp2
, src2
);
1371 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1372 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1373 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1374 brw_MAC(p
, dst
, src0
, tmp1
);
1375 brw_set_saturate(p
, 0);
1377 release_tmps(c
, mark
);
1382 * For GLSL shaders, this KIL will be unconditional.
1383 * It may be contained inside an IF/ENDIF structure of course.
1385 static void emit_kil(struct brw_wm_compile
*c
)
1387 struct brw_compile
*p
= &c
->func
;
1388 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1389 brw_push_insn_state(p
);
1390 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1391 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1392 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1393 brw_pop_insn_state(p
);
1396 static void emit_mad(struct brw_wm_compile
*c
,
1397 const struct prog_instruction
*inst
)
1399 struct brw_compile
*p
= &c
->func
;
1400 GLuint mask
= inst
->DstReg
.WriteMask
;
1401 struct brw_reg dst
, src0
, src1
, src2
;
1404 for (i
= 0; i
< 4; i
++) {
1405 if (mask
& (1<<i
)) {
1406 dst
= get_dst_reg(c
, inst
, i
);
1407 src0
= get_src_reg(c
, inst
, 0, i
);
1408 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1409 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1410 brw_MUL(p
, dst
, src0
, src1
);
1412 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1413 brw_ADD(p
, dst
, dst
, src2
);
1414 brw_set_saturate(p
, 0);
1419 static void emit_sop(struct brw_wm_compile
*c
,
1420 const struct prog_instruction
*inst
, GLuint cond
)
1422 struct brw_compile
*p
= &c
->func
;
1423 GLuint mask
= inst
->DstReg
.WriteMask
;
1424 struct brw_reg dst
, src0
, src1
;
1427 for (i
= 0; i
< 4; i
++) {
1428 if (mask
& (1<<i
)) {
1429 dst
= get_dst_reg(c
, inst
, i
);
1430 src0
= get_src_reg(c
, inst
, 0, i
);
1431 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1432 brw_push_insn_state(p
);
1433 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1434 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1435 brw_MOV(p
, dst
, brw_imm_f(0.0));
1436 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1437 brw_MOV(p
, dst
, brw_imm_f(1.0));
1438 brw_pop_insn_state(p
);
1443 static void emit_slt(struct brw_wm_compile
*c
,
1444 const struct prog_instruction
*inst
)
1446 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1449 static void emit_sle(struct brw_wm_compile
*c
,
1450 const struct prog_instruction
*inst
)
1452 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1455 static void emit_sgt(struct brw_wm_compile
*c
,
1456 const struct prog_instruction
*inst
)
1458 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1461 static void emit_sge(struct brw_wm_compile
*c
,
1462 const struct prog_instruction
*inst
)
1464 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1467 static void emit_seq(struct brw_wm_compile
*c
,
1468 const struct prog_instruction
*inst
)
1470 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1473 static void emit_sne(struct brw_wm_compile
*c
,
1474 const struct prog_instruction
*inst
)
1476 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1479 static void emit_ddx(struct brw_wm_compile
*c
,
1480 const struct prog_instruction
*inst
)
1482 struct brw_compile
*p
= &c
->func
;
1483 GLuint mask
= inst
->DstReg
.WriteMask
;
1484 struct brw_reg interp
[4];
1486 struct brw_reg src0
, w
;
1488 src0
= get_src_reg(c
, inst
, 0, 0);
1489 w
= get_src_reg(c
, inst
, 1, 3);
1491 interp
[0] = brw_vec1_grf(nr
, 0);
1492 interp
[1] = brw_vec1_grf(nr
, 4);
1493 interp
[2] = brw_vec1_grf(nr
+1, 0);
1494 interp
[3] = brw_vec1_grf(nr
+1, 4);
1495 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1496 for(i
= 0; i
< 4; i
++ ) {
1497 if (mask
& (1<<i
)) {
1498 dst
= get_dst_reg(c
, inst
, i
);
1499 brw_MOV(p
, dst
, interp
[i
]);
1500 brw_MUL(p
, dst
, dst
, w
);
1503 brw_set_saturate(p
, 0);
1506 static void emit_ddy(struct brw_wm_compile
*c
,
1507 const struct prog_instruction
*inst
)
1509 struct brw_compile
*p
= &c
->func
;
1510 GLuint mask
= inst
->DstReg
.WriteMask
;
1511 struct brw_reg interp
[4];
1513 struct brw_reg src0
, w
;
1516 src0
= get_src_reg(c
, inst
, 0, 0);
1518 w
= get_src_reg(c
, inst
, 1, 3);
1519 interp
[0] = brw_vec1_grf(nr
, 0);
1520 interp
[1] = brw_vec1_grf(nr
, 4);
1521 interp
[2] = brw_vec1_grf(nr
+1, 0);
1522 interp
[3] = brw_vec1_grf(nr
+1, 4);
1523 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1524 for(i
= 0; i
< 4; i
++ ) {
1525 if (mask
& (1<<i
)) {
1526 dst
= get_dst_reg(c
, inst
, i
);
1527 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1528 brw_MUL(p
, dst
, dst
, w
);
1531 brw_set_saturate(p
, 0);
1534 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1536 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1540 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1542 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1545 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1547 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1550 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1552 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1556 /* One-, two- and three-dimensional Perlin noise, similar to the description
1557 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1558 static void noise1_sub( struct brw_wm_compile
*c
) {
1560 struct brw_compile
*p
= &c
->func
;
1561 struct brw_reg param
,
1562 x0
, x1
, /* gradients at each end */
1563 t
, tmp
[ 2 ], /* float temporaries */
1564 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1566 int mark
= mark_tmps( c
);
1568 x0
= alloc_tmp( c
);
1569 x1
= alloc_tmp( c
);
1571 tmp
[ 0 ] = alloc_tmp( c
);
1572 tmp
[ 1 ] = alloc_tmp( c
);
1573 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1574 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1575 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1576 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1577 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1579 param
= lookup_tmp( c
, mark
- 2 );
1581 brw_set_access_mode( p
, BRW_ALIGN_1
);
1583 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1585 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1586 be hashed. Also compute the remainder (offset within the unit
1587 length), interleaved to reduce register dependency penalties. */
1588 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1589 brw_FRC( p
, param
, param
);
1590 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1591 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1592 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1594 /* We're now ready to perform the hashing. The two hashes are
1595 interleaved for performance. The hash function used is
1596 designed to rapidly achieve avalanche and require only 32x16
1597 bit multiplication, and 16-bit swizzles (which we get for
1598 free). We can't use immediate operands in the multiplies,
1599 because immediates are permitted only in src1 and the 16-bit
1600 factor is permitted only in src0. */
1601 for( i
= 0; i
< 2; i
++ )
1602 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1603 for( i
= 0; i
< 2; i
++ )
1604 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1605 high_words( itmp
[ i
] ) );
1606 for( i
= 0; i
< 2; i
++ )
1607 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1608 for( i
= 0; i
< 2; i
++ )
1609 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1610 high_words( itmp
[ i
] ) );
1611 for( i
= 0; i
< 2; i
++ )
1612 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1613 for( i
= 0; i
< 2; i
++ )
1614 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1615 high_words( itmp
[ i
] ) );
1617 /* Now we want to initialise the two gradients based on the
1618 hashes. Format conversion from signed integer to float leaves
1619 everything scaled too high by a factor of pow( 2, 31 ), but
1620 we correct for that right at the end. */
1621 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1622 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1623 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1625 brw_MUL( p
, x0
, x0
, param
);
1626 brw_MUL( p
, x1
, x1
, t
);
1628 /* We interpolate between the gradients using the polynomial
1629 6t^5 - 15t^4 + 10t^3 (Perlin). */
1630 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1631 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1632 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1633 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1634 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1635 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1637 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1638 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1639 brw_MUL( p
, x1
, x1
, param
);
1640 brw_ADD( p
, x0
, x0
, x1
);
1641 /* scale by pow( 2, -30 ), to compensate for the format conversion
1642 above and an extra factor of 2 so that a single gradient covers
1644 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1646 release_tmps( c
, mark
);
1649 static void emit_noise1( struct brw_wm_compile
*c
,
1650 const struct prog_instruction
*inst
)
1652 struct brw_compile
*p
= &c
->func
;
1653 struct brw_reg src
, param
, dst
;
1654 GLuint mask
= inst
->DstReg
.WriteMask
;
1656 int mark
= mark_tmps( c
);
1658 assert( mark
== 0 );
1660 src
= get_src_reg( c
, inst
, 0, 0 );
1662 param
= alloc_tmp( c
);
1664 brw_MOV( p
, param
, src
);
1666 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1668 /* Fill in the result: */
1669 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1670 for (i
= 0 ; i
< 4; i
++) {
1671 if (mask
& (1<<i
)) {
1672 dst
= get_dst_reg(c
, inst
, i
);
1673 brw_MOV( p
, dst
, param
);
1676 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1677 brw_set_saturate( p
, 0 );
1679 release_tmps( c
, mark
);
1682 static void noise2_sub( struct brw_wm_compile
*c
) {
1684 struct brw_compile
*p
= &c
->func
;
1685 struct brw_reg param0
, param1
,
1686 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1687 t
, tmp
[ 4 ], /* float temporaries */
1688 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1690 int mark
= mark_tmps( c
);
1692 x0y0
= alloc_tmp( c
);
1693 x0y1
= alloc_tmp( c
);
1694 x1y0
= alloc_tmp( c
);
1695 x1y1
= alloc_tmp( c
);
1697 for( i
= 0; i
< 4; i
++ ) {
1698 tmp
[ i
] = alloc_tmp( c
);
1699 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1701 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1702 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1703 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1705 param0
= lookup_tmp( c
, mark
- 3 );
1706 param1
= lookup_tmp( c
, mark
- 2 );
1708 brw_set_access_mode( p
, BRW_ALIGN_1
);
1710 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1711 be hashed. Also compute the remainders (offsets within the unit
1712 square), interleaved to reduce register dependency penalties. */
1713 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1714 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1715 brw_FRC( p
, param0
, param0
);
1716 brw_FRC( p
, param1
, param1
);
1717 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1718 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1719 low_words( itmp
[ 1 ] ) );
1720 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1721 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1722 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1723 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1724 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1726 /* We're now ready to perform the hashing. The four hashes are
1727 interleaved for performance. The hash function used is
1728 designed to rapidly achieve avalanche and require only 32x16
1729 bit multiplication, and 16-bit swizzles (which we get for
1730 free). We can't use immediate operands in the multiplies,
1731 because immediates are permitted only in src1 and the 16-bit
1732 factor is permitted only in src0. */
1733 for( i
= 0; i
< 4; i
++ )
1734 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1735 for( i
= 0; i
< 4; i
++ )
1736 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1737 high_words( itmp
[ i
] ) );
1738 for( i
= 0; i
< 4; i
++ )
1739 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1740 for( i
= 0; i
< 4; i
++ )
1741 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1742 high_words( itmp
[ i
] ) );
1743 for( i
= 0; i
< 4; i
++ )
1744 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1745 for( i
= 0; i
< 4; i
++ )
1746 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1747 high_words( itmp
[ i
] ) );
1749 /* Now we want to initialise the four gradients based on the
1750 hashes. Format conversion from signed integer to float leaves
1751 everything scaled too high by a factor of pow( 2, 15 ), but
1752 we correct for that right at the end. */
1753 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1754 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1755 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1756 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1757 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1759 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1760 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1761 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1762 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1764 brw_MUL( p
, x1y0
, x1y0
, t
);
1765 brw_MUL( p
, x1y1
, x1y1
, t
);
1766 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1767 brw_MUL( p
, x0y0
, x0y0
, param0
);
1768 brw_MUL( p
, x0y1
, x0y1
, param0
);
1770 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1771 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1772 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1773 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1775 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1776 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1777 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1778 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1780 /* We interpolate between the gradients using the polynomial
1781 6t^5 - 15t^4 + 10t^3 (Perlin). */
1782 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1783 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1784 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1785 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1786 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1787 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1788 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1790 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1791 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1792 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1793 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1794 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1796 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1797 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1798 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1799 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1801 /* Here we interpolate in the y dimension... */
1802 brw_MUL( p
, x0y1
, x0y1
, param1
);
1803 brw_MUL( p
, x1y1
, x1y1
, param1
);
1804 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1805 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1807 /* And now in x. There are horrible register dependencies here,
1808 but we have nothing else to do. */
1809 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1810 brw_MUL( p
, x1y0
, x1y0
, param0
);
1811 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1813 /* scale by pow( 2, -15 ), as described above */
1814 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1816 release_tmps( c
, mark
);
1819 static void emit_noise2( struct brw_wm_compile
*c
,
1820 const struct prog_instruction
*inst
)
1822 struct brw_compile
*p
= &c
->func
;
1823 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1824 GLuint mask
= inst
->DstReg
.WriteMask
;
1826 int mark
= mark_tmps( c
);
1828 assert( mark
== 0 );
1830 src0
= get_src_reg( c
, inst
, 0, 0 );
1831 src1
= get_src_reg( c
, inst
, 0, 1 );
1833 param0
= alloc_tmp( c
);
1834 param1
= alloc_tmp( c
);
1836 brw_MOV( p
, param0
, src0
);
1837 brw_MOV( p
, param1
, src1
);
1839 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1841 /* Fill in the result: */
1842 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1843 for (i
= 0 ; i
< 4; i
++) {
1844 if (mask
& (1<<i
)) {
1845 dst
= get_dst_reg(c
, inst
, i
);
1846 brw_MOV( p
, dst
, param0
);
1849 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1850 brw_set_saturate( p
, 0 );
1852 release_tmps( c
, mark
);
1856 * The three-dimensional case is much like the one- and two- versions above,
1857 * but since the number of corners is rapidly growing we now pack 16 16-bit
1858 * hashes into each register to extract more parallelism from the EUs.
1860 static void noise3_sub( struct brw_wm_compile
*c
) {
1862 struct brw_compile
*p
= &c
->func
;
1863 struct brw_reg param0
, param1
, param2
,
1864 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1865 xi
, yi
, zi
, /* interpolation coefficients */
1866 t
, tmp
[ 8 ], /* float temporaries */
1867 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1868 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1870 int mark
= mark_tmps( c
);
1872 x0y0
= alloc_tmp( c
);
1873 x0y1
= alloc_tmp( c
);
1874 x1y0
= alloc_tmp( c
);
1875 x1y1
= alloc_tmp( c
);
1876 xi
= alloc_tmp( c
);
1877 yi
= alloc_tmp( c
);
1878 zi
= alloc_tmp( c
);
1880 for( i
= 0; i
< 8; i
++ ) {
1881 tmp
[ i
] = alloc_tmp( c
);
1882 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1883 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1886 param0
= lookup_tmp( c
, mark
- 4 );
1887 param1
= lookup_tmp( c
, mark
- 3 );
1888 param2
= lookup_tmp( c
, mark
- 2 );
1890 brw_set_access_mode( p
, BRW_ALIGN_1
);
1892 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1893 be hashed. Also compute the remainders (offsets within the unit
1894 cube), interleaved to reduce register dependency penalties. */
1895 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1896 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1897 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1898 brw_FRC( p
, param0
, param0
);
1899 brw_FRC( p
, param1
, param1
);
1900 brw_FRC( p
, param2
, param2
);
1901 /* Since we now have only 16 bits of precision in the hash, we must
1902 be more careful about thorough mixing to maintain entropy as we
1903 squash the input vector into a small scalar. */
1904 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1905 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1906 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1907 brw_imm_uw( 0x9B93 ) );
1908 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1909 brw_imm_uw( 0xBC8F ) );
1911 /* Temporarily disable the execution mask while we work with ExecSize=16
1912 channels (the mask is set for ExecSize=8 and is probably incorrect).
1913 Although this might cause execution of unwanted channels, the code
1914 writes only to temporary registers and has no side effects, so
1915 disabling the mask is harmless. */
1916 brw_push_insn_state( p
);
1917 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1918 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1919 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1920 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1922 /* We're now ready to perform the hashing. The eight hashes are
1923 interleaved for performance. The hash function used is
1924 designed to rapidly achieve avalanche and require only 16x16
1925 bit multiplication, and 8-bit swizzles (which we get for
1927 for( i
= 0; i
< 4; i
++ )
1928 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1929 for( i
= 0; i
< 4; i
++ )
1930 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1931 odd_bytes( wtmp
[ i
] ) );
1932 for( i
= 0; i
< 4; i
++ )
1933 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1934 for( i
= 0; i
< 4; i
++ )
1935 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1936 odd_bytes( wtmp
[ i
] ) );
1937 brw_pop_insn_state( p
);
1939 /* Now we want to initialise the four rear gradients based on the
1940 hashes. Format conversion from signed integer to float leaves
1941 everything scaled too high by a factor of pow( 2, 15 ), but
1942 we correct for that right at the end. */
1944 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1945 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1946 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1947 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1948 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1950 brw_push_insn_state( p
);
1951 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1952 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1953 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1954 brw_pop_insn_state( p
);
1956 brw_MUL( p
, x1y0
, x1y0
, t
);
1957 brw_MUL( p
, x1y1
, x1y1
, t
);
1958 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1959 brw_MUL( p
, x0y0
, x0y0
, param0
);
1960 brw_MUL( p
, x0y1
, x0y1
, param0
);
1963 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1964 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1965 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1966 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1968 brw_push_insn_state( p
);
1969 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1970 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1971 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1972 brw_pop_insn_state( p
);
1974 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1975 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1976 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1977 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1978 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1980 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1981 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1982 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1983 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1986 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1987 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1988 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1989 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1991 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1992 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1993 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1994 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1996 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1997 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1998 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1999 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2001 /* We interpolate between the gradients using the polynomial
2002 6t^5 - 15t^4 + 10t^3 (Perlin). */
2003 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
2004 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
2005 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
2006 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
2007 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
2008 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
2009 brw_MUL( p
, xi
, xi
, param0
);
2010 brw_MUL( p
, yi
, yi
, param1
);
2011 brw_MUL( p
, zi
, zi
, param2
);
2012 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
2013 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
2014 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
2015 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
2016 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
2017 brw_MUL( p
, xi
, xi
, param0
);
2018 brw_MUL( p
, yi
, yi
, param1
);
2019 brw_MUL( p
, zi
, zi
, param2
);
2020 brw_MUL( p
, xi
, xi
, param0
);
2021 brw_MUL( p
, yi
, yi
, param1
);
2022 brw_MUL( p
, zi
, zi
, param2
);
2023 brw_MUL( p
, xi
, xi
, param0
);
2024 brw_MUL( p
, yi
, yi
, param1
);
2025 brw_MUL( p
, zi
, zi
, param2
);
2027 /* Here we interpolate in the y dimension... */
2028 brw_MUL( p
, x0y1
, x0y1
, yi
);
2029 brw_MUL( p
, x1y1
, x1y1
, yi
);
2030 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2031 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2033 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2034 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2035 brw_MUL( p
, x1y0
, x1y0
, xi
);
2036 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2038 /* Now do the same thing for the front four gradients... */
2040 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2041 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2042 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2043 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2045 brw_push_insn_state( p
);
2046 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2047 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2048 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2049 brw_pop_insn_state( p
);
2051 brw_MUL( p
, x1y0
, x1y0
, t
);
2052 brw_MUL( p
, x1y1
, x1y1
, t
);
2053 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
2054 brw_MUL( p
, x0y0
, x0y0
, param0
);
2055 brw_MUL( p
, x0y1
, x0y1
, param0
);
2058 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2059 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2060 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2061 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2063 brw_push_insn_state( p
);
2064 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2065 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2066 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2067 brw_pop_insn_state( p
);
2069 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2070 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2071 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
2072 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
2073 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
2075 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2076 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2077 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2078 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2081 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2082 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2083 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2084 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2086 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2087 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2088 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2089 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2091 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2092 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2093 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2094 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2096 /* The interpolation coefficients are still around from last time, so
2097 again interpolate in the y dimension... */
2098 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2099 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2100 brw_MUL( p
, x0y1
, x0y1
, yi
);
2101 brw_MUL( p
, x1y1
, x1y1
, yi
);
2102 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2103 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2105 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2106 time put the front face in tmp[ 1 ] and we're nearly there... */
2107 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2108 brw_MUL( p
, x1y0
, x1y0
, xi
);
2109 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2111 /* The final interpolation, in the z dimension: */
2112 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2113 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
2114 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2116 /* scale by pow( 2, -15 ), as described above */
2117 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2119 release_tmps( c
, mark
);
2122 static void emit_noise3( struct brw_wm_compile
*c
,
2123 const struct prog_instruction
*inst
)
2125 struct brw_compile
*p
= &c
->func
;
2126 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
2127 GLuint mask
= inst
->DstReg
.WriteMask
;
2129 int mark
= mark_tmps( c
);
2131 assert( mark
== 0 );
2133 src0
= get_src_reg( c
, inst
, 0, 0 );
2134 src1
= get_src_reg( c
, inst
, 0, 1 );
2135 src2
= get_src_reg( c
, inst
, 0, 2 );
2137 param0
= alloc_tmp( c
);
2138 param1
= alloc_tmp( c
);
2139 param2
= alloc_tmp( c
);
2141 brw_MOV( p
, param0
, src0
);
2142 brw_MOV( p
, param1
, src1
);
2143 brw_MOV( p
, param2
, src2
);
2145 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
2147 /* Fill in the result: */
2148 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2149 for (i
= 0 ; i
< 4; i
++) {
2150 if (mask
& (1<<i
)) {
2151 dst
= get_dst_reg(c
, inst
, i
);
2152 brw_MOV( p
, dst
, param0
);
2155 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2156 brw_set_saturate( p
, 0 );
2158 release_tmps( c
, mark
);
2162 * For the four-dimensional case, the little micro-optimisation benefits
2163 * we obtain by unrolling all the loops aren't worth the massive bloat it
2164 * now causes. Instead, we loop twice around performing a similar operation
2165 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2166 * code to glue it all together.
2168 static void noise4_sub( struct brw_wm_compile
*c
)
2170 struct brw_compile
*p
= &c
->func
;
2171 struct brw_reg param
[ 4 ],
2172 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2173 w0
, /* noise for the w=0 cube */
2174 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2175 interp
[ 4 ], /* interpolation coefficients */
2176 t
, tmp
[ 8 ], /* float temporaries */
2177 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2178 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2180 int mark
= mark_tmps( c
);
2181 GLuint loop
, origin
;
2183 x0y0
= alloc_tmp( c
);
2184 x0y1
= alloc_tmp( c
);
2185 x1y0
= alloc_tmp( c
);
2186 x1y1
= alloc_tmp( c
);
2188 w0
= alloc_tmp( c
);
2189 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2190 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2192 for( i
= 0; i
< 4; i
++ ) {
2193 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2194 interp
[ i
] = alloc_tmp( c
);
2197 for( i
= 0; i
< 8; i
++ ) {
2198 tmp
[ i
] = alloc_tmp( c
);
2199 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2200 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2203 brw_set_access_mode( p
, BRW_ALIGN_1
);
2205 /* We only want 16 bits of precision from the integral part of each
2206 co-ordinate, but unfortunately the RNDD semantics would saturate
2207 at 16 bits if we performed the operation directly to a 16-bit
2208 destination. Therefore, we round to 32-bit temporaries where
2209 appropriate, and then store only the lower 16 bits. */
2210 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2211 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2212 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2213 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2214 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2215 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2217 /* Modify the flag register here, because the side effect is useful
2218 later (see below). We know for certain that all flags will be
2219 cleared, since the FRC instruction cannot possibly generate
2220 negative results. Even for exceptional inputs (infinities, denormals,
2221 NaNs), the architecture guarantees that the L conditional is false. */
2222 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2223 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2224 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2225 for( i
= 1; i
< 4; i
++ )
2226 brw_FRC( p
, param
[ i
], param
[ i
] );
2228 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2230 for( i
= 0; i
< 4; i
++ )
2231 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2232 for( i
= 0; i
< 4; i
++ )
2233 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2234 for( i
= 0; i
< 4; i
++ )
2235 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2236 for( i
= 0; i
< 4; i
++ )
2237 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2238 for( j
= 0; j
< 3; j
++ )
2239 for( i
= 0; i
< 4; i
++ )
2240 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2242 /* Mark the current address, as it will be a jump destination. The
2243 following code will be executed twice: first, with the flag
2244 register clear indicating the w=0 case, and second with flags
2248 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2249 be hashed. Since we have only 16 bits of precision in the hash, we
2250 must be careful about thorough mixing to maintain entropy as we
2251 squash the input vector into a small scalar. */
2252 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2253 brw_imm_uw( 0xBC8F ) );
2254 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2255 brw_imm_uw( 0xD0BD ) );
2256 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2257 brw_imm_uw( 0x9B93 ) );
2258 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2259 brw_imm_uw( 0xA359 ) );
2260 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2261 brw_imm_uw( 0xBC8F ) );
2263 /* Temporarily disable the execution mask while we work with ExecSize=16
2264 channels (the mask is set for ExecSize=8 and is probably incorrect).
2265 Although this might cause execution of unwanted channels, the code
2266 writes only to temporary registers and has no side effects, so
2267 disabling the mask is harmless. */
2268 brw_push_insn_state( p
);
2269 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2270 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2271 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2272 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2274 /* We're now ready to perform the hashing. The eight hashes are
2275 interleaved for performance. The hash function used is
2276 designed to rapidly achieve avalanche and require only 16x16
2277 bit multiplication, and 8-bit swizzles (which we get for
2279 for( i
= 0; i
< 4; i
++ )
2280 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2281 for( i
= 0; i
< 4; i
++ )
2282 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2283 odd_bytes( wtmp
[ i
] ) );
2284 for( i
= 0; i
< 4; i
++ )
2285 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2286 for( i
= 0; i
< 4; i
++ )
2287 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2288 odd_bytes( wtmp
[ i
] ) );
2289 brw_pop_insn_state( p
);
2291 /* Now we want to initialise the four rear gradients based on the
2292 hashes. Format conversion from signed integer to float leaves
2293 everything scaled too high by a factor of pow( 2, 15 ), but
2294 we correct for that right at the end. */
2296 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2297 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2298 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2299 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2300 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2302 brw_push_insn_state( p
);
2303 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2304 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2305 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2306 brw_pop_insn_state( p
);
2308 brw_MUL( p
, x1y0
, x1y0
, t
);
2309 brw_MUL( p
, x1y1
, x1y1
, t
);
2310 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2311 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2312 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2315 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2316 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2317 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2318 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2320 brw_push_insn_state( p
);
2321 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2322 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2323 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2324 brw_pop_insn_state( p
);
2326 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2327 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2328 /* prepare t for the w component (used below): w the first time through
2329 the loop; w - 1 the second time) */
2330 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2331 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2332 p
->current
->header
.predicate_inverse
= 1;
2333 brw_MOV( p
, t
, param
[ 3 ] );
2334 p
->current
->header
.predicate_inverse
= 0;
2335 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2336 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2337 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2339 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2340 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2341 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2342 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2345 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2346 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2347 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2348 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2350 brw_push_insn_state( p
);
2351 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2352 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2353 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2354 brw_pop_insn_state( p
);
2356 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2357 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2358 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2359 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2361 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2362 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2363 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2364 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2367 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2368 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2369 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2370 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2372 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2373 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2374 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2375 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2376 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2378 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2379 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2380 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2381 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2383 /* Here we interpolate in the y dimension... */
2384 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2385 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2386 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2387 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2388 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2389 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2391 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2392 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2393 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2394 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2396 /* Now do the same thing for the front four gradients... */
2398 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2399 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2400 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2401 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2403 brw_push_insn_state( p
);
2404 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2405 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2406 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2407 brw_pop_insn_state( p
);
2409 brw_MUL( p
, x1y0
, x1y0
, t
);
2410 brw_MUL( p
, x1y1
, x1y1
, t
);
2411 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2412 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2413 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2416 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2417 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2418 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2419 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2421 brw_push_insn_state( p
);
2422 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2423 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2424 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2425 brw_pop_insn_state( p
);
2427 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2428 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2429 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2430 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2431 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2433 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2434 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2435 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2436 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2439 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2440 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2441 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2442 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2444 brw_push_insn_state( p
);
2445 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2446 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2447 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2448 brw_pop_insn_state( p
);
2450 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2451 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2452 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2453 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2454 /* prepare t for the w component (used below): w the first time through
2455 the loop; w - 1 the second time) */
2456 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2457 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2458 p
->current
->header
.predicate_inverse
= 1;
2459 brw_MOV( p
, t
, param
[ 3 ] );
2460 p
->current
->header
.predicate_inverse
= 0;
2461 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2463 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2464 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2465 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2466 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2469 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2470 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2471 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2472 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2474 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2475 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2476 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2477 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2479 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2480 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2481 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2482 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2484 /* Interpolate in the y dimension: */
2485 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2486 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2487 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2488 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2489 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2490 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2492 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2493 time put the front face in tmp[ 1 ] and we're nearly there... */
2494 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2495 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2496 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2498 /* Another interpolation, in the z dimension: */
2499 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2500 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2501 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2503 /* Exit the loop if we've computed both cubes... */
2504 origin
= p
->nr_insn
;
2505 brw_push_insn_state( p
);
2506 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2507 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2508 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2509 brw_pop_insn_state( p
);
2511 /* Save the result for the w=0 case, and increment the w coordinate: */
2512 brw_MOV( p
, w0
, tmp
[ 0 ] );
2513 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2516 /* Loop around for the other cube. Explicitly set the flag register
2517 (unfortunately we must spend an extra instruction to do this: we
2518 can't rely on a side effect of the previous MOV or ADD because
2519 conditional modifiers which are normally true might be false in
2520 exceptional circumstances, e.g. given a NaN input; the add to
2521 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2522 brw_push_insn_state( p
);
2523 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2524 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2525 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2526 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2527 brw_pop_insn_state( p
);
2529 /* Patch the previous conditional branch now that we know the
2530 destination address. */
2531 brw_set_src1( p
->store
+ origin
,
2532 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2534 /* The very last interpolation. */
2535 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2536 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2537 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2539 /* scale by pow( 2, -15 ), as described above */
2540 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2542 release_tmps( c
, mark
);
2545 static void emit_noise4( struct brw_wm_compile
*c
,
2546 const struct prog_instruction
*inst
)
2548 struct brw_compile
*p
= &c
->func
;
2549 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2550 GLuint mask
= inst
->DstReg
.WriteMask
;
2552 int mark
= mark_tmps( c
);
2554 assert( mark
== 0 );
2556 src0
= get_src_reg( c
, inst
, 0, 0 );
2557 src1
= get_src_reg( c
, inst
, 0, 1 );
2558 src2
= get_src_reg( c
, inst
, 0, 2 );
2559 src3
= get_src_reg( c
, inst
, 0, 3 );
2561 param0
= alloc_tmp( c
);
2562 param1
= alloc_tmp( c
);
2563 param2
= alloc_tmp( c
);
2564 param3
= alloc_tmp( c
);
2566 brw_MOV( p
, param0
, src0
);
2567 brw_MOV( p
, param1
, src1
);
2568 brw_MOV( p
, param2
, src2
);
2569 brw_MOV( p
, param3
, src3
);
2571 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2573 /* Fill in the result: */
2574 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2575 for (i
= 0 ; i
< 4; i
++) {
2576 if (mask
& (1<<i
)) {
2577 dst
= get_dst_reg(c
, inst
, i
);
2578 brw_MOV( p
, dst
, param0
);
2581 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2582 brw_set_saturate( p
, 0 );
2584 release_tmps( c
, mark
);
2587 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2588 const struct prog_instruction
*inst
)
2590 struct brw_compile
*p
= &c
->func
;
2591 GLuint mask
= inst
->DstReg
.WriteMask
;
2592 struct brw_reg src0
[2], dst
[2];
2594 dst
[0] = get_dst_reg(c
, inst
, 0);
2595 dst
[1] = get_dst_reg(c
, inst
, 1);
2597 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2598 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2600 /* Calculate the pixel offset from window bottom left into destination
2603 if (mask
& WRITEMASK_X
) {
2604 /* X' = X - origin_x */
2607 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2608 brw_imm_d(0 - c
->key
.origin_x
));
2611 if (mask
& WRITEMASK_Y
) {
2612 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2615 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2616 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2621 BIAS on SIMD8 not working yet...
2623 static void emit_txb(struct brw_wm_compile
*c
,
2624 const struct prog_instruction
*inst
)
2626 struct brw_compile
*p
= &c
->func
;
2627 struct brw_reg dst
[4], src
[4], payload_reg
;
2628 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2631 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2633 for (i
= 0; i
< 4; i
++)
2634 dst
[i
] = get_dst_reg(c
, inst
, i
);
2635 for (i
= 0; i
< 4; i
++)
2636 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2638 switch (inst
->TexSrcTarget
) {
2639 case TEXTURE_1D_INDEX
:
2640 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2641 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2642 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2644 case TEXTURE_2D_INDEX
:
2645 case TEXTURE_RECT_INDEX
:
2646 brw_MOV(p
, brw_message_reg(2), src
[0]);
2647 brw_MOV(p
, brw_message_reg(3), src
[1]);
2648 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2651 brw_MOV(p
, brw_message_reg(2), src
[0]);
2652 brw_MOV(p
, brw_message_reg(3), src
[1]);
2653 brw_MOV(p
, brw_message_reg(4), src
[2]);
2656 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2657 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2659 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2661 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2662 SURF_INDEX_TEXTURE(unit
),
2664 inst
->DstReg
.WriteMask
, /* writemask */
2665 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2666 4, /* response_length */
2672 static void emit_tex(struct brw_wm_compile
*c
,
2673 const struct prog_instruction
*inst
)
2675 struct brw_compile
*p
= &c
->func
;
2676 struct brw_reg dst
[4], src
[4], payload_reg
;
2677 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2681 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2683 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2685 for (i
= 0; i
< 4; i
++)
2686 dst
[i
] = get_dst_reg(c
, inst
, i
);
2687 for (i
= 0; i
< 4; i
++)
2688 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2690 switch (inst
->TexSrcTarget
) {
2691 case TEXTURE_1D_INDEX
:
2695 case TEXTURE_2D_INDEX
:
2696 case TEXTURE_RECT_INDEX
:
2697 emit
= WRITEMASK_XY
;
2701 emit
= WRITEMASK_XYZ
;
2707 /* move/load S, T, R coords */
2708 for (i
= 0; i
< nr
; i
++) {
2709 static const GLuint swz
[4] = {0,1,2,2};
2711 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2713 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2718 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2719 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2723 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2725 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2726 SURF_INDEX_TEXTURE(unit
),
2728 inst
->DstReg
.WriteMask
, /* writemask */
2729 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2730 4, /* response_length */
2731 shadow
? 6 : 4, /* msg_length */
2735 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2740 * Resolve subroutine calls after code emit is done.
2742 static void post_wm_emit( struct brw_wm_compile
*c
)
2744 brw_resolve_cals(&c
->func
);
2747 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2750 #define MAX_LOOP_DEPTH 32
2751 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2752 struct brw_instruction
*inst0
, *inst1
;
2753 int i
, if_insn
= 0, loop_insn
= 0;
2754 struct brw_compile
*p
= &c
->func
;
2755 struct brw_indirect stack_index
= brw_indirect(0, 0);
2757 c
->out_of_regs
= GL_FALSE
;
2760 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2761 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2763 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2764 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2769 _mesa_printf("Inst %d: ", i
);
2770 _mesa_print_instruction(inst
);
2773 /* fetch any constants that this instruction needs */
2774 if (c
->fp
->use_const_buffer
)
2775 fetch_constants(c
, inst
);
2777 if (inst
->CondUpdate
)
2778 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2780 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2782 switch (inst
->Opcode
) {
2784 emit_pixel_xy(c
, inst
);
2787 emit_delta_xy(c
, inst
);
2790 emit_pixel_w(c
, inst
);
2793 emit_linterp(c
, inst
);
2796 emit_pinterp(c
, inst
);
2799 emit_cinterp(c
, inst
);
2802 emit_wpos_xy(c
, inst
);
2805 emit_fb_write(c
, inst
);
2807 case WM_FRONTFACING
:
2808 emit_frontfacing(c
, inst
);
2832 emit_trunc(c
, inst
);
2869 emit_min_max(c
, inst
);
2905 emit_noise1(c
, inst
);
2908 emit_noise2(c
, inst
);
2911 emit_noise3(c
, inst
);
2914 emit_noise4(c
, inst
);
2926 assert(if_insn
< MAX_IFSN
);
2927 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2930 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2933 assert(if_insn
> 0);
2934 brw_ENDIF(p
, if_inst
[--if_insn
]);
2937 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2943 brw_push_insn_state(p
);
2944 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2945 brw_set_access_mode(p
, BRW_ALIGN_1
);
2946 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2947 brw_set_access_mode(p
, BRW_ALIGN_16
);
2948 brw_ADD(p
, get_addr_reg(stack_index
),
2949 get_addr_reg(stack_index
), brw_imm_d(4));
2950 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2951 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2952 brw_pop_insn_state(p
);
2956 brw_push_insn_state(p
);
2957 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2958 brw_ADD(p
, get_addr_reg(stack_index
),
2959 get_addr_reg(stack_index
), brw_imm_d(-4));
2960 brw_set_access_mode(p
, BRW_ALIGN_1
);
2961 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2962 brw_set_access_mode(p
, BRW_ALIGN_16
);
2963 brw_pop_insn_state(p
);
2966 case OPCODE_BGNLOOP
:
2967 /* XXX may need to invalidate the current_constant regs */
2968 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2972 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2976 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2978 case OPCODE_ENDLOOP
:
2980 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2981 /* patch all the BREAK instructions from
2983 while (inst0
> loop_inst
[loop_insn
]) {
2985 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2986 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2987 inst0
->bits3
.if_else
.pop_count
= 0;
2988 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2989 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2990 inst0
->bits3
.if_else
.pop_count
= 0;
2995 _mesa_printf("unsupported IR in fragment shader %d\n",
2999 if (inst
->CondUpdate
)
3000 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
3002 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
3009 * Do GPU code generation for shaders that use GLSL features such as
3010 * flow control. Other shaders will be compiled with the
3012 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
3014 if (INTEL_DEBUG
& DEBUG_WM
) {
3015 _mesa_printf("brw_wm_glsl_emit:\n");
3018 /* initial instruction translation/simplification */
3021 /* actual code generation */
3022 brw_wm_emit_glsl(brw
, c
);
3024 if (INTEL_DEBUG
& DEBUG_WM
) {
3025 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
3028 c
->prog_data
.total_grf
= num_grf_used(c
);
3029 c
->prog_data
.total_scratch
= 0;