1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
15 * Determine if the given fragment program uses GLSL features such
16 * as flow conditionals, loops, subroutines.
17 * Some GLSL shaders may use these features, others might not.
19 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
23 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
24 switch (inst
->Opcode
) {
48 reclaim_temps(struct brw_wm_compile
*c
);
51 /** Mark GRF register as used. */
53 prealloc_grf(struct brw_wm_compile
*c
, int r
)
55 c
->used_grf
[r
] = GL_TRUE
;
59 /** Mark given GRF register as not in use. */
61 release_grf(struct brw_wm_compile
*c
, int r
)
63 /*assert(c->used_grf[r]);*/
64 c
->used_grf
[r
] = GL_FALSE
;
65 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
69 /** Return index of a free GRF, mark it as used. */
71 alloc_grf(struct brw_wm_compile
*c
)
74 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
75 if (!c
->used_grf
[r
]) {
76 c
->used_grf
[r
] = GL_TRUE
;
77 c
->first_free_grf
= r
+ 1; /* a guess */
82 /* no free temps, try to reclaim some */
84 c
->first_free_grf
= 0;
87 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
88 if (!c
->used_grf
[r
]) {
89 c
->used_grf
[r
] = GL_TRUE
;
90 c
->first_free_grf
= r
+ 1; /* a guess */
95 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
96 assert(c
->used_grf
[r
]);
99 /* really, no free GRF regs found */
100 if (!c
->out_of_regs
) {
101 /* print warning once per compilation */
102 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
103 c
->out_of_regs
= GL_TRUE
;
110 /** Return number of GRF registers used */
112 num_grf_used(const struct brw_wm_compile
*c
)
115 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
124 * Record the mapping of a Mesa register to a hardware register.
126 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
127 int component
, struct brw_reg reg
)
129 c
->wm_regs
[file
][index
][component
].reg
= reg
;
130 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
134 * Examine instruction's write mask to find index of first component
135 * enabled for writing.
137 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
140 for (i
= 0; i
< 4; i
++)
141 if (inst
->DstReg
.WriteMask
& (1<<i
))
146 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
150 /* if we need to allocate another temp, grow the tmp_regs[] array */
151 if (c
->tmp_index
== c
->tmp_max
) {
152 int r
= alloc_grf(c
);
154 /*printf("Out of temps in %s\n", __FUNCTION__);*/
155 r
= 50; /* XXX random register! */
157 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
160 /* form the GRF register */
161 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
162 /*printf("alloc_temp %d\n", reg.nr);*/
163 assert(reg
.nr
< BRW_WM_MAX_GRF
);
169 * Save current temp register info.
170 * There must be a matching call to release_tmps().
172 static int mark_tmps(struct brw_wm_compile
*c
)
177 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
179 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
182 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
188 * Convert Mesa src register to brw register.
190 * Since we're running in SOA mode each Mesa register corresponds to four
191 * hardware registers. We allocate the hardware registers as needed here.
193 * \param file register file, one of PROGRAM_x
194 * \param index register number
195 * \param component src component (X=0, Y=1, Z=2, W=3)
196 * \param nr not used?!?
197 * \param neg negate value?
198 * \param abs take absolute value?
200 static struct brw_reg
201 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
202 int nr
, GLuint neg
, GLuint abs
)
206 case PROGRAM_STATE_VAR
:
207 case PROGRAM_CONSTANT
:
208 case PROGRAM_UNIFORM
:
209 file
= PROGRAM_STATE_VAR
;
211 case PROGRAM_UNDEFINED
:
212 return brw_null_reg();
213 case PROGRAM_TEMPORARY
:
216 case PROGRAM_PAYLOAD
:
219 _mesa_problem(NULL
, "Unexpected file in get_reg()");
220 return brw_null_reg();
224 assert(component
< 4);
226 /* see if we've already allocated a HW register for this Mesa register */
227 if (c
->wm_regs
[file
][index
][component
].inited
) {
229 reg
= c
->wm_regs
[file
][index
][component
].reg
;
232 /* no, allocate new register */
233 int grf
= alloc_grf(c
);
234 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
236 /* totally out of temps */
237 grf
= 51; /* XXX random register! */
240 reg
= brw_vec8_grf(grf
, 0);
241 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
243 set_reg(c
, file
, index
, component
, reg
);
246 if (neg
& (1 << component
)) {
257 * This is called if we run out of GRF registers. Examine the live intervals
258 * of temp regs in the program and free those which won't be used again.
261 reclaim_temps(struct brw_wm_compile
*c
)
263 GLint intBegin
[MAX_PROGRAM_TEMPS
];
264 GLint intEnd
[MAX_PROGRAM_TEMPS
];
267 /*printf("Reclaim temps:\n");*/
269 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
272 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
273 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
274 /* program temp[i] can be freed */
276 /*printf(" temp[%d] is dead\n", index);*/
277 for (component
= 0; component
< 4; component
++) {
278 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
279 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
282 printf(" Reclaim temp %d, reg %d at inst %d\n",
283 index, r, c->cur_inst);
285 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
296 * Preallocate registers. This sets up the Mesa to hardware register
297 * mapping for certain registers, such as constants (uniforms/state vars)
300 static void prealloc_reg(struct brw_wm_compile
*c
)
304 int urb_read_length
= 0;
305 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
306 GLuint reg_index
= 0;
308 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
309 c
->first_free_grf
= 0;
311 for (i
= 0; i
< 4; i
++) {
312 if (i
< c
->key
.nr_depth_regs
)
313 reg
= brw_vec8_grf(i
* 2, 0);
315 reg
= brw_vec8_grf(0, 0);
316 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
318 reg_index
+= 2 * c
->key
.nr_depth_regs
;
322 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
323 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
325 /* use a real constant buffer, or just use a section of the GRF? */
326 /* XXX this heuristic may need adjustment... */
327 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
328 c
->fp
->use_const_buffer
= GL_TRUE
;
330 c
->fp
->use_const_buffer
= GL_FALSE
;
331 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
333 if (c
->fp
->use_const_buffer
) {
334 /* We'll use a real constant buffer and fetch constants from
335 * it with a dataport read message.
338 /* number of float constants in CURBE */
339 c
->prog_data
.nr_params
= 0;
342 const struct gl_program_parameter_list
*plist
=
343 c
->fp
->program
.Base
.Parameters
;
346 /* number of float constants in CURBE */
347 c
->prog_data
.nr_params
= 4 * nr_params
;
349 /* loop over program constants (float[4]) */
350 for (i
= 0; i
< nr_params
; i
++) {
351 /* loop over XYZW channels */
352 for (j
= 0; j
< 4; j
++, index
++) {
353 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
354 /* Save pointer to parameter/constant value.
355 * Constants will be copied in prepare_constant_buffer()
357 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
358 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
361 /* number of constant regs used (each reg is float[8]) */
362 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
363 reg_index
+= c
->nr_creg
;
367 /* fragment shader inputs */
368 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
371 if (i
>= VERT_RESULT_VAR0
)
372 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
373 else if (i
<= VERT_RESULT_TEX7
)
378 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
379 urb_read_length
= reg_index
;
380 reg
= brw_vec8_grf(reg_index
, 0);
381 for (j
= 0; j
< 4; j
++)
382 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
384 if (c
->key
.vp_outputs_written
& (1 << i
)) {
389 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
390 c
->prog_data
.urb_read_length
= urb_read_length
;
391 c
->prog_data
.curb_read_length
= c
->nr_creg
;
392 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
394 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
397 /* mark GRF regs [0..reg_index-1] as in-use */
398 for (i
= 0; i
< reg_index
; i
++)
401 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
402 prealloc_grf(c
, 126);
403 prealloc_grf(c
, 127);
405 /* An instruction may reference up to three constants.
406 * They'll be found in these registers.
407 * XXX alloc these on demand!
409 if (c
->fp
->use_const_buffer
) {
410 for (i
= 0; i
< 3; i
++) {
411 c
->current_const
[i
].index
= -1;
412 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
416 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
417 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
423 * Check if any of the instruction's src registers are constants, uniforms,
424 * or statevars. If so, fetch any constants that we don't already have in
425 * the three GRF slots.
427 static void fetch_constants(struct brw_wm_compile
*c
,
428 const struct prog_instruction
*inst
)
430 struct brw_compile
*p
= &c
->func
;
433 /* loop over instruction src regs */
434 for (i
= 0; i
< 3; i
++) {
435 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
436 if (src
->File
== PROGRAM_STATE_VAR
||
437 src
->File
== PROGRAM_CONSTANT
||
438 src
->File
== PROGRAM_UNIFORM
) {
439 c
->current_const
[i
].index
= src
->Index
;
442 printf(" fetch const[%d] for arg %d into reg %d\n",
443 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
446 /* need to fetch the constant now */
448 c
->current_const
[i
].reg
, /* writeback dest */
449 src
->RelAddr
, /* relative indexing? */
450 16 * src
->Index
, /* byte offset */
451 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
459 * Convert Mesa dst register to brw register.
461 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
462 const struct prog_instruction
*inst
,
466 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
471 static struct brw_reg
472 get_src_reg_const(struct brw_wm_compile
*c
,
473 const struct prog_instruction
*inst
,
474 GLuint srcRegIndex
, GLuint component
)
476 /* We should have already fetched the constant from the constant
477 * buffer in fetch_constants(). Now we just have to return a
478 * register description that extracts the needed component and
479 * smears it across all eight vector components.
481 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
482 struct brw_reg const_reg
;
484 assert(component
< 4);
485 assert(srcRegIndex
< 3);
486 assert(c
->current_const
[srcRegIndex
].index
!= -1);
487 const_reg
= c
->current_const
[srcRegIndex
].reg
;
489 /* extract desired float from the const_reg, and smear */
490 const_reg
= stride(const_reg
, 0, 1, 0);
491 const_reg
.subnr
= component
* 4;
493 if (src
->Negate
& (1 << component
))
494 const_reg
= negate(const_reg
);
496 const_reg
= brw_abs(const_reg
);
499 printf(" form const[%d].%d for arg %d, reg %d\n",
500 c
->current_const
[srcRegIndex
].index
,
511 * Convert Mesa src register to brw register.
513 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
514 const struct prog_instruction
*inst
,
515 GLuint srcRegIndex
, GLuint channel
)
517 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
519 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
521 /* Extended swizzle terms */
522 if (component
== SWIZZLE_ZERO
) {
523 return brw_imm_f(0.0F
);
525 else if (component
== SWIZZLE_ONE
) {
526 return brw_imm_f(1.0F
);
529 if (c
->fp
->use_const_buffer
&&
530 (src
->File
== PROGRAM_STATE_VAR
||
531 src
->File
== PROGRAM_CONSTANT
||
532 src
->File
== PROGRAM_UNIFORM
)) {
533 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
536 /* other type of source register */
537 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
538 src
->Negate
, src
->Abs
);
544 * Same as \sa get_src_reg() but if the register is a literal, emit
545 * a brw_reg encoding the literal.
546 * Note that a brw instruction only allows one src operand to be a literal.
547 * For instructions with more than one operand, only the second can be a
548 * literal. This means that we treat some literals as constants/uniforms
549 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
552 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
553 const struct prog_instruction
*inst
,
554 GLuint srcRegIndex
, GLuint channel
)
556 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
557 if (src
->File
== PROGRAM_CONSTANT
) {
559 const int component
= GET_SWZ(src
->Swizzle
, channel
);
560 const GLfloat
*param
=
561 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
562 GLfloat value
= param
[component
];
563 if (src
->Negate
& (1 << channel
))
566 value
= FABSF(value
);
568 printf(" form immed value %f for chan %d\n", value
, channel
);
570 return brw_imm_f(value
);
573 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
579 * Subroutines are minimal support for resusable instruction sequences.
580 * They are implemented as simply as possible to minimise overhead: there
581 * is no explicit support for communication between the caller and callee
582 * other than saving the return address in a temporary register, nor is
583 * there any automatic local storage. This implies that great care is
584 * required before attempting reentrancy or any kind of nested
585 * subroutine invocations.
587 static void invoke_subroutine( struct brw_wm_compile
*c
,
588 enum _subroutine subroutine
,
589 void (*emit
)( struct brw_wm_compile
* ) )
591 struct brw_compile
*p
= &c
->func
;
593 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
595 if( c
->subroutines
[ subroutine
] ) {
596 /* subroutine previously emitted: reuse existing instructions */
598 int mark
= mark_tmps( c
);
599 struct brw_reg return_address
= retype( alloc_tmp( c
),
600 BRW_REGISTER_TYPE_UD
);
601 int here
= p
->nr_insn
;
603 brw_push_insn_state(p
);
604 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
605 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
607 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
608 brw_imm_d( ( c
->subroutines
[ subroutine
] -
610 brw_pop_insn_state(p
);
612 release_tmps( c
, mark
);
614 /* previously unused subroutine: emit, and mark for later reuse */
616 int mark
= mark_tmps( c
);
617 struct brw_reg return_address
= retype( alloc_tmp( c
),
618 BRW_REGISTER_TYPE_UD
);
619 struct brw_instruction
*calc
;
620 int base
= p
->nr_insn
;
622 brw_push_insn_state(p
);
623 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
624 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
625 brw_pop_insn_state(p
);
627 c
->subroutines
[ subroutine
] = p
->nr_insn
;
631 brw_push_insn_state(p
);
632 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
633 brw_MOV( p
, brw_ip_reg(), return_address
);
634 brw_pop_insn_state(p
);
636 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
638 release_tmps( c
, mark
);
642 static void emit_abs( struct brw_wm_compile
*c
,
643 const struct prog_instruction
*inst
)
646 struct brw_compile
*p
= &c
->func
;
647 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
648 for (i
= 0; i
< 4; i
++) {
649 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
650 struct brw_reg src
, dst
;
651 dst
= get_dst_reg(c
, inst
, i
);
652 src
= get_src_reg(c
, inst
, 0, i
);
653 brw_MOV(p
, dst
, brw_abs(src
));
656 brw_set_saturate(p
, 0);
659 static void emit_trunc( struct brw_wm_compile
*c
,
660 const struct prog_instruction
*inst
)
663 struct brw_compile
*p
= &c
->func
;
664 GLuint mask
= inst
->DstReg
.WriteMask
;
665 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
666 for (i
= 0; i
< 4; i
++) {
668 struct brw_reg src
, dst
;
669 dst
= get_dst_reg(c
, inst
, i
);
670 src
= get_src_reg(c
, inst
, 0, i
);
671 brw_RNDZ(p
, dst
, src
);
674 brw_set_saturate(p
, 0);
677 static void emit_mov( struct brw_wm_compile
*c
,
678 const struct prog_instruction
*inst
)
681 struct brw_compile
*p
= &c
->func
;
682 GLuint mask
= inst
->DstReg
.WriteMask
;
683 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
684 for (i
= 0; i
< 4; i
++) {
686 struct brw_reg src
, dst
;
687 dst
= get_dst_reg(c
, inst
, i
);
688 /* XXX some moves from immediate value don't work reliably!!! */
689 /*src = get_src_reg_imm(c, inst, 0, i);*/
690 src
= get_src_reg(c
, inst
, 0, i
);
691 brw_MOV(p
, dst
, src
);
694 brw_set_saturate(p
, 0);
697 static void emit_pixel_xy(struct brw_wm_compile
*c
,
698 const struct prog_instruction
*inst
)
700 struct brw_reg r1
= brw_vec1_grf(1, 0);
701 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
703 struct brw_reg dst0
, dst1
;
704 struct brw_compile
*p
= &c
->func
;
705 GLuint mask
= inst
->DstReg
.WriteMask
;
707 dst0
= get_dst_reg(c
, inst
, 0);
708 dst1
= get_dst_reg(c
, inst
, 1);
709 /* Calculate pixel centers by adding 1 or 0 to each of the
710 * micro-tile coordinates passed in r1.
712 if (mask
& WRITEMASK_X
) {
714 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
715 stride(suboffset(r1_uw
, 4), 2, 4, 0),
716 brw_imm_v(0x10101010));
719 if (mask
& WRITEMASK_Y
) {
721 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
722 stride(suboffset(r1_uw
, 5), 2, 4, 0),
723 brw_imm_v(0x11001100));
727 static void emit_delta_xy(struct brw_wm_compile
*c
,
728 const struct prog_instruction
*inst
)
730 struct brw_reg r1
= brw_vec1_grf(1, 0);
731 struct brw_reg dst0
, dst1
, src0
, src1
;
732 struct brw_compile
*p
= &c
->func
;
733 GLuint mask
= inst
->DstReg
.WriteMask
;
735 dst0
= get_dst_reg(c
, inst
, 0);
736 dst1
= get_dst_reg(c
, inst
, 1);
737 src0
= get_src_reg(c
, inst
, 0, 0);
738 src1
= get_src_reg(c
, inst
, 0, 1);
739 /* Calc delta X,Y by subtracting origin in r1 from the pixel
742 if (mask
& WRITEMASK_X
) {
745 retype(src0
, BRW_REGISTER_TYPE_UW
),
749 if (mask
& WRITEMASK_Y
) {
752 retype(src1
, BRW_REGISTER_TYPE_UW
),
753 negate(suboffset(r1
,1)));
758 static void fire_fb_write( struct brw_wm_compile
*c
,
764 struct brw_compile
*p
= &c
->func
;
765 /* Pass through control information:
767 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
769 brw_push_insn_state(p
);
770 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
772 brw_message_reg(base_reg
+ 1),
774 brw_pop_insn_state(p
);
776 /* Send framebuffer write message: */
778 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
780 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
787 static void emit_fb_write(struct brw_wm_compile
*c
,
788 const struct prog_instruction
*inst
)
790 struct brw_compile
*p
= &c
->func
;
796 /* Reserve a space for AA - may not be needed:
798 if (c
->key
.aa_dest_stencil_reg
)
801 brw_push_insn_state(p
);
802 for (channel
= 0; channel
< 4; channel
++) {
803 src0
= get_src_reg(c
, inst
, 0, channel
);
804 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
805 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
806 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
808 /* skip over the regs populated above: */
810 brw_pop_insn_state(p
);
812 if (c
->key
.source_depth_to_render_target
) {
813 if (c
->key
.computes_depth
) {
814 src0
= get_src_reg(c
, inst
, 2, 2);
815 brw_MOV(p
, brw_message_reg(nr
), src0
);
818 src0
= get_src_reg(c
, inst
, 1, 1);
819 brw_MOV(p
, brw_message_reg(nr
), src0
);
825 if (c
->key
.dest_depth_reg
) {
826 const GLuint comp
= c
->key
.dest_depth_reg
/ 2;
827 const GLuint off
= c
->key
.dest_depth_reg
% 2;
830 /* XXX this code needs review/testing */
831 struct brw_reg arg1_0
= get_src_reg(c
, inst
, 1, comp
);
832 struct brw_reg arg1_1
= get_src_reg(c
, inst
, 1, comp
+1);
834 brw_push_insn_state(p
);
835 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
837 brw_MOV(p
, brw_message_reg(nr
), offset(arg1_0
, 1));
839 brw_MOV(p
, brw_message_reg(nr
+1), arg1_1
);
840 brw_pop_insn_state(p
);
844 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
845 brw_MOV(p
, brw_message_reg(nr
), src
);
850 target
= inst
->Aux
>> 1;
852 fire_fb_write(c
, 0, nr
, target
, eot
);
855 static void emit_pixel_w( struct brw_wm_compile
*c
,
856 const struct prog_instruction
*inst
)
858 struct brw_compile
*p
= &c
->func
;
859 GLuint mask
= inst
->DstReg
.WriteMask
;
860 if (mask
& WRITEMASK_W
) {
861 struct brw_reg dst
, src0
, delta0
, delta1
;
862 struct brw_reg interp3
;
864 dst
= get_dst_reg(c
, inst
, 3);
865 src0
= get_src_reg(c
, inst
, 0, 0);
866 delta0
= get_src_reg(c
, inst
, 1, 0);
867 delta1
= get_src_reg(c
, inst
, 1, 1);
869 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
870 /* Calc 1/w - just linterp wpos[3] optimized by putting the
871 * result straight into a message reg.
873 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
874 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
878 BRW_MATH_FUNCTION_INV
,
879 BRW_MATH_SATURATE_NONE
,
881 BRW_MATH_PRECISION_FULL
);
885 static void emit_linterp(struct brw_wm_compile
*c
,
886 const struct prog_instruction
*inst
)
888 struct brw_compile
*p
= &c
->func
;
889 GLuint mask
= inst
->DstReg
.WriteMask
;
890 struct brw_reg interp
[4];
891 struct brw_reg dst
, delta0
, delta1
;
895 src0
= get_src_reg(c
, inst
, 0, 0);
896 delta0
= get_src_reg(c
, inst
, 1, 0);
897 delta1
= get_src_reg(c
, inst
, 1, 1);
900 interp
[0] = brw_vec1_grf(nr
, 0);
901 interp
[1] = brw_vec1_grf(nr
, 4);
902 interp
[2] = brw_vec1_grf(nr
+1, 0);
903 interp
[3] = brw_vec1_grf(nr
+1, 4);
905 for(i
= 0; i
< 4; i
++ ) {
907 dst
= get_dst_reg(c
, inst
, i
);
908 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
909 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
914 static void emit_cinterp(struct brw_wm_compile
*c
,
915 const struct prog_instruction
*inst
)
917 struct brw_compile
*p
= &c
->func
;
918 GLuint mask
= inst
->DstReg
.WriteMask
;
920 struct brw_reg interp
[4];
921 struct brw_reg dst
, src0
;
924 src0
= get_src_reg(c
, inst
, 0, 0);
927 interp
[0] = brw_vec1_grf(nr
, 0);
928 interp
[1] = brw_vec1_grf(nr
, 4);
929 interp
[2] = brw_vec1_grf(nr
+1, 0);
930 interp
[3] = brw_vec1_grf(nr
+1, 4);
932 for(i
= 0; i
< 4; i
++ ) {
934 dst
= get_dst_reg(c
, inst
, i
);
935 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
940 static void emit_pinterp(struct brw_wm_compile
*c
,
941 const struct prog_instruction
*inst
)
943 struct brw_compile
*p
= &c
->func
;
944 GLuint mask
= inst
->DstReg
.WriteMask
;
946 struct brw_reg interp
[4];
947 struct brw_reg dst
, delta0
, delta1
;
948 struct brw_reg src0
, w
;
951 src0
= get_src_reg(c
, inst
, 0, 0);
952 delta0
= get_src_reg(c
, inst
, 1, 0);
953 delta1
= get_src_reg(c
, inst
, 1, 1);
954 w
= get_src_reg(c
, inst
, 2, 3);
957 interp
[0] = brw_vec1_grf(nr
, 0);
958 interp
[1] = brw_vec1_grf(nr
, 4);
959 interp
[2] = brw_vec1_grf(nr
+1, 0);
960 interp
[3] = brw_vec1_grf(nr
+1, 4);
962 for(i
= 0; i
< 4; i
++ ) {
964 dst
= get_dst_reg(c
, inst
, i
);
965 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
966 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
968 brw_MUL(p
, dst
, dst
, w
);
973 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
974 static void emit_frontfacing(struct brw_wm_compile
*c
,
975 const struct prog_instruction
*inst
)
977 struct brw_compile
*p
= &c
->func
;
978 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
980 GLuint mask
= inst
->DstReg
.WriteMask
;
983 for (i
= 0; i
< 4; i
++) {
985 dst
= get_dst_reg(c
, inst
, i
);
986 brw_MOV(p
, dst
, brw_imm_f(0.0));
990 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
993 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
994 for (i
= 0; i
< 4; i
++) {
996 dst
= get_dst_reg(c
, inst
, i
);
997 brw_MOV(p
, dst
, brw_imm_f(1.0));
1000 brw_set_predicate_control_flag_value(p
, 0xff);
1003 static void emit_xpd(struct brw_wm_compile
*c
,
1004 const struct prog_instruction
*inst
)
1007 struct brw_compile
*p
= &c
->func
;
1008 GLuint mask
= inst
->DstReg
.WriteMask
;
1009 for (i
= 0; i
< 4; i
++) {
1010 GLuint i2
= (i
+2)%3;
1011 GLuint i1
= (i
+1)%3;
1012 if (mask
& (1<<i
)) {
1013 struct brw_reg src0
, src1
, dst
;
1014 dst
= get_dst_reg(c
, inst
, i
);
1015 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
1016 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
1017 brw_MUL(p
, brw_null_reg(), src0
, src1
);
1018 src0
= get_src_reg(c
, inst
, 0, i1
);
1019 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
1020 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1021 brw_MAC(p
, dst
, src0
, src1
);
1022 brw_set_saturate(p
, 0);
1025 brw_set_saturate(p
, 0);
1028 static void emit_dp3(struct brw_wm_compile
*c
,
1029 const struct prog_instruction
*inst
)
1031 struct brw_reg src0
[3], src1
[3], dst
;
1033 struct brw_compile
*p
= &c
->func
;
1034 for (i
= 0; i
< 3; i
++) {
1035 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1036 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1039 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1040 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1041 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1042 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1043 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1044 brw_set_saturate(p
, 0);
1047 static void emit_dp4(struct brw_wm_compile
*c
,
1048 const struct prog_instruction
*inst
)
1050 struct brw_reg src0
[4], src1
[4], dst
;
1052 struct brw_compile
*p
= &c
->func
;
1053 for (i
= 0; i
< 4; i
++) {
1054 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1055 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1057 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1058 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1059 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1060 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1061 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1062 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1063 brw_set_saturate(p
, 0);
1066 static void emit_dph(struct brw_wm_compile
*c
,
1067 const struct prog_instruction
*inst
)
1069 struct brw_reg src0
[4], src1
[4], dst
;
1071 struct brw_compile
*p
= &c
->func
;
1072 for (i
= 0; i
< 4; i
++) {
1073 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1074 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1076 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1077 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1078 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1079 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1080 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1081 brw_ADD(p
, dst
, dst
, src1
[3]);
1082 brw_set_saturate(p
, 0);
1086 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1087 * Note that the result of the function is smeared across the dest
1088 * register's X, Y, Z and W channels (subject to writemasking of course).
1090 static void emit_math1(struct brw_wm_compile
*c
,
1091 const struct prog_instruction
*inst
, GLuint func
)
1093 struct brw_compile
*p
= &c
->func
;
1094 struct brw_reg src0
, dst
, tmp
;
1095 const int mark
= mark_tmps( c
);
1100 /* Get first component of source register */
1101 src0
= get_src_reg(c
, inst
, 0, 0);
1103 /* tmp = func(src0) */
1104 brw_MOV(p
, brw_message_reg(2), src0
);
1108 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1111 BRW_MATH_DATA_VECTOR
,
1112 BRW_MATH_PRECISION_FULL
);
1114 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
1116 /* replicate tmp value across enabled dest channels */
1117 for (i
= 0; i
< 4; i
++) {
1118 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
1119 dst
= get_dst_reg(c
, inst
, i
);
1120 brw_MOV(p
, dst
, tmp
);
1124 release_tmps(c
, mark
);
1127 static void emit_rcp(struct brw_wm_compile
*c
,
1128 const struct prog_instruction
*inst
)
1130 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1133 static void emit_rsq(struct brw_wm_compile
*c
,
1134 const struct prog_instruction
*inst
)
1136 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1139 static void emit_sin(struct brw_wm_compile
*c
,
1140 const struct prog_instruction
*inst
)
1142 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1145 static void emit_cos(struct brw_wm_compile
*c
,
1146 const struct prog_instruction
*inst
)
1148 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1151 static void emit_ex2(struct brw_wm_compile
*c
,
1152 const struct prog_instruction
*inst
)
1154 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1157 static void emit_lg2(struct brw_wm_compile
*c
,
1158 const struct prog_instruction
*inst
)
1160 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1163 static void emit_add(struct brw_wm_compile
*c
,
1164 const struct prog_instruction
*inst
)
1166 struct brw_compile
*p
= &c
->func
;
1167 struct brw_reg src0
, src1
, dst
;
1168 GLuint mask
= inst
->DstReg
.WriteMask
;
1170 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1171 for (i
= 0 ; i
< 4; i
++) {
1172 if (mask
& (1<<i
)) {
1173 dst
= get_dst_reg(c
, inst
, i
);
1174 src0
= get_src_reg(c
, inst
, 0, i
);
1175 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1176 brw_ADD(p
, dst
, src0
, src1
);
1179 brw_set_saturate(p
, 0);
1182 static void emit_arl(struct brw_wm_compile
*c
,
1183 const struct prog_instruction
*inst
)
1185 struct brw_compile
*p
= &c
->func
;
1186 struct brw_reg src0
, addr_reg
;
1187 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1188 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1189 BRW_ARF_ADDRESS
, 0);
1190 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1191 brw_MOV(p
, addr_reg
, src0
);
1192 brw_set_saturate(p
, 0);
1195 static void emit_sub(struct brw_wm_compile
*c
,
1196 const struct prog_instruction
*inst
)
1198 struct brw_compile
*p
= &c
->func
;
1199 struct brw_reg src0
, src1
, dst
;
1200 GLuint mask
= inst
->DstReg
.WriteMask
;
1202 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1203 for (i
= 0 ; i
< 4; i
++) {
1204 if (mask
& (1<<i
)) {
1205 dst
= get_dst_reg(c
, inst
, i
);
1206 src0
= get_src_reg(c
, inst
, 0, i
);
1207 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1208 brw_ADD(p
, dst
, src0
, negate(src1
));
1211 brw_set_saturate(p
, 0);
1214 static void emit_mul(struct brw_wm_compile
*c
,
1215 const struct prog_instruction
*inst
)
1217 struct brw_compile
*p
= &c
->func
;
1218 struct brw_reg src0
, src1
, dst
;
1219 GLuint mask
= inst
->DstReg
.WriteMask
;
1221 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1222 for (i
= 0 ; i
< 4; i
++) {
1223 if (mask
& (1<<i
)) {
1224 dst
= get_dst_reg(c
, inst
, i
);
1225 src0
= get_src_reg(c
, inst
, 0, i
);
1226 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1227 brw_MUL(p
, dst
, src0
, src1
);
1230 brw_set_saturate(p
, 0);
1233 static void emit_frc(struct brw_wm_compile
*c
,
1234 const struct prog_instruction
*inst
)
1236 struct brw_compile
*p
= &c
->func
;
1237 struct brw_reg src0
, dst
;
1238 GLuint mask
= inst
->DstReg
.WriteMask
;
1240 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1241 for (i
= 0 ; i
< 4; i
++) {
1242 if (mask
& (1<<i
)) {
1243 dst
= get_dst_reg(c
, inst
, i
);
1244 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1245 brw_FRC(p
, dst
, src0
);
1248 if (inst
->SaturateMode
!= SATURATE_OFF
)
1249 brw_set_saturate(p
, 0);
1252 static void emit_flr(struct brw_wm_compile
*c
,
1253 const struct prog_instruction
*inst
)
1255 struct brw_compile
*p
= &c
->func
;
1256 struct brw_reg src0
, dst
;
1257 GLuint mask
= inst
->DstReg
.WriteMask
;
1259 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1260 for (i
= 0 ; i
< 4; i
++) {
1261 if (mask
& (1<<i
)) {
1262 dst
= get_dst_reg(c
, inst
, i
);
1263 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1264 brw_RNDD(p
, dst
, src0
);
1267 brw_set_saturate(p
, 0);
1271 static void emit_min_max(struct brw_wm_compile
*c
,
1272 const struct prog_instruction
*inst
)
1274 struct brw_compile
*p
= &c
->func
;
1275 const GLuint mask
= inst
->DstReg
.WriteMask
;
1276 const int mark
= mark_tmps(c
);
1278 brw_push_insn_state(p
);
1279 for (i
= 0; i
< 4; i
++) {
1280 if (mask
& (1<<i
)) {
1281 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1282 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1283 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1285 /* if dst==src0 or dst==src1 we need to use a temp reg */
1286 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1287 brw_same_reg(dst
, src1
);
1294 printf(" Min/max: dst %d src0 %d src1 %d\n",
1295 dst.nr, src0.nr, src1.nr);
1297 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1298 brw_MOV(p
, dst
, src0
);
1299 brw_set_saturate(p
, 0);
1301 if (inst
->Opcode
== OPCODE_MIN
)
1302 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1304 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1306 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1307 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1308 brw_MOV(p
, dst
, src1
);
1309 brw_set_saturate(p
, 0);
1310 brw_set_predicate_control_flag_value(p
, 0xff);
1312 brw_MOV(p
, real_dst
, dst
);
1315 brw_pop_insn_state(p
);
1316 release_tmps(c
, mark
);
1319 static void emit_pow(struct brw_wm_compile
*c
,
1320 const struct prog_instruction
*inst
)
1322 struct brw_compile
*p
= &c
->func
;
1323 struct brw_reg dst
, src0
, src1
;
1324 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1325 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1326 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1328 brw_MOV(p
, brw_message_reg(2), src0
);
1329 brw_MOV(p
, brw_message_reg(3), src1
);
1333 BRW_MATH_FUNCTION_POW
,
1334 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1337 BRW_MATH_DATA_VECTOR
,
1338 BRW_MATH_PRECISION_FULL
);
1341 static void emit_lrp(struct brw_wm_compile
*c
,
1342 const struct prog_instruction
*inst
)
1344 struct brw_compile
*p
= &c
->func
;
1345 GLuint mask
= inst
->DstReg
.WriteMask
;
1346 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1348 int mark
= mark_tmps(c
);
1349 for (i
= 0; i
< 4; i
++) {
1350 if (mask
& (1<<i
)) {
1351 dst
= get_dst_reg(c
, inst
, i
);
1352 src0
= get_src_reg(c
, inst
, 0, i
);
1354 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1356 if (src1
.nr
== dst
.nr
) {
1357 tmp1
= alloc_tmp(c
);
1358 brw_MOV(p
, tmp1
, src1
);
1362 src2
= get_src_reg(c
, inst
, 2, i
);
1363 if (src2
.nr
== dst
.nr
) {
1364 tmp2
= alloc_tmp(c
);
1365 brw_MOV(p
, tmp2
, src2
);
1369 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1370 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1371 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1372 brw_MAC(p
, dst
, src0
, tmp1
);
1373 brw_set_saturate(p
, 0);
1375 release_tmps(c
, mark
);
1380 * For GLSL shaders, this KIL will be unconditional.
1381 * It may be contained inside an IF/ENDIF structure of course.
1383 static void emit_kil(struct brw_wm_compile
*c
)
1385 struct brw_compile
*p
= &c
->func
;
1386 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1387 brw_push_insn_state(p
);
1388 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1389 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1390 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1391 brw_pop_insn_state(p
);
1394 static void emit_mad(struct brw_wm_compile
*c
,
1395 const struct prog_instruction
*inst
)
1397 struct brw_compile
*p
= &c
->func
;
1398 GLuint mask
= inst
->DstReg
.WriteMask
;
1399 struct brw_reg dst
, src0
, src1
, src2
;
1402 for (i
= 0; i
< 4; i
++) {
1403 if (mask
& (1<<i
)) {
1404 dst
= get_dst_reg(c
, inst
, i
);
1405 src0
= get_src_reg(c
, inst
, 0, i
);
1406 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1407 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1408 brw_MUL(p
, dst
, src0
, src1
);
1410 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1411 brw_ADD(p
, dst
, dst
, src2
);
1412 brw_set_saturate(p
, 0);
1417 static void emit_sop(struct brw_wm_compile
*c
,
1418 const struct prog_instruction
*inst
, GLuint cond
)
1420 struct brw_compile
*p
= &c
->func
;
1421 GLuint mask
= inst
->DstReg
.WriteMask
;
1422 struct brw_reg dst
, src0
, src1
;
1425 for (i
= 0; i
< 4; i
++) {
1426 if (mask
& (1<<i
)) {
1427 dst
= get_dst_reg(c
, inst
, i
);
1428 src0
= get_src_reg(c
, inst
, 0, i
);
1429 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1430 brw_push_insn_state(p
);
1431 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1432 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1433 brw_MOV(p
, dst
, brw_imm_f(0.0));
1434 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1435 brw_MOV(p
, dst
, brw_imm_f(1.0));
1436 brw_pop_insn_state(p
);
1441 static void emit_slt(struct brw_wm_compile
*c
,
1442 const struct prog_instruction
*inst
)
1444 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1447 static void emit_sle(struct brw_wm_compile
*c
,
1448 const struct prog_instruction
*inst
)
1450 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1453 static void emit_sgt(struct brw_wm_compile
*c
,
1454 const struct prog_instruction
*inst
)
1456 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1459 static void emit_sge(struct brw_wm_compile
*c
,
1460 const struct prog_instruction
*inst
)
1462 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1465 static void emit_seq(struct brw_wm_compile
*c
,
1466 const struct prog_instruction
*inst
)
1468 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1471 static void emit_sne(struct brw_wm_compile
*c
,
1472 const struct prog_instruction
*inst
)
1474 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1477 static void emit_ddx(struct brw_wm_compile
*c
,
1478 const struct prog_instruction
*inst
)
1480 struct brw_compile
*p
= &c
->func
;
1481 GLuint mask
= inst
->DstReg
.WriteMask
;
1482 struct brw_reg interp
[4];
1484 struct brw_reg src0
, w
;
1486 src0
= get_src_reg(c
, inst
, 0, 0);
1487 w
= get_src_reg(c
, inst
, 1, 3);
1489 interp
[0] = brw_vec1_grf(nr
, 0);
1490 interp
[1] = brw_vec1_grf(nr
, 4);
1491 interp
[2] = brw_vec1_grf(nr
+1, 0);
1492 interp
[3] = brw_vec1_grf(nr
+1, 4);
1493 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1494 for(i
= 0; i
< 4; i
++ ) {
1495 if (mask
& (1<<i
)) {
1496 dst
= get_dst_reg(c
, inst
, i
);
1497 brw_MOV(p
, dst
, interp
[i
]);
1498 brw_MUL(p
, dst
, dst
, w
);
1501 brw_set_saturate(p
, 0);
1504 static void emit_ddy(struct brw_wm_compile
*c
,
1505 const struct prog_instruction
*inst
)
1507 struct brw_compile
*p
= &c
->func
;
1508 GLuint mask
= inst
->DstReg
.WriteMask
;
1509 struct brw_reg interp
[4];
1511 struct brw_reg src0
, w
;
1514 src0
= get_src_reg(c
, inst
, 0, 0);
1516 w
= get_src_reg(c
, inst
, 1, 3);
1517 interp
[0] = brw_vec1_grf(nr
, 0);
1518 interp
[1] = brw_vec1_grf(nr
, 4);
1519 interp
[2] = brw_vec1_grf(nr
+1, 0);
1520 interp
[3] = brw_vec1_grf(nr
+1, 4);
1521 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1522 for(i
= 0; i
< 4; i
++ ) {
1523 if (mask
& (1<<i
)) {
1524 dst
= get_dst_reg(c
, inst
, i
);
1525 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1526 brw_MUL(p
, dst
, dst
, w
);
1529 brw_set_saturate(p
, 0);
1532 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1534 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1538 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1540 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1543 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1545 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1548 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1550 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1554 /* One-, two- and three-dimensional Perlin noise, similar to the description
1555 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1556 static void noise1_sub( struct brw_wm_compile
*c
) {
1558 struct brw_compile
*p
= &c
->func
;
1559 struct brw_reg param
,
1560 x0
, x1
, /* gradients at each end */
1561 t
, tmp
[ 2 ], /* float temporaries */
1562 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1564 int mark
= mark_tmps( c
);
1566 x0
= alloc_tmp( c
);
1567 x1
= alloc_tmp( c
);
1569 tmp
[ 0 ] = alloc_tmp( c
);
1570 tmp
[ 1 ] = alloc_tmp( c
);
1571 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1572 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1573 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1574 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1575 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1577 param
= lookup_tmp( c
, mark
- 2 );
1579 brw_set_access_mode( p
, BRW_ALIGN_1
);
1581 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1583 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1584 be hashed. Also compute the remainder (offset within the unit
1585 length), interleaved to reduce register dependency penalties. */
1586 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1587 brw_FRC( p
, param
, param
);
1588 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1589 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1590 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1592 /* We're now ready to perform the hashing. The two hashes are
1593 interleaved for performance. The hash function used is
1594 designed to rapidly achieve avalanche and require only 32x16
1595 bit multiplication, and 16-bit swizzles (which we get for
1596 free). We can't use immediate operands in the multiplies,
1597 because immediates are permitted only in src1 and the 16-bit
1598 factor is permitted only in src0. */
1599 for( i
= 0; i
< 2; i
++ )
1600 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1601 for( i
= 0; i
< 2; i
++ )
1602 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1603 high_words( itmp
[ i
] ) );
1604 for( i
= 0; i
< 2; i
++ )
1605 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1606 for( i
= 0; i
< 2; i
++ )
1607 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1608 high_words( itmp
[ i
] ) );
1609 for( i
= 0; i
< 2; i
++ )
1610 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1611 for( i
= 0; i
< 2; i
++ )
1612 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1613 high_words( itmp
[ i
] ) );
1615 /* Now we want to initialise the two gradients based on the
1616 hashes. Format conversion from signed integer to float leaves
1617 everything scaled too high by a factor of pow( 2, 31 ), but
1618 we correct for that right at the end. */
1619 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1620 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1621 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1623 brw_MUL( p
, x0
, x0
, param
);
1624 brw_MUL( p
, x1
, x1
, t
);
1626 /* We interpolate between the gradients using the polynomial
1627 6t^5 - 15t^4 + 10t^3 (Perlin). */
1628 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1629 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1630 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1631 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1632 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1633 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1635 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1636 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1637 brw_MUL( p
, x1
, x1
, param
);
1638 brw_ADD( p
, x0
, x0
, x1
);
1639 /* scale by pow( 2, -30 ), to compensate for the format conversion
1640 above and an extra factor of 2 so that a single gradient covers
1642 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1644 release_tmps( c
, mark
);
1647 static void emit_noise1( struct brw_wm_compile
*c
,
1648 const struct prog_instruction
*inst
)
1650 struct brw_compile
*p
= &c
->func
;
1651 struct brw_reg src
, param
, dst
;
1652 GLuint mask
= inst
->DstReg
.WriteMask
;
1654 int mark
= mark_tmps( c
);
1656 assert( mark
== 0 );
1658 src
= get_src_reg( c
, inst
, 0, 0 );
1660 param
= alloc_tmp( c
);
1662 brw_MOV( p
, param
, src
);
1664 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1666 /* Fill in the result: */
1667 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1668 for (i
= 0 ; i
< 4; i
++) {
1669 if (mask
& (1<<i
)) {
1670 dst
= get_dst_reg(c
, inst
, i
);
1671 brw_MOV( p
, dst
, param
);
1674 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1675 brw_set_saturate( p
, 0 );
1677 release_tmps( c
, mark
);
1680 static void noise2_sub( struct brw_wm_compile
*c
) {
1682 struct brw_compile
*p
= &c
->func
;
1683 struct brw_reg param0
, param1
,
1684 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1685 t
, tmp
[ 4 ], /* float temporaries */
1686 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1688 int mark
= mark_tmps( c
);
1690 x0y0
= alloc_tmp( c
);
1691 x0y1
= alloc_tmp( c
);
1692 x1y0
= alloc_tmp( c
);
1693 x1y1
= alloc_tmp( c
);
1695 for( i
= 0; i
< 4; i
++ ) {
1696 tmp
[ i
] = alloc_tmp( c
);
1697 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1699 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1700 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1701 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1703 param0
= lookup_tmp( c
, mark
- 3 );
1704 param1
= lookup_tmp( c
, mark
- 2 );
1706 brw_set_access_mode( p
, BRW_ALIGN_1
);
1708 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1709 be hashed. Also compute the remainders (offsets within the unit
1710 square), interleaved to reduce register dependency penalties. */
1711 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1712 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1713 brw_FRC( p
, param0
, param0
);
1714 brw_FRC( p
, param1
, param1
);
1715 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1716 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1717 low_words( itmp
[ 1 ] ) );
1718 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1719 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1720 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1721 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1722 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1724 /* We're now ready to perform the hashing. The four hashes are
1725 interleaved for performance. The hash function used is
1726 designed to rapidly achieve avalanche and require only 32x16
1727 bit multiplication, and 16-bit swizzles (which we get for
1728 free). We can't use immediate operands in the multiplies,
1729 because immediates are permitted only in src1 and the 16-bit
1730 factor is permitted only in src0. */
1731 for( i
= 0; i
< 4; i
++ )
1732 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1733 for( i
= 0; i
< 4; i
++ )
1734 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1735 high_words( itmp
[ i
] ) );
1736 for( i
= 0; i
< 4; i
++ )
1737 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1738 for( i
= 0; i
< 4; i
++ )
1739 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1740 high_words( itmp
[ i
] ) );
1741 for( i
= 0; i
< 4; i
++ )
1742 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1743 for( i
= 0; i
< 4; i
++ )
1744 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1745 high_words( itmp
[ i
] ) );
1747 /* Now we want to initialise the four gradients based on the
1748 hashes. Format conversion from signed integer to float leaves
1749 everything scaled too high by a factor of pow( 2, 15 ), but
1750 we correct for that right at the end. */
1751 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1752 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1753 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1754 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1755 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1757 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1758 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1759 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1760 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1762 brw_MUL( p
, x1y0
, x1y0
, t
);
1763 brw_MUL( p
, x1y1
, x1y1
, t
);
1764 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1765 brw_MUL( p
, x0y0
, x0y0
, param0
);
1766 brw_MUL( p
, x0y1
, x0y1
, param0
);
1768 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1769 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1770 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1771 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1773 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1774 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1775 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1776 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1778 /* We interpolate between the gradients using the polynomial
1779 6t^5 - 15t^4 + 10t^3 (Perlin). */
1780 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1781 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1782 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1783 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1784 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1785 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1786 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1788 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1789 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1790 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1791 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1792 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1794 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1795 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1796 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1797 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1799 /* Here we interpolate in the y dimension... */
1800 brw_MUL( p
, x0y1
, x0y1
, param1
);
1801 brw_MUL( p
, x1y1
, x1y1
, param1
);
1802 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1803 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1805 /* And now in x. There are horrible register dependencies here,
1806 but we have nothing else to do. */
1807 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1808 brw_MUL( p
, x1y0
, x1y0
, param0
);
1809 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1811 /* scale by pow( 2, -15 ), as described above */
1812 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1814 release_tmps( c
, mark
);
1817 static void emit_noise2( struct brw_wm_compile
*c
,
1818 const struct prog_instruction
*inst
)
1820 struct brw_compile
*p
= &c
->func
;
1821 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1822 GLuint mask
= inst
->DstReg
.WriteMask
;
1824 int mark
= mark_tmps( c
);
1826 assert( mark
== 0 );
1828 src0
= get_src_reg( c
, inst
, 0, 0 );
1829 src1
= get_src_reg( c
, inst
, 0, 1 );
1831 param0
= alloc_tmp( c
);
1832 param1
= alloc_tmp( c
);
1834 brw_MOV( p
, param0
, src0
);
1835 brw_MOV( p
, param1
, src1
);
1837 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1839 /* Fill in the result: */
1840 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1841 for (i
= 0 ; i
< 4; i
++) {
1842 if (mask
& (1<<i
)) {
1843 dst
= get_dst_reg(c
, inst
, i
);
1844 brw_MOV( p
, dst
, param0
);
1847 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1848 brw_set_saturate( p
, 0 );
1850 release_tmps( c
, mark
);
1854 * The three-dimensional case is much like the one- and two- versions above,
1855 * but since the number of corners is rapidly growing we now pack 16 16-bit
1856 * hashes into each register to extract more parallelism from the EUs.
1858 static void noise3_sub( struct brw_wm_compile
*c
) {
1860 struct brw_compile
*p
= &c
->func
;
1861 struct brw_reg param0
, param1
, param2
,
1862 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1863 xi
, yi
, zi
, /* interpolation coefficients */
1864 t
, tmp
[ 8 ], /* float temporaries */
1865 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1866 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1868 int mark
= mark_tmps( c
);
1870 x0y0
= alloc_tmp( c
);
1871 x0y1
= alloc_tmp( c
);
1872 x1y0
= alloc_tmp( c
);
1873 x1y1
= alloc_tmp( c
);
1874 xi
= alloc_tmp( c
);
1875 yi
= alloc_tmp( c
);
1876 zi
= alloc_tmp( c
);
1878 for( i
= 0; i
< 8; i
++ ) {
1879 tmp
[ i
] = alloc_tmp( c
);
1880 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1881 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1884 param0
= lookup_tmp( c
, mark
- 4 );
1885 param1
= lookup_tmp( c
, mark
- 3 );
1886 param2
= lookup_tmp( c
, mark
- 2 );
1888 brw_set_access_mode( p
, BRW_ALIGN_1
);
1890 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1891 be hashed. Also compute the remainders (offsets within the unit
1892 cube), interleaved to reduce register dependency penalties. */
1893 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1894 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1895 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1896 brw_FRC( p
, param0
, param0
);
1897 brw_FRC( p
, param1
, param1
);
1898 brw_FRC( p
, param2
, param2
);
1899 /* Since we now have only 16 bits of precision in the hash, we must
1900 be more careful about thorough mixing to maintain entropy as we
1901 squash the input vector into a small scalar. */
1902 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1903 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1904 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1905 brw_imm_uw( 0x9B93 ) );
1906 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1907 brw_imm_uw( 0xBC8F ) );
1909 /* Temporarily disable the execution mask while we work with ExecSize=16
1910 channels (the mask is set for ExecSize=8 and is probably incorrect).
1911 Although this might cause execution of unwanted channels, the code
1912 writes only to temporary registers and has no side effects, so
1913 disabling the mask is harmless. */
1914 brw_push_insn_state( p
);
1915 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1916 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1917 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1918 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1920 /* We're now ready to perform the hashing. The eight hashes are
1921 interleaved for performance. The hash function used is
1922 designed to rapidly achieve avalanche and require only 16x16
1923 bit multiplication, and 8-bit swizzles (which we get for
1925 for( i
= 0; i
< 4; i
++ )
1926 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1927 for( i
= 0; i
< 4; i
++ )
1928 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1929 odd_bytes( wtmp
[ i
] ) );
1930 for( i
= 0; i
< 4; i
++ )
1931 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1932 for( i
= 0; i
< 4; i
++ )
1933 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1934 odd_bytes( wtmp
[ i
] ) );
1935 brw_pop_insn_state( p
);
1937 /* Now we want to initialise the four rear gradients based on the
1938 hashes. Format conversion from signed integer to float leaves
1939 everything scaled too high by a factor of pow( 2, 15 ), but
1940 we correct for that right at the end. */
1942 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1943 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1944 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1945 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1946 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1948 brw_push_insn_state( p
);
1949 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1950 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1951 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1952 brw_pop_insn_state( p
);
1954 brw_MUL( p
, x1y0
, x1y0
, t
);
1955 brw_MUL( p
, x1y1
, x1y1
, t
);
1956 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1957 brw_MUL( p
, x0y0
, x0y0
, param0
);
1958 brw_MUL( p
, x0y1
, x0y1
, param0
);
1961 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1962 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1963 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1964 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1966 brw_push_insn_state( p
);
1967 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1968 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1969 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1970 brw_pop_insn_state( p
);
1972 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1973 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1974 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1975 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1976 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1978 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1979 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1980 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1981 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1984 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1985 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1986 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1987 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1989 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1990 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1991 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1992 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1994 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1995 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1996 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1997 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1999 /* We interpolate between the gradients using the polynomial
2000 6t^5 - 15t^4 + 10t^3 (Perlin). */
2001 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
2002 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
2003 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
2004 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
2005 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
2006 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
2007 brw_MUL( p
, xi
, xi
, param0
);
2008 brw_MUL( p
, yi
, yi
, param1
);
2009 brw_MUL( p
, zi
, zi
, param2
);
2010 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
2011 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
2012 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
2013 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
2014 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
2015 brw_MUL( p
, xi
, xi
, param0
);
2016 brw_MUL( p
, yi
, yi
, param1
);
2017 brw_MUL( p
, zi
, zi
, param2
);
2018 brw_MUL( p
, xi
, xi
, param0
);
2019 brw_MUL( p
, yi
, yi
, param1
);
2020 brw_MUL( p
, zi
, zi
, param2
);
2021 brw_MUL( p
, xi
, xi
, param0
);
2022 brw_MUL( p
, yi
, yi
, param1
);
2023 brw_MUL( p
, zi
, zi
, param2
);
2025 /* Here we interpolate in the y dimension... */
2026 brw_MUL( p
, x0y1
, x0y1
, yi
);
2027 brw_MUL( p
, x1y1
, x1y1
, yi
);
2028 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2029 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2031 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2032 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2033 brw_MUL( p
, x1y0
, x1y0
, xi
);
2034 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2036 /* Now do the same thing for the front four gradients... */
2038 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2039 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2040 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2041 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2043 brw_push_insn_state( p
);
2044 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2045 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2046 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2047 brw_pop_insn_state( p
);
2049 brw_MUL( p
, x1y0
, x1y0
, t
);
2050 brw_MUL( p
, x1y1
, x1y1
, t
);
2051 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
2052 brw_MUL( p
, x0y0
, x0y0
, param0
);
2053 brw_MUL( p
, x0y1
, x0y1
, param0
);
2056 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2057 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2058 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2059 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2061 brw_push_insn_state( p
);
2062 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2063 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2064 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2065 brw_pop_insn_state( p
);
2067 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2068 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2069 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
2070 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
2071 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
2073 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2074 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2075 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2076 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2079 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2080 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2081 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2082 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2084 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2085 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2086 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2087 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2089 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2090 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2091 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2092 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2094 /* The interpolation coefficients are still around from last time, so
2095 again interpolate in the y dimension... */
2096 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2097 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2098 brw_MUL( p
, x0y1
, x0y1
, yi
);
2099 brw_MUL( p
, x1y1
, x1y1
, yi
);
2100 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2101 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2103 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2104 time put the front face in tmp[ 1 ] and we're nearly there... */
2105 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2106 brw_MUL( p
, x1y0
, x1y0
, xi
);
2107 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2109 /* The final interpolation, in the z dimension: */
2110 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2111 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
2112 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2114 /* scale by pow( 2, -15 ), as described above */
2115 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2117 release_tmps( c
, mark
);
2120 static void emit_noise3( struct brw_wm_compile
*c
,
2121 const struct prog_instruction
*inst
)
2123 struct brw_compile
*p
= &c
->func
;
2124 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
2125 GLuint mask
= inst
->DstReg
.WriteMask
;
2127 int mark
= mark_tmps( c
);
2129 assert( mark
== 0 );
2131 src0
= get_src_reg( c
, inst
, 0, 0 );
2132 src1
= get_src_reg( c
, inst
, 0, 1 );
2133 src2
= get_src_reg( c
, inst
, 0, 2 );
2135 param0
= alloc_tmp( c
);
2136 param1
= alloc_tmp( c
);
2137 param2
= alloc_tmp( c
);
2139 brw_MOV( p
, param0
, src0
);
2140 brw_MOV( p
, param1
, src1
);
2141 brw_MOV( p
, param2
, src2
);
2143 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
2145 /* Fill in the result: */
2146 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2147 for (i
= 0 ; i
< 4; i
++) {
2148 if (mask
& (1<<i
)) {
2149 dst
= get_dst_reg(c
, inst
, i
);
2150 brw_MOV( p
, dst
, param0
);
2153 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2154 brw_set_saturate( p
, 0 );
2156 release_tmps( c
, mark
);
2160 * For the four-dimensional case, the little micro-optimisation benefits
2161 * we obtain by unrolling all the loops aren't worth the massive bloat it
2162 * now causes. Instead, we loop twice around performing a similar operation
2163 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2164 * code to glue it all together.
2166 static void noise4_sub( struct brw_wm_compile
*c
)
2168 struct brw_compile
*p
= &c
->func
;
2169 struct brw_reg param
[ 4 ],
2170 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2171 w0
, /* noise for the w=0 cube */
2172 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2173 interp
[ 4 ], /* interpolation coefficients */
2174 t
, tmp
[ 8 ], /* float temporaries */
2175 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2176 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2178 int mark
= mark_tmps( c
);
2179 GLuint loop
, origin
;
2181 x0y0
= alloc_tmp( c
);
2182 x0y1
= alloc_tmp( c
);
2183 x1y0
= alloc_tmp( c
);
2184 x1y1
= alloc_tmp( c
);
2186 w0
= alloc_tmp( c
);
2187 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2188 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2190 for( i
= 0; i
< 4; i
++ ) {
2191 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2192 interp
[ i
] = alloc_tmp( c
);
2195 for( i
= 0; i
< 8; i
++ ) {
2196 tmp
[ i
] = alloc_tmp( c
);
2197 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2198 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2201 brw_set_access_mode( p
, BRW_ALIGN_1
);
2203 /* We only want 16 bits of precision from the integral part of each
2204 co-ordinate, but unfortunately the RNDD semantics would saturate
2205 at 16 bits if we performed the operation directly to a 16-bit
2206 destination. Therefore, we round to 32-bit temporaries where
2207 appropriate, and then store only the lower 16 bits. */
2208 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2209 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2210 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2211 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2212 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2213 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2215 /* Modify the flag register here, because the side effect is useful
2216 later (see below). We know for certain that all flags will be
2217 cleared, since the FRC instruction cannot possibly generate
2218 negative results. Even for exceptional inputs (infinities, denormals,
2219 NaNs), the architecture guarantees that the L conditional is false. */
2220 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2221 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2222 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2223 for( i
= 1; i
< 4; i
++ )
2224 brw_FRC( p
, param
[ i
], param
[ i
] );
2226 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2228 for( i
= 0; i
< 4; i
++ )
2229 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2230 for( i
= 0; i
< 4; i
++ )
2231 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2232 for( i
= 0; i
< 4; i
++ )
2233 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2234 for( i
= 0; i
< 4; i
++ )
2235 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2236 for( j
= 0; j
< 3; j
++ )
2237 for( i
= 0; i
< 4; i
++ )
2238 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2240 /* Mark the current address, as it will be a jump destination. The
2241 following code will be executed twice: first, with the flag
2242 register clear indicating the w=0 case, and second with flags
2246 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2247 be hashed. Since we have only 16 bits of precision in the hash, we
2248 must be careful about thorough mixing to maintain entropy as we
2249 squash the input vector into a small scalar. */
2250 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2251 brw_imm_uw( 0xBC8F ) );
2252 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2253 brw_imm_uw( 0xD0BD ) );
2254 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2255 brw_imm_uw( 0x9B93 ) );
2256 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2257 brw_imm_uw( 0xA359 ) );
2258 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2259 brw_imm_uw( 0xBC8F ) );
2261 /* Temporarily disable the execution mask while we work with ExecSize=16
2262 channels (the mask is set for ExecSize=8 and is probably incorrect).
2263 Although this might cause execution of unwanted channels, the code
2264 writes only to temporary registers and has no side effects, so
2265 disabling the mask is harmless. */
2266 brw_push_insn_state( p
);
2267 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2268 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2269 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2270 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2272 /* We're now ready to perform the hashing. The eight hashes are
2273 interleaved for performance. The hash function used is
2274 designed to rapidly achieve avalanche and require only 16x16
2275 bit multiplication, and 8-bit swizzles (which we get for
2277 for( i
= 0; i
< 4; i
++ )
2278 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2279 for( i
= 0; i
< 4; i
++ )
2280 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2281 odd_bytes( wtmp
[ i
] ) );
2282 for( i
= 0; i
< 4; i
++ )
2283 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2284 for( i
= 0; i
< 4; i
++ )
2285 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2286 odd_bytes( wtmp
[ i
] ) );
2287 brw_pop_insn_state( p
);
2289 /* Now we want to initialise the four rear gradients based on the
2290 hashes. Format conversion from signed integer to float leaves
2291 everything scaled too high by a factor of pow( 2, 15 ), but
2292 we correct for that right at the end. */
2294 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2295 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2296 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2297 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2298 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2300 brw_push_insn_state( p
);
2301 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2302 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2303 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2304 brw_pop_insn_state( p
);
2306 brw_MUL( p
, x1y0
, x1y0
, t
);
2307 brw_MUL( p
, x1y1
, x1y1
, t
);
2308 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2309 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2310 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2313 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2314 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2315 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2316 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2318 brw_push_insn_state( p
);
2319 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2320 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2321 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2322 brw_pop_insn_state( p
);
2324 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2325 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2326 /* prepare t for the w component (used below): w the first time through
2327 the loop; w - 1 the second time) */
2328 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2329 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2330 p
->current
->header
.predicate_inverse
= 1;
2331 brw_MOV( p
, t
, param
[ 3 ] );
2332 p
->current
->header
.predicate_inverse
= 0;
2333 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2334 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2335 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2337 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2338 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2339 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2340 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2343 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2344 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2345 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2346 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2348 brw_push_insn_state( p
);
2349 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2350 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2351 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2352 brw_pop_insn_state( p
);
2354 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2355 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2356 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2357 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2359 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2360 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2361 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2362 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2365 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2366 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2367 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2368 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2370 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2371 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2372 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2373 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2374 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2376 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2377 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2378 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2379 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2381 /* Here we interpolate in the y dimension... */
2382 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2383 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2384 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2385 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2386 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2387 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2389 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2390 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2391 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2392 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2394 /* Now do the same thing for the front four gradients... */
2396 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2397 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2398 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2399 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2401 brw_push_insn_state( p
);
2402 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2403 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2404 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2405 brw_pop_insn_state( p
);
2407 brw_MUL( p
, x1y0
, x1y0
, t
);
2408 brw_MUL( p
, x1y1
, x1y1
, t
);
2409 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2410 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2411 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2414 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2415 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2416 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2417 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2419 brw_push_insn_state( p
);
2420 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2421 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2422 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2423 brw_pop_insn_state( p
);
2425 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2426 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2427 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2428 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2429 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2431 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2432 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2433 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2434 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2437 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2438 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2439 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2440 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2442 brw_push_insn_state( p
);
2443 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2444 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2445 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2446 brw_pop_insn_state( p
);
2448 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2449 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2450 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2451 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2452 /* prepare t for the w component (used below): w the first time through
2453 the loop; w - 1 the second time) */
2454 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2455 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2456 p
->current
->header
.predicate_inverse
= 1;
2457 brw_MOV( p
, t
, param
[ 3 ] );
2458 p
->current
->header
.predicate_inverse
= 0;
2459 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2461 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2462 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2463 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2464 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2467 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2468 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2469 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2470 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2472 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2473 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2474 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2475 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2477 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2478 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2479 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2480 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2482 /* Interpolate in the y dimension: */
2483 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2484 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2485 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2486 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2487 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2488 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2490 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2491 time put the front face in tmp[ 1 ] and we're nearly there... */
2492 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2493 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2494 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2496 /* Another interpolation, in the z dimension: */
2497 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2498 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2499 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2501 /* Exit the loop if we've computed both cubes... */
2502 origin
= p
->nr_insn
;
2503 brw_push_insn_state( p
);
2504 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2505 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2506 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2507 brw_pop_insn_state( p
);
2509 /* Save the result for the w=0 case, and increment the w coordinate: */
2510 brw_MOV( p
, w0
, tmp
[ 0 ] );
2511 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2514 /* Loop around for the other cube. Explicitly set the flag register
2515 (unfortunately we must spend an extra instruction to do this: we
2516 can't rely on a side effect of the previous MOV or ADD because
2517 conditional modifiers which are normally true might be false in
2518 exceptional circumstances, e.g. given a NaN input; the add to
2519 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2520 brw_push_insn_state( p
);
2521 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2522 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2523 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2524 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2525 brw_pop_insn_state( p
);
2527 /* Patch the previous conditional branch now that we know the
2528 destination address. */
2529 brw_set_src1( p
->store
+ origin
,
2530 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2532 /* The very last interpolation. */
2533 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2534 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2535 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2537 /* scale by pow( 2, -15 ), as described above */
2538 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2540 release_tmps( c
, mark
);
2543 static void emit_noise4( struct brw_wm_compile
*c
,
2544 const struct prog_instruction
*inst
)
2546 struct brw_compile
*p
= &c
->func
;
2547 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2548 GLuint mask
= inst
->DstReg
.WriteMask
;
2550 int mark
= mark_tmps( c
);
2552 assert( mark
== 0 );
2554 src0
= get_src_reg( c
, inst
, 0, 0 );
2555 src1
= get_src_reg( c
, inst
, 0, 1 );
2556 src2
= get_src_reg( c
, inst
, 0, 2 );
2557 src3
= get_src_reg( c
, inst
, 0, 3 );
2559 param0
= alloc_tmp( c
);
2560 param1
= alloc_tmp( c
);
2561 param2
= alloc_tmp( c
);
2562 param3
= alloc_tmp( c
);
2564 brw_MOV( p
, param0
, src0
);
2565 brw_MOV( p
, param1
, src1
);
2566 brw_MOV( p
, param2
, src2
);
2567 brw_MOV( p
, param3
, src3
);
2569 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2571 /* Fill in the result: */
2572 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2573 for (i
= 0 ; i
< 4; i
++) {
2574 if (mask
& (1<<i
)) {
2575 dst
= get_dst_reg(c
, inst
, i
);
2576 brw_MOV( p
, dst
, param0
);
2579 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2580 brw_set_saturate( p
, 0 );
2582 release_tmps( c
, mark
);
2585 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2586 const struct prog_instruction
*inst
)
2588 struct brw_compile
*p
= &c
->func
;
2589 GLuint mask
= inst
->DstReg
.WriteMask
;
2590 struct brw_reg src0
[2], dst
[2];
2592 dst
[0] = get_dst_reg(c
, inst
, 0);
2593 dst
[1] = get_dst_reg(c
, inst
, 1);
2595 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2596 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2598 /* Calculate the pixel offset from window bottom left into destination
2601 if (mask
& WRITEMASK_X
) {
2602 /* X' = X - origin_x */
2605 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2606 brw_imm_d(0 - c
->key
.origin_x
));
2609 if (mask
& WRITEMASK_Y
) {
2610 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2613 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2614 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2619 BIAS on SIMD8 not working yet...
2621 static void emit_txb(struct brw_wm_compile
*c
,
2622 const struct prog_instruction
*inst
)
2624 struct brw_compile
*p
= &c
->func
;
2625 struct brw_reg dst
[4], src
[4], payload_reg
;
2626 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2629 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2631 for (i
= 0; i
< 4; i
++)
2632 dst
[i
] = get_dst_reg(c
, inst
, i
);
2633 for (i
= 0; i
< 4; i
++)
2634 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2636 switch (inst
->TexSrcTarget
) {
2637 case TEXTURE_1D_INDEX
:
2638 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2639 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2640 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2642 case TEXTURE_2D_INDEX
:
2643 case TEXTURE_RECT_INDEX
:
2644 brw_MOV(p
, brw_message_reg(2), src
[0]);
2645 brw_MOV(p
, brw_message_reg(3), src
[1]);
2646 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2649 brw_MOV(p
, brw_message_reg(2), src
[0]);
2650 brw_MOV(p
, brw_message_reg(3), src
[1]);
2651 brw_MOV(p
, brw_message_reg(4), src
[2]);
2654 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2655 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2657 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2659 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2660 SURF_INDEX_TEXTURE(unit
),
2662 inst
->DstReg
.WriteMask
, /* writemask */
2663 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2664 4, /* response_length */
2670 static void emit_tex(struct brw_wm_compile
*c
,
2671 const struct prog_instruction
*inst
)
2673 struct brw_compile
*p
= &c
->func
;
2674 struct brw_reg dst
[4], src
[4], payload_reg
;
2675 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2679 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2681 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2683 for (i
= 0; i
< 4; i
++)
2684 dst
[i
] = get_dst_reg(c
, inst
, i
);
2685 for (i
= 0; i
< 4; i
++)
2686 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2688 switch (inst
->TexSrcTarget
) {
2689 case TEXTURE_1D_INDEX
:
2693 case TEXTURE_2D_INDEX
:
2694 case TEXTURE_RECT_INDEX
:
2695 emit
= WRITEMASK_XY
;
2699 emit
= WRITEMASK_XYZ
;
2705 /* move/load S, T, R coords */
2706 for (i
= 0; i
< nr
; i
++) {
2707 static const GLuint swz
[4] = {0,1,2,2};
2709 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2711 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2716 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2717 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2721 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2723 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2724 SURF_INDEX_TEXTURE(unit
),
2726 inst
->DstReg
.WriteMask
, /* writemask */
2727 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2728 4, /* response_length */
2729 shadow
? 6 : 4, /* msg_length */
2733 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2738 * Resolve subroutine calls after code emit is done.
2740 static void post_wm_emit( struct brw_wm_compile
*c
)
2742 brw_resolve_cals(&c
->func
);
2745 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2747 #define MAX_IF_DEPTH 32
2748 #define MAX_LOOP_DEPTH 32
2749 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
2750 GLuint i
, if_depth
= 0, loop_depth
= 0;
2751 struct brw_compile
*p
= &c
->func
;
2752 struct brw_indirect stack_index
= brw_indirect(0, 0);
2754 c
->out_of_regs
= GL_FALSE
;
2757 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2758 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2760 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2761 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2766 _mesa_printf("Inst %d: ", i
);
2767 _mesa_print_instruction(inst
);
2770 /* fetch any constants that this instruction needs */
2771 if (c
->fp
->use_const_buffer
)
2772 fetch_constants(c
, inst
);
2774 if (inst
->CondUpdate
)
2775 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2777 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2779 switch (inst
->Opcode
) {
2781 emit_pixel_xy(c
, inst
);
2784 emit_delta_xy(c
, inst
);
2787 emit_pixel_w(c
, inst
);
2790 emit_linterp(c
, inst
);
2793 emit_pinterp(c
, inst
);
2796 emit_cinterp(c
, inst
);
2799 emit_wpos_xy(c
, inst
);
2802 emit_fb_write(c
, inst
);
2804 case WM_FRONTFACING
:
2805 emit_frontfacing(c
, inst
);
2829 emit_trunc(c
, inst
);
2867 emit_min_max(c
, inst
);
2903 emit_noise1(c
, inst
);
2906 emit_noise2(c
, inst
);
2909 emit_noise3(c
, inst
);
2912 emit_noise4(c
, inst
);
2924 assert(if_depth
< MAX_IF_DEPTH
);
2925 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
2928 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2931 assert(if_depth
> 0);
2932 brw_ENDIF(p
, if_inst
[--if_depth
]);
2935 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2941 brw_push_insn_state(p
);
2942 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2943 brw_set_access_mode(p
, BRW_ALIGN_1
);
2944 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2945 brw_set_access_mode(p
, BRW_ALIGN_16
);
2946 brw_ADD(p
, get_addr_reg(stack_index
),
2947 get_addr_reg(stack_index
), brw_imm_d(4));
2948 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2949 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2950 brw_pop_insn_state(p
);
2954 brw_push_insn_state(p
);
2955 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2956 brw_ADD(p
, get_addr_reg(stack_index
),
2957 get_addr_reg(stack_index
), brw_imm_d(-4));
2958 brw_set_access_mode(p
, BRW_ALIGN_1
);
2959 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2960 brw_set_access_mode(p
, BRW_ALIGN_16
);
2961 brw_pop_insn_state(p
);
2964 case OPCODE_BGNLOOP
:
2965 /* XXX may need to invalidate the current_constant regs */
2966 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2970 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2974 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2976 case OPCODE_ENDLOOP
:
2978 struct brw_instruction
*inst0
, *inst1
;
2980 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2981 /* patch all the BREAK/CONT instructions from last BEGINLOOP */
2982 while (inst0
> loop_inst
[loop_depth
]) {
2984 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2985 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2986 inst0
->bits3
.if_else
.pop_count
= 0;
2988 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2989 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2990 inst0
->bits3
.if_else
.pop_count
= 0;
2996 _mesa_printf("unsupported IR in fragment shader %d\n",
3000 if (inst
->CondUpdate
)
3001 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
3003 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
3010 * Do GPU code generation for shaders that use GLSL features such as
3011 * flow control. Other shaders will be compiled with the
3013 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
3015 if (INTEL_DEBUG
& DEBUG_WM
) {
3016 _mesa_printf("brw_wm_glsl_emit:\n");
3019 /* initial instruction translation/simplification */
3022 /* actual code generation */
3023 brw_wm_emit_glsl(brw
, c
);
3025 if (INTEL_DEBUG
& DEBUG_WM
) {
3026 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
3029 c
->prog_data
.total_grf
= num_grf_used(c
);
3030 c
->prog_data
.total_scratch
= 0;