1 #include "main/macros.h"
2 #include "shader/prog_parameter.h"
3 #include "shader/prog_print.h"
4 #include "shader/prog_optimize.h"
5 #include "brw_context.h"
10 SUB_NOISE1
, SUB_NOISE2
, SUB_NOISE3
, SUB_NOISE4
15 * Determine if the given fragment program uses GLSL features such
16 * as flow conditionals, loops, subroutines.
17 * Some GLSL shaders may use these features, others might not.
19 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
23 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
24 switch (inst
->Opcode
) {
49 reclaim_temps(struct brw_wm_compile
*c
);
52 /** Mark GRF register as used. */
54 prealloc_grf(struct brw_wm_compile
*c
, int r
)
56 c
->used_grf
[r
] = GL_TRUE
;
60 /** Mark given GRF register as not in use. */
62 release_grf(struct brw_wm_compile
*c
, int r
)
64 /*assert(c->used_grf[r]);*/
65 c
->used_grf
[r
] = GL_FALSE
;
66 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
70 /** Return index of a free GRF, mark it as used. */
72 alloc_grf(struct brw_wm_compile
*c
)
75 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
76 if (!c
->used_grf
[r
]) {
77 c
->used_grf
[r
] = GL_TRUE
;
78 c
->first_free_grf
= r
+ 1; /* a guess */
83 /* no free temps, try to reclaim some */
85 c
->first_free_grf
= 0;
88 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
89 if (!c
->used_grf
[r
]) {
90 c
->used_grf
[r
] = GL_TRUE
;
91 c
->first_free_grf
= r
+ 1; /* a guess */
96 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
97 assert(c
->used_grf
[r
]);
99 /*printf("Really out of temp regs!\n");*/
104 /** Return number of GRF registers used */
106 num_grf_used(const struct brw_wm_compile
*c
)
109 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
118 * Record the mapping of a Mesa register to a hardware register.
120 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
121 int component
, struct brw_reg reg
)
123 c
->wm_regs
[file
][index
][component
].reg
= reg
;
124 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
128 * Examine instruction's write mask to find index of first component
129 * enabled for writing.
131 static int get_scalar_dst_index(const struct prog_instruction
*inst
)
134 for (i
= 0; i
< 4; i
++)
135 if (inst
->DstReg
.WriteMask
& (1<<i
))
140 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
144 /* if we need to allocate another temp, grow the tmp_regs[] array */
145 if (c
->tmp_index
== c
->tmp_max
) {
146 c
->tmp_regs
[ c
->tmp_max
++ ] = alloc_grf(c
);
149 /* form the GRF register */
150 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
151 /*printf("alloc_temp %d\n", reg.nr);*/
152 assert(reg
.nr
< BRW_WM_MAX_GRF
);
158 * Save current temp register info.
159 * There must be a matching call to release_tmps().
161 static int mark_tmps(struct brw_wm_compile
*c
)
166 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
168 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
171 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
177 * Convert Mesa src register to brw register.
179 * Since we're running in SOA mode each Mesa register corresponds to four
180 * hardware registers. We allocate the hardware registers as needed here.
182 * \param file register file, one of PROGRAM_x
183 * \param index register number
184 * \param component src component (X=0, Y=1, Z=2, W=3)
185 * \param nr not used?!?
186 * \param neg negate value?
187 * \param abs take absolute value?
189 static struct brw_reg
190 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
191 int nr
, GLuint neg
, GLuint abs
)
195 case PROGRAM_STATE_VAR
:
196 case PROGRAM_CONSTANT
:
197 case PROGRAM_UNIFORM
:
198 file
= PROGRAM_STATE_VAR
;
200 case PROGRAM_UNDEFINED
:
201 return brw_null_reg();
202 case PROGRAM_TEMPORARY
:
205 case PROGRAM_PAYLOAD
:
208 _mesa_problem(NULL
, "Unexpected file in get_reg()");
209 return brw_null_reg();
213 /* see if we've already allocated a HW register for this Mesa register */
214 if (c
->wm_regs
[file
][index
][component
].inited
) {
216 reg
= c
->wm_regs
[file
][index
][component
].reg
;
219 /* no, allocate new register */
220 int grf
= alloc_grf(c
);
222 /* totally out of temps */
223 grf
= 70; /* XXX !!!! */
226 reg
= brw_vec8_grf(grf
, 0);
227 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
229 set_reg(c
, file
, index
, component
, reg
);
232 if (neg
& (1 << component
)) {
243 * This is called if we run out of GRF registers. Examine the live intervals
244 * of temp regs in the program and free those which won't be used again.
247 reclaim_temps(struct brw_wm_compile
*c
)
249 GLint intBegin
[MAX_PROGRAM_TEMPS
];
250 GLint intEnd
[MAX_PROGRAM_TEMPS
];
253 /*printf("Reclaim temps:\n");*/
255 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
258 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
259 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
260 /* program temp[i] can be freed */
262 /*printf(" temp[%d] is dead\n", index);*/
263 for (component
= 0; component
< 4; component
++) {
264 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
265 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
268 printf(" Reclaim temp %d, reg %d at inst %d\n",
269 index, r, c->cur_inst);
271 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
282 * Preallocate registers. This sets up the Mesa to hardware register
283 * mapping for certain registers, such as constants (uniforms/state vars)
286 static void prealloc_reg(struct brw_wm_compile
*c
)
290 int nr_interp_regs
= 0;
291 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
| c
->fp_deriv_emitted
;
292 GLuint reg_index
= 0;
294 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
295 c
->first_free_grf
= 0;
297 for (i
= 0; i
< 4; i
++) {
298 if (i
< c
->key
.nr_depth_regs
)
299 reg
= brw_vec8_grf(i
* 2, 0);
301 reg
= brw_vec8_grf(0, 0);
302 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
304 reg_index
+= 2 * c
->key
.nr_depth_regs
;
308 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
309 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
311 /* use a real constant buffer, or just use a section of the GRF? */
312 /* XXX this heuristic may need adjustment... */
313 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
314 c
->fp
->use_const_buffer
= GL_TRUE
;
316 c
->fp
->use_const_buffer
= GL_FALSE
;
317 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
319 if (c
->fp
->use_const_buffer
) {
320 /* We'll use a real constant buffer and fetch constants from
321 * it with a dataport read message.
324 /* number of float constants in CURBE */
325 c
->prog_data
.nr_params
= 0;
328 const struct gl_program_parameter_list
*plist
=
329 c
->fp
->program
.Base
.Parameters
;
332 /* number of float constants in CURBE */
333 c
->prog_data
.nr_params
= 4 * nr_params
;
335 /* loop over program constants (float[4]) */
336 for (i
= 0; i
< nr_params
; i
++) {
337 /* loop over XYZW channels */
338 for (j
= 0; j
< 4; j
++, index
++) {
339 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
340 /* Save pointer to parameter/constant value.
341 * Constants will be copied in prepare_constant_buffer()
343 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
344 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
347 /* number of constant regs used (each reg is float[8]) */
348 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
349 reg_index
+= c
->nr_creg
;
353 /* fragment shader inputs */
354 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
355 if (inputs
& (1<<i
)) {
357 reg
= brw_vec8_grf(reg_index
, 0);
358 for (j
= 0; j
< 4; j
++)
359 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
364 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
365 c
->prog_data
.urb_read_length
= nr_interp_regs
* 2;
366 c
->prog_data
.curb_read_length
= c
->nr_creg
;
367 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
369 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
372 /* mark GRF regs [0..reg_index-1] as in-use */
373 for (i
= 0; i
< reg_index
; i
++)
376 /* An instruction may reference up to three constants.
377 * They'll be found in these registers.
378 * XXX alloc these on demand!
380 if (c
->fp
->use_const_buffer
) {
381 for (i
= 0; i
< 3; i
++) {
382 c
->current_const
[i
].index
= -1;
383 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
387 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
388 printf("AFTER PRE_ALLOC, reg_index = %d\n", c
->reg_index
);
394 * Check if any of the instruction's src registers are constants, uniforms,
395 * or statevars. If so, fetch any constants that we don't already have in
396 * the three GRF slots.
398 static void fetch_constants(struct brw_wm_compile
*c
,
399 const struct prog_instruction
*inst
)
401 struct brw_compile
*p
= &c
->func
;
404 /* loop over instruction src regs */
405 for (i
= 0; i
< 3; i
++) {
406 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
407 if (src
->File
== PROGRAM_STATE_VAR
||
408 src
->File
== PROGRAM_CONSTANT
||
409 src
->File
== PROGRAM_UNIFORM
) {
410 if (c
->current_const
[i
].index
!= src
->Index
) {
411 c
->current_const
[i
].index
= src
->Index
;
414 printf(" fetch const[%d] for arg %d into reg %d\n",
415 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
418 /* need to fetch the constant now */
420 c
->current_const
[i
].reg
, /* writeback dest */
422 src
->RelAddr
, /* relative indexing? */
423 16 * src
->Index
, /* byte offset */
424 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
433 * Convert Mesa dst register to brw register.
435 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
436 const struct prog_instruction
*inst
,
440 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
445 static struct brw_reg
446 get_src_reg_const(struct brw_wm_compile
*c
,
447 const struct prog_instruction
*inst
,
448 GLuint srcRegIndex
, GLuint component
)
450 /* We should have already fetched the constant from the constant
451 * buffer in fetch_constants(). Now we just have to return a
452 * register description that extracts the needed component and
453 * smears it across all eight vector components.
455 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
456 struct brw_reg const_reg
;
458 assert(component
< 4);
459 assert(srcRegIndex
< 3);
460 assert(c
->current_const
[srcRegIndex
].index
!= -1);
461 const_reg
= c
->current_const
[srcRegIndex
].reg
;
463 /* extract desired float from the const_reg, and smear */
464 const_reg
= stride(const_reg
, 0, 1, 0);
465 const_reg
.subnr
= component
* 4;
467 if (src
->Negate
& (1 << component
))
468 const_reg
= negate(const_reg
);
470 const_reg
= brw_abs(const_reg
);
473 printf(" form const[%d].%d for arg %d, reg %d\n",
474 c
->current_const
[srcRegIndex
].index
,
485 * Convert Mesa src register to brw register.
487 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
488 const struct prog_instruction
*inst
,
489 GLuint srcRegIndex
, GLuint channel
)
491 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
493 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
495 if (c
->fp
->use_const_buffer
&&
496 (src
->File
== PROGRAM_STATE_VAR
||
497 src
->File
== PROGRAM_CONSTANT
||
498 src
->File
== PROGRAM_UNIFORM
)) {
499 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
502 /* other type of source register */
503 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
504 src
->Negate
, src
->Abs
);
510 * Same as \sa get_src_reg() but if the register is a literal, emit
511 * a brw_reg encoding the literal.
512 * Note that a brw instruction only allows one src operand to be a literal.
513 * For instructions with more than one operand, only the second can be a
514 * literal. This means that we treat some literals as constants/uniforms
515 * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
518 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
519 const struct prog_instruction
*inst
,
520 GLuint srcRegIndex
, GLuint channel
)
522 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
523 if (src
->File
== PROGRAM_CONSTANT
) {
525 const int component
= GET_SWZ(src
->Swizzle
, channel
);
526 const GLfloat
*param
=
527 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
528 GLfloat value
= param
[component
];
529 if (src
->Negate
& (1 << channel
))
532 value
= FABSF(value
);
534 printf(" form immed value %f for chan %d\n", value
, channel
);
536 return brw_imm_f(value
);
539 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
545 * Subroutines are minimal support for resusable instruction sequences.
546 * They are implemented as simply as possible to minimise overhead: there
547 * is no explicit support for communication between the caller and callee
548 * other than saving the return address in a temporary register, nor is
549 * there any automatic local storage. This implies that great care is
550 * required before attempting reentrancy or any kind of nested
551 * subroutine invocations.
553 static void invoke_subroutine( struct brw_wm_compile
*c
,
554 enum _subroutine subroutine
,
555 void (*emit
)( struct brw_wm_compile
* ) )
557 struct brw_compile
*p
= &c
->func
;
559 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
561 if( c
->subroutines
[ subroutine
] ) {
562 /* subroutine previously emitted: reuse existing instructions */
564 int mark
= mark_tmps( c
);
565 struct brw_reg return_address
= retype( alloc_tmp( c
),
566 BRW_REGISTER_TYPE_UD
);
567 int here
= p
->nr_insn
;
569 brw_push_insn_state(p
);
570 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
571 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
573 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
574 brw_imm_d( ( c
->subroutines
[ subroutine
] -
576 brw_pop_insn_state(p
);
578 release_tmps( c
, mark
);
580 /* previously unused subroutine: emit, and mark for later reuse */
582 int mark
= mark_tmps( c
);
583 struct brw_reg return_address
= retype( alloc_tmp( c
),
584 BRW_REGISTER_TYPE_UD
);
585 struct brw_instruction
*calc
;
586 int base
= p
->nr_insn
;
588 brw_push_insn_state(p
);
589 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
590 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
591 brw_pop_insn_state(p
);
593 c
->subroutines
[ subroutine
] = p
->nr_insn
;
597 brw_push_insn_state(p
);
598 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
599 brw_MOV( p
, brw_ip_reg(), return_address
);
600 brw_pop_insn_state(p
);
602 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
604 release_tmps( c
, mark
);
608 static void emit_abs( struct brw_wm_compile
*c
,
609 const struct prog_instruction
*inst
)
612 struct brw_compile
*p
= &c
->func
;
613 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
614 for (i
= 0; i
< 4; i
++) {
615 if (inst
->DstReg
.WriteMask
& (1<<i
)) {
616 struct brw_reg src
, dst
;
617 dst
= get_dst_reg(c
, inst
, i
);
618 src
= get_src_reg(c
, inst
, 0, i
);
619 brw_MOV(p
, dst
, brw_abs(src
));
622 brw_set_saturate(p
, 0);
625 static void emit_trunc( struct brw_wm_compile
*c
,
626 const struct prog_instruction
*inst
)
629 struct brw_compile
*p
= &c
->func
;
630 GLuint mask
= inst
->DstReg
.WriteMask
;
631 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
632 for (i
= 0; i
< 4; i
++) {
634 struct brw_reg src
, dst
;
635 dst
= get_dst_reg(c
, inst
, i
);
636 src
= get_src_reg(c
, inst
, 0, i
);
637 brw_RNDZ(p
, dst
, src
);
640 brw_set_saturate(p
, 0);
643 static void emit_mov( struct brw_wm_compile
*c
,
644 const struct prog_instruction
*inst
)
647 struct brw_compile
*p
= &c
->func
;
648 GLuint mask
= inst
->DstReg
.WriteMask
;
649 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
650 for (i
= 0; i
< 4; i
++) {
652 struct brw_reg src
, dst
;
653 dst
= get_dst_reg(c
, inst
, i
);
654 /* XXX some moves from immediate value don't work reliably!!! */
655 /*src = get_src_reg_imm(c, inst, 0, i);*/
656 src
= get_src_reg(c
, inst
, 0, i
);
657 brw_MOV(p
, dst
, src
);
660 brw_set_saturate(p
, 0);
663 static void emit_pixel_xy(struct brw_wm_compile
*c
,
664 const struct prog_instruction
*inst
)
666 struct brw_reg r1
= brw_vec1_grf(1, 0);
667 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
669 struct brw_reg dst0
, dst1
;
670 struct brw_compile
*p
= &c
->func
;
671 GLuint mask
= inst
->DstReg
.WriteMask
;
673 dst0
= get_dst_reg(c
, inst
, 0);
674 dst1
= get_dst_reg(c
, inst
, 1);
675 /* Calculate pixel centers by adding 1 or 0 to each of the
676 * micro-tile coordinates passed in r1.
678 if (mask
& WRITEMASK_X
) {
680 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
681 stride(suboffset(r1_uw
, 4), 2, 4, 0),
682 brw_imm_v(0x10101010));
685 if (mask
& WRITEMASK_Y
) {
687 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
688 stride(suboffset(r1_uw
, 5), 2, 4, 0),
689 brw_imm_v(0x11001100));
693 static void emit_delta_xy(struct brw_wm_compile
*c
,
694 const struct prog_instruction
*inst
)
696 struct brw_reg r1
= brw_vec1_grf(1, 0);
697 struct brw_reg dst0
, dst1
, src0
, src1
;
698 struct brw_compile
*p
= &c
->func
;
699 GLuint mask
= inst
->DstReg
.WriteMask
;
701 dst0
= get_dst_reg(c
, inst
, 0);
702 dst1
= get_dst_reg(c
, inst
, 1);
703 src0
= get_src_reg(c
, inst
, 0, 0);
704 src1
= get_src_reg(c
, inst
, 0, 1);
705 /* Calc delta X,Y by subtracting origin in r1 from the pixel
708 if (mask
& WRITEMASK_X
) {
711 retype(src0
, BRW_REGISTER_TYPE_UW
),
715 if (mask
& WRITEMASK_Y
) {
718 retype(src1
, BRW_REGISTER_TYPE_UW
),
719 negate(suboffset(r1
,1)));
724 static void fire_fb_write( struct brw_wm_compile
*c
,
730 struct brw_compile
*p
= &c
->func
;
731 /* Pass through control information:
733 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
735 brw_push_insn_state(p
);
736 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
738 brw_message_reg(base_reg
+ 1),
740 brw_pop_insn_state(p
);
742 /* Send framebuffer write message: */
744 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
746 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
753 static void emit_fb_write(struct brw_wm_compile
*c
,
754 const struct prog_instruction
*inst
)
756 struct brw_compile
*p
= &c
->func
;
762 /* Reserve a space for AA - may not be needed:
764 if (c
->key
.aa_dest_stencil_reg
)
767 brw_push_insn_state(p
);
768 for (channel
= 0; channel
< 4; channel
++) {
769 src0
= get_src_reg(c
, inst
, 0, channel
);
770 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
771 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
772 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
774 /* skip over the regs populated above: */
776 brw_pop_insn_state(p
);
778 if (c
->key
.source_depth_to_render_target
) {
779 if (c
->key
.computes_depth
) {
780 src0
= get_src_reg(c
, inst
, 2, 2);
781 brw_MOV(p
, brw_message_reg(nr
), src0
);
784 src0
= get_src_reg(c
, inst
, 1, 1);
785 brw_MOV(p
, brw_message_reg(nr
), src0
);
791 if (c
->key
.dest_depth_reg
) {
792 GLuint comp
= c
->key
.dest_depth_reg
/ 2;
793 GLuint off
= c
->key
.dest_depth_reg
% 2;
798 /* XXX do we need this code? comp always 1, off always 0, it seems */
800 brw_push_insn_state(p
);
801 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
803 brw_MOV(p
, brw_message_reg(nr
), offset(arg1
[comp
],1));
805 brw_MOV(p
, brw_message_reg(nr
+1), arg1
[comp
+1]);
806 brw_pop_insn_state(p
);
811 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
812 brw_MOV(p
, brw_message_reg(nr
), src
);
817 target
= inst
->Aux
>> 1;
819 fire_fb_write(c
, 0, nr
, target
, eot
);
822 static void emit_pixel_w( struct brw_wm_compile
*c
,
823 const struct prog_instruction
*inst
)
825 struct brw_compile
*p
= &c
->func
;
826 GLuint mask
= inst
->DstReg
.WriteMask
;
827 if (mask
& WRITEMASK_W
) {
828 struct brw_reg dst
, src0
, delta0
, delta1
;
829 struct brw_reg interp3
;
831 dst
= get_dst_reg(c
, inst
, 3);
832 src0
= get_src_reg(c
, inst
, 0, 0);
833 delta0
= get_src_reg(c
, inst
, 1, 0);
834 delta1
= get_src_reg(c
, inst
, 1, 1);
836 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
837 /* Calc 1/w - just linterp wpos[3] optimized by putting the
838 * result straight into a message reg.
840 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
841 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
845 BRW_MATH_FUNCTION_INV
,
846 BRW_MATH_SATURATE_NONE
,
848 BRW_MATH_PRECISION_FULL
);
852 static void emit_linterp(struct brw_wm_compile
*c
,
853 const struct prog_instruction
*inst
)
855 struct brw_compile
*p
= &c
->func
;
856 GLuint mask
= inst
->DstReg
.WriteMask
;
857 struct brw_reg interp
[4];
858 struct brw_reg dst
, delta0
, delta1
;
862 src0
= get_src_reg(c
, inst
, 0, 0);
863 delta0
= get_src_reg(c
, inst
, 1, 0);
864 delta1
= get_src_reg(c
, inst
, 1, 1);
867 interp
[0] = brw_vec1_grf(nr
, 0);
868 interp
[1] = brw_vec1_grf(nr
, 4);
869 interp
[2] = brw_vec1_grf(nr
+1, 0);
870 interp
[3] = brw_vec1_grf(nr
+1, 4);
872 for(i
= 0; i
< 4; i
++ ) {
874 dst
= get_dst_reg(c
, inst
, i
);
875 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
876 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
881 static void emit_cinterp(struct brw_wm_compile
*c
,
882 const struct prog_instruction
*inst
)
884 struct brw_compile
*p
= &c
->func
;
885 GLuint mask
= inst
->DstReg
.WriteMask
;
887 struct brw_reg interp
[4];
888 struct brw_reg dst
, src0
;
891 src0
= get_src_reg(c
, inst
, 0, 0);
894 interp
[0] = brw_vec1_grf(nr
, 0);
895 interp
[1] = brw_vec1_grf(nr
, 4);
896 interp
[2] = brw_vec1_grf(nr
+1, 0);
897 interp
[3] = brw_vec1_grf(nr
+1, 4);
899 for(i
= 0; i
< 4; i
++ ) {
901 dst
= get_dst_reg(c
, inst
, i
);
902 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
907 static void emit_pinterp(struct brw_wm_compile
*c
,
908 const struct prog_instruction
*inst
)
910 struct brw_compile
*p
= &c
->func
;
911 GLuint mask
= inst
->DstReg
.WriteMask
;
913 struct brw_reg interp
[4];
914 struct brw_reg dst
, delta0
, delta1
;
915 struct brw_reg src0
, w
;
918 src0
= get_src_reg(c
, inst
, 0, 0);
919 delta0
= get_src_reg(c
, inst
, 1, 0);
920 delta1
= get_src_reg(c
, inst
, 1, 1);
921 w
= get_src_reg(c
, inst
, 2, 3);
924 interp
[0] = brw_vec1_grf(nr
, 0);
925 interp
[1] = brw_vec1_grf(nr
, 4);
926 interp
[2] = brw_vec1_grf(nr
+1, 0);
927 interp
[3] = brw_vec1_grf(nr
+1, 4);
929 for(i
= 0; i
< 4; i
++ ) {
931 dst
= get_dst_reg(c
, inst
, i
);
932 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
933 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
935 brw_MUL(p
, dst
, dst
, w
);
940 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
941 static void emit_frontfacing(struct brw_wm_compile
*c
,
942 const struct prog_instruction
*inst
)
944 struct brw_compile
*p
= &c
->func
;
945 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
947 GLuint mask
= inst
->DstReg
.WriteMask
;
950 for (i
= 0; i
< 4; i
++) {
952 dst
= get_dst_reg(c
, inst
, i
);
953 brw_MOV(p
, dst
, brw_imm_f(0.0));
957 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
960 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
961 for (i
= 0; i
< 4; i
++) {
963 dst
= get_dst_reg(c
, inst
, i
);
964 brw_MOV(p
, dst
, brw_imm_f(1.0));
967 brw_set_predicate_control_flag_value(p
, 0xff);
970 static void emit_xpd(struct brw_wm_compile
*c
,
971 const struct prog_instruction
*inst
)
974 struct brw_compile
*p
= &c
->func
;
975 GLuint mask
= inst
->DstReg
.WriteMask
;
976 for (i
= 0; i
< 4; i
++) {
980 struct brw_reg src0
, src1
, dst
;
981 dst
= get_dst_reg(c
, inst
, i
);
982 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
983 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
984 brw_MUL(p
, brw_null_reg(), src0
, src1
);
985 src0
= get_src_reg(c
, inst
, 0, i1
);
986 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
987 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
988 brw_MAC(p
, dst
, src0
, src1
);
989 brw_set_saturate(p
, 0);
992 brw_set_saturate(p
, 0);
995 static void emit_dp3(struct brw_wm_compile
*c
,
996 const struct prog_instruction
*inst
)
998 struct brw_reg src0
[3], src1
[3], dst
;
1000 struct brw_compile
*p
= &c
->func
;
1001 for (i
= 0; i
< 3; i
++) {
1002 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1003 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1006 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1007 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1008 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1009 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1010 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1011 brw_set_saturate(p
, 0);
1014 static void emit_dp4(struct brw_wm_compile
*c
,
1015 const struct prog_instruction
*inst
)
1017 struct brw_reg src0
[4], src1
[4], dst
;
1019 struct brw_compile
*p
= &c
->func
;
1020 for (i
= 0; i
< 4; i
++) {
1021 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1022 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1024 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1025 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1026 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1027 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1028 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1029 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1030 brw_set_saturate(p
, 0);
1033 static void emit_dph(struct brw_wm_compile
*c
,
1034 const struct prog_instruction
*inst
)
1036 struct brw_reg src0
[4], src1
[4], dst
;
1038 struct brw_compile
*p
= &c
->func
;
1039 for (i
= 0; i
< 4; i
++) {
1040 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1041 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1043 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1044 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1045 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1046 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1047 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1048 brw_ADD(p
, dst
, dst
, src1
[3]);
1049 brw_set_saturate(p
, 0);
1053 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1054 * Note that the result of the function is smeared across the dest
1055 * register's X, Y, Z and W channels (subject to writemasking of course).
1057 static void emit_math1(struct brw_wm_compile
*c
,
1058 const struct prog_instruction
*inst
, GLuint func
)
1060 struct brw_compile
*p
= &c
->func
;
1061 struct brw_reg src0
, dst
, tmp
;
1062 const int mark
= mark_tmps( c
);
1067 /* Get first component of source register */
1068 src0
= get_src_reg(c
, inst
, 0, 0);
1070 /* tmp = func(src0) */
1071 brw_MOV(p
, brw_message_reg(2), src0
);
1075 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1078 BRW_MATH_DATA_VECTOR
,
1079 BRW_MATH_PRECISION_FULL
);
1081 /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
1083 /* replicate tmp value across enabled dest channels */
1084 for (i
= 0; i
< 4; i
++) {
1085 if (inst
->DstReg
.WriteMask
& (1 << i
)) {
1086 dst
= get_dst_reg(c
, inst
, i
);
1087 brw_MOV(p
, dst
, tmp
);
1091 release_tmps(c
, mark
);
1094 static void emit_rcp(struct brw_wm_compile
*c
,
1095 const struct prog_instruction
*inst
)
1097 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1100 static void emit_rsq(struct brw_wm_compile
*c
,
1101 const struct prog_instruction
*inst
)
1103 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1106 static void emit_sin(struct brw_wm_compile
*c
,
1107 const struct prog_instruction
*inst
)
1109 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1112 static void emit_cos(struct brw_wm_compile
*c
,
1113 const struct prog_instruction
*inst
)
1115 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1118 static void emit_ex2(struct brw_wm_compile
*c
,
1119 const struct prog_instruction
*inst
)
1121 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1124 static void emit_lg2(struct brw_wm_compile
*c
,
1125 const struct prog_instruction
*inst
)
1127 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1130 static void emit_add(struct brw_wm_compile
*c
,
1131 const struct prog_instruction
*inst
)
1133 struct brw_compile
*p
= &c
->func
;
1134 struct brw_reg src0
, src1
, dst
;
1135 GLuint mask
= inst
->DstReg
.WriteMask
;
1137 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1138 for (i
= 0 ; i
< 4; i
++) {
1139 if (mask
& (1<<i
)) {
1140 dst
= get_dst_reg(c
, inst
, i
);
1141 src0
= get_src_reg(c
, inst
, 0, i
);
1142 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1143 brw_ADD(p
, dst
, src0
, src1
);
1146 brw_set_saturate(p
, 0);
1149 static void emit_arl(struct brw_wm_compile
*c
,
1150 const struct prog_instruction
*inst
)
1152 struct brw_compile
*p
= &c
->func
;
1153 struct brw_reg src0
, addr_reg
;
1154 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1155 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1156 BRW_ARF_ADDRESS
, 0);
1157 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1158 brw_MOV(p
, addr_reg
, src0
);
1159 brw_set_saturate(p
, 0);
1162 static void emit_sub(struct brw_wm_compile
*c
,
1163 const struct prog_instruction
*inst
)
1165 struct brw_compile
*p
= &c
->func
;
1166 struct brw_reg src0
, src1
, dst
;
1167 GLuint mask
= inst
->DstReg
.WriteMask
;
1169 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1170 for (i
= 0 ; i
< 4; i
++) {
1171 if (mask
& (1<<i
)) {
1172 dst
= get_dst_reg(c
, inst
, i
);
1173 src0
= get_src_reg(c
, inst
, 0, i
);
1174 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1175 brw_ADD(p
, dst
, src0
, negate(src1
));
1178 brw_set_saturate(p
, 0);
1181 static void emit_mul(struct brw_wm_compile
*c
,
1182 const struct prog_instruction
*inst
)
1184 struct brw_compile
*p
= &c
->func
;
1185 struct brw_reg src0
, src1
, dst
;
1186 GLuint mask
= inst
->DstReg
.WriteMask
;
1188 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1189 for (i
= 0 ; i
< 4; i
++) {
1190 if (mask
& (1<<i
)) {
1191 dst
= get_dst_reg(c
, inst
, i
);
1192 src0
= get_src_reg(c
, inst
, 0, i
);
1193 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1194 brw_MUL(p
, dst
, src0
, src1
);
1197 brw_set_saturate(p
, 0);
1200 static void emit_frc(struct brw_wm_compile
*c
,
1201 const struct prog_instruction
*inst
)
1203 struct brw_compile
*p
= &c
->func
;
1204 struct brw_reg src0
, dst
;
1205 GLuint mask
= inst
->DstReg
.WriteMask
;
1207 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1208 for (i
= 0 ; i
< 4; i
++) {
1209 if (mask
& (1<<i
)) {
1210 dst
= get_dst_reg(c
, inst
, i
);
1211 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1212 brw_FRC(p
, dst
, src0
);
1215 if (inst
->SaturateMode
!= SATURATE_OFF
)
1216 brw_set_saturate(p
, 0);
1219 static void emit_flr(struct brw_wm_compile
*c
,
1220 const struct prog_instruction
*inst
)
1222 struct brw_compile
*p
= &c
->func
;
1223 struct brw_reg src0
, dst
;
1224 GLuint mask
= inst
->DstReg
.WriteMask
;
1226 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1227 for (i
= 0 ; i
< 4; i
++) {
1228 if (mask
& (1<<i
)) {
1229 dst
= get_dst_reg(c
, inst
, i
);
1230 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1231 brw_RNDD(p
, dst
, src0
);
1234 brw_set_saturate(p
, 0);
1238 static void emit_min_max(struct brw_wm_compile
*c
,
1239 const struct prog_instruction
*inst
)
1241 struct brw_compile
*p
= &c
->func
;
1242 const GLuint mask
= inst
->DstReg
.WriteMask
;
1243 const int mark
= mark_tmps(c
);
1245 brw_push_insn_state(p
);
1246 for (i
= 0; i
< 4; i
++) {
1247 if (mask
& (1<<i
)) {
1248 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1249 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1250 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1252 /* if dst==src0 or dst==src1 we need to use a temp reg */
1253 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1254 brw_same_reg(dst
, src1
);
1261 printf(" Min/max: dst %d src0 %d src1 %d\n",
1262 dst.nr, src0.nr, src1.nr);
1264 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1265 brw_MOV(p
, dst
, src0
);
1266 brw_set_saturate(p
, 0);
1268 if (inst
->Opcode
== OPCODE_MIN
)
1269 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1271 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1273 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1274 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1275 brw_MOV(p
, dst
, src1
);
1276 brw_set_saturate(p
, 0);
1277 brw_set_predicate_control_flag_value(p
, 0xff);
1279 brw_MOV(p
, real_dst
, dst
);
1282 brw_pop_insn_state(p
);
1283 release_tmps(c
, mark
);
1286 static void emit_pow(struct brw_wm_compile
*c
,
1287 const struct prog_instruction
*inst
)
1289 struct brw_compile
*p
= &c
->func
;
1290 struct brw_reg dst
, src0
, src1
;
1291 dst
= get_dst_reg(c
, inst
, get_scalar_dst_index(inst
));
1292 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1293 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1295 brw_MOV(p
, brw_message_reg(2), src0
);
1296 brw_MOV(p
, brw_message_reg(3), src1
);
1300 BRW_MATH_FUNCTION_POW
,
1301 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1304 BRW_MATH_DATA_VECTOR
,
1305 BRW_MATH_PRECISION_FULL
);
1308 static void emit_lrp(struct brw_wm_compile
*c
,
1309 const struct prog_instruction
*inst
)
1311 struct brw_compile
*p
= &c
->func
;
1312 GLuint mask
= inst
->DstReg
.WriteMask
;
1313 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1315 int mark
= mark_tmps(c
);
1316 for (i
= 0; i
< 4; i
++) {
1317 if (mask
& (1<<i
)) {
1318 dst
= get_dst_reg(c
, inst
, i
);
1319 src0
= get_src_reg(c
, inst
, 0, i
);
1321 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1323 if (src1
.nr
== dst
.nr
) {
1324 tmp1
= alloc_tmp(c
);
1325 brw_MOV(p
, tmp1
, src1
);
1329 src2
= get_src_reg(c
, inst
, 2, i
);
1330 if (src2
.nr
== dst
.nr
) {
1331 tmp2
= alloc_tmp(c
);
1332 brw_MOV(p
, tmp2
, src2
);
1336 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1337 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1338 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1339 brw_MAC(p
, dst
, src0
, tmp1
);
1340 brw_set_saturate(p
, 0);
1342 release_tmps(c
, mark
);
1347 * For GLSL shaders, this KIL will be unconditional.
1348 * It may be contained inside an IF/ENDIF structure of course.
1350 static void emit_kil(struct brw_wm_compile
*c
)
1352 struct brw_compile
*p
= &c
->func
;
1353 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1354 brw_push_insn_state(p
);
1355 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1356 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1357 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1358 brw_pop_insn_state(p
);
1361 static void emit_mad(struct brw_wm_compile
*c
,
1362 const struct prog_instruction
*inst
)
1364 struct brw_compile
*p
= &c
->func
;
1365 GLuint mask
= inst
->DstReg
.WriteMask
;
1366 struct brw_reg dst
, src0
, src1
, src2
;
1369 for (i
= 0; i
< 4; i
++) {
1370 if (mask
& (1<<i
)) {
1371 dst
= get_dst_reg(c
, inst
, i
);
1372 src0
= get_src_reg(c
, inst
, 0, i
);
1373 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1374 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1375 brw_MUL(p
, dst
, src0
, src1
);
1377 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1378 brw_ADD(p
, dst
, dst
, src2
);
1379 brw_set_saturate(p
, 0);
1384 static void emit_sop(struct brw_wm_compile
*c
,
1385 const struct prog_instruction
*inst
, GLuint cond
)
1387 struct brw_compile
*p
= &c
->func
;
1388 GLuint mask
= inst
->DstReg
.WriteMask
;
1389 struct brw_reg dst
, src0
, src1
;
1392 for (i
= 0; i
< 4; i
++) {
1393 if (mask
& (1<<i
)) {
1394 dst
= get_dst_reg(c
, inst
, i
);
1395 src0
= get_src_reg(c
, inst
, 0, i
);
1396 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1397 brw_push_insn_state(p
);
1398 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1399 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1400 brw_MOV(p
, dst
, brw_imm_f(0.0));
1401 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1402 brw_MOV(p
, dst
, brw_imm_f(1.0));
1403 brw_pop_insn_state(p
);
1408 static void emit_slt(struct brw_wm_compile
*c
,
1409 const struct prog_instruction
*inst
)
1411 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1414 static void emit_sle(struct brw_wm_compile
*c
,
1415 const struct prog_instruction
*inst
)
1417 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1420 static void emit_sgt(struct brw_wm_compile
*c
,
1421 const struct prog_instruction
*inst
)
1423 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1426 static void emit_sge(struct brw_wm_compile
*c
,
1427 const struct prog_instruction
*inst
)
1429 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1432 static void emit_seq(struct brw_wm_compile
*c
,
1433 const struct prog_instruction
*inst
)
1435 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1438 static void emit_sne(struct brw_wm_compile
*c
,
1439 const struct prog_instruction
*inst
)
1441 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1444 static void emit_ddx(struct brw_wm_compile
*c
,
1445 const struct prog_instruction
*inst
)
1447 struct brw_compile
*p
= &c
->func
;
1448 GLuint mask
= inst
->DstReg
.WriteMask
;
1449 struct brw_reg interp
[4];
1451 struct brw_reg src0
, w
;
1453 src0
= get_src_reg(c
, inst
, 0, 0);
1454 w
= get_src_reg(c
, inst
, 1, 3);
1456 interp
[0] = brw_vec1_grf(nr
, 0);
1457 interp
[1] = brw_vec1_grf(nr
, 4);
1458 interp
[2] = brw_vec1_grf(nr
+1, 0);
1459 interp
[3] = brw_vec1_grf(nr
+1, 4);
1460 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1461 for(i
= 0; i
< 4; i
++ ) {
1462 if (mask
& (1<<i
)) {
1463 dst
= get_dst_reg(c
, inst
, i
);
1464 brw_MOV(p
, dst
, interp
[i
]);
1465 brw_MUL(p
, dst
, dst
, w
);
1468 brw_set_saturate(p
, 0);
1471 static void emit_ddy(struct brw_wm_compile
*c
,
1472 const struct prog_instruction
*inst
)
1474 struct brw_compile
*p
= &c
->func
;
1475 GLuint mask
= inst
->DstReg
.WriteMask
;
1476 struct brw_reg interp
[4];
1478 struct brw_reg src0
, w
;
1481 src0
= get_src_reg(c
, inst
, 0, 0);
1483 w
= get_src_reg(c
, inst
, 1, 3);
1484 interp
[0] = brw_vec1_grf(nr
, 0);
1485 interp
[1] = brw_vec1_grf(nr
, 4);
1486 interp
[2] = brw_vec1_grf(nr
+1, 0);
1487 interp
[3] = brw_vec1_grf(nr
+1, 4);
1488 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1489 for(i
= 0; i
< 4; i
++ ) {
1490 if (mask
& (1<<i
)) {
1491 dst
= get_dst_reg(c
, inst
, i
);
1492 brw_MOV(p
, dst
, suboffset(interp
[i
], 1));
1493 brw_MUL(p
, dst
, dst
, w
);
1496 brw_set_saturate(p
, 0);
1499 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1501 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1505 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1507 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1510 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1512 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1515 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1517 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1521 /* One-, two- and three-dimensional Perlin noise, similar to the description
1522 in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
1523 static void noise1_sub( struct brw_wm_compile
*c
) {
1525 struct brw_compile
*p
= &c
->func
;
1526 struct brw_reg param
,
1527 x0
, x1
, /* gradients at each end */
1528 t
, tmp
[ 2 ], /* float temporaries */
1529 itmp
[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
1531 int mark
= mark_tmps( c
);
1533 x0
= alloc_tmp( c
);
1534 x1
= alloc_tmp( c
);
1536 tmp
[ 0 ] = alloc_tmp( c
);
1537 tmp
[ 1 ] = alloc_tmp( c
);
1538 itmp
[ 0 ] = retype( tmp
[ 0 ], BRW_REGISTER_TYPE_UD
);
1539 itmp
[ 1 ] = retype( tmp
[ 1 ], BRW_REGISTER_TYPE_UD
);
1540 itmp
[ 2 ] = retype( x0
, BRW_REGISTER_TYPE_UD
);
1541 itmp
[ 3 ] = retype( x1
, BRW_REGISTER_TYPE_UD
);
1542 itmp
[ 4 ] = retype( t
, BRW_REGISTER_TYPE_UD
);
1544 param
= lookup_tmp( c
, mark
- 2 );
1546 brw_set_access_mode( p
, BRW_ALIGN_1
);
1548 brw_MOV( p
, itmp
[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1550 /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
1551 be hashed. Also compute the remainder (offset within the unit
1552 length), interleaved to reduce register dependency penalties. */
1553 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
);
1554 brw_FRC( p
, param
, param
);
1555 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 1 ) );
1556 brw_MOV( p
, itmp
[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1557 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1559 /* We're now ready to perform the hashing. The two hashes are
1560 interleaved for performance. The hash function used is
1561 designed to rapidly achieve avalanche and require only 32x16
1562 bit multiplication, and 16-bit swizzles (which we get for
1563 free). We can't use immediate operands in the multiplies,
1564 because immediates are permitted only in src1 and the 16-bit
1565 factor is permitted only in src0. */
1566 for( i
= 0; i
< 2; i
++ )
1567 brw_MUL( p
, itmp
[ i
], itmp
[ 2 ], itmp
[ i
] );
1568 for( i
= 0; i
< 2; i
++ )
1569 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1570 high_words( itmp
[ i
] ) );
1571 for( i
= 0; i
< 2; i
++ )
1572 brw_MUL( p
, itmp
[ i
], itmp
[ 3 ], itmp
[ i
] );
1573 for( i
= 0; i
< 2; i
++ )
1574 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1575 high_words( itmp
[ i
] ) );
1576 for( i
= 0; i
< 2; i
++ )
1577 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1578 for( i
= 0; i
< 2; i
++ )
1579 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1580 high_words( itmp
[ i
] ) );
1582 /* Now we want to initialise the two gradients based on the
1583 hashes. Format conversion from signed integer to float leaves
1584 everything scaled too high by a factor of pow( 2, 31 ), but
1585 we correct for that right at the end. */
1586 brw_ADD( p
, t
, param
, brw_imm_f( -1.0 ) );
1587 brw_MOV( p
, x0
, retype( tmp
[ 0 ], BRW_REGISTER_TYPE_D
) );
1588 brw_MOV( p
, x1
, retype( tmp
[ 1 ], BRW_REGISTER_TYPE_D
) );
1590 brw_MUL( p
, x0
, x0
, param
);
1591 brw_MUL( p
, x1
, x1
, t
);
1593 /* We interpolate between the gradients using the polynomial
1594 6t^5 - 15t^4 + 10t^3 (Perlin). */
1595 brw_MUL( p
, tmp
[ 0 ], param
, brw_imm_f( 6.0 ) );
1596 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1597 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1598 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1599 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1600 brw_ADD( p
, x1
, x1
, negate( x0
) ); /* unrelated work to fill the
1602 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param
);
1603 brw_MUL( p
, param
, tmp
[ 0 ], param
);
1604 brw_MUL( p
, x1
, x1
, param
);
1605 brw_ADD( p
, x0
, x0
, x1
);
1606 /* scale by pow( 2, -30 ), to compensate for the format conversion
1607 above and an extra factor of 2 so that a single gradient covers
1609 brw_MUL( p
, param
, x0
, brw_imm_f( 0.000000000931322574615478515625 ) );
1611 release_tmps( c
, mark
);
1614 static void emit_noise1( struct brw_wm_compile
*c
,
1615 const struct prog_instruction
*inst
)
1617 struct brw_compile
*p
= &c
->func
;
1618 struct brw_reg src
, param
, dst
;
1619 GLuint mask
= inst
->DstReg
.WriteMask
;
1621 int mark
= mark_tmps( c
);
1623 assert( mark
== 0 );
1625 src
= get_src_reg( c
, inst
, 0, 0 );
1627 param
= alloc_tmp( c
);
1629 brw_MOV( p
, param
, src
);
1631 invoke_subroutine( c
, SUB_NOISE1
, noise1_sub
);
1633 /* Fill in the result: */
1634 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1635 for (i
= 0 ; i
< 4; i
++) {
1636 if (mask
& (1<<i
)) {
1637 dst
= get_dst_reg(c
, inst
, i
);
1638 brw_MOV( p
, dst
, param
);
1641 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1642 brw_set_saturate( p
, 0 );
1644 release_tmps( c
, mark
);
1647 static void noise2_sub( struct brw_wm_compile
*c
) {
1649 struct brw_compile
*p
= &c
->func
;
1650 struct brw_reg param0
, param1
,
1651 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at each corner */
1652 t
, tmp
[ 4 ], /* float temporaries */
1653 itmp
[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
1655 int mark
= mark_tmps( c
);
1657 x0y0
= alloc_tmp( c
);
1658 x0y1
= alloc_tmp( c
);
1659 x1y0
= alloc_tmp( c
);
1660 x1y1
= alloc_tmp( c
);
1662 for( i
= 0; i
< 4; i
++ ) {
1663 tmp
[ i
] = alloc_tmp( c
);
1664 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1666 itmp
[ 4 ] = retype( x0y0
, BRW_REGISTER_TYPE_UD
);
1667 itmp
[ 5 ] = retype( x0y1
, BRW_REGISTER_TYPE_UD
);
1668 itmp
[ 6 ] = retype( x1y0
, BRW_REGISTER_TYPE_UD
);
1670 param0
= lookup_tmp( c
, mark
- 3 );
1671 param1
= lookup_tmp( c
, mark
- 2 );
1673 brw_set_access_mode( p
, BRW_ALIGN_1
);
1675 /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
1676 be hashed. Also compute the remainders (offsets within the unit
1677 square), interleaved to reduce register dependency penalties. */
1678 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1679 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1680 brw_FRC( p
, param0
, param0
);
1681 brw_FRC( p
, param1
, param1
);
1682 brw_MOV( p
, itmp
[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
1683 brw_ADD( p
, high_words( itmp
[ 0 ] ), high_words( itmp
[ 0 ] ),
1684 low_words( itmp
[ 1 ] ) );
1685 brw_MOV( p
, itmp
[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
1686 brw_MOV( p
, itmp
[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
1687 brw_ADD( p
, itmp
[ 1 ], itmp
[ 0 ], brw_imm_ud( 0x10000 ) );
1688 brw_ADD( p
, itmp
[ 2 ], itmp
[ 0 ], brw_imm_ud( 0x1 ) );
1689 brw_ADD( p
, itmp
[ 3 ], itmp
[ 0 ], brw_imm_ud( 0x10001 ) );
1691 /* We're now ready to perform the hashing. The four hashes are
1692 interleaved for performance. The hash function used is
1693 designed to rapidly achieve avalanche and require only 32x16
1694 bit multiplication, and 16-bit swizzles (which we get for
1695 free). We can't use immediate operands in the multiplies,
1696 because immediates are permitted only in src1 and the 16-bit
1697 factor is permitted only in src0. */
1698 for( i
= 0; i
< 4; i
++ )
1699 brw_MUL( p
, itmp
[ i
], itmp
[ 4 ], itmp
[ i
] );
1700 for( i
= 0; i
< 4; i
++ )
1701 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1702 high_words( itmp
[ i
] ) );
1703 for( i
= 0; i
< 4; i
++ )
1704 brw_MUL( p
, itmp
[ i
], itmp
[ 5 ], itmp
[ i
] );
1705 for( i
= 0; i
< 4; i
++ )
1706 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1707 high_words( itmp
[ i
] ) );
1708 for( i
= 0; i
< 4; i
++ )
1709 brw_MUL( p
, itmp
[ i
], itmp
[ 6 ], itmp
[ i
] );
1710 for( i
= 0; i
< 4; i
++ )
1711 brw_XOR( p
, low_words( itmp
[ i
] ), low_words( itmp
[ i
] ),
1712 high_words( itmp
[ i
] ) );
1714 /* Now we want to initialise the four gradients based on the
1715 hashes. Format conversion from signed integer to float leaves
1716 everything scaled too high by a factor of pow( 2, 15 ), but
1717 we correct for that right at the end. */
1718 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1719 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1720 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1721 brw_MOV( p
, x1y0
, low_words( tmp
[ 2 ] ) );
1722 brw_MOV( p
, x1y1
, low_words( tmp
[ 3 ] ) );
1724 brw_MOV( p
, tmp
[ 0 ], high_words( tmp
[ 0 ] ) );
1725 brw_MOV( p
, tmp
[ 1 ], high_words( tmp
[ 1 ] ) );
1726 brw_MOV( p
, tmp
[ 2 ], high_words( tmp
[ 2 ] ) );
1727 brw_MOV( p
, tmp
[ 3 ], high_words( tmp
[ 3 ] ) );
1729 brw_MUL( p
, x1y0
, x1y0
, t
);
1730 brw_MUL( p
, x1y1
, x1y1
, t
);
1731 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1732 brw_MUL( p
, x0y0
, x0y0
, param0
);
1733 brw_MUL( p
, x0y1
, x0y1
, param0
);
1735 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param1
);
1736 brw_MUL( p
, tmp
[ 2 ], tmp
[ 2 ], param1
);
1737 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], t
);
1738 brw_MUL( p
, tmp
[ 3 ], tmp
[ 3 ], t
);
1740 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 0 ] );
1741 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 2 ] );
1742 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 1 ] );
1743 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 3 ] );
1745 /* We interpolate between the gradients using the polynomial
1746 6t^5 - 15t^4 + 10t^3 (Perlin). */
1747 brw_MUL( p
, tmp
[ 0 ], param0
, brw_imm_f( 6.0 ) );
1748 brw_MUL( p
, tmp
[ 1 ], param1
, brw_imm_f( 6.0 ) );
1749 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( -15.0 ) );
1750 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( -15.0 ) );
1751 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1752 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1753 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work to fill the
1755 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], brw_imm_f( 10.0 ) );
1756 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], brw_imm_f( 10.0 ) );
1757 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1758 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1759 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work to fill the
1761 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], param0
);
1762 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], param1
);
1763 brw_MUL( p
, param0
, tmp
[ 0 ], param0
);
1764 brw_MUL( p
, param1
, tmp
[ 1 ], param1
);
1766 /* Here we interpolate in the y dimension... */
1767 brw_MUL( p
, x0y1
, x0y1
, param1
);
1768 brw_MUL( p
, x1y1
, x1y1
, param1
);
1769 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1770 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1772 /* And now in x. There are horrible register dependencies here,
1773 but we have nothing else to do. */
1774 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
1775 brw_MUL( p
, x1y0
, x1y0
, param0
);
1776 brw_ADD( p
, x0y0
, x0y0
, x1y0
);
1778 /* scale by pow( 2, -15 ), as described above */
1779 brw_MUL( p
, param0
, x0y0
, brw_imm_f( 0.000030517578125 ) );
1781 release_tmps( c
, mark
);
1784 static void emit_noise2( struct brw_wm_compile
*c
,
1785 const struct prog_instruction
*inst
)
1787 struct brw_compile
*p
= &c
->func
;
1788 struct brw_reg src0
, src1
, param0
, param1
, dst
;
1789 GLuint mask
= inst
->DstReg
.WriteMask
;
1791 int mark
= mark_tmps( c
);
1793 assert( mark
== 0 );
1795 src0
= get_src_reg( c
, inst
, 0, 0 );
1796 src1
= get_src_reg( c
, inst
, 0, 1 );
1798 param0
= alloc_tmp( c
);
1799 param1
= alloc_tmp( c
);
1801 brw_MOV( p
, param0
, src0
);
1802 brw_MOV( p
, param1
, src1
);
1804 invoke_subroutine( c
, SUB_NOISE2
, noise2_sub
);
1806 /* Fill in the result: */
1807 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
1808 for (i
= 0 ; i
< 4; i
++) {
1809 if (mask
& (1<<i
)) {
1810 dst
= get_dst_reg(c
, inst
, i
);
1811 brw_MOV( p
, dst
, param0
);
1814 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1815 brw_set_saturate( p
, 0 );
1817 release_tmps( c
, mark
);
1821 * The three-dimensional case is much like the one- and two- versions above,
1822 * but since the number of corners is rapidly growing we now pack 16 16-bit
1823 * hashes into each register to extract more parallelism from the EUs.
1825 static void noise3_sub( struct brw_wm_compile
*c
) {
1827 struct brw_compile
*p
= &c
->func
;
1828 struct brw_reg param0
, param1
, param2
,
1829 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
1830 xi
, yi
, zi
, /* interpolation coefficients */
1831 t
, tmp
[ 8 ], /* float temporaries */
1832 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
1833 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
1835 int mark
= mark_tmps( c
);
1837 x0y0
= alloc_tmp( c
);
1838 x0y1
= alloc_tmp( c
);
1839 x1y0
= alloc_tmp( c
);
1840 x1y1
= alloc_tmp( c
);
1841 xi
= alloc_tmp( c
);
1842 yi
= alloc_tmp( c
);
1843 zi
= alloc_tmp( c
);
1845 for( i
= 0; i
< 8; i
++ ) {
1846 tmp
[ i
] = alloc_tmp( c
);
1847 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
1848 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
1851 param0
= lookup_tmp( c
, mark
- 4 );
1852 param1
= lookup_tmp( c
, mark
- 3 );
1853 param2
= lookup_tmp( c
, mark
- 2 );
1855 brw_set_access_mode( p
, BRW_ALIGN_1
);
1857 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
1858 be hashed. Also compute the remainders (offsets within the unit
1859 cube), interleaved to reduce register dependency penalties. */
1860 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param0
);
1861 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param1
);
1862 brw_RNDD( p
, retype( itmp
[ 2 ], BRW_REGISTER_TYPE_D
), param2
);
1863 brw_FRC( p
, param0
, param0
);
1864 brw_FRC( p
, param1
, param1
);
1865 brw_FRC( p
, param2
, param2
);
1866 /* Since we now have only 16 bits of precision in the hash, we must
1867 be more careful about thorough mixing to maintain entropy as we
1868 squash the input vector into a small scalar. */
1869 brw_MUL( p
, brw_null_reg(), low_words( itmp
[ 0 ] ), brw_imm_uw( 0xBC8F ) );
1870 brw_MAC( p
, brw_null_reg(), low_words( itmp
[ 1 ] ), brw_imm_uw( 0xD0BD ) );
1871 brw_MAC( p
, low_words( itmp
[ 0 ] ), low_words( itmp
[ 2 ] ),
1872 brw_imm_uw( 0x9B93 ) );
1873 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
1874 brw_imm_uw( 0xBC8F ) );
1876 /* Temporarily disable the execution mask while we work with ExecSize=16
1877 channels (the mask is set for ExecSize=8 and is probably incorrect).
1878 Although this might cause execution of unwanted channels, the code
1879 writes only to temporary registers and has no side effects, so
1880 disabling the mask is harmless. */
1881 brw_push_insn_state( p
);
1882 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1883 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
1884 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
1885 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
1887 /* We're now ready to perform the hashing. The eight hashes are
1888 interleaved for performance. The hash function used is
1889 designed to rapidly achieve avalanche and require only 16x16
1890 bit multiplication, and 8-bit swizzles (which we get for
1892 for( i
= 0; i
< 4; i
++ )
1893 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
1894 for( i
= 0; i
< 4; i
++ )
1895 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1896 odd_bytes( wtmp
[ i
] ) );
1897 for( i
= 0; i
< 4; i
++ )
1898 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
1899 for( i
= 0; i
< 4; i
++ )
1900 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
1901 odd_bytes( wtmp
[ i
] ) );
1902 brw_pop_insn_state( p
);
1904 /* Now we want to initialise the four rear gradients based on the
1905 hashes. Format conversion from signed integer to float leaves
1906 everything scaled too high by a factor of pow( 2, 15 ), but
1907 we correct for that right at the end. */
1909 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1910 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
1911 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
1912 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
1913 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
1915 brw_push_insn_state( p
);
1916 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1917 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1918 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1919 brw_pop_insn_state( p
);
1921 brw_MUL( p
, x1y0
, x1y0
, t
);
1922 brw_MUL( p
, x1y1
, x1y1
, t
);
1923 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
1924 brw_MUL( p
, x0y0
, x0y0
, param0
);
1925 brw_MUL( p
, x0y1
, x0y1
, param0
);
1928 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1929 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1930 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1931 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1933 brw_push_insn_state( p
);
1934 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1935 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 5 ) );
1936 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 5 ) );
1937 brw_pop_insn_state( p
);
1939 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
1940 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
1941 brw_ADD( p
, t
, param0
, brw_imm_f( -1.0 ) );
1942 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
1943 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
1945 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1946 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1947 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1948 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1951 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
1952 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
1953 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
1954 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
1956 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param2
);
1957 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param2
);
1958 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param2
);
1959 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param2
);
1961 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
1962 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
1963 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
1964 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
1966 /* We interpolate between the gradients using the polynomial
1967 6t^5 - 15t^4 + 10t^3 (Perlin). */
1968 brw_MUL( p
, xi
, param0
, brw_imm_f( 6.0 ) );
1969 brw_MUL( p
, yi
, param1
, brw_imm_f( 6.0 ) );
1970 brw_MUL( p
, zi
, param2
, brw_imm_f( 6.0 ) );
1971 brw_ADD( p
, xi
, xi
, brw_imm_f( -15.0 ) );
1972 brw_ADD( p
, yi
, yi
, brw_imm_f( -15.0 ) );
1973 brw_ADD( p
, zi
, zi
, brw_imm_f( -15.0 ) );
1974 brw_MUL( p
, xi
, xi
, param0
);
1975 brw_MUL( p
, yi
, yi
, param1
);
1976 brw_MUL( p
, zi
, zi
, param2
);
1977 brw_ADD( p
, xi
, xi
, brw_imm_f( 10.0 ) );
1978 brw_ADD( p
, yi
, yi
, brw_imm_f( 10.0 ) );
1979 brw_ADD( p
, zi
, zi
, brw_imm_f( 10.0 ) );
1980 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) ); /* unrelated work */
1981 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) ); /* unrelated work */
1982 brw_MUL( p
, xi
, xi
, param0
);
1983 brw_MUL( p
, yi
, yi
, param1
);
1984 brw_MUL( p
, zi
, zi
, param2
);
1985 brw_MUL( p
, xi
, xi
, param0
);
1986 brw_MUL( p
, yi
, yi
, param1
);
1987 brw_MUL( p
, zi
, zi
, param2
);
1988 brw_MUL( p
, xi
, xi
, param0
);
1989 brw_MUL( p
, yi
, yi
, param1
);
1990 brw_MUL( p
, zi
, zi
, param2
);
1992 /* Here we interpolate in the y dimension... */
1993 brw_MUL( p
, x0y1
, x0y1
, yi
);
1994 brw_MUL( p
, x1y1
, x1y1
, yi
);
1995 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
1996 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
1998 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
1999 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2000 brw_MUL( p
, x1y0
, x1y0
, xi
);
2001 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2003 /* Now do the same thing for the front four gradients... */
2005 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2006 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2007 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2008 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2010 brw_push_insn_state( p
);
2011 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2012 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2013 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2014 brw_pop_insn_state( p
);
2016 brw_MUL( p
, x1y0
, x1y0
, t
);
2017 brw_MUL( p
, x1y1
, x1y1
, t
);
2018 brw_ADD( p
, t
, param1
, brw_imm_f( -1.0 ) );
2019 brw_MUL( p
, x0y0
, x0y0
, param0
);
2020 brw_MUL( p
, x0y1
, x0y1
, param0
);
2023 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2024 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2025 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2026 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2028 brw_push_insn_state( p
);
2029 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2030 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 5 ) );
2031 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 5 ) );
2032 brw_pop_insn_state( p
);
2034 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2035 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2036 brw_ADD( p
, t
, param2
, brw_imm_f( -1.0 ) );
2037 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param1
);
2038 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param1
);
2040 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2041 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2042 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2043 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2046 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2047 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2048 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2049 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2051 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2052 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2053 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2054 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2056 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2057 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2058 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2059 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2061 /* The interpolation coefficients are still around from last time, so
2062 again interpolate in the y dimension... */
2063 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2064 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2065 brw_MUL( p
, x0y1
, x0y1
, yi
);
2066 brw_MUL( p
, x1y1
, x1y1
, yi
);
2067 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2068 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2070 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2071 time put the front face in tmp[ 1 ] and we're nearly there... */
2072 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2073 brw_MUL( p
, x1y0
, x1y0
, xi
);
2074 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2076 /* The final interpolation, in the z dimension: */
2077 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2078 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], zi
);
2079 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2081 /* scale by pow( 2, -15 ), as described above */
2082 brw_MUL( p
, param0
, tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2084 release_tmps( c
, mark
);
2087 static void emit_noise3( struct brw_wm_compile
*c
,
2088 const struct prog_instruction
*inst
)
2090 struct brw_compile
*p
= &c
->func
;
2091 struct brw_reg src0
, src1
, src2
, param0
, param1
, param2
, dst
;
2092 GLuint mask
= inst
->DstReg
.WriteMask
;
2094 int mark
= mark_tmps( c
);
2096 assert( mark
== 0 );
2098 src0
= get_src_reg( c
, inst
, 0, 0 );
2099 src1
= get_src_reg( c
, inst
, 0, 1 );
2100 src2
= get_src_reg( c
, inst
, 0, 2 );
2102 param0
= alloc_tmp( c
);
2103 param1
= alloc_tmp( c
);
2104 param2
= alloc_tmp( c
);
2106 brw_MOV( p
, param0
, src0
);
2107 brw_MOV( p
, param1
, src1
);
2108 brw_MOV( p
, param2
, src2
);
2110 invoke_subroutine( c
, SUB_NOISE3
, noise3_sub
);
2112 /* Fill in the result: */
2113 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2114 for (i
= 0 ; i
< 4; i
++) {
2115 if (mask
& (1<<i
)) {
2116 dst
= get_dst_reg(c
, inst
, i
);
2117 brw_MOV( p
, dst
, param0
);
2120 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2121 brw_set_saturate( p
, 0 );
2123 release_tmps( c
, mark
);
2127 * For the four-dimensional case, the little micro-optimisation benefits
2128 * we obtain by unrolling all the loops aren't worth the massive bloat it
2129 * now causes. Instead, we loop twice around performing a similar operation
2130 * to noise3, once for the w=0 cube and once for the w=1, with a bit more
2131 * code to glue it all together.
2133 static void noise4_sub( struct brw_wm_compile
*c
)
2135 struct brw_compile
*p
= &c
->func
;
2136 struct brw_reg param
[ 4 ],
2137 x0y0
, x0y1
, x1y0
, x1y1
, /* gradients at four of the corners */
2138 w0
, /* noise for the w=0 cube */
2139 floors
[ 2 ], /* integer coordinates of base corner of hypercube */
2140 interp
[ 4 ], /* interpolation coefficients */
2141 t
, tmp
[ 8 ], /* float temporaries */
2142 itmp
[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
2143 wtmp
[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
2145 int mark
= mark_tmps( c
);
2146 GLuint loop
, origin
;
2148 x0y0
= alloc_tmp( c
);
2149 x0y1
= alloc_tmp( c
);
2150 x1y0
= alloc_tmp( c
);
2151 x1y1
= alloc_tmp( c
);
2153 w0
= alloc_tmp( c
);
2154 floors
[ 0 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2155 floors
[ 1 ] = retype( alloc_tmp( c
), BRW_REGISTER_TYPE_UD
);
2157 for( i
= 0; i
< 4; i
++ ) {
2158 param
[ i
] = lookup_tmp( c
, mark
- 5 + i
);
2159 interp
[ i
] = alloc_tmp( c
);
2162 for( i
= 0; i
< 8; i
++ ) {
2163 tmp
[ i
] = alloc_tmp( c
);
2164 itmp
[ i
] = retype( tmp
[ i
], BRW_REGISTER_TYPE_UD
);
2165 wtmp
[ i
] = brw_uw16_grf( tmp
[ i
].nr
, 0 );
2168 brw_set_access_mode( p
, BRW_ALIGN_1
);
2170 /* We only want 16 bits of precision from the integral part of each
2171 co-ordinate, but unfortunately the RNDD semantics would saturate
2172 at 16 bits if we performed the operation directly to a 16-bit
2173 destination. Therefore, we round to 32-bit temporaries where
2174 appropriate, and then store only the lower 16 bits. */
2175 brw_RNDD( p
, retype( floors
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 0 ] );
2176 brw_RNDD( p
, retype( itmp
[ 0 ], BRW_REGISTER_TYPE_D
), param
[ 1 ] );
2177 brw_RNDD( p
, retype( floors
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 2 ] );
2178 brw_RNDD( p
, retype( itmp
[ 1 ], BRW_REGISTER_TYPE_D
), param
[ 3 ] );
2179 brw_MOV( p
, high_words( floors
[ 0 ] ), low_words( itmp
[ 0 ] ) );
2180 brw_MOV( p
, high_words( floors
[ 1 ] ), low_words( itmp
[ 1 ] ) );
2182 /* Modify the flag register here, because the side effect is useful
2183 later (see below). We know for certain that all flags will be
2184 cleared, since the FRC instruction cannot possibly generate
2185 negative results. Even for exceptional inputs (infinities, denormals,
2186 NaNs), the architecture guarantees that the L conditional is false. */
2187 brw_set_conditionalmod( p
, BRW_CONDITIONAL_L
);
2188 brw_FRC( p
, param
[ 0 ], param
[ 0 ] );
2189 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2190 for( i
= 1; i
< 4; i
++ )
2191 brw_FRC( p
, param
[ i
], param
[ i
] );
2193 /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
2195 for( i
= 0; i
< 4; i
++ )
2196 brw_MUL( p
, interp
[ i
], param
[ i
], brw_imm_f( 6.0 ) );
2197 for( i
= 0; i
< 4; i
++ )
2198 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( -15.0 ) );
2199 for( i
= 0; i
< 4; i
++ )
2200 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2201 for( i
= 0; i
< 4; i
++ )
2202 brw_ADD( p
, interp
[ i
], interp
[ i
], brw_imm_f( 10.0 ) );
2203 for( j
= 0; j
< 3; j
++ )
2204 for( i
= 0; i
< 4; i
++ )
2205 brw_MUL( p
, interp
[ i
], interp
[ i
], param
[ i
] );
2207 /* Mark the current address, as it will be a jump destination. The
2208 following code will be executed twice: first, with the flag
2209 register clear indicating the w=0 case, and second with flags
2213 /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
2214 be hashed. Since we have only 16 bits of precision in the hash, we
2215 must be careful about thorough mixing to maintain entropy as we
2216 squash the input vector into a small scalar. */
2217 brw_MUL( p
, brw_null_reg(), low_words( floors
[ 0 ] ),
2218 brw_imm_uw( 0xBC8F ) );
2219 brw_MAC( p
, brw_null_reg(), high_words( floors
[ 0 ] ),
2220 brw_imm_uw( 0xD0BD ) );
2221 brw_MAC( p
, brw_null_reg(), low_words( floors
[ 1 ] ),
2222 brw_imm_uw( 0x9B93 ) );
2223 brw_MAC( p
, low_words( itmp
[ 0 ] ), high_words( floors
[ 1 ] ),
2224 brw_imm_uw( 0xA359 ) );
2225 brw_ADD( p
, high_words( itmp
[ 0 ] ), low_words( itmp
[ 0 ] ),
2226 brw_imm_uw( 0xBC8F ) );
2228 /* Temporarily disable the execution mask while we work with ExecSize=16
2229 channels (the mask is set for ExecSize=8 and is probably incorrect).
2230 Although this might cause execution of unwanted channels, the code
2231 writes only to temporary registers and has no side effects, so
2232 disabling the mask is harmless. */
2233 brw_push_insn_state( p
);
2234 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2235 brw_ADD( p
, wtmp
[ 1 ], wtmp
[ 0 ], brw_imm_uw( 0xD0BD ) );
2236 brw_ADD( p
, wtmp
[ 2 ], wtmp
[ 0 ], brw_imm_uw( 0x9B93 ) );
2237 brw_ADD( p
, wtmp
[ 3 ], wtmp
[ 1 ], brw_imm_uw( 0x9B93 ) );
2239 /* We're now ready to perform the hashing. The eight hashes are
2240 interleaved for performance. The hash function used is
2241 designed to rapidly achieve avalanche and require only 16x16
2242 bit multiplication, and 8-bit swizzles (which we get for
2244 for( i
= 0; i
< 4; i
++ )
2245 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0x28D9 ) );
2246 for( i
= 0; i
< 4; i
++ )
2247 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2248 odd_bytes( wtmp
[ i
] ) );
2249 for( i
= 0; i
< 4; i
++ )
2250 brw_MUL( p
, wtmp
[ i
], wtmp
[ i
], brw_imm_uw( 0xC6D5 ) );
2251 for( i
= 0; i
< 4; i
++ )
2252 brw_XOR( p
, even_bytes( wtmp
[ i
] ), even_bytes( wtmp
[ i
] ),
2253 odd_bytes( wtmp
[ i
] ) );
2254 brw_pop_insn_state( p
);
2256 /* Now we want to initialise the four rear gradients based on the
2257 hashes. Format conversion from signed integer to float leaves
2258 everything scaled too high by a factor of pow( 2, 15 ), but
2259 we correct for that right at the end. */
2261 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2262 brw_MOV( p
, x0y0
, low_words( tmp
[ 0 ] ) );
2263 brw_MOV( p
, x0y1
, low_words( tmp
[ 1 ] ) );
2264 brw_MOV( p
, x1y0
, high_words( tmp
[ 0 ] ) );
2265 brw_MOV( p
, x1y1
, high_words( tmp
[ 1 ] ) );
2267 brw_push_insn_state( p
);
2268 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2269 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2270 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2271 brw_pop_insn_state( p
);
2273 brw_MUL( p
, x1y0
, x1y0
, t
);
2274 brw_MUL( p
, x1y1
, x1y1
, t
);
2275 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2276 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2277 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2280 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2281 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2282 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2283 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2285 brw_push_insn_state( p
);
2286 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2287 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2288 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2289 brw_pop_insn_state( p
);
2291 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2292 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2293 /* prepare t for the w component (used below): w the first time through
2294 the loop; w - 1 the second time) */
2295 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2296 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2297 p
->current
->header
.predicate_inverse
= 1;
2298 brw_MOV( p
, t
, param
[ 3 ] );
2299 p
->current
->header
.predicate_inverse
= 0;
2300 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2301 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2302 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2304 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2305 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2306 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2307 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2310 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2311 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2312 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2313 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2315 brw_push_insn_state( p
);
2316 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2317 brw_SHL( p
, wtmp
[ 0 ], wtmp
[ 0 ], brw_imm_uw( 4 ) );
2318 brw_SHL( p
, wtmp
[ 1 ], wtmp
[ 1 ], brw_imm_uw( 4 ) );
2319 brw_pop_insn_state( p
);
2321 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 2 ] );
2322 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], param
[ 2 ] );
2323 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 2 ] );
2324 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], param
[ 2 ] );
2326 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2327 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2328 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2329 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2332 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 0 ] ) );
2333 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 1 ] ) );
2334 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 0 ] ) );
2335 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 1 ] ) );
2337 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2338 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2339 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2340 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2341 brw_ADD( p
, t
, param
[ 0 ], brw_imm_f( -1.0 ) );
2343 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2344 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2345 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2346 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2348 /* Here we interpolate in the y dimension... */
2349 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2350 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2351 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2352 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2353 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2354 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2356 /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
2357 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2358 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2359 brw_ADD( p
, tmp
[ 0 ], x0y0
, x1y0
);
2361 /* Now do the same thing for the front four gradients... */
2363 brw_MOV( p
, x0y0
, low_words( tmp
[ 2 ] ) );
2364 brw_MOV( p
, x0y1
, low_words( tmp
[ 3 ] ) );
2365 brw_MOV( p
, x1y0
, high_words( tmp
[ 2 ] ) );
2366 brw_MOV( p
, x1y1
, high_words( tmp
[ 3 ] ) );
2368 brw_push_insn_state( p
);
2369 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2370 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2371 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2372 brw_pop_insn_state( p
);
2374 brw_MUL( p
, x1y0
, x1y0
, t
);
2375 brw_MUL( p
, x1y1
, x1y1
, t
);
2376 brw_ADD( p
, t
, param
[ 1 ], brw_imm_f( -1.0 ) );
2377 brw_MUL( p
, x0y0
, x0y0
, param
[ 0 ] );
2378 brw_MUL( p
, x0y1
, x0y1
, param
[ 0 ] );
2381 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2382 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2383 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2384 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2386 brw_push_insn_state( p
);
2387 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2388 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2389 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2390 brw_pop_insn_state( p
);
2392 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2393 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2394 brw_ADD( p
, t
, param
[ 2 ], brw_imm_f( -1.0 ) );
2395 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], param
[ 1 ] );
2396 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], param
[ 1 ] );
2398 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2399 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2400 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2401 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2404 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2405 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2406 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2407 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2409 brw_push_insn_state( p
);
2410 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2411 brw_SHL( p
, wtmp
[ 2 ], wtmp
[ 2 ], brw_imm_uw( 4 ) );
2412 brw_SHL( p
, wtmp
[ 3 ], wtmp
[ 3 ], brw_imm_uw( 4 ) );
2413 brw_pop_insn_state( p
);
2415 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2416 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2417 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2418 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2419 /* prepare t for the w component (used below): w the first time through
2420 the loop; w - 1 the second time) */
2421 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2422 brw_ADD( p
, t
, param
[ 3 ], brw_imm_f( -1.0 ) );
2423 p
->current
->header
.predicate_inverse
= 1;
2424 brw_MOV( p
, t
, param
[ 3 ] );
2425 p
->current
->header
.predicate_inverse
= 0;
2426 brw_set_predicate_control( p
, BRW_PREDICATE_NONE
);
2428 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2429 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2430 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2431 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2434 brw_MOV( p
, tmp
[ 4 ], low_words( tmp
[ 2 ] ) );
2435 brw_MOV( p
, tmp
[ 5 ], low_words( tmp
[ 3 ] ) );
2436 brw_MOV( p
, tmp
[ 6 ], high_words( tmp
[ 2 ] ) );
2437 brw_MOV( p
, tmp
[ 7 ], high_words( tmp
[ 3 ] ) );
2439 brw_MUL( p
, tmp
[ 4 ], tmp
[ 4 ], t
);
2440 brw_MUL( p
, tmp
[ 5 ], tmp
[ 5 ], t
);
2441 brw_MUL( p
, tmp
[ 6 ], tmp
[ 6 ], t
);
2442 brw_MUL( p
, tmp
[ 7 ], tmp
[ 7 ], t
);
2444 brw_ADD( p
, x0y0
, x0y0
, tmp
[ 4 ] );
2445 brw_ADD( p
, x0y1
, x0y1
, tmp
[ 5 ] );
2446 brw_ADD( p
, x1y0
, x1y0
, tmp
[ 6 ] );
2447 brw_ADD( p
, x1y1
, x1y1
, tmp
[ 7 ] );
2449 /* Interpolate in the y dimension: */
2450 brw_ADD( p
, x0y1
, x0y1
, negate( x0y0
) );
2451 brw_ADD( p
, x1y1
, x1y1
, negate( x1y0
) );
2452 brw_MUL( p
, x0y1
, x0y1
, interp
[ 1 ] );
2453 brw_MUL( p
, x1y1
, x1y1
, interp
[ 1 ] );
2454 brw_ADD( p
, x0y0
, x0y0
, x0y1
);
2455 brw_ADD( p
, x1y0
, x1y0
, x1y1
);
2457 /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
2458 time put the front face in tmp[ 1 ] and we're nearly there... */
2459 brw_ADD( p
, x1y0
, x1y0
, negate( x0y0
) );
2460 brw_MUL( p
, x1y0
, x1y0
, interp
[ 0 ] );
2461 brw_ADD( p
, tmp
[ 1 ], x0y0
, x1y0
);
2463 /* Another interpolation, in the z dimension: */
2464 brw_ADD( p
, tmp
[ 1 ], tmp
[ 1 ], negate( tmp
[ 0 ] ) );
2465 brw_MUL( p
, tmp
[ 1 ], tmp
[ 1 ], interp
[ 2 ] );
2466 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], tmp
[ 1 ] );
2468 /* Exit the loop if we've computed both cubes... */
2469 origin
= p
->nr_insn
;
2470 brw_push_insn_state( p
);
2471 brw_set_predicate_control( p
, BRW_PREDICATE_NORMAL
);
2472 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2473 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
2474 brw_pop_insn_state( p
);
2476 /* Save the result for the w=0 case, and increment the w coordinate: */
2477 brw_MOV( p
, w0
, tmp
[ 0 ] );
2478 brw_ADD( p
, high_words( floors
[ 1 ] ), high_words( floors
[ 1 ] ),
2481 /* Loop around for the other cube. Explicitly set the flag register
2482 (unfortunately we must spend an extra instruction to do this: we
2483 can't rely on a side effect of the previous MOV or ADD because
2484 conditional modifiers which are normally true might be false in
2485 exceptional circumstances, e.g. given a NaN input; the add to
2486 brw_ip_reg() is not suitable because the IP is not an 8-vector). */
2487 brw_push_insn_state( p
);
2488 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2489 brw_MOV( p
, brw_flag_reg(), brw_imm_uw( 0xFF ) );
2490 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
2491 brw_imm_d( ( loop
- p
->nr_insn
) << 4 ) );
2492 brw_pop_insn_state( p
);
2494 /* Patch the previous conditional branch now that we know the
2495 destination address. */
2496 brw_set_src1( p
->store
+ origin
,
2497 brw_imm_d( ( p
->nr_insn
- origin
) << 4 ) );
2499 /* The very last interpolation. */
2500 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], negate( w0
) );
2501 brw_MUL( p
, tmp
[ 0 ], tmp
[ 0 ], interp
[ 3 ] );
2502 brw_ADD( p
, tmp
[ 0 ], tmp
[ 0 ], w0
);
2504 /* scale by pow( 2, -15 ), as described above */
2505 brw_MUL( p
, param
[ 0 ], tmp
[ 0 ], brw_imm_f( 0.000030517578125 ) );
2507 release_tmps( c
, mark
);
2510 static void emit_noise4( struct brw_wm_compile
*c
,
2511 const struct prog_instruction
*inst
)
2513 struct brw_compile
*p
= &c
->func
;
2514 struct brw_reg src0
, src1
, src2
, src3
, param0
, param1
, param2
, param3
, dst
;
2515 GLuint mask
= inst
->DstReg
.WriteMask
;
2517 int mark
= mark_tmps( c
);
2519 assert( mark
== 0 );
2521 src0
= get_src_reg( c
, inst
, 0, 0 );
2522 src1
= get_src_reg( c
, inst
, 0, 1 );
2523 src2
= get_src_reg( c
, inst
, 0, 2 );
2524 src3
= get_src_reg( c
, inst
, 0, 3 );
2526 param0
= alloc_tmp( c
);
2527 param1
= alloc_tmp( c
);
2528 param2
= alloc_tmp( c
);
2529 param3
= alloc_tmp( c
);
2531 brw_MOV( p
, param0
, src0
);
2532 brw_MOV( p
, param1
, src1
);
2533 brw_MOV( p
, param2
, src2
);
2534 brw_MOV( p
, param3
, src3
);
2536 invoke_subroutine( c
, SUB_NOISE4
, noise4_sub
);
2538 /* Fill in the result: */
2539 brw_set_saturate( p
, inst
->SaturateMode
== SATURATE_ZERO_ONE
);
2540 for (i
= 0 ; i
< 4; i
++) {
2541 if (mask
& (1<<i
)) {
2542 dst
= get_dst_reg(c
, inst
, i
);
2543 brw_MOV( p
, dst
, param0
);
2546 if( inst
->SaturateMode
== SATURATE_ZERO_ONE
)
2547 brw_set_saturate( p
, 0 );
2549 release_tmps( c
, mark
);
2552 static void emit_wpos_xy(struct brw_wm_compile
*c
,
2553 const struct prog_instruction
*inst
)
2555 struct brw_compile
*p
= &c
->func
;
2556 GLuint mask
= inst
->DstReg
.WriteMask
;
2557 struct brw_reg src0
[2], dst
[2];
2559 dst
[0] = get_dst_reg(c
, inst
, 0);
2560 dst
[1] = get_dst_reg(c
, inst
, 1);
2562 src0
[0] = get_src_reg(c
, inst
, 0, 0);
2563 src0
[1] = get_src_reg(c
, inst
, 0, 1);
2565 /* Calculate the pixel offset from window bottom left into destination
2568 if (mask
& WRITEMASK_X
) {
2569 /* X' = X - origin_x */
2572 retype(src0
[0], BRW_REGISTER_TYPE_W
),
2573 brw_imm_d(0 - c
->key
.origin_x
));
2576 if (mask
& WRITEMASK_Y
) {
2577 /* Y' = height - (Y - origin_y) = height + origin_y - Y */
2580 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
2581 brw_imm_d(c
->key
.origin_y
+ c
->key
.drawable_height
- 1));
2586 BIAS on SIMD8 not working yet...
2588 static void emit_txb(struct brw_wm_compile
*c
,
2589 const struct prog_instruction
*inst
)
2591 struct brw_compile
*p
= &c
->func
;
2592 struct brw_reg dst
[4], src
[4], payload_reg
;
2593 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2596 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2598 for (i
= 0; i
< 4; i
++)
2599 dst
[i
] = get_dst_reg(c
, inst
, i
);
2600 for (i
= 0; i
< 4; i
++)
2601 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2603 switch (inst
->TexSrcTarget
) {
2604 case TEXTURE_1D_INDEX
:
2605 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
2606 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
2607 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
2609 case TEXTURE_2D_INDEX
:
2610 case TEXTURE_RECT_INDEX
:
2611 brw_MOV(p
, brw_message_reg(2), src
[0]);
2612 brw_MOV(p
, brw_message_reg(3), src
[1]);
2613 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
2616 brw_MOV(p
, brw_message_reg(2), src
[0]);
2617 brw_MOV(p
, brw_message_reg(3), src
[1]);
2618 brw_MOV(p
, brw_message_reg(4), src
[2]);
2621 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
2622 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
2624 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2626 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2627 SURF_INDEX_TEXTURE(unit
),
2629 inst
->DstReg
.WriteMask
, /* writemask */
2630 BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
, /* msg_type */
2631 4, /* response_length */
2637 static void emit_tex(struct brw_wm_compile
*c
,
2638 const struct prog_instruction
*inst
)
2640 struct brw_compile
*p
= &c
->func
;
2641 struct brw_reg dst
[4], src
[4], payload_reg
;
2642 GLuint unit
= c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
];
2646 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
2648 payload_reg
= get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
2650 for (i
= 0; i
< 4; i
++)
2651 dst
[i
] = get_dst_reg(c
, inst
, i
);
2652 for (i
= 0; i
< 4; i
++)
2653 src
[i
] = get_src_reg(c
, inst
, 0, i
);
2655 switch (inst
->TexSrcTarget
) {
2656 case TEXTURE_1D_INDEX
:
2660 case TEXTURE_2D_INDEX
:
2661 case TEXTURE_RECT_INDEX
:
2662 emit
= WRITEMASK_XY
;
2666 emit
= WRITEMASK_XYZ
;
2672 /* move/load S, T, R coords */
2673 for (i
= 0; i
< nr
; i
++) {
2674 static const GLuint swz
[4] = {0,1,2,2};
2676 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
2678 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
2683 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
2684 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
2688 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
2690 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
2691 SURF_INDEX_TEXTURE(unit
),
2693 inst
->DstReg
.WriteMask
, /* writemask */
2694 BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
, /* msg_type */
2695 4, /* response_length */
2696 shadow
? 6 : 4, /* msg_length */
2700 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
2705 * Resolve subroutine calls after code emit is done.
2707 static void post_wm_emit( struct brw_wm_compile
*c
)
2709 brw_resolve_cals(&c
->func
);
2712 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2715 #define MAX_LOOP_DEPTH 32
2716 struct brw_instruction
*if_inst
[MAX_IFSN
], *loop_inst
[MAX_LOOP_DEPTH
];
2717 struct brw_instruction
*inst0
, *inst1
;
2718 int i
, if_insn
= 0, loop_insn
= 0;
2719 struct brw_compile
*p
= &c
->func
;
2720 struct brw_indirect stack_index
= brw_indirect(0, 0);
2723 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2724 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
2726 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
2727 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
2732 _mesa_printf("Inst %d: ", i
);
2733 _mesa_print_instruction(inst
);
2736 /* fetch any constants that this instruction needs */
2737 if (c
->fp
->use_const_buffer
)
2738 fetch_constants(c
, inst
);
2740 if (inst
->CondUpdate
)
2741 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
2743 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
2745 switch (inst
->Opcode
) {
2747 emit_pixel_xy(c
, inst
);
2750 emit_delta_xy(c
, inst
);
2753 emit_pixel_w(c
, inst
);
2756 emit_linterp(c
, inst
);
2759 emit_pinterp(c
, inst
);
2762 emit_cinterp(c
, inst
);
2765 emit_wpos_xy(c
, inst
);
2768 emit_fb_write(c
, inst
);
2770 case WM_FRONTFACING
:
2771 emit_frontfacing(c
, inst
);
2795 emit_trunc(c
, inst
);
2832 emit_min_max(c
, inst
);
2868 emit_noise1(c
, inst
);
2871 emit_noise2(c
, inst
);
2874 emit_noise3(c
, inst
);
2877 emit_noise4(c
, inst
);
2889 assert(if_insn
< MAX_IFSN
);
2890 if_inst
[if_insn
++] = brw_IF(p
, BRW_EXECUTE_8
);
2893 if_inst
[if_insn
-1] = brw_ELSE(p
, if_inst
[if_insn
-1]);
2896 assert(if_insn
> 0);
2897 brw_ENDIF(p
, if_inst
[--if_insn
]);
2900 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2906 brw_push_insn_state(p
);
2907 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2908 brw_set_access_mode(p
, BRW_ALIGN_1
);
2909 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2910 brw_set_access_mode(p
, BRW_ALIGN_16
);
2911 brw_ADD(p
, get_addr_reg(stack_index
),
2912 get_addr_reg(stack_index
), brw_imm_d(4));
2913 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
2914 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2915 brw_pop_insn_state(p
);
2919 brw_push_insn_state(p
);
2920 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2921 brw_ADD(p
, get_addr_reg(stack_index
),
2922 get_addr_reg(stack_index
), brw_imm_d(-4));
2923 brw_set_access_mode(p
, BRW_ALIGN_1
);
2924 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
2925 brw_set_access_mode(p
, BRW_ALIGN_16
);
2926 brw_pop_insn_state(p
);
2929 case OPCODE_BGNLOOP
:
2930 /* XXX may need to invalidate the current_constant regs */
2931 loop_inst
[loop_insn
++] = brw_DO(p
, BRW_EXECUTE_8
);
2935 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2939 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2941 case OPCODE_ENDLOOP
:
2943 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_insn
]);
2944 /* patch all the BREAK instructions from
2946 while (inst0
> loop_inst
[loop_insn
]) {
2948 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
2949 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
+ 1;
2950 inst0
->bits3
.if_else
.pop_count
= 0;
2951 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
2952 inst0
->bits3
.if_else
.jump_count
= inst1
- inst0
;
2953 inst0
->bits3
.if_else
.pop_count
= 0;
2958 _mesa_printf("unsupported IR in fragment shader %d\n",
2962 if (inst
->CondUpdate
)
2963 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2965 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2972 * Do GPU code generation for shaders that use GLSL features such as
2973 * flow control. Other shaders will be compiled with the
2975 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2977 if (INTEL_DEBUG
& DEBUG_WM
) {
2978 _mesa_printf("brw_wm_glsl_emit:\n");
2981 /* initial instruction translation/simplification */
2984 /* actual code generation */
2985 brw_wm_emit_glsl(brw
, c
);
2987 if (INTEL_DEBUG
& DEBUG_WM
) {
2988 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
2991 c
->prog_data
.total_grf
= num_grf_used(c
);
2992 c
->prog_data
.total_scratch
= 0;