1 #include "main/macros.h"
2 #include "program/prog_parameter.h"
3 #include "program/prog_print.h"
4 #include "program/prog_optimize.h"
5 #include "brw_context.h"
9 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
10 const struct prog_instruction
*inst
,
14 * Determine if the given fragment program uses GLSL features such
15 * as flow conditionals, loops, subroutines.
16 * Some GLSL shaders may use these features, others might not.
18 GLboolean
brw_wm_is_glsl(const struct gl_fragment_program
*fp
)
22 if (unlikely(INTEL_DEBUG
& DEBUG_GLSL_FORCE
))
25 for (i
= 0; i
< fp
->Base
.NumInstructions
; i
++) {
26 const struct prog_instruction
*inst
= &fp
->Base
.Instructions
[i
];
27 switch (inst
->Opcode
) {
46 reclaim_temps(struct brw_wm_compile
*c
);
49 /** Mark GRF register as used. */
51 prealloc_grf(struct brw_wm_compile
*c
, int r
)
53 c
->used_grf
[r
] = GL_TRUE
;
57 /** Mark given GRF register as not in use. */
59 release_grf(struct brw_wm_compile
*c
, int r
)
61 /*assert(c->used_grf[r]);*/
62 c
->used_grf
[r
] = GL_FALSE
;
63 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
67 /** Return index of a free GRF, mark it as used. */
69 alloc_grf(struct brw_wm_compile
*c
)
72 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
73 if (!c
->used_grf
[r
]) {
74 c
->used_grf
[r
] = GL_TRUE
;
75 c
->first_free_grf
= r
+ 1; /* a guess */
80 /* no free temps, try to reclaim some */
82 c
->first_free_grf
= 0;
85 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
86 if (!c
->used_grf
[r
]) {
87 c
->used_grf
[r
] = GL_TRUE
;
88 c
->first_free_grf
= r
+ 1; /* a guess */
93 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
94 assert(c
->used_grf
[r
]);
97 /* really, no free GRF regs found */
98 if (!c
->out_of_regs
) {
99 /* print warning once per compilation */
100 _mesa_warning(NULL
, "i965: ran out of registers for fragment program");
101 c
->out_of_regs
= GL_TRUE
;
108 /** Return number of GRF registers used */
110 num_grf_used(const struct brw_wm_compile
*c
)
113 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
122 * Record the mapping of a Mesa register to a hardware register.
124 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
125 int component
, struct brw_reg reg
)
127 c
->wm_regs
[file
][index
][component
].reg
= reg
;
128 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
131 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
135 /* if we need to allocate another temp, grow the tmp_regs[] array */
136 if (c
->tmp_index
== c
->tmp_max
) {
137 int r
= alloc_grf(c
);
139 /*printf("Out of temps in %s\n", __FUNCTION__);*/
140 r
= 50; /* XXX random register! */
142 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
145 /* form the GRF register */
146 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
147 /*printf("alloc_temp %d\n", reg.nr);*/
148 assert(reg
.nr
< BRW_WM_MAX_GRF
);
154 * Save current temp register info.
155 * There must be a matching call to release_tmps().
157 static int mark_tmps(struct brw_wm_compile
*c
)
162 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
168 * Convert Mesa src register to brw register.
170 * Since we're running in SOA mode each Mesa register corresponds to four
171 * hardware registers. We allocate the hardware registers as needed here.
173 * \param file register file, one of PROGRAM_x
174 * \param index register number
175 * \param component src component (X=0, Y=1, Z=2, W=3)
176 * \param nr not used?!?
177 * \param neg negate value?
178 * \param abs take absolute value?
180 static struct brw_reg
181 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
182 int nr
, GLuint neg
, GLuint abs
)
186 case PROGRAM_STATE_VAR
:
187 case PROGRAM_CONSTANT
:
188 case PROGRAM_UNIFORM
:
189 file
= PROGRAM_STATE_VAR
;
191 case PROGRAM_UNDEFINED
:
192 return brw_null_reg();
193 case PROGRAM_TEMPORARY
:
196 case PROGRAM_PAYLOAD
:
199 _mesa_problem(NULL
, "Unexpected file in get_reg()");
200 return brw_null_reg();
204 assert(component
< 4);
206 /* see if we've already allocated a HW register for this Mesa register */
207 if (c
->wm_regs
[file
][index
][component
].inited
) {
209 reg
= c
->wm_regs
[file
][index
][component
].reg
;
212 /* no, allocate new register */
213 int grf
= alloc_grf(c
);
214 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
216 /* totally out of temps */
217 grf
= 51; /* XXX random register! */
220 reg
= brw_vec8_grf(grf
, 0);
221 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
223 set_reg(c
, file
, index
, component
, reg
);
226 if (neg
& (1 << component
)) {
237 * This is called if we run out of GRF registers. Examine the live intervals
238 * of temp regs in the program and free those which won't be used again.
241 reclaim_temps(struct brw_wm_compile
*c
)
243 GLint intBegin
[MAX_PROGRAM_TEMPS
];
244 GLint intEnd
[MAX_PROGRAM_TEMPS
];
247 /*printf("Reclaim temps:\n");*/
249 _mesa_find_temp_intervals(c
->prog_instructions
, c
->nr_fp_insns
,
252 for (index
= 0; index
< MAX_PROGRAM_TEMPS
; index
++) {
253 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
254 /* program temp[i] can be freed */
256 /*printf(" temp[%d] is dead\n", index);*/
257 for (component
= 0; component
< 4; component
++) {
258 if (c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
) {
259 int r
= c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].reg
.nr
;
262 printf(" Reclaim temp %d, reg %d at inst %d\n",
263 index, r, c->cur_inst);
265 c
->wm_regs
[PROGRAM_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
276 * Preallocate registers. This sets up the Mesa to hardware register
277 * mapping for certain registers, such as constants (uniforms/state vars)
280 static void prealloc_reg(struct brw_wm_compile
*c
)
282 struct intel_context
*intel
= &c
->func
.brw
->intel
;
285 int urb_read_length
= 0;
286 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
;
287 GLuint reg_index
= 0;
289 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
290 c
->first_free_grf
= 0;
292 for (i
= 0; i
< 4; i
++) {
293 if (i
< (c
->key
.nr_payload_regs
+ 1) / 2)
294 reg
= brw_vec8_grf(i
* 2, 0);
296 reg
= brw_vec8_grf(0, 0);
297 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
299 set_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_W
, 0,
300 brw_vec8_grf(c
->key
.source_w_reg
, 0));
301 reg_index
+= c
->key
.nr_payload_regs
;
305 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
306 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
308 /* use a real constant buffer, or just use a section of the GRF? */
309 /* XXX this heuristic may need adjustment... */
310 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80) {
311 for (i
= 0; i
< nr_params
; i
++) {
312 float *pv
= c
->fp
->program
.Base
.Parameters
->ParameterValues
[i
];
313 for (j
= 0; j
< 4; j
++) {
314 c
->prog_data
.pull_param
[c
->prog_data
.nr_pull_params
] = &pv
[j
];
315 c
->prog_data
.nr_pull_params
++;
319 c
->prog_data
.nr_params
= 0;
321 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
323 if (!c
->prog_data
.nr_pull_params
) {
324 const struct gl_program_parameter_list
*plist
=
325 c
->fp
->program
.Base
.Parameters
;
328 /* number of float constants in CURBE */
329 c
->prog_data
.nr_params
= 4 * nr_params
;
331 /* loop over program constants (float[4]) */
332 for (i
= 0; i
< nr_params
; i
++) {
333 /* loop over XYZW channels */
334 for (j
= 0; j
< 4; j
++, index
++) {
335 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
336 /* Save pointer to parameter/constant value.
337 * Constants will be copied in prepare_constant_buffer()
339 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
340 set_reg(c
, PROGRAM_STATE_VAR
, i
, j
, reg
);
343 /* number of constant regs used (each reg is float[8]) */
344 c
->nr_creg
= ALIGN(nr_params
, 2) / 2;
345 reg_index
+= c
->nr_creg
;
349 /* fragment shader inputs: One 2-reg pair of interpolation
350 * coefficients for each vec4 to be set up.
352 if (intel
->gen
>= 6) {
353 for (i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
354 if (!(c
->fp
->program
.Base
.InputsRead
& BITFIELD64_BIT(i
)))
357 reg
= brw_vec8_grf(reg_index
, 0);
358 for (j
= 0; j
< 4; j
++) {
359 set_reg(c
, PROGRAM_PAYLOAD
, i
, j
, reg
);
363 urb_read_length
= reg_index
;
365 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
368 if (i
>= VERT_RESULT_VAR0
)
369 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
370 else if (i
<= VERT_RESULT_TEX7
)
375 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
376 urb_read_length
= reg_index
;
377 reg
= brw_vec8_grf(reg_index
, 0);
378 for (j
= 0; j
< 4; j
++)
379 set_reg(c
, PROGRAM_PAYLOAD
, fp_input
, j
, reg
);
381 if (c
->key
.vp_outputs_written
& BITFIELD64_BIT(i
)) {
387 c
->prog_data
.first_curbe_grf
= c
->key
.nr_payload_regs
;
388 c
->prog_data
.urb_read_length
= urb_read_length
;
389 c
->prog_data
.curb_read_length
= c
->nr_creg
;
390 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
392 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
395 /* mark GRF regs [0..reg_index-1] as in-use */
396 for (i
= 0; i
< reg_index
; i
++)
399 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
400 prealloc_grf(c
, 126);
401 prealloc_grf(c
, 127);
403 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
404 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
405 struct brw_reg dst
[4];
407 switch (inst
->Opcode
) {
410 /* Allocate the channels of texture results contiguously,
411 * since they are written out that way by the sampler unit.
413 for (j
= 0; j
< 4; j
++) {
414 dst
[j
] = get_dst_reg(c
, inst
, j
);
416 assert(dst
[j
].nr
== dst
[j
- 1].nr
+ 1);
424 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
425 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
427 switch (inst
->Opcode
) {
429 /* Allocate WM_DELTAXY destination on G45/GM45 to an
430 * even-numbered GRF if possible so that we can use the PLN
433 if (inst
->DstReg
.WriteMask
== WRITEMASK_XY
&&
434 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][0].inited
&&
435 !c
->wm_regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
][1].inited
&&
436 (IS_G4X(intel
->intelScreen
->deviceID
) || intel
->gen
== 5)) {
439 for (grf
= c
->first_free_grf
& ~1;
440 grf
< BRW_WM_MAX_GRF
;
443 if (!c
->used_grf
[grf
] && !c
->used_grf
[grf
+ 1]) {
444 c
->used_grf
[grf
] = GL_TRUE
;
445 c
->used_grf
[grf
+ 1] = GL_TRUE
;
446 c
->first_free_grf
= grf
+ 2; /* a guess */
448 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 0,
449 brw_vec8_grf(grf
, 0));
450 set_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, 1,
451 brw_vec8_grf(grf
+ 1, 0));
461 /* An instruction may reference up to three constants.
462 * They'll be found in these registers.
463 * XXX alloc these on demand!
465 if (c
->prog_data
.nr_pull_params
) {
466 for (i
= 0; i
< 3; i
++) {
467 c
->current_const
[i
].index
= -1;
468 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
472 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
473 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
479 * Check if any of the instruction's src registers are constants, uniforms,
480 * or statevars. If so, fetch any constants that we don't already have in
481 * the three GRF slots.
483 static void fetch_constants(struct brw_wm_compile
*c
,
484 const struct prog_instruction
*inst
)
486 struct brw_compile
*p
= &c
->func
;
489 /* loop over instruction src regs */
490 for (i
= 0; i
< 3; i
++) {
491 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
492 if (src
->File
== PROGRAM_STATE_VAR
||
493 src
->File
== PROGRAM_CONSTANT
||
494 src
->File
== PROGRAM_UNIFORM
) {
495 c
->current_const
[i
].index
= src
->Index
;
498 printf(" fetch const[%d] for arg %d into reg %d\n",
499 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
502 /* need to fetch the constant now */
503 brw_oword_block_read(p
,
504 c
->current_const
[i
].reg
,
507 SURF_INDEX_FRAG_CONST_BUFFER
);
514 * Convert Mesa dst register to brw register.
516 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
517 const struct prog_instruction
*inst
,
521 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
526 static struct brw_reg
527 get_src_reg_const(struct brw_wm_compile
*c
,
528 const struct prog_instruction
*inst
,
529 GLuint srcRegIndex
, GLuint component
)
531 /* We should have already fetched the constant from the constant
532 * buffer in fetch_constants(). Now we just have to return a
533 * register description that extracts the needed component and
534 * smears it across all eight vector components.
536 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
537 struct brw_reg const_reg
;
539 assert(component
< 4);
540 assert(srcRegIndex
< 3);
541 assert(c
->current_const
[srcRegIndex
].index
!= -1);
542 const_reg
= c
->current_const
[srcRegIndex
].reg
;
544 /* extract desired float from the const_reg, and smear */
545 const_reg
= stride(const_reg
, 0, 1, 0);
546 const_reg
.subnr
= component
* 4;
548 if (src
->Negate
& (1 << component
))
549 const_reg
= negate(const_reg
);
551 const_reg
= brw_abs(const_reg
);
554 printf(" form const[%d].%d for arg %d, reg %d\n",
555 c
->current_const
[srcRegIndex
].index
,
566 * Convert Mesa src register to brw register.
568 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
569 const struct prog_instruction
*inst
,
570 GLuint srcRegIndex
, GLuint channel
)
572 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
574 const GLuint component
= GET_SWZ(src
->Swizzle
, channel
);
576 /* Only one immediate value can be used per native opcode, and it
577 * has be in the src1 slot, so not all Mesa instructions will get
578 * to take advantage of immediate constants.
580 if (brw_wm_arg_can_be_immediate(inst
->Opcode
, srcRegIndex
)) {
581 const struct gl_program_parameter_list
*params
;
583 params
= c
->fp
->program
.Base
.Parameters
;
585 /* Extended swizzle terms */
586 if (component
== SWIZZLE_ZERO
) {
587 return brw_imm_f(0.0F
);
588 } else if (component
== SWIZZLE_ONE
) {
590 return brw_imm_f(-1.0F
);
592 return brw_imm_f(1.0F
);
595 if (src
->File
== PROGRAM_CONSTANT
) {
596 float f
= params
->ParameterValues
[src
->Index
][component
];
607 if (c
->prog_data
.nr_pull_params
&&
608 (src
->File
== PROGRAM_STATE_VAR
||
609 src
->File
== PROGRAM_CONSTANT
||
610 src
->File
== PROGRAM_UNIFORM
)) {
611 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
614 /* other type of source register */
615 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
616 src
->Negate
, src
->Abs
);
620 static void emit_arl(struct brw_wm_compile
*c
,
621 const struct prog_instruction
*inst
)
623 struct brw_compile
*p
= &c
->func
;
624 struct brw_reg src0
, addr_reg
;
625 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
626 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
628 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
629 brw_MOV(p
, addr_reg
, src0
);
630 brw_set_saturate(p
, 0);
633 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
635 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
639 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
641 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
644 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
646 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
649 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
651 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
656 * Resolve subroutine calls after code emit is done.
658 static void post_wm_emit( struct brw_wm_compile
*c
)
660 brw_resolve_cals(&c
->func
);
664 get_argument_regs(struct brw_wm_compile
*c
,
665 const struct prog_instruction
*inst
,
668 struct brw_reg
*regs
,
671 struct brw_compile
*p
= &c
->func
;
674 for (i
= 0; i
< 4; i
++) {
675 if (mask
& (1 << i
)) {
676 regs
[i
] = get_src_reg(c
, inst
, index
, i
);
678 /* Unalias destination registers from our sources. */
679 if (regs
[i
].file
== BRW_GENERAL_REGISTER_FILE
) {
680 for (j
= 0; j
< 4; j
++) {
681 if (memcmp(®s
[i
], &dst
[j
], sizeof(regs
[0])) == 0) {
682 struct brw_reg tmp
= alloc_tmp(c
);
683 brw_MOV(p
, tmp
, regs
[i
]);
693 static void brw_wm_emit_glsl(struct brw_context
*brw
, struct brw_wm_compile
*c
)
695 struct intel_context
*intel
= &brw
->intel
;
696 #define MAX_IF_DEPTH 32
697 #define MAX_LOOP_DEPTH 32
698 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
699 int if_depth_in_loop
[MAX_LOOP_DEPTH
];
700 GLuint i
, if_depth
= 0, loop_depth
= 0;
701 struct brw_compile
*p
= &c
->func
;
702 struct brw_indirect stack_index
= brw_indirect(0, 0);
704 c
->out_of_regs
= GL_FALSE
;
706 if_depth_in_loop
[loop_depth
] = 0;
709 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
710 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
713 brw_set_acc_write_control(p
, 1);
715 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
716 const struct prog_instruction
*inst
= &c
->prog_instructions
[i
];
718 struct brw_reg args
[3][4], dst
[4];
720 int mark
= mark_tmps( c
);
725 printf("Inst %d: ", i
);
726 _mesa_print_instruction(inst
);
729 /* fetch any constants that this instruction needs */
730 if (c
->prog_data
.nr_pull_params
)
731 fetch_constants(c
, inst
);
733 if (inst
->Opcode
!= OPCODE_ARL
) {
734 for (j
= 0; j
< 4; j
++) {
735 if (inst
->DstReg
.WriteMask
& (1 << j
))
736 dst
[j
] = get_dst_reg(c
, inst
, j
);
738 dst
[j
] = brw_null_reg();
741 for (j
= 0; j
< brw_wm_nr_args(inst
->Opcode
); j
++)
742 get_argument_regs(c
, inst
, j
, dst
, args
[j
], WRITEMASK_XYZW
);
744 dst_flags
= inst
->DstReg
.WriteMask
;
745 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
746 dst_flags
|= SATURATE
;
748 if (inst
->CondUpdate
)
749 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
751 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
753 switch (inst
->Opcode
) {
755 emit_pixel_xy(c
, dst
, dst_flags
);
758 emit_delta_xy(p
, dst
, dst_flags
, args
[0]);
761 emit_pixel_w(c
, dst
, dst_flags
, args
[0], args
[1]);
764 emit_linterp(p
, dst
, dst_flags
, args
[0], args
[1]);
767 emit_pinterp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
770 emit_cinterp(p
, dst
, dst_flags
, args
[0]);
773 emit_wpos_xy(c
, dst
, dst_flags
, args
[0]);
776 emit_fb_write(c
, args
[0], args
[1], args
[2],
777 INST_AUX_GET_TARGET(inst
->Aux
),
778 inst
->Aux
& INST_AUX_EOT
);
781 emit_frontfacing(p
, dst
, dst_flags
);
784 emit_alu2(p
, brw_ADD
, dst
, dst_flags
, args
[0], args
[1]);
790 emit_alu1(p
, brw_FRC
, dst
, dst_flags
, args
[0]);
793 emit_alu1(p
, brw_RNDD
, dst
, dst_flags
, args
[0]);
796 emit_lrp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
799 emit_alu1(p
, brw_RNDZ
, dst
, dst_flags
, args
[0]);
803 emit_alu1(p
, brw_MOV
, dst
, dst_flags
, args
[0]);
806 emit_dp2(p
, dst
, dst_flags
, args
[0], args
[1]);
809 emit_dp3(p
, dst
, dst_flags
, args
[0], args
[1]);
812 emit_dp4(p
, dst
, dst_flags
, args
[0], args
[1]);
815 emit_xpd(p
, dst
, dst_flags
, args
[0], args
[1]);
818 emit_dph(p
, dst
, dst_flags
, args
[0], args
[1]);
821 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, dst_flags
, args
[0]);
824 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, dst_flags
, args
[0]);
827 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, dst_flags
, args
[0]);
830 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, dst_flags
, args
[0]);
833 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, dst_flags
, args
[0]);
836 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, dst_flags
, args
[0]);
839 emit_cmp(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
842 emit_min(p
, dst
, dst_flags
, args
[0], args
[1]);
845 emit_max(p
, dst
, dst_flags
, args
[0], args
[1]);
849 emit_ddxy(p
, dst
, dst_flags
, (inst
->Opcode
== OPCODE_DDX
),
853 emit_sop(p
, dst
, dst_flags
,
854 BRW_CONDITIONAL_L
, args
[0], args
[1]);
857 emit_sop(p
, dst
, dst_flags
,
858 BRW_CONDITIONAL_LE
, args
[0], args
[1]);
861 emit_sop(p
, dst
, dst_flags
,
862 BRW_CONDITIONAL_G
, args
[0], args
[1]);
865 emit_sop(p
, dst
, dst_flags
,
866 BRW_CONDITIONAL_GE
, args
[0], args
[1]);
869 emit_sop(p
, dst
, dst_flags
,
870 BRW_CONDITIONAL_EQ
, args
[0], args
[1]);
873 emit_sop(p
, dst
, dst_flags
,
874 BRW_CONDITIONAL_NEQ
, args
[0], args
[1]);
877 emit_sign(p
, dst
, dst_flags
, args
[0]);
880 emit_alu2(p
, brw_MUL
, dst
, dst_flags
, args
[0], args
[1]);
883 emit_math2(c
, BRW_MATH_FUNCTION_POW
,
884 dst
, dst_flags
, args
[0], args
[1]);
887 emit_mad(p
, dst
, dst_flags
, args
[0], args
[1], args
[2]);
890 emit_tex(c
, dst
, dst_flags
, args
[0],
891 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
895 (c
->key
.shadowtex_mask
& (1 << inst
->TexSrcUnit
)) != 0);
898 emit_txb(c
, dst
, dst_flags
, args
[0],
899 get_reg(c
, PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
,
902 c
->fp
->program
.Base
.SamplerUnits
[inst
->TexSrcUnit
]);
908 assert(if_depth
< MAX_IF_DEPTH
);
909 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
910 if_depth_in_loop
[loop_depth
]++;
913 assert(if_depth
> 0);
914 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
917 assert(if_depth
> 0);
918 brw_ENDIF(p
, if_inst
[--if_depth
]);
919 if_depth_in_loop
[loop_depth
]--;
922 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
928 brw_push_insn_state(p
);
929 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
930 brw_set_access_mode(p
, BRW_ALIGN_1
);
931 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
932 brw_set_access_mode(p
, BRW_ALIGN_16
);
933 brw_ADD(p
, get_addr_reg(stack_index
),
934 get_addr_reg(stack_index
), brw_imm_d(4));
935 brw_save_call(&c
->func
, inst
->Comment
, p
->nr_insn
);
936 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
937 brw_pop_insn_state(p
);
941 brw_push_insn_state(p
);
942 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
943 brw_ADD(p
, get_addr_reg(stack_index
),
944 get_addr_reg(stack_index
), brw_imm_d(-4));
945 brw_set_access_mode(p
, BRW_ALIGN_1
);
946 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
947 brw_set_access_mode(p
, BRW_ALIGN_16
);
948 brw_pop_insn_state(p
);
952 /* XXX may need to invalidate the current_constant regs */
953 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
954 if_depth_in_loop
[loop_depth
] = 0;
957 brw_BREAK(p
, if_depth_in_loop
[loop_depth
]);
958 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
961 brw_CONT(p
, if_depth_in_loop
[loop_depth
]);
962 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
966 struct brw_instruction
*inst0
, *inst1
;
972 assert(loop_depth
> 0);
974 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
975 /* patch all the BREAK/CONT instructions from last BGNLOOP */
976 while (inst0
> loop_inst
[loop_depth
]) {
978 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
979 inst0
->bits3
.if_else
.jump_count
== 0) {
980 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
982 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
983 inst0
->bits3
.if_else
.jump_count
== 0) {
984 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
990 printf("unsupported opcode %d (%s) in fragment shader\n",
991 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
992 _mesa_opcode_string(inst
->Opcode
) : "unknown");
995 /* Release temporaries containing any unaliased source regs. */
996 release_tmps( c
, mark
);
998 if (inst
->CondUpdate
)
999 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1001 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1005 if (unlikely(INTEL_DEBUG
& DEBUG_WM
)) {
1006 printf("wm-native:\n");
1007 for (i
= 0; i
< p
->nr_insn
; i
++)
1008 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);
1014 * Do GPU code generation for shaders that use GLSL features such as
1015 * flow control. Other shaders will be compiled with the
1017 void brw_wm_glsl_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1019 if (unlikely(INTEL_DEBUG
& DEBUG_WM
)) {
1020 printf("brw_wm_glsl_emit:\n");
1023 /* initial instruction translation/simplification */
1026 /* actual code generation */
1027 brw_wm_emit_glsl(brw
, c
);
1029 if (unlikely(INTEL_DEBUG
& DEBUG_WM
)) {
1030 brw_wm_print_program(c
, "brw_wm_glsl_emit done");
1033 c
->prog_data
.total_grf
= num_grf_used(c
);
1034 c
->prog_data
.total_scratch
= 0;