2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "program/program.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "brw_context.h"
40 /* Return the SrcReg index of the channels that can be immediate float operands
41 * instead of usage of PROGRAM_CONSTANT values through push/pull.
44 brw_vs_arg_can_be_immediate(enum prog_opcode opcode
, int arg
)
46 int opcode_array
[] = {
66 /* These opcodes get broken down in a way that allow two
67 * args to be immediates.
69 if (opcode
== OPCODE_MAD
|| opcode
== OPCODE_LRP
) {
70 if (arg
== 1 || arg
== 2)
74 if (opcode
> ARRAY_SIZE(opcode_array
))
77 return arg
== opcode_array
[opcode
] - 1;
80 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
82 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
84 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
85 c
->prog_data
.total_grf
= c
->last_tmp
;
90 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
92 if (tmp
.nr
== c
->last_tmp
-1)
96 static void release_tmps( struct brw_vs_compile
*c
)
98 c
->last_tmp
= c
->first_tmp
;
102 get_first_reladdr_output(struct gl_vertex_program
*vp
)
105 int first_reladdr_output
= VERT_RESULT_MAX
;
107 for (i
= 0; i
< vp
->Base
.NumInstructions
; i
++) {
108 struct prog_instruction
*inst
= vp
->Base
.Instructions
+ i
;
110 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
&&
111 inst
->DstReg
.RelAddr
&&
112 inst
->DstReg
.Index
< first_reladdr_output
)
113 first_reladdr_output
= inst
->DstReg
.Index
;
116 return first_reladdr_output
;
119 /* Clears the record of which vp_const_buffer elements have been
120 * loaded into our constant buffer registers, for the starts of new
121 * blocks after control flow.
124 clear_current_const(struct brw_vs_compile
*c
)
128 if (c
->vp
->use_const_buffer
) {
129 for (i
= 0; i
< 3; i
++) {
130 c
->current_const
[i
].index
= -1;
136 * Preallocate GRF register before code emit.
137 * Do things as simply as possible. Allocate and populate all regs
140 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
142 struct intel_context
*intel
= &c
->func
.brw
->intel
;
143 GLuint i
, reg
= 0, mrf
;
144 int attributes_in_vue
;
145 int first_reladdr_output
;
149 /* Determine whether to use a real constant buffer or use a block
150 * of GRF registers for constants. The later is faster but only
151 * works if everything fits in the GRF.
152 * XXX this heuristic/check may need some fine tuning...
154 if (c
->vp
->program
.Base
.Parameters
->NumParameters
+
155 c
->vp
->program
.Base
.NumTemporaries
+ 20 > BRW_MAX_GRF
)
156 c
->vp
->use_const_buffer
= GL_TRUE
;
158 c
->vp
->use_const_buffer
= GL_FALSE
;
160 /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
162 /* r0 -- reserved as usual
164 c
->r0
= brw_vec8_grf(reg
, 0);
167 /* User clip planes from curbe:
169 if (c
->key
.nr_userclip
) {
170 if (intel
->gen
>= 6) {
171 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
172 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
173 (i
% 2) * 4), 0, 4, 1);
175 reg
+= ALIGN(c
->key
.nr_userclip
, 2) / 2;
177 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
178 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ (6 + i
) / 2,
179 (i
% 2) * 4), 0, 4, 1);
181 reg
+= (ALIGN(6 + c
->key
.nr_userclip
, 4) / 4) * 2;
186 /* Assign some (probably all) of the vertex program constants to
187 * the push constant buffer/CURBE.
189 * There's an obvious limit to the numer of push constants equal to
190 * the number of register available, and that number is smaller
191 * than the minimum maximum number of vertex program parameters, so
192 * support for pull constants is required if we overflow.
193 * Additionally, on gen6 the number of push constants is even
196 * When there's relative addressing, we don't know what range of
197 * Mesa IR registers can be accessed. And generally, when relative
198 * addressing is used we also have too many constants to load them
199 * all as push constants. So, we'll just support relative
200 * addressing out of the pull constant buffers, and try to load as
201 * many statically-accessed constants into the push constant buffer
204 if (intel
->gen
>= 6) {
205 /* We can only load 32 regs of push constants. */
206 max_constant
= 32 * 2 - c
->key
.nr_userclip
;
208 max_constant
= BRW_MAX_GRF
- 20 - c
->vp
->program
.Base
.NumTemporaries
;
211 /* constant_map maps from ParameterValues[] index to index in the
212 * push constant buffer, or -1 if it's only in the pull constant
215 memset(c
->constant_map
, -1, c
->vp
->program
.Base
.Parameters
->NumParameters
);
217 i
< c
->vp
->program
.Base
.NumInstructions
&& constant
< max_constant
;
219 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[i
];
222 for (arg
= 0; arg
< 3 && constant
< max_constant
; arg
++) {
223 if (inst
->SrcReg
[arg
].File
!= PROGRAM_STATE_VAR
&&
224 inst
->SrcReg
[arg
].File
!= PROGRAM_CONSTANT
&&
225 inst
->SrcReg
[arg
].File
!= PROGRAM_UNIFORM
&&
226 inst
->SrcReg
[arg
].File
!= PROGRAM_ENV_PARAM
&&
227 inst
->SrcReg
[arg
].File
!= PROGRAM_LOCAL_PARAM
) {
231 if (inst
->SrcReg
[arg
].RelAddr
) {
232 c
->vp
->use_const_buffer
= GL_TRUE
;
236 if (c
->constant_map
[inst
->SrcReg
[arg
].Index
] == -1) {
237 c
->constant_map
[inst
->SrcReg
[arg
].Index
] = constant
++;
242 /* If we ran out of push constant space, then we'll also upload all
243 * constants through the pull constant buffer so that they can be
244 * accessed no matter what. For relative addressing (the common
245 * case) we need them all in place anyway.
247 if (constant
== max_constant
)
248 c
->vp
->use_const_buffer
= GL_TRUE
;
250 for (i
= 0; i
< constant
; i
++) {
251 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
255 reg
+= (constant
+ 1) / 2;
256 c
->prog_data
.curb_read_length
= reg
- 1;
257 c
->prog_data
.nr_params
= constant
* 4;
258 /* XXX 0 causes a bug elsewhere... */
259 if (intel
->gen
< 6 && c
->prog_data
.nr_params
== 0)
260 c
->prog_data
.nr_params
= 4;
262 /* Allocate input regs:
265 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
266 if (c
->prog_data
.inputs_read
& (1 << i
)) {
268 c
->regs
[PROGRAM_INPUT
][i
] = brw_vec8_grf(reg
, 0);
272 /* If there are no inputs, we'll still be reading one attribute's worth
273 * because it's required -- see urb_read_length setting.
275 if (c
->nr_inputs
== 0)
278 /* Allocate outputs. The non-position outputs go straight into message regs.
281 c
->first_output
= reg
;
282 c
->first_overflow_output
= 0;
284 if (intel
->gen
>= 6) {
286 if (c
->key
.nr_userclip
)
288 } else if (intel
->gen
== 5)
293 first_reladdr_output
= get_first_reladdr_output(&c
->vp
->program
);
294 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
295 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
297 assert(i
< Elements(c
->regs
[PROGRAM_OUTPUT
]));
298 if (i
== VERT_RESULT_HPOS
) {
299 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
302 else if (i
== VERT_RESULT_PSIZ
) {
303 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
307 /* Two restrictions on our compute-to-MRF here. The
308 * message length for all SEND messages is restricted to
309 * [1,15], so we can't use mrf 15, as that means a length
312 * Additionally, URB writes are aligned to URB rows, so we
313 * need to put an even number of registers of URB data in
314 * each URB write so that the later write is aligned. A
315 * message length of 15 means 1 message header reg plus 14
318 * For attributes beyond the compute-to-MRF, we compute to
319 * GRFs and they will be written in the second URB_WRITE.
321 if (first_reladdr_output
> i
&& mrf
< 15) {
322 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_message_reg(mrf
);
326 if (mrf
>= 15 && !c
->first_overflow_output
)
327 c
->first_overflow_output
= i
;
328 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
336 /* Allocate program temporaries:
338 for (i
= 0; i
< c
->vp
->program
.Base
.NumTemporaries
; i
++) {
339 c
->regs
[PROGRAM_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
343 /* Address reg(s). Don't try to use the internal address reg until
346 for (i
= 0; i
< c
->vp
->program
.Base
.NumAddressRegs
; i
++) {
347 c
->regs
[PROGRAM_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
351 BRW_VERTICAL_STRIDE_8
,
353 BRW_HORIZONTAL_STRIDE_1
,
359 if (c
->vp
->use_const_buffer
) {
360 for (i
= 0; i
< 3; i
++) {
361 c
->current_const
[i
].reg
= brw_vec8_grf(reg
, 0);
364 clear_current_const(c
);
367 for (i
= 0; i
< 128; i
++) {
368 if (c
->output_regs
[i
].used_in_src
) {
369 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
374 if (c
->needs_stack
) {
375 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
379 /* Some opcodes need an internal temporary:
382 c
->last_tmp
= reg
; /* for allocation purposes */
384 /* Each input reg holds data from two vertices. The
385 * urb_read_length is the number of registers read from *each*
386 * vertex urb, so is half the amount:
388 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+ 1) / 2;
389 /* Setting this field to 0 leads to undefined behavior according to the
390 * the VS_STATE docs. Our VUEs will always have at least one attribute
391 * sitting in them, even if it's padding.
393 if (c
->prog_data
.urb_read_length
== 0)
394 c
->prog_data
.urb_read_length
= 1;
396 /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
397 * them to fit the biggest thing they need to.
399 attributes_in_vue
= MAX2(c
->nr_outputs
, c
->nr_inputs
);
401 /* See emit_vertex_write() for where the VUE's overhead on top of the
402 * attributes comes from.
404 if (intel
->gen
>= 6) {
406 if (c
->key
.nr_userclip
)
409 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ header_regs
+ 7) / 8;
410 } else if (intel
->gen
== 5)
411 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 6 + 3) / 4;
413 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 2 + 3) / 4;
415 c
->prog_data
.total_grf
= reg
;
417 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
418 printf("%s NumAddrRegs %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumAddressRegs
);
419 printf("%s NumTemps %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumTemporaries
);
420 printf("%s reg = %d\n", __FUNCTION__
, reg
);
426 * If an instruction uses a temp reg both as a src and the dest, we
427 * sometimes need to allocate an intermediate temporary.
429 static void unalias1( struct brw_vs_compile
*c
,
432 void (*func
)( struct brw_vs_compile
*,
436 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
437 struct brw_compile
*p
= &c
->func
;
438 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
440 brw_MOV(p
, dst
, tmp
);
450 * Checkes if 2-operand instruction needs an intermediate temporary.
452 static void unalias2( struct brw_vs_compile
*c
,
456 void (*func
)( struct brw_vs_compile
*,
461 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
462 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
463 struct brw_compile
*p
= &c
->func
;
464 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
465 func(c
, tmp
, arg0
, arg1
);
466 brw_MOV(p
, dst
, tmp
);
470 func(c
, dst
, arg0
, arg1
);
476 * Checkes if 3-operand instruction needs an intermediate temporary.
478 static void unalias3( struct brw_vs_compile
*c
,
483 void (*func
)( struct brw_vs_compile
*,
489 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
490 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
) ||
491 (dst
.file
== arg2
.file
&& dst
.nr
== arg2
.nr
)) {
492 struct brw_compile
*p
= &c
->func
;
493 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
494 func(c
, tmp
, arg0
, arg1
, arg2
);
495 brw_MOV(p
, dst
, tmp
);
499 func(c
, dst
, arg0
, arg1
, arg2
);
503 static void emit_sop( struct brw_vs_compile
*c
,
509 struct brw_compile
*p
= &c
->func
;
511 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
512 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
513 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
514 brw_set_predicate_control_flag_value(p
, 0xff);
517 static void emit_seq( struct brw_vs_compile
*c
,
520 struct brw_reg arg1
)
522 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
525 static void emit_sne( struct brw_vs_compile
*c
,
528 struct brw_reg arg1
)
530 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
532 static void emit_slt( struct brw_vs_compile
*c
,
535 struct brw_reg arg1
)
537 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
540 static void emit_sle( struct brw_vs_compile
*c
,
543 struct brw_reg arg1
)
545 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
548 static void emit_sgt( struct brw_vs_compile
*c
,
551 struct brw_reg arg1
)
553 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
556 static void emit_sge( struct brw_vs_compile
*c
,
559 struct brw_reg arg1
)
561 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
564 static void emit_cmp( struct brw_compile
*p
,
568 struct brw_reg arg2
)
570 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
571 brw_SEL(p
, dst
, arg1
, arg2
);
572 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
575 static void emit_sign(struct brw_vs_compile
*c
,
579 struct brw_compile
*p
= &c
->func
;
581 brw_MOV(p
, dst
, brw_imm_f(0));
583 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
584 brw_MOV(p
, dst
, brw_imm_f(-1.0));
585 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
587 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, arg0
, brw_imm_f(0));
588 brw_MOV(p
, dst
, brw_imm_f(1.0));
589 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
592 static void emit_max( struct brw_compile
*p
,
595 struct brw_reg arg1
)
597 struct intel_context
*intel
= &p
->brw
->intel
;
599 if (intel
->gen
>= 6) {
600 brw_set_conditionalmod(p
, BRW_CONDITIONAL_GE
);
601 brw_SEL(p
, dst
, arg0
, arg1
);
602 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
603 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
605 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
, arg1
);
606 brw_SEL(p
, dst
, arg0
, arg1
);
607 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
611 static void emit_min( struct brw_compile
*p
,
614 struct brw_reg arg1
)
616 struct intel_context
*intel
= &p
->brw
->intel
;
618 if (intel
->gen
>= 6) {
619 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
620 brw_SEL(p
, dst
, arg0
, arg1
);
621 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
622 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
624 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
625 brw_SEL(p
, dst
, arg0
, arg1
);
626 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
630 static void emit_math1_gen4(struct brw_vs_compile
*c
,
636 /* There are various odd behaviours with SEND on the simulator. In
637 * addition there are documented issues with the fact that the GEN4
638 * processor doesn't do dependency control properly on SEND
639 * results. So, on balance, this kludge to get around failures
640 * with writemasked math results looks like it might be necessary
641 * whether that turns out to be a simulator bug or not:
643 struct brw_compile
*p
= &c
->func
;
644 struct brw_reg tmp
= dst
;
645 GLboolean need_tmp
= GL_FALSE
;
647 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
648 dst
.dw1
.bits
.writemask
!= 0xf)
657 BRW_MATH_SATURATE_NONE
,
660 BRW_MATH_DATA_SCALAR
,
664 brw_MOV(p
, dst
, tmp
);
670 emit_math1_gen6(struct brw_vs_compile
*c
,
676 struct brw_compile
*p
= &c
->func
;
677 struct brw_reg tmp_src
, tmp_dst
;
679 /* Something is strange on gen6 math in 16-wide mode, though the
680 * docs say it's supposed to work. Punt to using align1 mode,
681 * which doesn't do writemasking and swizzles.
683 tmp_src
= get_tmp(c
);
684 tmp_dst
= get_tmp(c
);
686 brw_MOV(p
, tmp_src
, arg0
);
688 brw_set_access_mode(p
, BRW_ALIGN_1
);
692 BRW_MATH_SATURATE_NONE
,
695 BRW_MATH_DATA_SCALAR
,
697 brw_set_access_mode(p
, BRW_ALIGN_16
);
699 brw_MOV(p
, dst
, tmp_dst
);
701 release_tmp(c
, tmp_src
);
702 release_tmp(c
, tmp_dst
);
706 emit_math1(struct brw_vs_compile
*c
,
712 struct brw_compile
*p
= &c
->func
;
713 struct intel_context
*intel
= &p
->brw
->intel
;
716 emit_math1_gen6(c
, function
, dst
, arg0
, precision
);
718 emit_math1_gen4(c
, function
, dst
, arg0
, precision
);
721 static void emit_math2_gen4( struct brw_vs_compile
*c
,
728 struct brw_compile
*p
= &c
->func
;
729 struct brw_reg tmp
= dst
;
730 GLboolean need_tmp
= GL_FALSE
;
732 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
733 dst
.dw1
.bits
.writemask
!= 0xf)
739 brw_MOV(p
, brw_message_reg(3), arg1
);
744 BRW_MATH_SATURATE_NONE
,
747 BRW_MATH_DATA_SCALAR
,
751 brw_MOV(p
, dst
, tmp
);
756 static void emit_math2_gen6( struct brw_vs_compile
*c
,
763 struct brw_compile
*p
= &c
->func
;
764 struct brw_reg tmp_src0
, tmp_src1
, tmp_dst
;
766 tmp_src0
= get_tmp(c
);
767 tmp_src1
= get_tmp(c
);
768 tmp_dst
= get_tmp(c
);
770 brw_MOV(p
, tmp_src0
, arg0
);
771 brw_MOV(p
, tmp_src1
, arg1
);
773 brw_set_access_mode(p
, BRW_ALIGN_1
);
779 brw_set_access_mode(p
, BRW_ALIGN_16
);
781 brw_MOV(p
, dst
, tmp_dst
);
783 release_tmp(c
, tmp_src0
);
784 release_tmp(c
, tmp_src1
);
785 release_tmp(c
, tmp_dst
);
788 static void emit_math2( struct brw_vs_compile
*c
,
795 struct brw_compile
*p
= &c
->func
;
796 struct intel_context
*intel
= &p
->brw
->intel
;
799 emit_math2_gen6(c
, function
, dst
, arg0
, arg1
, precision
);
801 emit_math2_gen4(c
, function
, dst
, arg0
, arg1
, precision
);
804 static void emit_exp_noalias( struct brw_vs_compile
*c
,
806 struct brw_reg arg0
)
808 struct brw_compile
*p
= &c
->func
;
811 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
) {
812 struct brw_reg tmp
= get_tmp(c
);
813 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
815 /* tmp_d = floor(arg0.x) */
816 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
818 /* result[0] = 2.0 ^ tmp */
820 /* Adjust exponent for floating point:
823 brw_ADD(p
, brw_writemask(tmp_d
, WRITEMASK_X
), tmp_d
, brw_imm_d(127));
825 /* Install exponent and sign.
826 * Excess drops off the edge:
828 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), WRITEMASK_X
),
829 tmp_d
, brw_imm_d(23));
834 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
) {
835 /* result[1] = arg0.x - floor(arg0.x) */
836 brw_FRC(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
839 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
840 /* As with the LOG instruction, we might be better off just
841 * doing a taylor expansion here, seeing as we have to do all
844 * If mathbox partial precision is too low, consider also:
845 * result[3] = result[0] * EXP(result[1])
848 BRW_MATH_FUNCTION_EXP
,
849 brw_writemask(dst
, WRITEMASK_Z
),
850 brw_swizzle1(arg0
, 0),
851 BRW_MATH_PRECISION_FULL
);
854 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
855 /* result[3] = 1.0; */
856 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), brw_imm_f(1));
861 static void emit_log_noalias( struct brw_vs_compile
*c
,
863 struct brw_reg arg0
)
865 struct brw_compile
*p
= &c
->func
;
866 struct brw_reg tmp
= dst
;
867 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
868 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
869 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
870 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
874 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
877 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
880 * These almost look likey they could be joined up, but not really
883 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
884 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
886 if (dst
.dw1
.bits
.writemask
& WRITEMASK_XZ
) {
888 brw_writemask(tmp_ud
, WRITEMASK_X
),
889 brw_swizzle1(arg0_ud
, 0),
890 brw_imm_ud((1U<<31)-1));
893 brw_writemask(tmp_ud
, WRITEMASK_X
),
898 brw_writemask(tmp
, WRITEMASK_X
),
899 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
903 if (dst
.dw1
.bits
.writemask
& WRITEMASK_YZ
) {
905 brw_writemask(tmp_ud
, WRITEMASK_Y
),
906 brw_swizzle1(arg0_ud
, 0),
907 brw_imm_ud((1<<23)-1));
910 brw_writemask(tmp_ud
, WRITEMASK_Y
),
912 brw_imm_ud(127<<23));
915 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
916 /* result[2] = result[0] + LOG2(result[1]); */
918 /* Why bother? The above is just a hint how to do this with a
919 * taylor series. Maybe we *should* use a taylor series as by
920 * the time all the above has been done it's almost certainly
921 * quicker than calling the mathbox, even with low precision.
924 * - result[0] + mathbox.LOG2(result[1])
925 * - mathbox.LOG2(arg0.x)
926 * - result[0] + inline_taylor_approx(result[1])
929 BRW_MATH_FUNCTION_LOG
,
930 brw_writemask(tmp
, WRITEMASK_Z
),
931 brw_swizzle1(tmp
, 1),
932 BRW_MATH_PRECISION_FULL
);
935 brw_writemask(tmp
, WRITEMASK_Z
),
936 brw_swizzle1(tmp
, 2),
937 brw_swizzle1(tmp
, 0));
940 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
941 /* result[3] = 1.0; */
942 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_W
), brw_imm_f(1));
946 brw_MOV(p
, dst
, tmp
);
952 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
954 static void emit_dst_noalias( struct brw_vs_compile
*c
,
959 struct brw_compile
*p
= &c
->func
;
961 /* There must be a better way to do this:
963 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
)
964 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_X
), brw_imm_f(1.0));
965 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
)
966 brw_MUL(p
, brw_writemask(dst
, WRITEMASK_Y
), arg0
, arg1
);
967 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
)
968 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Z
), arg0
);
969 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
)
970 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), arg1
);
974 static void emit_xpd( struct brw_compile
*p
,
979 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
980 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
984 static void emit_lit_noalias( struct brw_vs_compile
*c
,
986 struct brw_reg arg0
)
988 struct brw_compile
*p
= &c
->func
;
989 struct brw_instruction
*if_insn
;
990 struct brw_reg tmp
= dst
;
991 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
996 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_YZ
), brw_imm_f(0));
997 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_XW
), brw_imm_f(1));
999 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
1000 * to get all channels active inside the IF. In the clipping code
1001 * we run with NoMask, so it's not an option and we can use
1002 * BRW_EXECUTE_1 for all comparisions.
1004 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
1005 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
1007 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
,0));
1009 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
1010 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_Z
), brw_swizzle1(arg0
,1));
1011 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1014 BRW_MATH_FUNCTION_POW
,
1015 brw_writemask(dst
, WRITEMASK_Z
),
1016 brw_swizzle1(tmp
, 2),
1017 brw_swizzle1(arg0
, 3),
1018 BRW_MATH_PRECISION_PARTIAL
);
1021 brw_ENDIF(p
, if_insn
);
1023 release_tmp(c
, tmp
);
1026 static void emit_lrp_noalias(struct brw_vs_compile
*c
,
1028 struct brw_reg arg0
,
1029 struct brw_reg arg1
,
1030 struct brw_reg arg2
)
1032 struct brw_compile
*p
= &c
->func
;
1034 brw_ADD(p
, dst
, negate(arg0
), brw_imm_f(1.0));
1035 brw_MUL(p
, brw_null_reg(), dst
, arg2
);
1036 brw_MAC(p
, dst
, arg0
, arg1
);
1039 /** 3 or 4-component vector normalization */
1040 static void emit_nrm( struct brw_vs_compile
*c
,
1042 struct brw_reg arg0
,
1045 struct brw_compile
*p
= &c
->func
;
1046 struct brw_reg tmp
= get_tmp(c
);
1048 /* tmp = dot(arg0, arg0) */
1050 brw_DP3(p
, tmp
, arg0
, arg0
);
1052 brw_DP4(p
, tmp
, arg0
, arg0
);
1054 /* tmp = 1 / sqrt(tmp) */
1055 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, tmp
, tmp
, BRW_MATH_PRECISION_FULL
);
1057 /* dst = arg0 * tmp */
1058 brw_MUL(p
, dst
, arg0
, tmp
);
1060 release_tmp(c
, tmp
);
1064 static struct brw_reg
1065 get_constant(struct brw_vs_compile
*c
,
1066 const struct prog_instruction
*inst
,
1069 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1070 struct brw_compile
*p
= &c
->func
;
1071 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1073 assert(argIndex
< 3);
1075 if (c
->current_const
[argIndex
].index
!= src
->Index
) {
1076 /* Keep track of the last constant loaded in this slot, for reuse. */
1077 c
->current_const
[argIndex
].index
= src
->Index
;
1080 printf(" fetch const[%d] for arg %d into reg %d\n",
1081 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1083 /* need to fetch the constant now */
1085 const_reg
, /* writeback dest */
1086 16 * src
->Index
, /* byte offset */
1087 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
1091 /* replicate lower four floats into upper half (to get XYZWXYZW) */
1092 const_reg
= stride(const_reg
, 0, 4, 0);
1093 const_reg
.subnr
= 0;
1098 static struct brw_reg
1099 get_reladdr_constant(struct brw_vs_compile
*c
,
1100 const struct prog_instruction
*inst
,
1103 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1104 struct brw_compile
*p
= &c
->func
;
1105 struct brw_context
*brw
= p
->brw
;
1106 struct intel_context
*intel
= &brw
->intel
;
1107 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1108 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1111 assert(argIndex
< 3);
1113 /* Can't reuse a reladdr constant load. */
1114 c
->current_const
[argIndex
].index
= -1;
1117 printf(" fetch const[a0.x+%d] for arg %d into reg %d\n",
1118 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1121 if (intel
->gen
>= 6) {
1122 offset
= src
->Index
;
1124 struct brw_reg byte_addr_reg
= retype(get_tmp(c
), BRW_REGISTER_TYPE_D
);
1125 brw_MUL(p
, byte_addr_reg
, addr_reg
, brw_imm_d(16));
1126 addr_reg
= byte_addr_reg
;
1127 offset
= 16 * src
->Index
;
1130 /* fetch the first vec4 */
1131 brw_dp_READ_4_vs_relative(p
,
1135 SURF_INDEX_VERT_CONST_BUFFER
);
1142 /* TODO: relative addressing!
1144 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
1145 gl_register_file file
,
1149 case PROGRAM_TEMPORARY
:
1151 case PROGRAM_OUTPUT
:
1152 assert(c
->regs
[file
][index
].nr
!= 0);
1153 return c
->regs
[file
][index
];
1154 case PROGRAM_STATE_VAR
:
1155 case PROGRAM_CONSTANT
:
1156 case PROGRAM_UNIFORM
:
1157 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
1158 return c
->regs
[PROGRAM_STATE_VAR
][index
];
1159 case PROGRAM_ADDRESS
:
1161 return c
->regs
[file
][index
];
1163 case PROGRAM_UNDEFINED
: /* undef values */
1164 return brw_null_reg();
1166 case PROGRAM_LOCAL_PARAM
:
1167 case PROGRAM_ENV_PARAM
:
1168 case PROGRAM_WRITE_ONLY
:
1171 return brw_null_reg();
1177 * Indirect addressing: get reg[[arg] + offset].
1179 static struct brw_reg
deref( struct brw_vs_compile
*c
,
1184 struct brw_compile
*p
= &c
->func
;
1185 struct brw_reg tmp
= get_tmp(c
);
1186 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1187 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1188 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* reg_size
;
1189 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1190 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1192 /* Set the vertical stride on the register access so that the first
1193 * 4 components come from a0.0 and the second 4 from a0.1.
1195 indirect
.vstride
= BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL
;
1198 brw_push_insn_state(p
);
1199 brw_set_access_mode(p
, BRW_ALIGN_1
);
1201 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1202 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1204 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1205 brw_ADD(p
, brw_address_reg(1), acc
, brw_imm_uw(byte_offset
));
1207 brw_MOV(p
, tmp
, indirect
);
1209 brw_pop_insn_state(p
);
1212 /* NOTE: tmp not released */
1217 move_to_reladdr_dst(struct brw_vs_compile
*c
,
1218 const struct prog_instruction
*inst
,
1221 struct brw_compile
*p
= &c
->func
;
1223 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1224 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1225 struct brw_reg base
= c
->regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
];
1226 GLuint byte_offset
= base
.nr
* 32 + base
.subnr
;
1227 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1228 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1230 /* Because destination register indirect addressing can only use
1231 * one index, we'll write each vertex's vec4 value separately.
1233 val
.width
= BRW_WIDTH_4
;
1234 val
.vstride
= BRW_VERTICAL_STRIDE_4
;
1236 brw_push_insn_state(p
);
1237 brw_set_access_mode(p
, BRW_ALIGN_1
);
1239 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1240 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1241 brw_MOV(p
, indirect
, val
);
1243 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1244 brw_ADD(p
, brw_address_reg(0), acc
,
1245 brw_imm_uw(byte_offset
+ reg_size
/ 2));
1246 brw_MOV(p
, indirect
, suboffset(val
, 4));
1248 brw_pop_insn_state(p
);
1252 * Get brw reg corresponding to the instruction's [argIndex] src reg.
1253 * TODO: relative addressing!
1255 static struct brw_reg
1256 get_src_reg( struct brw_vs_compile
*c
,
1257 const struct prog_instruction
*inst
,
1260 const GLuint file
= inst
->SrcReg
[argIndex
].File
;
1261 const GLint index
= inst
->SrcReg
[argIndex
].Index
;
1262 const GLboolean relAddr
= inst
->SrcReg
[argIndex
].RelAddr
;
1264 if (brw_vs_arg_can_be_immediate(inst
->Opcode
, argIndex
)) {
1265 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1267 if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ZERO
,
1271 return brw_imm_f(0.0f
);
1272 } else if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ONE
,
1277 return brw_imm_f(-1.0F
);
1279 return brw_imm_f(1.0F
);
1280 } else if (src
->File
== PROGRAM_CONSTANT
) {
1281 const struct gl_program_parameter_list
*params
;
1285 switch (src
->Swizzle
) {
1300 if (component
>= 0) {
1301 params
= c
->vp
->program
.Base
.Parameters
;
1302 f
= params
->ParameterValues
[src
->Index
][component
];
1308 return brw_imm_f(f
);
1314 case PROGRAM_TEMPORARY
:
1316 case PROGRAM_OUTPUT
:
1318 return deref(c
, c
->regs
[file
][0], index
, 32);
1321 assert(c
->regs
[file
][index
].nr
!= 0);
1322 return c
->regs
[file
][index
];
1325 case PROGRAM_STATE_VAR
:
1326 case PROGRAM_CONSTANT
:
1327 case PROGRAM_UNIFORM
:
1328 case PROGRAM_ENV_PARAM
:
1329 case PROGRAM_LOCAL_PARAM
:
1330 if (!relAddr
&& c
->constant_map
[index
] != -1) {
1331 /* Take from the push constant buffer if possible. */
1332 assert(c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]].nr
!= 0);
1333 return c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]];
1335 /* Must be in the pull constant buffer then .*/
1336 assert(c
->vp
->use_const_buffer
);
1338 return get_reladdr_constant(c
, inst
, argIndex
);
1340 return get_constant(c
, inst
, argIndex
);
1342 case PROGRAM_ADDRESS
:
1344 return c
->regs
[file
][index
];
1346 case PROGRAM_UNDEFINED
:
1347 /* this is a normal case since we loop over all three src args */
1348 return brw_null_reg();
1350 case PROGRAM_WRITE_ONLY
:
1353 return brw_null_reg();
1358 * Return the brw reg for the given instruction's src argument.
1359 * Will return mangled results for SWZ op. The emit_swz() function
1360 * ignores this result and recalculates taking extended swizzles into
1363 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
1364 const struct prog_instruction
*inst
,
1367 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1370 if (src
->File
== PROGRAM_UNDEFINED
)
1371 return brw_null_reg();
1373 reg
= get_src_reg(c
, inst
, argIndex
);
1375 /* Convert 3-bit swizzle to 2-bit.
1377 if (reg
.file
!= BRW_IMMEDIATE_VALUE
) {
1378 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(GET_SWZ(src
->Swizzle
, 0),
1379 GET_SWZ(src
->Swizzle
, 1),
1380 GET_SWZ(src
->Swizzle
, 2),
1381 GET_SWZ(src
->Swizzle
, 3));
1384 /* Note this is ok for non-swizzle instructions:
1386 reg
.negate
= src
->Negate
? 1 : 0;
1393 * Get brw register for the given program dest register.
1395 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
1396 struct prog_dst_register dst
)
1401 case PROGRAM_TEMPORARY
:
1402 case PROGRAM_OUTPUT
:
1403 /* register-indirect addressing is only 1x1, not VxH, for
1404 * destination regs. So, for RelAddr we'll return a temporary
1405 * for the dest and do a move of the result to the RelAddr
1406 * register after the instruction emit.
1411 assert(c
->regs
[dst
.File
][dst
.Index
].nr
!= 0);
1412 reg
= c
->regs
[dst
.File
][dst
.Index
];
1415 case PROGRAM_ADDRESS
:
1416 assert(dst
.Index
== 0);
1417 reg
= c
->regs
[dst
.File
][dst
.Index
];
1419 case PROGRAM_UNDEFINED
:
1420 /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
1421 reg
= brw_null_reg();
1425 reg
= brw_null_reg();
1428 assert(reg
.type
!= BRW_IMMEDIATE_VALUE
);
1429 reg
.dw1
.bits
.writemask
= dst
.WriteMask
;
1435 static void emit_swz( struct brw_vs_compile
*c
,
1437 const struct prog_instruction
*inst
)
1439 const GLuint argIndex
= 0;
1440 const struct prog_src_register src
= inst
->SrcReg
[argIndex
];
1441 struct brw_compile
*p
= &c
->func
;
1442 GLuint zeros_mask
= 0;
1443 GLuint ones_mask
= 0;
1444 GLuint src_mask
= 0;
1446 GLboolean need_tmp
= (src
.Negate
&&
1447 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
1448 struct brw_reg tmp
= dst
;
1454 for (i
= 0; i
< 4; i
++) {
1455 if (dst
.dw1
.bits
.writemask
& (1<<i
)) {
1456 GLubyte s
= GET_SWZ(src
.Swizzle
, i
);
1475 /* Do src first, in case dst aliases src:
1478 struct brw_reg arg0
;
1480 arg0
= get_src_reg(c
, inst
, argIndex
);
1482 arg0
= brw_swizzle(arg0
,
1483 src_swz
[0], src_swz
[1],
1484 src_swz
[2], src_swz
[3]);
1486 brw_MOV(p
, brw_writemask(tmp
, src_mask
), arg0
);
1490 brw_MOV(p
, brw_writemask(tmp
, zeros_mask
), brw_imm_f(0));
1493 brw_MOV(p
, brw_writemask(tmp
, ones_mask
), brw_imm_f(1));
1496 brw_MOV(p
, brw_writemask(tmp
, src
.Negate
), negate(tmp
));
1499 brw_MOV(p
, dst
, tmp
);
1500 release_tmp(c
, tmp
);
1506 * Post-vertex-program processing. Send the results to the URB.
1508 static void emit_vertex_write( struct brw_vs_compile
*c
)
1510 struct brw_compile
*p
= &c
->func
;
1511 struct brw_context
*brw
= p
->brw
;
1512 struct intel_context
*intel
= &brw
->intel
;
1513 struct brw_reg pos
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_HPOS
];
1516 GLuint len_vertex_header
= 2;
1519 if (c
->key
.copy_edgeflag
) {
1521 get_reg(c
, PROGRAM_OUTPUT
, VERT_RESULT_EDGE
),
1522 get_reg(c
, PROGRAM_INPUT
, VERT_ATTRIB_EDGEFLAG
));
1525 if (intel
->gen
< 6) {
1526 /* Build ndc coords */
1528 /* ndc = 1.0 / pos.w */
1529 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
1530 /* ndc.xyz = pos * ndc */
1531 brw_MUL(p
, brw_writemask(ndc
, WRITEMASK_XYZ
), pos
, ndc
);
1534 /* Update the header for point size, user clipping flags, and -ve rhw
1537 if (intel
->gen
>= 6) {
1538 struct brw_reg m1
= brw_message_reg(1);
1540 /* On gen6, m1 has each value in a separate dword, so we never
1541 * need to mess with a temporary for computing the m1 value.
1543 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1544 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1545 brw_MOV(p
, brw_writemask(m1
, WRITEMASK_W
),
1546 brw_swizzle1(c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
], 0));
1549 /* Set the user clip distances in dword 8-15. (m3-4)*/
1550 if (c
->key
.nr_userclip
) {
1551 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1554 m
= brw_message_reg(3);
1556 m
= brw_message_reg(4);
1558 brw_DP4(p
, brw_writemask(m
, (1 << (i
& 7))),pos
, c
->userplane
[i
]);
1561 } else if ((c
->prog_data
.outputs_written
&
1562 BITFIELD64_BIT(VERT_RESULT_PSIZ
)) ||
1563 c
->key
.nr_userclip
|| brw
->has_negative_rhw_bug
) {
1564 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
1567 brw_MOV(p
, header1
, brw_imm_ud(0));
1569 brw_set_access_mode(p
, BRW_ALIGN_16
);
1571 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1572 struct brw_reg psiz
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
];
1573 brw_MUL(p
, brw_writemask(header1
, WRITEMASK_W
),
1574 brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
1575 brw_AND(p
, brw_writemask(header1
, WRITEMASK_W
),
1576 header1
, brw_imm_ud(0x7ff<<8));
1579 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1580 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
1581 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
1582 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
1583 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1586 /* i965 clipping workaround:
1587 * 1) Test for -ve rhw
1589 * set ndc = (0,0,0,0)
1592 * Later, clipping will detect ucp[6] and ensure the primitive is
1593 * clipped against all fixed planes.
1595 if (brw
->has_negative_rhw_bug
) {
1597 vec8(brw_null_reg()),
1599 brw_swizzle1(ndc
, 3),
1602 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
1603 brw_MOV(p
, ndc
, brw_imm_f(0));
1604 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1607 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
1608 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
1609 brw_set_access_mode(p
, BRW_ALIGN_16
);
1611 release_tmp(c
, header1
);
1614 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1617 /* Emit the (interleaved) headers for the two vertices - an 8-reg
1618 * of zeros followed by two sets of NDC coordinates:
1620 brw_set_access_mode(p
, BRW_ALIGN_1
);
1621 brw_set_acc_write_control(p
, 0);
1623 /* The VUE layout is documented in Volume 2a. */
1624 if (intel
->gen
>= 6) {
1625 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1626 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1627 * dword 4-7 (m2) is the 4D space position
1628 * dword 8-15 (m3,m4) of the vertex header is the user clip distance if
1630 * m3 or 5 is the first vertex element data we fill, which is
1631 * the vertex position.
1633 brw_MOV(p
, brw_message_reg(2), pos
);
1634 len_vertex_header
= 1;
1635 if (c
->key
.nr_userclip
> 0)
1636 len_vertex_header
+= 2;
1637 } else if (intel
->gen
== 5) {
1638 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1639 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1640 * dword 4-7 (m2) is the ndc position (set above)
1641 * dword 8-11 (m3) of the vertex header is the 4D space position
1642 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1643 * m6 is a pad so that the vertex element data is aligned
1644 * m7 is the first vertex data we fill, which is the vertex position.
1646 brw_MOV(p
, brw_message_reg(2), ndc
);
1647 brw_MOV(p
, brw_message_reg(3), pos
);
1648 brw_MOV(p
, brw_message_reg(7), pos
);
1649 len_vertex_header
= 6;
1651 /* There are 8 dwords in VUE header pre-Ironlake:
1652 * dword 0-3 (m1) is indices, point width, clip flags.
1653 * dword 4-7 (m2) is ndc position (set above)
1655 * dword 8-11 (m3) is the first vertex data, which we always have be the
1658 brw_MOV(p
, brw_message_reg(2), ndc
);
1659 brw_MOV(p
, brw_message_reg(3), pos
);
1660 len_vertex_header
= 2;
1663 /* Move variable-addressed, non-overflow outputs to their MRFs. */
1664 next_mrf
= 2 + len_vertex_header
;
1665 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1666 if (c
->first_overflow_output
> 0 && i
>= c
->first_overflow_output
)
1668 if (!(c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)))
1670 if (i
== VERT_RESULT_PSIZ
)
1673 if (i
>= VERT_RESULT_TEX0
&&
1674 c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_GENERAL_REGISTER_FILE
) {
1675 brw_MOV(p
, brw_message_reg(next_mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1677 } else if (c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_MESSAGE_REGISTER_FILE
) {
1678 next_mrf
= c
->regs
[PROGRAM_OUTPUT
][i
].nr
+ 1;
1682 eot
= (c
->first_overflow_output
== 0);
1685 brw_null_reg(), /* dest */
1686 0, /* starting mrf reg nr */
1690 MIN2(c
->nr_outputs
+ 1 + len_vertex_header
, (BRW_MAX_MRF
-1)), /* msg len */
1691 0, /* response len */
1693 eot
, /* writes complete */
1694 0, /* urb destination offset */
1695 BRW_URB_SWIZZLE_INTERLEAVE
);
1697 if (c
->first_overflow_output
> 0) {
1698 /* Not all of the vertex outputs/results fit into the MRF.
1699 * Move the overflowed attributes from the GRF to the MRF and
1700 * issue another brw_urb_WRITE().
1703 for (i
= c
->first_overflow_output
; i
< VERT_RESULT_MAX
; i
++) {
1704 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
1705 /* move from GRF to MRF */
1706 brw_MOV(p
, brw_message_reg(mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1712 brw_null_reg(), /* dest */
1713 0, /* starting mrf reg nr */
1718 0, /* response len */
1720 1, /* writes complete */
1721 14 / 2, /* urb destination offset */
1722 BRW_URB_SWIZZLE_INTERLEAVE
);
1727 accumulator_contains(struct brw_vs_compile
*c
, struct brw_reg val
)
1729 struct brw_compile
*p
= &c
->func
;
1730 struct brw_instruction
*prev_insn
= &p
->store
[p
->nr_insn
- 1];
1732 if (p
->nr_insn
== 0)
1735 if (val
.address_mode
!= BRW_ADDRESS_DIRECT
)
1738 switch (prev_insn
->header
.opcode
) {
1739 case BRW_OPCODE_MOV
:
1740 case BRW_OPCODE_MAC
:
1741 case BRW_OPCODE_MUL
:
1742 if (prev_insn
->header
.access_mode
== BRW_ALIGN_16
&&
1743 prev_insn
->header
.execution_size
== val
.width
&&
1744 prev_insn
->bits1
.da1
.dest_reg_file
== val
.file
&&
1745 prev_insn
->bits1
.da1
.dest_reg_type
== val
.type
&&
1746 prev_insn
->bits1
.da1
.dest_address_mode
== val
.address_mode
&&
1747 prev_insn
->bits1
.da1
.dest_reg_nr
== val
.nr
&&
1748 prev_insn
->bits1
.da16
.dest_subreg_nr
== val
.subnr
/ 16 &&
1749 prev_insn
->bits1
.da16
.dest_writemask
== 0xf)
1759 get_predicate(const struct prog_instruction
*inst
)
1761 if (inst
->DstReg
.CondMask
== COND_TR
)
1762 return BRW_PREDICATE_NONE
;
1764 /* All of GLSL only produces predicates for COND_NE and one channel per
1765 * vector. Fail badly if someone starts doing something else, as it might
1766 * mean infinite looping or something.
1768 * We'd like to support all the condition codes, but our hardware doesn't
1769 * quite match the Mesa IR, which is modeled after the NV extensions. For
1770 * those, the instruction may update the condition codes or not, then any
1771 * later instruction may use one of those condition codes. For gen4, the
1772 * instruction may update the flags register based on one of the condition
1773 * codes output by the instruction, and then further instructions may
1774 * predicate on that. We can probably support this, but it won't
1775 * necessarily be easy.
1777 assert(inst
->DstReg
.CondMask
== COND_NE
);
1779 switch (inst
->DstReg
.CondSwizzle
) {
1781 return BRW_PREDICATE_ALIGN16_REPLICATE_X
;
1783 return BRW_PREDICATE_ALIGN16_REPLICATE_Y
;
1785 return BRW_PREDICATE_ALIGN16_REPLICATE_Z
;
1787 return BRW_PREDICATE_ALIGN16_REPLICATE_W
;
1789 _mesa_problem(NULL
, "Unexpected predicate: 0x%08x\n",
1790 inst
->DstReg
.CondMask
);
1791 return BRW_PREDICATE_NORMAL
;
1795 /* Emit the vertex program instructions here.
1797 void brw_vs_emit(struct brw_vs_compile
*c
)
1799 #define MAX_IF_DEPTH 32
1800 #define MAX_LOOP_DEPTH 32
1801 struct brw_compile
*p
= &c
->func
;
1802 struct brw_context
*brw
= p
->brw
;
1803 struct intel_context
*intel
= &brw
->intel
;
1804 const GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
1805 GLuint insn
, if_depth
= 0, loop_depth
= 0;
1806 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
] = { 0 };
1807 int if_depth_in_loop
[MAX_LOOP_DEPTH
];
1808 const struct brw_indirect stack_index
= brw_indirect(0, 0);
1812 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1813 printf("vs-mesa:\n");
1814 _mesa_fprint_program_opt(stdout
, &c
->vp
->program
.Base
, PROG_PRINT_DEBUG
,
1819 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1820 brw_set_access_mode(p
, BRW_ALIGN_16
);
1821 if_depth_in_loop
[loop_depth
] = 0;
1823 brw_set_acc_write_control(p
, 1);
1825 for (insn
= 0; insn
< nr_insns
; insn
++) {
1827 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1829 /* Message registers can't be read, so copy the output into GRF
1830 * register if they are used in source registers
1832 for (i
= 0; i
< 3; i
++) {
1833 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1834 GLuint index
= src
->Index
;
1835 GLuint file
= src
->File
;
1836 if (file
== PROGRAM_OUTPUT
&& index
!= VERT_RESULT_HPOS
)
1837 c
->output_regs
[index
].used_in_src
= GL_TRUE
;
1840 switch (inst
->Opcode
) {
1843 c
->needs_stack
= GL_TRUE
;
1850 /* Static register allocation
1852 brw_vs_alloc_regs(c
);
1855 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1857 for (insn
= 0; insn
< nr_insns
; insn
++) {
1859 const struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1860 struct brw_reg args
[3], dst
;
1864 printf("%d: ", insn
);
1865 _mesa_print_instruction(inst
);
1868 /* Get argument regs. SWZ is special and does this itself.
1870 if (inst
->Opcode
!= OPCODE_SWZ
)
1871 for (i
= 0; i
< 3; i
++) {
1872 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1875 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1876 args
[i
] = c
->output_regs
[index
].reg
;
1878 args
[i
] = get_arg(c
, inst
, i
);
1881 /* Get dest regs. Note that it is possible for a reg to be both
1882 * dst and arg, given the static allocation of registers. So
1883 * care needs to be taken emitting multi-operation instructions.
1885 index
= inst
->DstReg
.Index
;
1886 file
= inst
->DstReg
.File
;
1887 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1888 dst
= c
->output_regs
[index
].reg
;
1890 dst
= get_dst(c
, inst
->DstReg
);
1892 if (inst
->SaturateMode
!= SATURATE_OFF
) {
1893 _mesa_problem(NULL
, "Unsupported saturate %d in vertex shader",
1894 inst
->SaturateMode
);
1897 switch (inst
->Opcode
) {
1899 brw_MOV(p
, dst
, brw_abs(args
[0]));
1902 brw_ADD(p
, dst
, args
[0], args
[1]);
1905 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1908 brw_DP2(p
, dst
, args
[0], args
[1]);
1911 brw_DP3(p
, dst
, args
[0], args
[1]);
1914 brw_DP4(p
, dst
, args
[0], args
[1]);
1917 brw_DPH(p
, dst
, args
[0], args
[1]);
1920 emit_nrm(c
, dst
, args
[0], 3);
1923 emit_nrm(c
, dst
, args
[0], 4);
1926 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1929 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1932 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1935 brw_RNDD(p
, dst
, args
[0]);
1938 brw_RNDD(p
, dst
, args
[0]);
1941 brw_FRC(p
, dst
, args
[0]);
1944 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1947 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1950 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1953 unalias3(c
, dst
, args
[0], args
[1], args
[2], emit_lrp_noalias
);
1956 if (!accumulator_contains(c
, args
[2]))
1957 brw_MOV(p
, brw_acc_reg(), args
[2]);
1958 brw_MAC(p
, dst
, args
[0], args
[1]);
1961 emit_cmp(p
, dst
, args
[0], args
[1], args
[2]);
1964 emit_max(p
, dst
, args
[0], args
[1]);
1967 emit_min(p
, dst
, args
[0], args
[1]);
1970 brw_MOV(p
, dst
, args
[0]);
1973 brw_MUL(p
, dst
, args
[0], args
[1]);
1976 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1979 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1982 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, brw_abs(args
[0]), BRW_MATH_PRECISION_FULL
);
1986 unalias2(c
, dst
, args
[0], args
[1], emit_seq
);
1989 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1992 unalias2(c
, dst
, args
[0], args
[1], emit_sne
);
1995 unalias2(c
, dst
, args
[0], args
[1], emit_sge
);
1998 unalias2(c
, dst
, args
[0], args
[1], emit_sgt
);
2001 unalias2(c
, dst
, args
[0], args
[1], emit_slt
);
2004 unalias2(c
, dst
, args
[0], args
[1], emit_sle
);
2007 unalias1(c
, dst
, args
[0], emit_sign
);
2010 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
2013 /* The args[0] value can't be used here as it won't have
2014 * correctly encoded the full swizzle:
2016 emit_swz(c
, dst
, inst
);
2019 /* round toward zero */
2020 brw_RNDZ(p
, dst
, args
[0]);
2023 emit_xpd(p
, dst
, args
[0], args
[1]);
2026 assert(if_depth
< MAX_IF_DEPTH
);
2027 if_inst
[if_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
2028 /* Note that brw_IF smashes the predicate_control field. */
2029 if_inst
[if_depth
]->header
.predicate_control
= get_predicate(inst
);
2030 if_depth_in_loop
[loop_depth
]++;
2034 clear_current_const(c
);
2035 assert(if_depth
> 0);
2036 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2039 clear_current_const(c
);
2040 assert(if_depth
> 0);
2041 brw_ENDIF(p
, if_inst
[--if_depth
]);
2042 if_depth_in_loop
[loop_depth
]--;
2044 case OPCODE_BGNLOOP
:
2045 clear_current_const(c
);
2046 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2047 if_depth_in_loop
[loop_depth
] = 0;
2050 brw_set_predicate_control(p
, get_predicate(inst
));
2051 brw_BREAK(p
, if_depth_in_loop
[loop_depth
]);
2052 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2055 brw_set_predicate_control(p
, get_predicate(inst
));
2056 if (intel
->gen
>= 6) {
2057 brw_CONT_gen6(p
, loop_inst
[loop_depth
- 1]);
2059 brw_CONT(p
, if_depth_in_loop
[loop_depth
]);
2061 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2064 case OPCODE_ENDLOOP
: {
2065 clear_current_const(c
);
2066 struct brw_instruction
*inst0
, *inst1
;
2071 if (intel
->gen
== 5)
2074 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2076 if (intel
->gen
< 6) {
2077 /* patch all the BREAK/CONT instructions from last BEGINLOOP */
2078 while (inst0
> loop_inst
[loop_depth
]) {
2080 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2081 inst0
->bits3
.if_else
.jump_count
== 0) {
2082 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2083 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2084 inst0
->bits3
.if_else
.jump_count
== 0) {
2085 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2093 brw_set_predicate_control(p
, get_predicate(inst
));
2094 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2095 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2098 brw_set_access_mode(p
, BRW_ALIGN_1
);
2099 brw_ADD(p
, deref_1d(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2100 brw_set_access_mode(p
, BRW_ALIGN_16
);
2101 brw_ADD(p
, get_addr_reg(stack_index
),
2102 get_addr_reg(stack_index
), brw_imm_d(4));
2103 brw_save_call(p
, inst
->Comment
, p
->nr_insn
);
2104 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2107 brw_ADD(p
, get_addr_reg(stack_index
),
2108 get_addr_reg(stack_index
), brw_imm_d(-4));
2109 brw_set_access_mode(p
, BRW_ALIGN_1
);
2110 brw_MOV(p
, brw_ip_reg(), deref_1d(stack_index
, 0));
2111 brw_set_access_mode(p
, BRW_ALIGN_16
);
2114 emit_vertex_write(c
);
2120 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2126 _mesa_problem(NULL
, "Unsupported opcode %i (%s) in vertex shader",
2127 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
2128 _mesa_opcode_string(inst
->Opcode
) :
2132 /* Set the predication update on the last instruction of the native
2133 * instruction sequence.
2135 * This would be problematic if it was set on a math instruction,
2136 * but that shouldn't be the case with the current GLSL compiler.
2138 if (inst
->CondUpdate
) {
2139 struct brw_instruction
*hw_insn
= &p
->store
[p
->nr_insn
- 1];
2141 assert(hw_insn
->header
.destreg__conditionalmod
== 0);
2142 hw_insn
->header
.destreg__conditionalmod
= BRW_CONDITIONAL_NZ
;
2145 if ((inst
->DstReg
.File
== PROGRAM_OUTPUT
)
2146 && (inst
->DstReg
.Index
!= VERT_RESULT_HPOS
)
2147 && c
->output_regs
[inst
->DstReg
.Index
].used_in_src
) {
2148 brw_MOV(p
, get_dst(c
, inst
->DstReg
), dst
);
2151 /* Result color clamping.
2153 * When destination register is an output register and
2154 * it's primary/secondary front/back color, we have to clamp
2155 * the result to [0,1]. This is done by enabling the
2156 * saturation bit for the last instruction.
2158 * We don't use brw_set_saturate() as it modifies
2159 * p->current->header.saturate, which affects all the subsequent
2160 * instructions. Instead, we directly modify the header
2161 * of the last (already stored) instruction.
2163 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
) {
2164 if ((inst
->DstReg
.Index
== VERT_RESULT_COL0
)
2165 || (inst
->DstReg
.Index
== VERT_RESULT_COL1
)
2166 || (inst
->DstReg
.Index
== VERT_RESULT_BFC0
)
2167 || (inst
->DstReg
.Index
== VERT_RESULT_BFC1
)) {
2168 p
->store
[p
->nr_insn
-1].header
.saturate
= 1;
2172 if (inst
->DstReg
.RelAddr
) {
2173 assert(inst
->DstReg
.File
== PROGRAM_TEMPORARY
||
2174 inst
->DstReg
.File
== PROGRAM_OUTPUT
);
2175 move_to_reladdr_dst(c
, inst
, dst
);
2181 brw_resolve_cals(p
);
2186 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
2189 printf("vs-native:\n");
2190 for (i
= 0; i
< p
->nr_insn
; i
++)
2191 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);