2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "program/program.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "brw_context.h"
40 /* Return the SrcReg index of the channels that can be immediate float operands
41 * instead of usage of PROGRAM_CONSTANT values through push/pull.
44 brw_vs_arg_can_be_immediate(enum prog_opcode opcode
, int arg
)
46 int opcode_array
[] = {
66 /* These opcodes get broken down in a way that allow two
67 * args to be immediates.
69 if (opcode
== OPCODE_MAD
|| opcode
== OPCODE_LRP
) {
70 if (arg
== 1 || arg
== 2)
74 if (opcode
> ARRAY_SIZE(opcode_array
))
77 return arg
== opcode_array
[opcode
] - 1;
80 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
82 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
84 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
85 c
->prog_data
.total_grf
= c
->last_tmp
;
90 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
92 if (tmp
.nr
== c
->last_tmp
-1)
96 static void release_tmps( struct brw_vs_compile
*c
)
98 c
->last_tmp
= c
->first_tmp
;
102 get_first_reladdr_output(struct gl_vertex_program
*vp
)
105 int first_reladdr_output
= VERT_RESULT_MAX
;
107 for (i
= 0; i
< vp
->Base
.NumInstructions
; i
++) {
108 struct prog_instruction
*inst
= vp
->Base
.Instructions
+ i
;
110 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
&&
111 inst
->DstReg
.RelAddr
&&
112 inst
->DstReg
.Index
< first_reladdr_output
)
113 first_reladdr_output
= inst
->DstReg
.Index
;
116 return first_reladdr_output
;
119 /* Clears the record of which vp_const_buffer elements have been
120 * loaded into our constant buffer registers, for the starts of new
121 * blocks after control flow.
124 clear_current_const(struct brw_vs_compile
*c
)
128 if (c
->vp
->use_const_buffer
) {
129 for (i
= 0; i
< 3; i
++) {
130 c
->current_const
[i
].index
= -1;
136 * Preallocate GRF register before code emit.
137 * Do things as simply as possible. Allocate and populate all regs
140 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
142 struct intel_context
*intel
= &c
->func
.brw
->intel
;
143 GLuint i
, reg
= 0, mrf
;
144 int attributes_in_vue
;
145 int first_reladdr_output
;
149 /* Determine whether to use a real constant buffer or use a block
150 * of GRF registers for constants. The later is faster but only
151 * works if everything fits in the GRF.
152 * XXX this heuristic/check may need some fine tuning...
154 if (c
->vp
->program
.Base
.Parameters
->NumParameters
+
155 c
->vp
->program
.Base
.NumTemporaries
+ 20 > BRW_MAX_GRF
)
156 c
->vp
->use_const_buffer
= GL_TRUE
;
158 c
->vp
->use_const_buffer
= GL_FALSE
;
160 /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
162 /* r0 -- reserved as usual
164 c
->r0
= brw_vec8_grf(reg
, 0);
167 /* User clip planes from curbe:
169 if (c
->key
.nr_userclip
) {
170 if (intel
->gen
>= 6) {
171 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
172 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
173 (i
% 2) * 4), 0, 4, 1);
175 reg
+= ALIGN(c
->key
.nr_userclip
, 2) / 2;
177 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
178 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ (6 + i
) / 2,
179 (i
% 2) * 4), 0, 4, 1);
181 reg
+= (ALIGN(6 + c
->key
.nr_userclip
, 4) / 4) * 2;
186 /* Assign some (probably all) of the vertex program constants to
187 * the push constant buffer/CURBE.
189 * There's an obvious limit to the numer of push constants equal to
190 * the number of register available, and that number is smaller
191 * than the minimum maximum number of vertex program parameters, so
192 * support for pull constants is required if we overflow.
193 * Additionally, on gen6 the number of push constants is even
196 * When there's relative addressing, we don't know what range of
197 * Mesa IR registers can be accessed. And generally, when relative
198 * addressing is used we also have too many constants to load them
199 * all as push constants. So, we'll just support relative
200 * addressing out of the pull constant buffers, and try to load as
201 * many statically-accessed constants into the push constant buffer
204 if (intel
->gen
>= 6) {
205 /* We can only load 32 regs of push constants. */
206 max_constant
= 32 * 2 - c
->key
.nr_userclip
;
208 max_constant
= BRW_MAX_GRF
- 20 - c
->vp
->program
.Base
.NumTemporaries
;
211 /* constant_map maps from ParameterValues[] index to index in the
212 * push constant buffer, or -1 if it's only in the pull constant
215 memset(c
->constant_map
, -1, c
->vp
->program
.Base
.Parameters
->NumParameters
);
217 i
< c
->vp
->program
.Base
.NumInstructions
&& constant
< max_constant
;
219 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[i
];
222 for (arg
= 0; arg
< 3 && constant
< max_constant
; arg
++) {
223 if (inst
->SrcReg
[arg
].File
!= PROGRAM_STATE_VAR
&&
224 inst
->SrcReg
[arg
].File
!= PROGRAM_CONSTANT
&&
225 inst
->SrcReg
[arg
].File
!= PROGRAM_UNIFORM
&&
226 inst
->SrcReg
[arg
].File
!= PROGRAM_ENV_PARAM
&&
227 inst
->SrcReg
[arg
].File
!= PROGRAM_LOCAL_PARAM
) {
231 if (inst
->SrcReg
[arg
].RelAddr
) {
232 c
->vp
->use_const_buffer
= GL_TRUE
;
236 if (c
->constant_map
[inst
->SrcReg
[arg
].Index
] == -1) {
237 c
->constant_map
[inst
->SrcReg
[arg
].Index
] = constant
++;
242 /* If we ran out of push constant space, then we'll also upload all
243 * constants through the pull constant buffer so that they can be
244 * accessed no matter what. For relative addressing (the common
245 * case) we need them all in place anyway.
247 if (constant
== max_constant
)
248 c
->vp
->use_const_buffer
= GL_TRUE
;
250 for (i
= 0; i
< constant
; i
++) {
251 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
255 reg
+= (constant
+ 1) / 2;
256 c
->prog_data
.curb_read_length
= reg
- 1;
257 c
->prog_data
.nr_params
= constant
;
258 /* XXX 0 causes a bug elsewhere... */
259 if (intel
->gen
< 6 && c
->prog_data
.nr_params
== 0)
260 c
->prog_data
.nr_params
= 4;
262 /* Allocate input regs:
265 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
266 if (c
->prog_data
.inputs_read
& (1 << i
)) {
268 c
->regs
[PROGRAM_INPUT
][i
] = brw_vec8_grf(reg
, 0);
272 /* If there are no inputs, we'll still be reading one attribute's worth
273 * because it's required -- see urb_read_length setting.
275 if (c
->nr_inputs
== 0)
278 /* Allocate outputs. The non-position outputs go straight into message regs.
281 c
->first_output
= reg
;
282 c
->first_overflow_output
= 0;
284 if (intel
->gen
>= 6) {
286 if (c
->key
.nr_userclip
)
288 } else if (intel
->gen
== 5)
293 first_reladdr_output
= get_first_reladdr_output(&c
->vp
->program
);
294 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
295 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
297 assert(i
< Elements(c
->regs
[PROGRAM_OUTPUT
]));
298 if (i
== VERT_RESULT_HPOS
) {
299 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
302 else if (i
== VERT_RESULT_PSIZ
) {
303 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
307 /* Two restrictions on our compute-to-MRF here. The
308 * message length for all SEND messages is restricted to
309 * [1,15], so we can't use mrf 15, as that means a length
312 * Additionally, URB writes are aligned to URB rows, so we
313 * need to put an even number of registers of URB data in
314 * each URB write so that the later write is aligned. A
315 * message length of 15 means 1 message header reg plus 14
318 * For attributes beyond the compute-to-MRF, we compute to
319 * GRFs and they will be written in the second URB_WRITE.
321 if (first_reladdr_output
> i
&& mrf
< 15) {
322 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_message_reg(mrf
);
326 if (mrf
>= 15 && !c
->first_overflow_output
)
327 c
->first_overflow_output
= i
;
328 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
336 /* Allocate program temporaries:
338 for (i
= 0; i
< c
->vp
->program
.Base
.NumTemporaries
; i
++) {
339 c
->regs
[PROGRAM_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
343 /* Address reg(s). Don't try to use the internal address reg until
346 for (i
= 0; i
< c
->vp
->program
.Base
.NumAddressRegs
; i
++) {
347 c
->regs
[PROGRAM_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
351 BRW_VERTICAL_STRIDE_8
,
353 BRW_HORIZONTAL_STRIDE_1
,
359 if (c
->vp
->use_const_buffer
) {
360 for (i
= 0; i
< 3; i
++) {
361 c
->current_const
[i
].reg
= brw_vec8_grf(reg
, 0);
364 clear_current_const(c
);
367 for (i
= 0; i
< 128; i
++) {
368 if (c
->output_regs
[i
].used_in_src
) {
369 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
374 if (c
->needs_stack
) {
375 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
379 /* Some opcodes need an internal temporary:
382 c
->last_tmp
= reg
; /* for allocation purposes */
384 /* Each input reg holds data from two vertices. The
385 * urb_read_length is the number of registers read from *each*
386 * vertex urb, so is half the amount:
388 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+ 1) / 2;
389 /* Setting this field to 0 leads to undefined behavior according to the
390 * the VS_STATE docs. Our VUEs will always have at least one attribute
391 * sitting in them, even if it's padding.
393 if (c
->prog_data
.urb_read_length
== 0)
394 c
->prog_data
.urb_read_length
= 1;
396 /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
397 * them to fit the biggest thing they need to.
399 attributes_in_vue
= MAX2(c
->nr_outputs
, c
->nr_inputs
);
401 /* See emit_vertex_write() for where the VUE's overhead on top of the
402 * attributes comes from.
404 if (intel
->gen
>= 6) {
406 if (c
->key
.nr_userclip
)
409 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ header_regs
+ 7) / 8;
410 } else if (intel
->gen
== 5)
411 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 6 + 3) / 4;
413 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 2 + 3) / 4;
415 c
->prog_data
.total_grf
= reg
;
417 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
418 printf("%s NumAddrRegs %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumAddressRegs
);
419 printf("%s NumTemps %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumTemporaries
);
420 printf("%s reg = %d\n", __FUNCTION__
, reg
);
426 * If an instruction uses a temp reg both as a src and the dest, we
427 * sometimes need to allocate an intermediate temporary.
429 static void unalias1( struct brw_vs_compile
*c
,
432 void (*func
)( struct brw_vs_compile
*,
436 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
437 struct brw_compile
*p
= &c
->func
;
438 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
440 brw_MOV(p
, dst
, tmp
);
450 * Checkes if 2-operand instruction needs an intermediate temporary.
452 static void unalias2( struct brw_vs_compile
*c
,
456 void (*func
)( struct brw_vs_compile
*,
461 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
462 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
463 struct brw_compile
*p
= &c
->func
;
464 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
465 func(c
, tmp
, arg0
, arg1
);
466 brw_MOV(p
, dst
, tmp
);
470 func(c
, dst
, arg0
, arg1
);
476 * Checkes if 3-operand instruction needs an intermediate temporary.
478 static void unalias3( struct brw_vs_compile
*c
,
483 void (*func
)( struct brw_vs_compile
*,
489 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
490 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
) ||
491 (dst
.file
== arg2
.file
&& dst
.nr
== arg2
.nr
)) {
492 struct brw_compile
*p
= &c
->func
;
493 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
494 func(c
, tmp
, arg0
, arg1
, arg2
);
495 brw_MOV(p
, dst
, tmp
);
499 func(c
, dst
, arg0
, arg1
, arg2
);
503 static void emit_sop( struct brw_vs_compile
*c
,
509 struct brw_compile
*p
= &c
->func
;
511 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
512 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
513 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
514 brw_set_predicate_control_flag_value(p
, 0xff);
517 static void emit_seq( struct brw_vs_compile
*c
,
520 struct brw_reg arg1
)
522 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
525 static void emit_sne( struct brw_vs_compile
*c
,
528 struct brw_reg arg1
)
530 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
532 static void emit_slt( struct brw_vs_compile
*c
,
535 struct brw_reg arg1
)
537 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
540 static void emit_sle( struct brw_vs_compile
*c
,
543 struct brw_reg arg1
)
545 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
548 static void emit_sgt( struct brw_vs_compile
*c
,
551 struct brw_reg arg1
)
553 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
556 static void emit_sge( struct brw_vs_compile
*c
,
559 struct brw_reg arg1
)
561 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
564 static void emit_cmp( struct brw_compile
*p
,
568 struct brw_reg arg2
)
570 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
571 brw_SEL(p
, dst
, arg1
, arg2
);
572 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
575 static void emit_sign(struct brw_vs_compile
*c
,
579 struct brw_compile
*p
= &c
->func
;
581 brw_MOV(p
, dst
, brw_imm_f(0));
583 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
584 brw_MOV(p
, dst
, brw_imm_f(-1.0));
585 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
587 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, arg0
, brw_imm_f(0));
588 brw_MOV(p
, dst
, brw_imm_f(1.0));
589 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
592 static void emit_max( struct brw_compile
*p
,
595 struct brw_reg arg1
)
597 struct intel_context
*intel
= &p
->brw
->intel
;
599 if (intel
->gen
>= 6) {
600 brw_set_conditionalmod(p
, BRW_CONDITIONAL_GE
);
601 brw_SEL(p
, dst
, arg0
, arg1
);
602 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
603 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
605 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
, arg1
);
606 brw_SEL(p
, dst
, arg0
, arg1
);
607 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
611 static void emit_min( struct brw_compile
*p
,
614 struct brw_reg arg1
)
616 struct intel_context
*intel
= &p
->brw
->intel
;
618 if (intel
->gen
>= 6) {
619 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
620 brw_SEL(p
, dst
, arg0
, arg1
);
621 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
622 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
624 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
625 brw_SEL(p
, dst
, arg0
, arg1
);
626 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
630 static void emit_math1_gen4(struct brw_vs_compile
*c
,
636 /* There are various odd behaviours with SEND on the simulator. In
637 * addition there are documented issues with the fact that the GEN4
638 * processor doesn't do dependency control properly on SEND
639 * results. So, on balance, this kludge to get around failures
640 * with writemasked math results looks like it might be necessary
641 * whether that turns out to be a simulator bug or not:
643 struct brw_compile
*p
= &c
->func
;
644 struct brw_reg tmp
= dst
;
645 GLboolean need_tmp
= GL_FALSE
;
647 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
648 dst
.dw1
.bits
.writemask
!= 0xf)
657 BRW_MATH_SATURATE_NONE
,
660 BRW_MATH_DATA_SCALAR
,
664 brw_MOV(p
, dst
, tmp
);
670 emit_math1_gen6(struct brw_vs_compile
*c
,
676 struct brw_compile
*p
= &c
->func
;
677 struct brw_reg tmp_src
, tmp_dst
;
679 /* Something is strange on gen6 math in 16-wide mode, though the
680 * docs say it's supposed to work. Punt to using align1 mode,
681 * which doesn't do writemasking and swizzles.
683 tmp_src
= get_tmp(c
);
684 tmp_dst
= get_tmp(c
);
686 brw_MOV(p
, tmp_src
, arg0
);
688 brw_set_access_mode(p
, BRW_ALIGN_1
);
692 BRW_MATH_SATURATE_NONE
,
695 BRW_MATH_DATA_SCALAR
,
697 brw_set_access_mode(p
, BRW_ALIGN_16
);
699 brw_MOV(p
, dst
, tmp_dst
);
701 release_tmp(c
, tmp_src
);
702 release_tmp(c
, tmp_dst
);
706 emit_math1(struct brw_vs_compile
*c
,
712 struct brw_compile
*p
= &c
->func
;
713 struct intel_context
*intel
= &p
->brw
->intel
;
716 emit_math1_gen6(c
, function
, dst
, arg0
, precision
);
718 emit_math1_gen4(c
, function
, dst
, arg0
, precision
);
721 static void emit_math2_gen4( struct brw_vs_compile
*c
,
728 struct brw_compile
*p
= &c
->func
;
729 struct brw_reg tmp
= dst
;
730 GLboolean need_tmp
= GL_FALSE
;
732 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
733 dst
.dw1
.bits
.writemask
!= 0xf)
739 brw_MOV(p
, brw_message_reg(3), arg1
);
744 BRW_MATH_SATURATE_NONE
,
747 BRW_MATH_DATA_SCALAR
,
751 brw_MOV(p
, dst
, tmp
);
756 static void emit_math2_gen6( struct brw_vs_compile
*c
,
763 struct brw_compile
*p
= &c
->func
;
764 struct brw_reg tmp_src0
, tmp_src1
, tmp_dst
;
766 tmp_src0
= get_tmp(c
);
767 tmp_src1
= get_tmp(c
);
768 tmp_dst
= get_tmp(c
);
770 brw_MOV(p
, tmp_src0
, arg0
);
771 brw_MOV(p
, tmp_src1
, arg1
);
773 brw_set_access_mode(p
, BRW_ALIGN_1
);
779 brw_set_access_mode(p
, BRW_ALIGN_16
);
781 brw_MOV(p
, dst
, tmp_dst
);
783 release_tmp(c
, tmp_src0
);
784 release_tmp(c
, tmp_src1
);
785 release_tmp(c
, tmp_dst
);
788 static void emit_math2( struct brw_vs_compile
*c
,
795 struct brw_compile
*p
= &c
->func
;
796 struct intel_context
*intel
= &p
->brw
->intel
;
799 emit_math2_gen6(c
, function
, dst
, arg0
, arg1
, precision
);
801 emit_math2_gen4(c
, function
, dst
, arg0
, arg1
, precision
);
804 static void emit_exp_noalias( struct brw_vs_compile
*c
,
806 struct brw_reg arg0
)
808 struct brw_compile
*p
= &c
->func
;
811 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
) {
812 struct brw_reg tmp
= get_tmp(c
);
813 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
815 /* tmp_d = floor(arg0.x) */
816 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
818 /* result[0] = 2.0 ^ tmp */
820 /* Adjust exponent for floating point:
823 brw_ADD(p
, brw_writemask(tmp_d
, WRITEMASK_X
), tmp_d
, brw_imm_d(127));
825 /* Install exponent and sign.
826 * Excess drops off the edge:
828 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), WRITEMASK_X
),
829 tmp_d
, brw_imm_d(23));
834 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
) {
835 /* result[1] = arg0.x - floor(arg0.x) */
836 brw_FRC(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
839 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
840 /* As with the LOG instruction, we might be better off just
841 * doing a taylor expansion here, seeing as we have to do all
844 * If mathbox partial precision is too low, consider also:
845 * result[3] = result[0] * EXP(result[1])
848 BRW_MATH_FUNCTION_EXP
,
849 brw_writemask(dst
, WRITEMASK_Z
),
850 brw_swizzle1(arg0
, 0),
851 BRW_MATH_PRECISION_FULL
);
854 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
855 /* result[3] = 1.0; */
856 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), brw_imm_f(1));
861 static void emit_log_noalias( struct brw_vs_compile
*c
,
863 struct brw_reg arg0
)
865 struct brw_compile
*p
= &c
->func
;
866 struct brw_reg tmp
= dst
;
867 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
868 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
869 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
870 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
874 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
877 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
880 * These almost look likey they could be joined up, but not really
883 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
884 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
886 if (dst
.dw1
.bits
.writemask
& WRITEMASK_XZ
) {
888 brw_writemask(tmp_ud
, WRITEMASK_X
),
889 brw_swizzle1(arg0_ud
, 0),
890 brw_imm_ud((1U<<31)-1));
893 brw_writemask(tmp_ud
, WRITEMASK_X
),
898 brw_writemask(tmp
, WRITEMASK_X
),
899 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
903 if (dst
.dw1
.bits
.writemask
& WRITEMASK_YZ
) {
905 brw_writemask(tmp_ud
, WRITEMASK_Y
),
906 brw_swizzle1(arg0_ud
, 0),
907 brw_imm_ud((1<<23)-1));
910 brw_writemask(tmp_ud
, WRITEMASK_Y
),
912 brw_imm_ud(127<<23));
915 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
916 /* result[2] = result[0] + LOG2(result[1]); */
918 /* Why bother? The above is just a hint how to do this with a
919 * taylor series. Maybe we *should* use a taylor series as by
920 * the time all the above has been done it's almost certainly
921 * quicker than calling the mathbox, even with low precision.
924 * - result[0] + mathbox.LOG2(result[1])
925 * - mathbox.LOG2(arg0.x)
926 * - result[0] + inline_taylor_approx(result[1])
929 BRW_MATH_FUNCTION_LOG
,
930 brw_writemask(tmp
, WRITEMASK_Z
),
931 brw_swizzle1(tmp
, 1),
932 BRW_MATH_PRECISION_FULL
);
935 brw_writemask(tmp
, WRITEMASK_Z
),
936 brw_swizzle1(tmp
, 2),
937 brw_swizzle1(tmp
, 0));
940 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
941 /* result[3] = 1.0; */
942 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_W
), brw_imm_f(1));
946 brw_MOV(p
, dst
, tmp
);
952 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
954 static void emit_dst_noalias( struct brw_vs_compile
*c
,
959 struct brw_compile
*p
= &c
->func
;
961 /* There must be a better way to do this:
963 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
)
964 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_X
), brw_imm_f(1.0));
965 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
)
966 brw_MUL(p
, brw_writemask(dst
, WRITEMASK_Y
), arg0
, arg1
);
967 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
)
968 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Z
), arg0
);
969 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
)
970 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), arg1
);
974 static void emit_xpd( struct brw_compile
*p
,
979 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
980 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
984 static void emit_lit_noalias( struct brw_vs_compile
*c
,
986 struct brw_reg arg0
)
988 struct brw_compile
*p
= &c
->func
;
989 struct brw_instruction
*if_insn
;
990 struct brw_reg tmp
= dst
;
991 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
996 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_YZ
), brw_imm_f(0));
997 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_XW
), brw_imm_f(1));
999 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
1000 * to get all channels active inside the IF. In the clipping code
1001 * we run with NoMask, so it's not an option and we can use
1002 * BRW_EXECUTE_1 for all comparisions.
1004 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
1005 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
1007 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
,0));
1009 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
1010 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_Z
), brw_swizzle1(arg0
,1));
1011 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1014 BRW_MATH_FUNCTION_POW
,
1015 brw_writemask(dst
, WRITEMASK_Z
),
1016 brw_swizzle1(tmp
, 2),
1017 brw_swizzle1(arg0
, 3),
1018 BRW_MATH_PRECISION_PARTIAL
);
1021 brw_ENDIF(p
, if_insn
);
1023 release_tmp(c
, tmp
);
1026 static void emit_lrp_noalias(struct brw_vs_compile
*c
,
1028 struct brw_reg arg0
,
1029 struct brw_reg arg1
,
1030 struct brw_reg arg2
)
1032 struct brw_compile
*p
= &c
->func
;
1034 brw_ADD(p
, dst
, negate(arg0
), brw_imm_f(1.0));
1035 brw_MUL(p
, brw_null_reg(), dst
, arg2
);
1036 brw_MAC(p
, dst
, arg0
, arg1
);
1039 /** 3 or 4-component vector normalization */
1040 static void emit_nrm( struct brw_vs_compile
*c
,
1042 struct brw_reg arg0
,
1045 struct brw_compile
*p
= &c
->func
;
1046 struct brw_reg tmp
= get_tmp(c
);
1048 /* tmp = dot(arg0, arg0) */
1050 brw_DP3(p
, tmp
, arg0
, arg0
);
1052 brw_DP4(p
, tmp
, arg0
, arg0
);
1054 /* tmp = 1 / sqrt(tmp) */
1055 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, tmp
, tmp
, BRW_MATH_PRECISION_FULL
);
1057 /* dst = arg0 * tmp */
1058 brw_MUL(p
, dst
, arg0
, tmp
);
1060 release_tmp(c
, tmp
);
1064 static struct brw_reg
1065 get_constant(struct brw_vs_compile
*c
,
1066 const struct prog_instruction
*inst
,
1069 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1070 struct brw_compile
*p
= &c
->func
;
1071 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1073 assert(argIndex
< 3);
1075 assert(c
->func
.brw
->intel
.gen
< 6); /* FINISHME */
1077 if (c
->current_const
[argIndex
].index
!= src
->Index
) {
1078 /* Keep track of the last constant loaded in this slot, for reuse. */
1079 c
->current_const
[argIndex
].index
= src
->Index
;
1082 printf(" fetch const[%d] for arg %d into reg %d\n",
1083 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1085 /* need to fetch the constant now */
1087 const_reg
, /* writeback dest */
1088 16 * src
->Index
, /* byte offset */
1089 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
1093 /* replicate lower four floats into upper half (to get XYZWXYZW) */
1094 const_reg
= stride(const_reg
, 0, 4, 0);
1095 const_reg
.subnr
= 0;
1100 static struct brw_reg
1101 get_reladdr_constant(struct brw_vs_compile
*c
,
1102 const struct prog_instruction
*inst
,
1105 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1106 struct brw_compile
*p
= &c
->func
;
1107 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1108 struct brw_reg addrReg
= c
->regs
[PROGRAM_ADDRESS
][0];
1109 struct brw_reg byte_addr_reg
= retype(get_tmp(c
), BRW_REGISTER_TYPE_D
);
1111 assert(argIndex
< 3);
1113 assert(c
->func
.brw
->intel
.gen
< 6); /* FINISHME */
1115 /* Can't reuse a reladdr constant load. */
1116 c
->current_const
[argIndex
].index
= -1;
1119 printf(" fetch const[a0.x+%d] for arg %d into reg %d\n",
1120 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1123 brw_MUL(p
, byte_addr_reg
, addrReg
, brw_imm_ud(16));
1125 /* fetch the first vec4 */
1126 brw_dp_READ_4_vs_relative(p
,
1127 const_reg
, /* writeback dest */
1128 byte_addr_reg
, /* address register */
1129 16 * src
->Index
, /* byte offset */
1130 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
1138 /* TODO: relative addressing!
1140 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
1141 gl_register_file file
,
1145 case PROGRAM_TEMPORARY
:
1147 case PROGRAM_OUTPUT
:
1148 assert(c
->regs
[file
][index
].nr
!= 0);
1149 return c
->regs
[file
][index
];
1150 case PROGRAM_STATE_VAR
:
1151 case PROGRAM_CONSTANT
:
1152 case PROGRAM_UNIFORM
:
1153 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
1154 return c
->regs
[PROGRAM_STATE_VAR
][index
];
1155 case PROGRAM_ADDRESS
:
1157 return c
->regs
[file
][index
];
1159 case PROGRAM_UNDEFINED
: /* undef values */
1160 return brw_null_reg();
1162 case PROGRAM_LOCAL_PARAM
:
1163 case PROGRAM_ENV_PARAM
:
1164 case PROGRAM_WRITE_ONLY
:
1167 return brw_null_reg();
1173 * Indirect addressing: get reg[[arg] + offset].
1175 static struct brw_reg
deref( struct brw_vs_compile
*c
,
1180 struct brw_compile
*p
= &c
->func
;
1181 struct brw_reg tmp
= get_tmp(c
);
1182 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1183 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1184 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* reg_size
;
1185 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1186 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1188 /* Set the vertical stride on the register access so that the first
1189 * 4 components come from a0.0 and the second 4 from a0.1.
1191 indirect
.vstride
= BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL
;
1194 brw_push_insn_state(p
);
1195 brw_set_access_mode(p
, BRW_ALIGN_1
);
1197 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1198 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1200 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1201 brw_ADD(p
, brw_address_reg(1), acc
, brw_imm_uw(byte_offset
));
1203 brw_MOV(p
, tmp
, indirect
);
1205 brw_pop_insn_state(p
);
1208 /* NOTE: tmp not released */
1213 move_to_reladdr_dst(struct brw_vs_compile
*c
,
1214 const struct prog_instruction
*inst
,
1217 struct brw_compile
*p
= &c
->func
;
1219 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1220 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1221 struct brw_reg base
= c
->regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
];
1222 GLuint byte_offset
= base
.nr
* 32 + base
.subnr
;
1223 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1224 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1226 /* Because destination register indirect addressing can only use
1227 * one index, we'll write each vertex's vec4 value separately.
1229 val
.width
= BRW_WIDTH_4
;
1230 val
.vstride
= BRW_VERTICAL_STRIDE_4
;
1232 brw_push_insn_state(p
);
1233 brw_set_access_mode(p
, BRW_ALIGN_1
);
1235 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1236 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1237 brw_MOV(p
, indirect
, val
);
1239 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1240 brw_ADD(p
, brw_address_reg(0), acc
,
1241 brw_imm_uw(byte_offset
+ reg_size
/ 2));
1242 brw_MOV(p
, indirect
, suboffset(val
, 4));
1244 brw_pop_insn_state(p
);
1248 * Get brw reg corresponding to the instruction's [argIndex] src reg.
1249 * TODO: relative addressing!
1251 static struct brw_reg
1252 get_src_reg( struct brw_vs_compile
*c
,
1253 const struct prog_instruction
*inst
,
1256 const GLuint file
= inst
->SrcReg
[argIndex
].File
;
1257 const GLint index
= inst
->SrcReg
[argIndex
].Index
;
1258 const GLboolean relAddr
= inst
->SrcReg
[argIndex
].RelAddr
;
1260 if (brw_vs_arg_can_be_immediate(inst
->Opcode
, argIndex
)) {
1261 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1263 if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ZERO
,
1267 return brw_imm_f(0.0f
);
1268 } else if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ONE
,
1273 return brw_imm_f(-1.0F
);
1275 return brw_imm_f(1.0F
);
1276 } else if (src
->File
== PROGRAM_CONSTANT
) {
1277 const struct gl_program_parameter_list
*params
;
1281 switch (src
->Swizzle
) {
1296 if (component
>= 0) {
1297 params
= c
->vp
->program
.Base
.Parameters
;
1298 f
= params
->ParameterValues
[src
->Index
][component
];
1304 return brw_imm_f(f
);
1310 case PROGRAM_TEMPORARY
:
1312 case PROGRAM_OUTPUT
:
1314 return deref(c
, c
->regs
[file
][0], index
, 32);
1317 assert(c
->regs
[file
][index
].nr
!= 0);
1318 return c
->regs
[file
][index
];
1321 case PROGRAM_STATE_VAR
:
1322 case PROGRAM_CONSTANT
:
1323 case PROGRAM_UNIFORM
:
1324 case PROGRAM_ENV_PARAM
:
1325 case PROGRAM_LOCAL_PARAM
:
1326 if (!relAddr
&& c
->constant_map
[index
] != -1) {
1327 /* Take from the push constant buffer if possible. */
1328 assert(c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]].nr
!= 0);
1329 return c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]];
1331 /* Must be in the pull constant buffer then .*/
1332 assert(c
->vp
->use_const_buffer
);
1334 return get_reladdr_constant(c
, inst
, argIndex
);
1336 return get_constant(c
, inst
, argIndex
);
1338 case PROGRAM_ADDRESS
:
1340 return c
->regs
[file
][index
];
1342 case PROGRAM_UNDEFINED
:
1343 /* this is a normal case since we loop over all three src args */
1344 return brw_null_reg();
1346 case PROGRAM_WRITE_ONLY
:
1349 return brw_null_reg();
1354 * Return the brw reg for the given instruction's src argument.
1355 * Will return mangled results for SWZ op. The emit_swz() function
1356 * ignores this result and recalculates taking extended swizzles into
1359 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
1360 const struct prog_instruction
*inst
,
1363 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1366 if (src
->File
== PROGRAM_UNDEFINED
)
1367 return brw_null_reg();
1369 reg
= get_src_reg(c
, inst
, argIndex
);
1371 /* Convert 3-bit swizzle to 2-bit.
1373 if (reg
.file
!= BRW_IMMEDIATE_VALUE
) {
1374 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(GET_SWZ(src
->Swizzle
, 0),
1375 GET_SWZ(src
->Swizzle
, 1),
1376 GET_SWZ(src
->Swizzle
, 2),
1377 GET_SWZ(src
->Swizzle
, 3));
1380 /* Note this is ok for non-swizzle instructions:
1382 reg
.negate
= src
->Negate
? 1 : 0;
1389 * Get brw register for the given program dest register.
1391 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
1392 struct prog_dst_register dst
)
1397 case PROGRAM_TEMPORARY
:
1398 case PROGRAM_OUTPUT
:
1399 /* register-indirect addressing is only 1x1, not VxH, for
1400 * destination regs. So, for RelAddr we'll return a temporary
1401 * for the dest and do a move of the result to the RelAddr
1402 * register after the instruction emit.
1407 assert(c
->regs
[dst
.File
][dst
.Index
].nr
!= 0);
1408 reg
= c
->regs
[dst
.File
][dst
.Index
];
1411 case PROGRAM_ADDRESS
:
1412 assert(dst
.Index
== 0);
1413 reg
= c
->regs
[dst
.File
][dst
.Index
];
1415 case PROGRAM_UNDEFINED
:
1416 /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
1417 reg
= brw_null_reg();
1421 reg
= brw_null_reg();
1424 assert(reg
.type
!= BRW_IMMEDIATE_VALUE
);
1425 reg
.dw1
.bits
.writemask
= dst
.WriteMask
;
1431 static void emit_swz( struct brw_vs_compile
*c
,
1433 const struct prog_instruction
*inst
)
1435 const GLuint argIndex
= 0;
1436 const struct prog_src_register src
= inst
->SrcReg
[argIndex
];
1437 struct brw_compile
*p
= &c
->func
;
1438 GLuint zeros_mask
= 0;
1439 GLuint ones_mask
= 0;
1440 GLuint src_mask
= 0;
1442 GLboolean need_tmp
= (src
.Negate
&&
1443 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
1444 struct brw_reg tmp
= dst
;
1450 for (i
= 0; i
< 4; i
++) {
1451 if (dst
.dw1
.bits
.writemask
& (1<<i
)) {
1452 GLubyte s
= GET_SWZ(src
.Swizzle
, i
);
1471 /* Do src first, in case dst aliases src:
1474 struct brw_reg arg0
;
1476 arg0
= get_src_reg(c
, inst
, argIndex
);
1478 arg0
= brw_swizzle(arg0
,
1479 src_swz
[0], src_swz
[1],
1480 src_swz
[2], src_swz
[3]);
1482 brw_MOV(p
, brw_writemask(tmp
, src_mask
), arg0
);
1486 brw_MOV(p
, brw_writemask(tmp
, zeros_mask
), brw_imm_f(0));
1489 brw_MOV(p
, brw_writemask(tmp
, ones_mask
), brw_imm_f(1));
1492 brw_MOV(p
, brw_writemask(tmp
, src
.Negate
), negate(tmp
));
1495 brw_MOV(p
, dst
, tmp
);
1496 release_tmp(c
, tmp
);
1502 * Post-vertex-program processing. Send the results to the URB.
1504 static void emit_vertex_write( struct brw_vs_compile
*c
)
1506 struct brw_compile
*p
= &c
->func
;
1507 struct brw_context
*brw
= p
->brw
;
1508 struct intel_context
*intel
= &brw
->intel
;
1509 struct brw_reg pos
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_HPOS
];
1512 GLuint len_vertex_header
= 2;
1515 if (c
->key
.copy_edgeflag
) {
1517 get_reg(c
, PROGRAM_OUTPUT
, VERT_RESULT_EDGE
),
1518 get_reg(c
, PROGRAM_INPUT
, VERT_ATTRIB_EDGEFLAG
));
1521 if (intel
->gen
< 6) {
1522 /* Build ndc coords */
1524 /* ndc = 1.0 / pos.w */
1525 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
1526 /* ndc.xyz = pos * ndc */
1527 brw_MUL(p
, brw_writemask(ndc
, WRITEMASK_XYZ
), pos
, ndc
);
1530 /* Update the header for point size, user clipping flags, and -ve rhw
1533 if (intel
->gen
>= 6) {
1534 struct brw_reg m1
= brw_message_reg(1);
1536 /* On gen6, m1 has each value in a separate dword, so we never
1537 * need to mess with a temporary for computing the m1 value.
1539 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1540 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1541 brw_MOV(p
, brw_writemask(m1
, WRITEMASK_W
),
1542 brw_swizzle1(c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
], 0));
1545 /* Set the user clip distances in dword 8-15. (m3-4)*/
1546 if (c
->key
.nr_userclip
) {
1547 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1550 m
= brw_message_reg(3);
1552 m
= brw_message_reg(4);
1554 brw_DP4(p
, brw_writemask(m
, (1 << (i
& 7))),pos
, c
->userplane
[i
]);
1557 } else if ((c
->prog_data
.outputs_written
&
1558 BITFIELD64_BIT(VERT_RESULT_PSIZ
)) ||
1559 c
->key
.nr_userclip
|| brw
->has_negative_rhw_bug
) {
1560 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
1563 brw_MOV(p
, header1
, brw_imm_ud(0));
1565 brw_set_access_mode(p
, BRW_ALIGN_16
);
1567 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1568 struct brw_reg psiz
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
];
1569 brw_MUL(p
, brw_writemask(header1
, WRITEMASK_W
),
1570 brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
1571 brw_AND(p
, brw_writemask(header1
, WRITEMASK_W
),
1572 header1
, brw_imm_ud(0x7ff<<8));
1575 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1576 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
1577 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
1578 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
1579 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1582 /* i965 clipping workaround:
1583 * 1) Test for -ve rhw
1585 * set ndc = (0,0,0,0)
1588 * Later, clipping will detect ucp[6] and ensure the primitive is
1589 * clipped against all fixed planes.
1591 if (brw
->has_negative_rhw_bug
) {
1593 vec8(brw_null_reg()),
1595 brw_swizzle1(ndc
, 3),
1598 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
1599 brw_MOV(p
, ndc
, brw_imm_f(0));
1600 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1603 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
1604 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
1605 brw_set_access_mode(p
, BRW_ALIGN_16
);
1607 release_tmp(c
, header1
);
1610 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1613 /* Emit the (interleaved) headers for the two vertices - an 8-reg
1614 * of zeros followed by two sets of NDC coordinates:
1616 brw_set_access_mode(p
, BRW_ALIGN_1
);
1617 brw_set_acc_write_control(p
, 0);
1619 /* The VUE layout is documented in Volume 2a. */
1620 if (intel
->gen
>= 6) {
1621 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1622 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1623 * dword 4-7 (m2) is the 4D space position
1624 * dword 8-15 (m3,m4) of the vertex header is the user clip distance if
1626 * m3 or 5 is the first vertex element data we fill, which is
1627 * the vertex position.
1629 brw_MOV(p
, brw_message_reg(2), pos
);
1630 len_vertex_header
= 1;
1631 if (c
->key
.nr_userclip
> 0)
1632 len_vertex_header
+= 2;
1633 } else if (intel
->gen
== 5) {
1634 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1635 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1636 * dword 4-7 (m2) is the ndc position (set above)
1637 * dword 8-11 (m3) of the vertex header is the 4D space position
1638 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1639 * m6 is a pad so that the vertex element data is aligned
1640 * m7 is the first vertex data we fill, which is the vertex position.
1642 brw_MOV(p
, brw_message_reg(2), ndc
);
1643 brw_MOV(p
, brw_message_reg(3), pos
);
1644 brw_MOV(p
, brw_message_reg(7), pos
);
1645 len_vertex_header
= 6;
1647 /* There are 8 dwords in VUE header pre-Ironlake:
1648 * dword 0-3 (m1) is indices, point width, clip flags.
1649 * dword 4-7 (m2) is ndc position (set above)
1651 * dword 8-11 (m3) is the first vertex data, which we always have be the
1654 brw_MOV(p
, brw_message_reg(2), ndc
);
1655 brw_MOV(p
, brw_message_reg(3), pos
);
1656 len_vertex_header
= 2;
1659 /* Move variable-addressed, non-overflow outputs to their MRFs. */
1660 next_mrf
= 2 + len_vertex_header
;
1661 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1662 if (c
->first_overflow_output
> 0 && i
>= c
->first_overflow_output
)
1664 if (!(c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)))
1666 if (i
== VERT_RESULT_PSIZ
)
1669 if (i
>= VERT_RESULT_TEX0
&&
1670 c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_GENERAL_REGISTER_FILE
) {
1671 brw_MOV(p
, brw_message_reg(next_mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1673 } else if (c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_MESSAGE_REGISTER_FILE
) {
1674 next_mrf
= c
->regs
[PROGRAM_OUTPUT
][i
].nr
+ 1;
1678 eot
= (c
->first_overflow_output
== 0);
1681 brw_null_reg(), /* dest */
1682 0, /* starting mrf reg nr */
1686 MIN2(c
->nr_outputs
+ 1 + len_vertex_header
, (BRW_MAX_MRF
-1)), /* msg len */
1687 0, /* response len */
1689 eot
, /* writes complete */
1690 0, /* urb destination offset */
1691 BRW_URB_SWIZZLE_INTERLEAVE
);
1693 if (c
->first_overflow_output
> 0) {
1694 /* Not all of the vertex outputs/results fit into the MRF.
1695 * Move the overflowed attributes from the GRF to the MRF and
1696 * issue another brw_urb_WRITE().
1699 for (i
= c
->first_overflow_output
; i
< VERT_RESULT_MAX
; i
++) {
1700 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
1701 /* move from GRF to MRF */
1702 brw_MOV(p
, brw_message_reg(mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1708 brw_null_reg(), /* dest */
1709 0, /* starting mrf reg nr */
1714 0, /* response len */
1716 1, /* writes complete */
1717 14 / 2, /* urb destination offset */
1718 BRW_URB_SWIZZLE_INTERLEAVE
);
1723 accumulator_contains(struct brw_vs_compile
*c
, struct brw_reg val
)
1725 struct brw_compile
*p
= &c
->func
;
1726 struct brw_instruction
*prev_insn
= &p
->store
[p
->nr_insn
- 1];
1728 if (p
->nr_insn
== 0)
1731 if (val
.address_mode
!= BRW_ADDRESS_DIRECT
)
1734 switch (prev_insn
->header
.opcode
) {
1735 case BRW_OPCODE_MOV
:
1736 case BRW_OPCODE_MAC
:
1737 case BRW_OPCODE_MUL
:
1738 if (prev_insn
->header
.access_mode
== BRW_ALIGN_16
&&
1739 prev_insn
->header
.execution_size
== val
.width
&&
1740 prev_insn
->bits1
.da1
.dest_reg_file
== val
.file
&&
1741 prev_insn
->bits1
.da1
.dest_reg_type
== val
.type
&&
1742 prev_insn
->bits1
.da1
.dest_address_mode
== val
.address_mode
&&
1743 prev_insn
->bits1
.da1
.dest_reg_nr
== val
.nr
&&
1744 prev_insn
->bits1
.da16
.dest_subreg_nr
== val
.subnr
/ 16 &&
1745 prev_insn
->bits1
.da16
.dest_writemask
== 0xf)
1755 get_predicate(const struct prog_instruction
*inst
)
1757 if (inst
->DstReg
.CondMask
== COND_TR
)
1758 return BRW_PREDICATE_NONE
;
1760 /* All of GLSL only produces predicates for COND_NE and one channel per
1761 * vector. Fail badly if someone starts doing something else, as it might
1762 * mean infinite looping or something.
1764 * We'd like to support all the condition codes, but our hardware doesn't
1765 * quite match the Mesa IR, which is modeled after the NV extensions. For
1766 * those, the instruction may update the condition codes or not, then any
1767 * later instruction may use one of those condition codes. For gen4, the
1768 * instruction may update the flags register based on one of the condition
1769 * codes output by the instruction, and then further instructions may
1770 * predicate on that. We can probably support this, but it won't
1771 * necessarily be easy.
1773 assert(inst
->DstReg
.CondMask
== COND_NE
);
1775 switch (inst
->DstReg
.CondSwizzle
) {
1777 return BRW_PREDICATE_ALIGN16_REPLICATE_X
;
1779 return BRW_PREDICATE_ALIGN16_REPLICATE_Y
;
1781 return BRW_PREDICATE_ALIGN16_REPLICATE_Z
;
1783 return BRW_PREDICATE_ALIGN16_REPLICATE_W
;
1785 _mesa_problem(NULL
, "Unexpected predicate: 0x%08x\n",
1786 inst
->DstReg
.CondMask
);
1787 return BRW_PREDICATE_NORMAL
;
1791 /* Emit the vertex program instructions here.
1793 void brw_vs_emit(struct brw_vs_compile
*c
)
1795 #define MAX_IF_DEPTH 32
1796 #define MAX_LOOP_DEPTH 32
1797 struct brw_compile
*p
= &c
->func
;
1798 struct brw_context
*brw
= p
->brw
;
1799 struct intel_context
*intel
= &brw
->intel
;
1800 const GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
1801 GLuint insn
, if_depth
= 0, loop_depth
= 0;
1802 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
] = { 0 };
1803 int if_depth_in_loop
[MAX_LOOP_DEPTH
];
1804 const struct brw_indirect stack_index
= brw_indirect(0, 0);
1808 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1809 printf("vs-mesa:\n");
1810 _mesa_fprint_program_opt(stdout
, &c
->vp
->program
.Base
, PROG_PRINT_DEBUG
,
1815 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1816 brw_set_access_mode(p
, BRW_ALIGN_16
);
1817 if_depth_in_loop
[loop_depth
] = 0;
1819 brw_set_acc_write_control(p
, 1);
1821 for (insn
= 0; insn
< nr_insns
; insn
++) {
1823 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1825 /* Message registers can't be read, so copy the output into GRF
1826 * register if they are used in source registers
1828 for (i
= 0; i
< 3; i
++) {
1829 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1830 GLuint index
= src
->Index
;
1831 GLuint file
= src
->File
;
1832 if (file
== PROGRAM_OUTPUT
&& index
!= VERT_RESULT_HPOS
)
1833 c
->output_regs
[index
].used_in_src
= GL_TRUE
;
1836 switch (inst
->Opcode
) {
1839 c
->needs_stack
= GL_TRUE
;
1846 /* Static register allocation
1848 brw_vs_alloc_regs(c
);
1851 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1853 for (insn
= 0; insn
< nr_insns
; insn
++) {
1855 const struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1856 struct brw_reg args
[3], dst
;
1860 printf("%d: ", insn
);
1861 _mesa_print_instruction(inst
);
1864 /* Get argument regs. SWZ is special and does this itself.
1866 if (inst
->Opcode
!= OPCODE_SWZ
)
1867 for (i
= 0; i
< 3; i
++) {
1868 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1871 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1872 args
[i
] = c
->output_regs
[index
].reg
;
1874 args
[i
] = get_arg(c
, inst
, i
);
1877 /* Get dest regs. Note that it is possible for a reg to be both
1878 * dst and arg, given the static allocation of registers. So
1879 * care needs to be taken emitting multi-operation instructions.
1881 index
= inst
->DstReg
.Index
;
1882 file
= inst
->DstReg
.File
;
1883 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1884 dst
= c
->output_regs
[index
].reg
;
1886 dst
= get_dst(c
, inst
->DstReg
);
1888 if (inst
->SaturateMode
!= SATURATE_OFF
) {
1889 _mesa_problem(NULL
, "Unsupported saturate %d in vertex shader",
1890 inst
->SaturateMode
);
1893 switch (inst
->Opcode
) {
1895 brw_MOV(p
, dst
, brw_abs(args
[0]));
1898 brw_ADD(p
, dst
, args
[0], args
[1]);
1901 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1904 brw_DP2(p
, dst
, args
[0], args
[1]);
1907 brw_DP3(p
, dst
, args
[0], args
[1]);
1910 brw_DP4(p
, dst
, args
[0], args
[1]);
1913 brw_DPH(p
, dst
, args
[0], args
[1]);
1916 emit_nrm(c
, dst
, args
[0], 3);
1919 emit_nrm(c
, dst
, args
[0], 4);
1922 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1925 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1928 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1931 brw_RNDD(p
, dst
, args
[0]);
1934 brw_RNDD(p
, dst
, args
[0]);
1937 brw_FRC(p
, dst
, args
[0]);
1940 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1943 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1946 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1949 unalias3(c
, dst
, args
[0], args
[1], args
[2], emit_lrp_noalias
);
1952 if (!accumulator_contains(c
, args
[2]))
1953 brw_MOV(p
, brw_acc_reg(), args
[2]);
1954 brw_MAC(p
, dst
, args
[0], args
[1]);
1957 emit_cmp(p
, dst
, args
[0], args
[1], args
[2]);
1960 emit_max(p
, dst
, args
[0], args
[1]);
1963 emit_min(p
, dst
, args
[0], args
[1]);
1966 brw_MOV(p
, dst
, args
[0]);
1969 brw_MUL(p
, dst
, args
[0], args
[1]);
1972 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1975 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1978 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, brw_abs(args
[0]), BRW_MATH_PRECISION_FULL
);
1982 unalias2(c
, dst
, args
[0], args
[1], emit_seq
);
1985 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1988 unalias2(c
, dst
, args
[0], args
[1], emit_sne
);
1991 unalias2(c
, dst
, args
[0], args
[1], emit_sge
);
1994 unalias2(c
, dst
, args
[0], args
[1], emit_sgt
);
1997 unalias2(c
, dst
, args
[0], args
[1], emit_slt
);
2000 unalias2(c
, dst
, args
[0], args
[1], emit_sle
);
2003 unalias1(c
, dst
, args
[0], emit_sign
);
2006 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
2009 /* The args[0] value can't be used here as it won't have
2010 * correctly encoded the full swizzle:
2012 emit_swz(c
, dst
, inst
);
2015 /* round toward zero */
2016 brw_RNDZ(p
, dst
, args
[0]);
2019 emit_xpd(p
, dst
, args
[0], args
[1]);
2022 assert(if_depth
< MAX_IF_DEPTH
);
2023 if_inst
[if_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
2024 /* Note that brw_IF smashes the predicate_control field. */
2025 if_inst
[if_depth
]->header
.predicate_control
= get_predicate(inst
);
2026 if_depth_in_loop
[loop_depth
]++;
2030 clear_current_const(c
);
2031 assert(if_depth
> 0);
2032 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2035 clear_current_const(c
);
2036 assert(if_depth
> 0);
2037 brw_ENDIF(p
, if_inst
[--if_depth
]);
2038 if_depth_in_loop
[loop_depth
]--;
2040 case OPCODE_BGNLOOP
:
2041 clear_current_const(c
);
2042 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2043 if_depth_in_loop
[loop_depth
] = 0;
2046 brw_set_predicate_control(p
, get_predicate(inst
));
2047 brw_BREAK(p
, if_depth_in_loop
[loop_depth
]);
2048 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2051 brw_set_predicate_control(p
, get_predicate(inst
));
2052 if (intel
->gen
>= 6) {
2053 brw_CONT_gen6(p
, loop_inst
[loop_depth
- 1]);
2055 brw_CONT(p
, if_depth_in_loop
[loop_depth
]);
2057 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2060 case OPCODE_ENDLOOP
: {
2061 clear_current_const(c
);
2062 struct brw_instruction
*inst0
, *inst1
;
2067 if (intel
->gen
== 5)
2070 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2072 if (intel
->gen
< 6) {
2073 /* patch all the BREAK/CONT instructions from last BEGINLOOP */
2074 while (inst0
> loop_inst
[loop_depth
]) {
2076 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2077 inst0
->bits3
.if_else
.jump_count
== 0) {
2078 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2079 } else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2080 inst0
->bits3
.if_else
.jump_count
== 0) {
2081 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2089 brw_set_predicate_control(p
, get_predicate(inst
));
2090 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2091 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2094 brw_set_access_mode(p
, BRW_ALIGN_1
);
2095 brw_ADD(p
, deref_1d(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2096 brw_set_access_mode(p
, BRW_ALIGN_16
);
2097 brw_ADD(p
, get_addr_reg(stack_index
),
2098 get_addr_reg(stack_index
), brw_imm_d(4));
2099 brw_save_call(p
, inst
->Comment
, p
->nr_insn
);
2100 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2103 brw_ADD(p
, get_addr_reg(stack_index
),
2104 get_addr_reg(stack_index
), brw_imm_d(-4));
2105 brw_set_access_mode(p
, BRW_ALIGN_1
);
2106 brw_MOV(p
, brw_ip_reg(), deref_1d(stack_index
, 0));
2107 brw_set_access_mode(p
, BRW_ALIGN_16
);
2110 emit_vertex_write(c
);
2116 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2122 _mesa_problem(NULL
, "Unsupported opcode %i (%s) in vertex shader",
2123 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
2124 _mesa_opcode_string(inst
->Opcode
) :
2128 /* Set the predication update on the last instruction of the native
2129 * instruction sequence.
2131 * This would be problematic if it was set on a math instruction,
2132 * but that shouldn't be the case with the current GLSL compiler.
2134 if (inst
->CondUpdate
) {
2135 struct brw_instruction
*hw_insn
= &p
->store
[p
->nr_insn
- 1];
2137 assert(hw_insn
->header
.destreg__conditionalmod
== 0);
2138 hw_insn
->header
.destreg__conditionalmod
= BRW_CONDITIONAL_NZ
;
2141 if ((inst
->DstReg
.File
== PROGRAM_OUTPUT
)
2142 && (inst
->DstReg
.Index
!= VERT_RESULT_HPOS
)
2143 && c
->output_regs
[inst
->DstReg
.Index
].used_in_src
) {
2144 brw_MOV(p
, get_dst(c
, inst
->DstReg
), dst
);
2147 /* Result color clamping.
2149 * When destination register is an output register and
2150 * it's primary/secondary front/back color, we have to clamp
2151 * the result to [0,1]. This is done by enabling the
2152 * saturation bit for the last instruction.
2154 * We don't use brw_set_saturate() as it modifies
2155 * p->current->header.saturate, which affects all the subsequent
2156 * instructions. Instead, we directly modify the header
2157 * of the last (already stored) instruction.
2159 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
) {
2160 if ((inst
->DstReg
.Index
== VERT_RESULT_COL0
)
2161 || (inst
->DstReg
.Index
== VERT_RESULT_COL1
)
2162 || (inst
->DstReg
.Index
== VERT_RESULT_BFC0
)
2163 || (inst
->DstReg
.Index
== VERT_RESULT_BFC1
)) {
2164 p
->store
[p
->nr_insn
-1].header
.saturate
= 1;
2168 if (inst
->DstReg
.RelAddr
) {
2169 assert(inst
->DstReg
.File
== PROGRAM_TEMPORARY
||
2170 inst
->DstReg
.File
== PROGRAM_OUTPUT
);
2171 move_to_reladdr_dst(c
, inst
, dst
);
2177 brw_resolve_cals(p
);
2182 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
2185 printf("vs-native:\n");
2186 for (i
= 0; i
< p
->nr_insn
; i
++)
2187 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);