2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "main/macros.h"
34 #include "program/program.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "brw_context.h"
40 /* Return the SrcReg index of the channels that can be immediate float operands
41 * instead of usage of PROGRAM_CONSTANT values through push/pull.
44 brw_vs_arg_can_be_immediate(enum prog_opcode opcode
, int arg
)
46 int opcode_array
[] = {
66 /* These opcodes get broken down in a way that allow two
67 * args to be immediates.
69 if (opcode
== OPCODE_MAD
|| opcode
== OPCODE_LRP
) {
70 if (arg
== 1 || arg
== 2)
74 if (opcode
> ARRAY_SIZE(opcode_array
))
77 return arg
== opcode_array
[opcode
] - 1;
80 static struct brw_reg
get_tmp( struct brw_vs_compile
*c
)
82 struct brw_reg tmp
= brw_vec8_grf(c
->last_tmp
, 0);
84 if (++c
->last_tmp
> c
->prog_data
.total_grf
)
85 c
->prog_data
.total_grf
= c
->last_tmp
;
90 static void release_tmp( struct brw_vs_compile
*c
, struct brw_reg tmp
)
92 if (tmp
.nr
== c
->last_tmp
-1)
96 static void release_tmps( struct brw_vs_compile
*c
)
98 c
->last_tmp
= c
->first_tmp
;
102 get_first_reladdr_output(struct gl_vertex_program
*vp
)
105 int first_reladdr_output
= VERT_RESULT_MAX
;
107 for (i
= 0; i
< vp
->Base
.NumInstructions
; i
++) {
108 struct prog_instruction
*inst
= vp
->Base
.Instructions
+ i
;
110 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
&&
111 inst
->DstReg
.RelAddr
&&
112 inst
->DstReg
.Index
< first_reladdr_output
)
113 first_reladdr_output
= inst
->DstReg
.Index
;
116 return first_reladdr_output
;
119 /* Clears the record of which vp_const_buffer elements have been
120 * loaded into our constant buffer registers, for the starts of new
121 * blocks after control flow.
124 clear_current_const(struct brw_vs_compile
*c
)
128 if (c
->vp
->use_const_buffer
) {
129 for (i
= 0; i
< 3; i
++) {
130 c
->current_const
[i
].index
= -1;
136 * Preallocate GRF register before code emit.
137 * Do things as simply as possible. Allocate and populate all regs
140 static void brw_vs_alloc_regs( struct brw_vs_compile
*c
)
142 struct intel_context
*intel
= &c
->func
.brw
->intel
;
143 GLuint i
, reg
= 0, mrf
;
144 int attributes_in_vue
;
145 int first_reladdr_output
;
147 /* Determine whether to use a real constant buffer or use a block
148 * of GRF registers for constants. The later is faster but only
149 * works if everything fits in the GRF.
150 * XXX this heuristic/check may need some fine tuning...
152 if (c
->vp
->program
.Base
.Parameters
->NumParameters
+
153 c
->vp
->program
.Base
.NumTemporaries
+ 20 > BRW_MAX_GRF
)
154 c
->vp
->use_const_buffer
= GL_TRUE
;
156 c
->vp
->use_const_buffer
= GL_FALSE
;
158 /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
160 /* r0 -- reserved as usual
162 c
->r0
= brw_vec8_grf(reg
, 0);
165 /* User clip planes from curbe:
167 if (c
->key
.nr_userclip
) {
168 if (intel
->gen
>= 6) {
169 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
170 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ i
/ 2,
171 (i
% 2) * 4), 0, 4, 1);
173 reg
+= ALIGN(c
->key
.nr_userclip
, 2) / 2;
175 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
176 c
->userplane
[i
] = stride(brw_vec4_grf(reg
+ (6 + i
) / 2,
177 (i
% 2) * 4), 0, 4, 1);
179 reg
+= (ALIGN(6 + c
->key
.nr_userclip
, 4) / 4) * 2;
184 /* Vertex program parameters from curbe:
186 if (c
->vp
->use_const_buffer
) {
187 int max_constant
= BRW_MAX_GRF
- 20 - c
->vp
->program
.Base
.NumTemporaries
;
190 /* We've got more constants than we can load with the push
191 * mechanism. This is often correlated with reladdr loads where
192 * we should probably be using a pull mechanism anyway to avoid
193 * excessive reading. However, the pull mechanism is slow in
194 * general. So, we try to allocate as many non-reladdr-loaded
195 * constants through the push buffer as we can before giving up.
197 memset(c
->constant_map
, -1, c
->vp
->program
.Base
.Parameters
->NumParameters
);
199 i
< c
->vp
->program
.Base
.NumInstructions
&& constant
< max_constant
;
201 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[i
];
204 for (arg
= 0; arg
< 3 && constant
< max_constant
; arg
++) {
205 if ((inst
->SrcReg
[arg
].File
!= PROGRAM_STATE_VAR
&&
206 inst
->SrcReg
[arg
].File
!= PROGRAM_CONSTANT
&&
207 inst
->SrcReg
[arg
].File
!= PROGRAM_UNIFORM
&&
208 inst
->SrcReg
[arg
].File
!= PROGRAM_ENV_PARAM
&&
209 inst
->SrcReg
[arg
].File
!= PROGRAM_LOCAL_PARAM
) ||
210 inst
->SrcReg
[arg
].RelAddr
)
213 if (c
->constant_map
[inst
->SrcReg
[arg
].Index
] == -1) {
214 c
->constant_map
[inst
->SrcReg
[arg
].Index
] = constant
++;
219 for (i
= 0; i
< constant
; i
++) {
220 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride( brw_vec4_grf(reg
+i
/2,
224 reg
+= (constant
+ 1) / 2;
225 c
->prog_data
.curb_read_length
= reg
- 1;
226 /* XXX 0 causes a bug elsewhere... */
227 c
->prog_data
.nr_params
= MAX2(constant
* 4, 4);
230 /* use a section of the GRF for constants */
231 GLuint nr_params
= c
->vp
->program
.Base
.Parameters
->NumParameters
;
232 for (i
= 0; i
< nr_params
; i
++) {
233 c
->regs
[PROGRAM_STATE_VAR
][i
] = stride( brw_vec4_grf(reg
+i
/2, (i
%2) * 4), 0, 4, 1);
235 reg
+= (nr_params
+ 1) / 2;
236 c
->prog_data
.curb_read_length
= reg
- 1;
238 c
->prog_data
.nr_params
= nr_params
* 4;
241 /* Allocate input regs:
244 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
245 if (c
->prog_data
.inputs_read
& (1 << i
)) {
247 c
->regs
[PROGRAM_INPUT
][i
] = brw_vec8_grf(reg
, 0);
251 /* If there are no inputs, we'll still be reading one attribute's worth
252 * because it's required -- see urb_read_length setting.
254 if (c
->nr_inputs
== 0)
257 /* Allocate outputs. The non-position outputs go straight into message regs.
260 c
->first_output
= reg
;
261 c
->first_overflow_output
= 0;
263 if (intel
->gen
>= 6) {
265 if (c
->key
.nr_userclip
)
267 } else if (intel
->gen
== 5)
272 first_reladdr_output
= get_first_reladdr_output(&c
->vp
->program
);
273 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
274 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
276 assert(i
< Elements(c
->regs
[PROGRAM_OUTPUT
]));
277 if (i
== VERT_RESULT_HPOS
) {
278 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
281 else if (i
== VERT_RESULT_PSIZ
) {
282 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
286 /* Two restrictions on our compute-to-MRF here. The
287 * message length for all SEND messages is restricted to
288 * [1,15], so we can't use mrf 15, as that means a length
291 * Additionally, URB writes are aligned to URB rows, so we
292 * need to put an even number of registers of URB data in
293 * each URB write so that the later write is aligned. A
294 * message length of 15 means 1 message header reg plus 14
297 * For attributes beyond the compute-to-MRF, we compute to
298 * GRFs and they will be written in the second URB_WRITE.
300 if (first_reladdr_output
> i
&& mrf
< 15) {
301 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_message_reg(mrf
);
305 if (mrf
>= 15 && !c
->first_overflow_output
)
306 c
->first_overflow_output
= i
;
307 c
->regs
[PROGRAM_OUTPUT
][i
] = brw_vec8_grf(reg
, 0);
315 /* Allocate program temporaries:
317 for (i
= 0; i
< c
->vp
->program
.Base
.NumTemporaries
; i
++) {
318 c
->regs
[PROGRAM_TEMPORARY
][i
] = brw_vec8_grf(reg
, 0);
322 /* Address reg(s). Don't try to use the internal address reg until
325 for (i
= 0; i
< c
->vp
->program
.Base
.NumAddressRegs
; i
++) {
326 c
->regs
[PROGRAM_ADDRESS
][i
] = brw_reg(BRW_GENERAL_REGISTER_FILE
,
330 BRW_VERTICAL_STRIDE_8
,
332 BRW_HORIZONTAL_STRIDE_1
,
338 if (c
->vp
->use_const_buffer
) {
339 for (i
= 0; i
< 3; i
++) {
340 c
->current_const
[i
].reg
= brw_vec8_grf(reg
, 0);
343 clear_current_const(c
);
346 for (i
= 0; i
< 128; i
++) {
347 if (c
->output_regs
[i
].used_in_src
) {
348 c
->output_regs
[i
].reg
= brw_vec8_grf(reg
, 0);
353 if (c
->needs_stack
) {
354 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg
, 0);
358 /* Some opcodes need an internal temporary:
361 c
->last_tmp
= reg
; /* for allocation purposes */
363 /* Each input reg holds data from two vertices. The
364 * urb_read_length is the number of registers read from *each*
365 * vertex urb, so is half the amount:
367 c
->prog_data
.urb_read_length
= (c
->nr_inputs
+ 1) / 2;
368 /* Setting this field to 0 leads to undefined behavior according to the
369 * the VS_STATE docs. Our VUEs will always have at least one attribute
370 * sitting in them, even if it's padding.
372 if (c
->prog_data
.urb_read_length
== 0)
373 c
->prog_data
.urb_read_length
= 1;
375 /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
376 * them to fit the biggest thing they need to.
378 attributes_in_vue
= MAX2(c
->nr_outputs
, c
->nr_inputs
);
380 /* See emit_vertex_write() for where the VUE's overhead on top of the
381 * attributes comes from.
383 if (intel
->gen
>= 6) {
385 if (c
->key
.nr_userclip
)
388 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ header_regs
+ 7) / 8;
389 } else if (intel
->gen
== 5)
390 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 6 + 3) / 4;
392 c
->prog_data
.urb_entry_size
= (attributes_in_vue
+ 2 + 3) / 4;
394 c
->prog_data
.total_grf
= reg
;
396 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
397 printf("%s NumAddrRegs %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumAddressRegs
);
398 printf("%s NumTemps %d\n", __FUNCTION__
, c
->vp
->program
.Base
.NumTemporaries
);
399 printf("%s reg = %d\n", __FUNCTION__
, reg
);
405 * If an instruction uses a temp reg both as a src and the dest, we
406 * sometimes need to allocate an intermediate temporary.
408 static void unalias1( struct brw_vs_compile
*c
,
411 void (*func
)( struct brw_vs_compile
*,
415 if (dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) {
416 struct brw_compile
*p
= &c
->func
;
417 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
419 brw_MOV(p
, dst
, tmp
);
429 * Checkes if 2-operand instruction needs an intermediate temporary.
431 static void unalias2( struct brw_vs_compile
*c
,
435 void (*func
)( struct brw_vs_compile
*,
440 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
441 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
)) {
442 struct brw_compile
*p
= &c
->func
;
443 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
444 func(c
, tmp
, arg0
, arg1
);
445 brw_MOV(p
, dst
, tmp
);
449 func(c
, dst
, arg0
, arg1
);
455 * Checkes if 3-operand instruction needs an intermediate temporary.
457 static void unalias3( struct brw_vs_compile
*c
,
462 void (*func
)( struct brw_vs_compile
*,
468 if ((dst
.file
== arg0
.file
&& dst
.nr
== arg0
.nr
) ||
469 (dst
.file
== arg1
.file
&& dst
.nr
== arg1
.nr
) ||
470 (dst
.file
== arg2
.file
&& dst
.nr
== arg2
.nr
)) {
471 struct brw_compile
*p
= &c
->func
;
472 struct brw_reg tmp
= brw_writemask(get_tmp(c
), dst
.dw1
.bits
.writemask
);
473 func(c
, tmp
, arg0
, arg1
, arg2
);
474 brw_MOV(p
, dst
, tmp
);
478 func(c
, dst
, arg0
, arg1
, arg2
);
482 static void emit_sop( struct brw_vs_compile
*c
,
488 struct brw_compile
*p
= &c
->func
;
490 brw_MOV(p
, dst
, brw_imm_f(0.0f
));
491 brw_CMP(p
, brw_null_reg(), cond
, arg0
, arg1
);
492 brw_MOV(p
, dst
, brw_imm_f(1.0f
));
493 brw_set_predicate_control_flag_value(p
, 0xff);
496 static void emit_seq( struct brw_vs_compile
*c
,
499 struct brw_reg arg1
)
501 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_EQ
);
504 static void emit_sne( struct brw_vs_compile
*c
,
507 struct brw_reg arg1
)
509 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_NEQ
);
511 static void emit_slt( struct brw_vs_compile
*c
,
514 struct brw_reg arg1
)
516 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_L
);
519 static void emit_sle( struct brw_vs_compile
*c
,
522 struct brw_reg arg1
)
524 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_LE
);
527 static void emit_sgt( struct brw_vs_compile
*c
,
530 struct brw_reg arg1
)
532 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_G
);
535 static void emit_sge( struct brw_vs_compile
*c
,
538 struct brw_reg arg1
)
540 emit_sop(c
, dst
, arg0
, arg1
, BRW_CONDITIONAL_GE
);
543 static void emit_cmp( struct brw_compile
*p
,
547 struct brw_reg arg2
)
549 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
550 brw_SEL(p
, dst
, arg1
, arg2
);
551 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
554 static void emit_sign(struct brw_vs_compile
*c
,
558 struct brw_compile
*p
= &c
->func
;
560 brw_MOV(p
, dst
, brw_imm_f(0));
562 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, brw_imm_f(0));
563 brw_MOV(p
, dst
, brw_imm_f(-1.0));
564 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
566 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, arg0
, brw_imm_f(0));
567 brw_MOV(p
, dst
, brw_imm_f(1.0));
568 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
571 static void emit_max( struct brw_compile
*p
,
574 struct brw_reg arg1
)
576 struct intel_context
*intel
= &p
->brw
->intel
;
578 if (intel
->gen
>= 6) {
579 brw_set_conditionalmod(p
, BRW_CONDITIONAL_GE
);
580 brw_SEL(p
, dst
, arg0
, arg1
);
581 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
582 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
584 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_GE
, arg0
, arg1
);
585 brw_SEL(p
, dst
, arg0
, arg1
);
586 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
590 static void emit_min( struct brw_compile
*p
,
593 struct brw_reg arg1
)
595 struct intel_context
*intel
= &p
->brw
->intel
;
597 if (intel
->gen
>= 6) {
598 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
599 brw_SEL(p
, dst
, arg0
, arg1
);
600 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
601 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
603 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, arg0
, arg1
);
604 brw_SEL(p
, dst
, arg0
, arg1
);
605 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
609 static void emit_math1_gen4(struct brw_vs_compile
*c
,
615 /* There are various odd behaviours with SEND on the simulator. In
616 * addition there are documented issues with the fact that the GEN4
617 * processor doesn't do dependency control properly on SEND
618 * results. So, on balance, this kludge to get around failures
619 * with writemasked math results looks like it might be necessary
620 * whether that turns out to be a simulator bug or not:
622 struct brw_compile
*p
= &c
->func
;
623 struct brw_reg tmp
= dst
;
624 GLboolean need_tmp
= GL_FALSE
;
626 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
627 dst
.dw1
.bits
.writemask
!= 0xf)
636 BRW_MATH_SATURATE_NONE
,
639 BRW_MATH_DATA_SCALAR
,
643 brw_MOV(p
, dst
, tmp
);
649 emit_math1_gen6(struct brw_vs_compile
*c
,
655 struct brw_compile
*p
= &c
->func
;
656 struct brw_reg tmp_src
, tmp_dst
;
658 /* Something is strange on gen6 math in 16-wide mode, though the
659 * docs say it's supposed to work. Punt to using align1 mode,
660 * which doesn't do writemasking and swizzles.
662 tmp_src
= get_tmp(c
);
663 tmp_dst
= get_tmp(c
);
665 brw_MOV(p
, tmp_src
, arg0
);
667 brw_set_access_mode(p
, BRW_ALIGN_1
);
671 BRW_MATH_SATURATE_NONE
,
674 BRW_MATH_DATA_SCALAR
,
676 brw_set_access_mode(p
, BRW_ALIGN_16
);
678 brw_MOV(p
, dst
, tmp_dst
);
680 release_tmp(c
, tmp_src
);
681 release_tmp(c
, tmp_dst
);
685 emit_math1(struct brw_vs_compile
*c
,
691 struct brw_compile
*p
= &c
->func
;
692 struct intel_context
*intel
= &p
->brw
->intel
;
695 emit_math1_gen6(c
, function
, dst
, arg0
, precision
);
697 emit_math1_gen4(c
, function
, dst
, arg0
, precision
);
700 static void emit_math2_gen4( struct brw_vs_compile
*c
,
707 struct brw_compile
*p
= &c
->func
;
708 struct brw_reg tmp
= dst
;
709 GLboolean need_tmp
= GL_FALSE
;
711 if (dst
.file
!= BRW_GENERAL_REGISTER_FILE
||
712 dst
.dw1
.bits
.writemask
!= 0xf)
718 brw_MOV(p
, brw_message_reg(3), arg1
);
723 BRW_MATH_SATURATE_NONE
,
726 BRW_MATH_DATA_SCALAR
,
730 brw_MOV(p
, dst
, tmp
);
735 static void emit_math2_gen6( struct brw_vs_compile
*c
,
742 struct brw_compile
*p
= &c
->func
;
743 struct brw_reg tmp_src0
, tmp_src1
, tmp_dst
;
745 tmp_src0
= get_tmp(c
);
746 tmp_src1
= get_tmp(c
);
747 tmp_dst
= get_tmp(c
);
749 brw_MOV(p
, tmp_src0
, arg0
);
750 brw_MOV(p
, tmp_src1
, arg1
);
752 brw_set_access_mode(p
, BRW_ALIGN_1
);
758 brw_set_access_mode(p
, BRW_ALIGN_16
);
760 brw_MOV(p
, dst
, tmp_dst
);
762 release_tmp(c
, tmp_src0
);
763 release_tmp(c
, tmp_src1
);
764 release_tmp(c
, tmp_dst
);
767 static void emit_math2( struct brw_vs_compile
*c
,
774 struct brw_compile
*p
= &c
->func
;
775 struct intel_context
*intel
= &p
->brw
->intel
;
778 emit_math2_gen6(c
, function
, dst
, arg0
, arg1
, precision
);
780 emit_math2_gen4(c
, function
, dst
, arg0
, arg1
, precision
);
783 static void emit_exp_noalias( struct brw_vs_compile
*c
,
785 struct brw_reg arg0
)
787 struct brw_compile
*p
= &c
->func
;
790 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
) {
791 struct brw_reg tmp
= get_tmp(c
);
792 struct brw_reg tmp_d
= retype(tmp
, BRW_REGISTER_TYPE_D
);
794 /* tmp_d = floor(arg0.x) */
795 brw_RNDD(p
, tmp_d
, brw_swizzle1(arg0
, 0));
797 /* result[0] = 2.0 ^ tmp */
799 /* Adjust exponent for floating point:
802 brw_ADD(p
, brw_writemask(tmp_d
, WRITEMASK_X
), tmp_d
, brw_imm_d(127));
804 /* Install exponent and sign.
805 * Excess drops off the edge:
807 brw_SHL(p
, brw_writemask(retype(dst
, BRW_REGISTER_TYPE_D
), WRITEMASK_X
),
808 tmp_d
, brw_imm_d(23));
813 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
) {
814 /* result[1] = arg0.x - floor(arg0.x) */
815 brw_FRC(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
, 0));
818 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
819 /* As with the LOG instruction, we might be better off just
820 * doing a taylor expansion here, seeing as we have to do all
823 * If mathbox partial precision is too low, consider also:
824 * result[3] = result[0] * EXP(result[1])
827 BRW_MATH_FUNCTION_EXP
,
828 brw_writemask(dst
, WRITEMASK_Z
),
829 brw_swizzle1(arg0
, 0),
830 BRW_MATH_PRECISION_FULL
);
833 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
834 /* result[3] = 1.0; */
835 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), brw_imm_f(1));
840 static void emit_log_noalias( struct brw_vs_compile
*c
,
842 struct brw_reg arg0
)
844 struct brw_compile
*p
= &c
->func
;
845 struct brw_reg tmp
= dst
;
846 struct brw_reg tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
847 struct brw_reg arg0_ud
= retype(arg0
, BRW_REGISTER_TYPE_UD
);
848 GLboolean need_tmp
= (dst
.dw1
.bits
.writemask
!= 0xf ||
849 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
853 tmp_ud
= retype(tmp
, BRW_REGISTER_TYPE_UD
);
856 /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
859 * These almost look likey they could be joined up, but not really
862 * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
863 * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
865 if (dst
.dw1
.bits
.writemask
& WRITEMASK_XZ
) {
867 brw_writemask(tmp_ud
, WRITEMASK_X
),
868 brw_swizzle1(arg0_ud
, 0),
869 brw_imm_ud((1U<<31)-1));
872 brw_writemask(tmp_ud
, WRITEMASK_X
),
877 brw_writemask(tmp
, WRITEMASK_X
),
878 retype(tmp_ud
, BRW_REGISTER_TYPE_D
), /* does it matter? */
882 if (dst
.dw1
.bits
.writemask
& WRITEMASK_YZ
) {
884 brw_writemask(tmp_ud
, WRITEMASK_Y
),
885 brw_swizzle1(arg0_ud
, 0),
886 brw_imm_ud((1<<23)-1));
889 brw_writemask(tmp_ud
, WRITEMASK_Y
),
891 brw_imm_ud(127<<23));
894 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
) {
895 /* result[2] = result[0] + LOG2(result[1]); */
897 /* Why bother? The above is just a hint how to do this with a
898 * taylor series. Maybe we *should* use a taylor series as by
899 * the time all the above has been done it's almost certainly
900 * quicker than calling the mathbox, even with low precision.
903 * - result[0] + mathbox.LOG2(result[1])
904 * - mathbox.LOG2(arg0.x)
905 * - result[0] + inline_taylor_approx(result[1])
908 BRW_MATH_FUNCTION_LOG
,
909 brw_writemask(tmp
, WRITEMASK_Z
),
910 brw_swizzle1(tmp
, 1),
911 BRW_MATH_PRECISION_FULL
);
914 brw_writemask(tmp
, WRITEMASK_Z
),
915 brw_swizzle1(tmp
, 2),
916 brw_swizzle1(tmp
, 0));
919 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
) {
920 /* result[3] = 1.0; */
921 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_W
), brw_imm_f(1));
925 brw_MOV(p
, dst
, tmp
);
931 /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
933 static void emit_dst_noalias( struct brw_vs_compile
*c
,
938 struct brw_compile
*p
= &c
->func
;
940 /* There must be a better way to do this:
942 if (dst
.dw1
.bits
.writemask
& WRITEMASK_X
)
943 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_X
), brw_imm_f(1.0));
944 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Y
)
945 brw_MUL(p
, brw_writemask(dst
, WRITEMASK_Y
), arg0
, arg1
);
946 if (dst
.dw1
.bits
.writemask
& WRITEMASK_Z
)
947 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Z
), arg0
);
948 if (dst
.dw1
.bits
.writemask
& WRITEMASK_W
)
949 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_W
), arg1
);
953 static void emit_xpd( struct brw_compile
*p
,
958 brw_MUL(p
, brw_null_reg(), brw_swizzle(t
, 1,2,0,3), brw_swizzle(u
,2,0,1,3));
959 brw_MAC(p
, dst
, negate(brw_swizzle(t
, 2,0,1,3)), brw_swizzle(u
,1,2,0,3));
963 static void emit_lit_noalias( struct brw_vs_compile
*c
,
965 struct brw_reg arg0
)
967 struct brw_compile
*p
= &c
->func
;
968 struct brw_instruction
*if_insn
;
969 struct brw_reg tmp
= dst
;
970 GLboolean need_tmp
= (dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
975 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_YZ
), brw_imm_f(0));
976 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_XW
), brw_imm_f(1));
978 /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
979 * to get all channels active inside the IF. In the clipping code
980 * we run with NoMask, so it's not an option and we can use
981 * BRW_EXECUTE_1 for all comparisions.
983 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,0), brw_imm_f(0));
984 if_insn
= brw_IF(p
, BRW_EXECUTE_8
);
986 brw_MOV(p
, brw_writemask(dst
, WRITEMASK_Y
), brw_swizzle1(arg0
,0));
988 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, brw_swizzle1(arg0
,1), brw_imm_f(0));
989 brw_MOV(p
, brw_writemask(tmp
, WRITEMASK_Z
), brw_swizzle1(arg0
,1));
990 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
993 BRW_MATH_FUNCTION_POW
,
994 brw_writemask(dst
, WRITEMASK_Z
),
995 brw_swizzle1(tmp
, 2),
996 brw_swizzle1(arg0
, 3),
997 BRW_MATH_PRECISION_PARTIAL
);
1000 brw_ENDIF(p
, if_insn
);
1002 release_tmp(c
, tmp
);
1005 static void emit_lrp_noalias(struct brw_vs_compile
*c
,
1007 struct brw_reg arg0
,
1008 struct brw_reg arg1
,
1009 struct brw_reg arg2
)
1011 struct brw_compile
*p
= &c
->func
;
1013 brw_ADD(p
, dst
, negate(arg0
), brw_imm_f(1.0));
1014 brw_MUL(p
, brw_null_reg(), dst
, arg2
);
1015 brw_MAC(p
, dst
, arg0
, arg1
);
1018 /** 3 or 4-component vector normalization */
1019 static void emit_nrm( struct brw_vs_compile
*c
,
1021 struct brw_reg arg0
,
1024 struct brw_compile
*p
= &c
->func
;
1025 struct brw_reg tmp
= get_tmp(c
);
1027 /* tmp = dot(arg0, arg0) */
1029 brw_DP3(p
, tmp
, arg0
, arg0
);
1031 brw_DP4(p
, tmp
, arg0
, arg0
);
1033 /* tmp = 1 / sqrt(tmp) */
1034 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, tmp
, tmp
, BRW_MATH_PRECISION_FULL
);
1036 /* dst = arg0 * tmp */
1037 brw_MUL(p
, dst
, arg0
, tmp
);
1039 release_tmp(c
, tmp
);
1043 static struct brw_reg
1044 get_constant(struct brw_vs_compile
*c
,
1045 const struct prog_instruction
*inst
,
1048 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1049 struct brw_compile
*p
= &c
->func
;
1050 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1052 assert(argIndex
< 3);
1054 assert(c
->func
.brw
->intel
.gen
< 6); /* FINISHME */
1056 if (c
->current_const
[argIndex
].index
!= src
->Index
) {
1057 /* Keep track of the last constant loaded in this slot, for reuse. */
1058 c
->current_const
[argIndex
].index
= src
->Index
;
1061 printf(" fetch const[%d] for arg %d into reg %d\n",
1062 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1064 /* need to fetch the constant now */
1066 const_reg
, /* writeback dest */
1067 16 * src
->Index
, /* byte offset */
1068 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
1072 /* replicate lower four floats into upper half (to get XYZWXYZW) */
1073 const_reg
= stride(const_reg
, 0, 4, 0);
1074 const_reg
.subnr
= 0;
1079 static struct brw_reg
1080 get_reladdr_constant(struct brw_vs_compile
*c
,
1081 const struct prog_instruction
*inst
,
1084 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1085 struct brw_compile
*p
= &c
->func
;
1086 struct brw_reg const_reg
= c
->current_const
[argIndex
].reg
;
1087 struct brw_reg addrReg
= c
->regs
[PROGRAM_ADDRESS
][0];
1088 struct brw_reg byte_addr_reg
= retype(get_tmp(c
), BRW_REGISTER_TYPE_D
);
1090 assert(argIndex
< 3);
1092 assert(c
->func
.brw
->intel
.gen
< 6); /* FINISHME */
1094 /* Can't reuse a reladdr constant load. */
1095 c
->current_const
[argIndex
].index
= -1;
1098 printf(" fetch const[a0.x+%d] for arg %d into reg %d\n",
1099 src
->Index
, argIndex
, c
->current_const
[argIndex
].reg
.nr
);
1102 brw_MUL(p
, byte_addr_reg
, addrReg
, brw_imm_ud(16));
1104 /* fetch the first vec4 */
1105 brw_dp_READ_4_vs_relative(p
,
1106 const_reg
, /* writeback dest */
1107 byte_addr_reg
, /* address register */
1108 16 * src
->Index
, /* byte offset */
1109 SURF_INDEX_VERT_CONST_BUFFER
/* binding table index */
1117 /* TODO: relative addressing!
1119 static struct brw_reg
get_reg( struct brw_vs_compile
*c
,
1120 gl_register_file file
,
1124 case PROGRAM_TEMPORARY
:
1126 case PROGRAM_OUTPUT
:
1127 assert(c
->regs
[file
][index
].nr
!= 0);
1128 return c
->regs
[file
][index
];
1129 case PROGRAM_STATE_VAR
:
1130 case PROGRAM_CONSTANT
:
1131 case PROGRAM_UNIFORM
:
1132 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
1133 return c
->regs
[PROGRAM_STATE_VAR
][index
];
1134 case PROGRAM_ADDRESS
:
1136 return c
->regs
[file
][index
];
1138 case PROGRAM_UNDEFINED
: /* undef values */
1139 return brw_null_reg();
1141 case PROGRAM_LOCAL_PARAM
:
1142 case PROGRAM_ENV_PARAM
:
1143 case PROGRAM_WRITE_ONLY
:
1146 return brw_null_reg();
1152 * Indirect addressing: get reg[[arg] + offset].
1154 static struct brw_reg
deref( struct brw_vs_compile
*c
,
1159 struct brw_compile
*p
= &c
->func
;
1160 struct brw_reg tmp
= get_tmp(c
);
1161 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1162 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1163 GLuint byte_offset
= arg
.nr
* 32 + arg
.subnr
+ offset
* reg_size
;
1164 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1165 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1167 /* Set the vertical stride on the register access so that the first
1168 * 4 components come from a0.0 and the second 4 from a0.1.
1170 indirect
.vstride
= BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL
;
1173 brw_push_insn_state(p
);
1174 brw_set_access_mode(p
, BRW_ALIGN_1
);
1176 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1177 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1179 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1180 brw_ADD(p
, brw_address_reg(1), acc
, brw_imm_uw(byte_offset
));
1182 brw_MOV(p
, tmp
, indirect
);
1184 brw_pop_insn_state(p
);
1187 /* NOTE: tmp not released */
1192 move_to_reladdr_dst(struct brw_vs_compile
*c
,
1193 const struct prog_instruction
*inst
,
1196 struct brw_compile
*p
= &c
->func
;
1198 struct brw_reg addr_reg
= c
->regs
[PROGRAM_ADDRESS
][0];
1199 struct brw_reg vp_address
= retype(vec1(addr_reg
), BRW_REGISTER_TYPE_D
);
1200 struct brw_reg base
= c
->regs
[inst
->DstReg
.File
][inst
->DstReg
.Index
];
1201 GLuint byte_offset
= base
.nr
* 32 + base
.subnr
;
1202 struct brw_reg indirect
= brw_vec4_indirect(0,0);
1203 struct brw_reg acc
= retype(vec1(get_tmp(c
)), BRW_REGISTER_TYPE_UW
);
1205 /* Because destination register indirect addressing can only use
1206 * one index, we'll write each vertex's vec4 value separately.
1208 val
.width
= BRW_WIDTH_4
;
1209 val
.vstride
= BRW_VERTICAL_STRIDE_4
;
1211 brw_push_insn_state(p
);
1212 brw_set_access_mode(p
, BRW_ALIGN_1
);
1214 brw_MUL(p
, acc
, vp_address
, brw_imm_uw(reg_size
));
1215 brw_ADD(p
, brw_address_reg(0), acc
, brw_imm_uw(byte_offset
));
1216 brw_MOV(p
, indirect
, val
);
1218 brw_MUL(p
, acc
, suboffset(vp_address
, 4), brw_imm_uw(reg_size
));
1219 brw_ADD(p
, brw_address_reg(0), acc
,
1220 brw_imm_uw(byte_offset
+ reg_size
/ 2));
1221 brw_MOV(p
, indirect
, suboffset(val
, 4));
1223 brw_pop_insn_state(p
);
1227 * Get brw reg corresponding to the instruction's [argIndex] src reg.
1228 * TODO: relative addressing!
1230 static struct brw_reg
1231 get_src_reg( struct brw_vs_compile
*c
,
1232 const struct prog_instruction
*inst
,
1235 const GLuint file
= inst
->SrcReg
[argIndex
].File
;
1236 const GLint index
= inst
->SrcReg
[argIndex
].Index
;
1237 const GLboolean relAddr
= inst
->SrcReg
[argIndex
].RelAddr
;
1239 if (brw_vs_arg_can_be_immediate(inst
->Opcode
, argIndex
)) {
1240 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1242 if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ZERO
,
1246 return brw_imm_f(0.0f
);
1247 } else if (src
->Swizzle
== MAKE_SWIZZLE4(SWIZZLE_ONE
,
1252 return brw_imm_f(-1.0F
);
1254 return brw_imm_f(1.0F
);
1255 } else if (src
->File
== PROGRAM_CONSTANT
) {
1256 const struct gl_program_parameter_list
*params
;
1260 switch (src
->Swizzle
) {
1275 if (component
>= 0) {
1276 params
= c
->vp
->program
.Base
.Parameters
;
1277 f
= params
->ParameterValues
[src
->Index
][component
];
1283 return brw_imm_f(f
);
1289 case PROGRAM_TEMPORARY
:
1291 case PROGRAM_OUTPUT
:
1293 return deref(c
, c
->regs
[file
][0], index
, 32);
1296 assert(c
->regs
[file
][index
].nr
!= 0);
1297 return c
->regs
[file
][index
];
1300 case PROGRAM_STATE_VAR
:
1301 case PROGRAM_CONSTANT
:
1302 case PROGRAM_UNIFORM
:
1303 case PROGRAM_ENV_PARAM
:
1304 case PROGRAM_LOCAL_PARAM
:
1305 if (c
->vp
->use_const_buffer
) {
1306 if (!relAddr
&& c
->constant_map
[index
] != -1) {
1307 assert(c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]].nr
!= 0);
1308 return c
->regs
[PROGRAM_STATE_VAR
][c
->constant_map
[index
]];
1310 return get_reladdr_constant(c
, inst
, argIndex
);
1312 return get_constant(c
, inst
, argIndex
);
1315 return deref(c
, c
->regs
[PROGRAM_STATE_VAR
][0], index
, 16);
1318 assert(c
->regs
[PROGRAM_STATE_VAR
][index
].nr
!= 0);
1319 return c
->regs
[PROGRAM_STATE_VAR
][index
];
1321 case PROGRAM_ADDRESS
:
1323 return c
->regs
[file
][index
];
1325 case PROGRAM_UNDEFINED
:
1326 /* this is a normal case since we loop over all three src args */
1327 return brw_null_reg();
1329 case PROGRAM_WRITE_ONLY
:
1332 return brw_null_reg();
1337 * Return the brw reg for the given instruction's src argument.
1338 * Will return mangled results for SWZ op. The emit_swz() function
1339 * ignores this result and recalculates taking extended swizzles into
1342 static struct brw_reg
get_arg( struct brw_vs_compile
*c
,
1343 const struct prog_instruction
*inst
,
1346 const struct prog_src_register
*src
= &inst
->SrcReg
[argIndex
];
1349 if (src
->File
== PROGRAM_UNDEFINED
)
1350 return brw_null_reg();
1352 reg
= get_src_reg(c
, inst
, argIndex
);
1354 /* Convert 3-bit swizzle to 2-bit.
1356 if (reg
.file
!= BRW_IMMEDIATE_VALUE
) {
1357 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(GET_SWZ(src
->Swizzle
, 0),
1358 GET_SWZ(src
->Swizzle
, 1),
1359 GET_SWZ(src
->Swizzle
, 2),
1360 GET_SWZ(src
->Swizzle
, 3));
1363 /* Note this is ok for non-swizzle instructions:
1365 reg
.negate
= src
->Negate
? 1 : 0;
1372 * Get brw register for the given program dest register.
1374 static struct brw_reg
get_dst( struct brw_vs_compile
*c
,
1375 struct prog_dst_register dst
)
1380 case PROGRAM_TEMPORARY
:
1381 case PROGRAM_OUTPUT
:
1382 /* register-indirect addressing is only 1x1, not VxH, for
1383 * destination regs. So, for RelAddr we'll return a temporary
1384 * for the dest and do a move of the result to the RelAddr
1385 * register after the instruction emit.
1390 assert(c
->regs
[dst
.File
][dst
.Index
].nr
!= 0);
1391 reg
= c
->regs
[dst
.File
][dst
.Index
];
1394 case PROGRAM_ADDRESS
:
1395 assert(dst
.Index
== 0);
1396 reg
= c
->regs
[dst
.File
][dst
.Index
];
1398 case PROGRAM_UNDEFINED
:
1399 /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
1400 reg
= brw_null_reg();
1404 reg
= brw_null_reg();
1407 assert(reg
.type
!= BRW_IMMEDIATE_VALUE
);
1408 reg
.dw1
.bits
.writemask
= dst
.WriteMask
;
1414 static void emit_swz( struct brw_vs_compile
*c
,
1416 const struct prog_instruction
*inst
)
1418 const GLuint argIndex
= 0;
1419 const struct prog_src_register src
= inst
->SrcReg
[argIndex
];
1420 struct brw_compile
*p
= &c
->func
;
1421 GLuint zeros_mask
= 0;
1422 GLuint ones_mask
= 0;
1423 GLuint src_mask
= 0;
1425 GLboolean need_tmp
= (src
.Negate
&&
1426 dst
.file
!= BRW_GENERAL_REGISTER_FILE
);
1427 struct brw_reg tmp
= dst
;
1433 for (i
= 0; i
< 4; i
++) {
1434 if (dst
.dw1
.bits
.writemask
& (1<<i
)) {
1435 GLubyte s
= GET_SWZ(src
.Swizzle
, i
);
1454 /* Do src first, in case dst aliases src:
1457 struct brw_reg arg0
;
1459 arg0
= get_src_reg(c
, inst
, argIndex
);
1461 arg0
= brw_swizzle(arg0
,
1462 src_swz
[0], src_swz
[1],
1463 src_swz
[2], src_swz
[3]);
1465 brw_MOV(p
, brw_writemask(tmp
, src_mask
), arg0
);
1469 brw_MOV(p
, brw_writemask(tmp
, zeros_mask
), brw_imm_f(0));
1472 brw_MOV(p
, brw_writemask(tmp
, ones_mask
), brw_imm_f(1));
1475 brw_MOV(p
, brw_writemask(tmp
, src
.Negate
), negate(tmp
));
1478 brw_MOV(p
, dst
, tmp
);
1479 release_tmp(c
, tmp
);
1485 * Post-vertex-program processing. Send the results to the URB.
1487 static void emit_vertex_write( struct brw_vs_compile
*c
)
1489 struct brw_compile
*p
= &c
->func
;
1490 struct brw_context
*brw
= p
->brw
;
1491 struct intel_context
*intel
= &brw
->intel
;
1492 struct brw_reg pos
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_HPOS
];
1495 GLuint len_vertex_header
= 2;
1498 if (c
->key
.copy_edgeflag
) {
1500 get_reg(c
, PROGRAM_OUTPUT
, VERT_RESULT_EDGE
),
1501 get_reg(c
, PROGRAM_INPUT
, VERT_ATTRIB_EDGEFLAG
));
1504 if (intel
->gen
< 6) {
1505 /* Build ndc coords */
1507 /* ndc = 1.0 / pos.w */
1508 emit_math1(c
, BRW_MATH_FUNCTION_INV
, ndc
, brw_swizzle1(pos
, 3), BRW_MATH_PRECISION_FULL
);
1509 /* ndc.xyz = pos * ndc */
1510 brw_MUL(p
, brw_writemask(ndc
, WRITEMASK_XYZ
), pos
, ndc
);
1513 /* Update the header for point size, user clipping flags, and -ve rhw
1516 if (intel
->gen
>= 6) {
1517 struct brw_reg m1
= brw_message_reg(1);
1519 /* On gen6, m1 has each value in a separate dword, so we never
1520 * need to mess with a temporary for computing the m1 value.
1522 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1523 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1524 brw_MOV(p
, brw_writemask(m1
, WRITEMASK_W
),
1525 brw_swizzle1(c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
], 0));
1528 /* Set the user clip distances in dword 8-15. (m3-4)*/
1529 if (c
->key
.nr_userclip
) {
1530 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1533 m
= brw_message_reg(3);
1535 m
= brw_message_reg(4);
1537 brw_DP4(p
, brw_writemask(m
, (1 << (i
& 7))),pos
, c
->userplane
[i
]);
1540 } else if ((c
->prog_data
.outputs_written
&
1541 BITFIELD64_BIT(VERT_RESULT_PSIZ
)) ||
1542 c
->key
.nr_userclip
|| brw
->has_negative_rhw_bug
) {
1543 struct brw_reg header1
= retype(get_tmp(c
), BRW_REGISTER_TYPE_UD
);
1546 brw_MOV(p
, header1
, brw_imm_ud(0));
1548 brw_set_access_mode(p
, BRW_ALIGN_16
);
1550 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(VERT_RESULT_PSIZ
)) {
1551 struct brw_reg psiz
= c
->regs
[PROGRAM_OUTPUT
][VERT_RESULT_PSIZ
];
1552 brw_MUL(p
, brw_writemask(header1
, WRITEMASK_W
),
1553 brw_swizzle1(psiz
, 0), brw_imm_f(1<<11));
1554 brw_AND(p
, brw_writemask(header1
, WRITEMASK_W
),
1555 header1
, brw_imm_ud(0x7ff<<8));
1558 for (i
= 0; i
< c
->key
.nr_userclip
; i
++) {
1559 brw_set_conditionalmod(p
, BRW_CONDITIONAL_L
);
1560 brw_DP4(p
, brw_null_reg(), pos
, c
->userplane
[i
]);
1561 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<i
));
1562 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1565 /* i965 clipping workaround:
1566 * 1) Test for -ve rhw
1568 * set ndc = (0,0,0,0)
1571 * Later, clipping will detect ucp[6] and ensure the primitive is
1572 * clipped against all fixed planes.
1574 if (brw
->has_negative_rhw_bug
) {
1576 vec8(brw_null_reg()),
1578 brw_swizzle1(ndc
, 3),
1581 brw_OR(p
, brw_writemask(header1
, WRITEMASK_W
), header1
, brw_imm_ud(1<<6));
1582 brw_MOV(p
, ndc
, brw_imm_f(0));
1583 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1586 brw_set_access_mode(p
, BRW_ALIGN_1
); /* why? */
1587 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), header1
);
1588 brw_set_access_mode(p
, BRW_ALIGN_16
);
1590 release_tmp(c
, header1
);
1593 brw_MOV(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
1596 /* Emit the (interleaved) headers for the two vertices - an 8-reg
1597 * of zeros followed by two sets of NDC coordinates:
1599 brw_set_access_mode(p
, BRW_ALIGN_1
);
1600 brw_set_acc_write_control(p
, 0);
1602 /* The VUE layout is documented in Volume 2a. */
1603 if (intel
->gen
>= 6) {
1604 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1605 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1606 * dword 4-7 (m2) is the 4D space position
1607 * dword 8-15 (m3,m4) of the vertex header is the user clip distance if
1609 * m3 or 5 is the first vertex element data we fill, which is
1610 * the vertex position.
1612 brw_MOV(p
, brw_message_reg(2), pos
);
1613 len_vertex_header
= 1;
1614 if (c
->key
.nr_userclip
> 0)
1615 len_vertex_header
+= 2;
1616 } else if (intel
->gen
== 5) {
1617 /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1618 * dword 0-3 (m1) of the header is indices, point width, clip flags.
1619 * dword 4-7 (m2) is the ndc position (set above)
1620 * dword 8-11 (m3) of the vertex header is the 4D space position
1621 * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1622 * m6 is a pad so that the vertex element data is aligned
1623 * m7 is the first vertex data we fill, which is the vertex position.
1625 brw_MOV(p
, brw_message_reg(2), ndc
);
1626 brw_MOV(p
, brw_message_reg(3), pos
);
1627 brw_MOV(p
, brw_message_reg(7), pos
);
1628 len_vertex_header
= 6;
1630 /* There are 8 dwords in VUE header pre-Ironlake:
1631 * dword 0-3 (m1) is indices, point width, clip flags.
1632 * dword 4-7 (m2) is ndc position (set above)
1634 * dword 8-11 (m3) is the first vertex data, which we always have be the
1637 brw_MOV(p
, brw_message_reg(2), ndc
);
1638 brw_MOV(p
, brw_message_reg(3), pos
);
1639 len_vertex_header
= 2;
1642 /* Move variable-addressed, non-overflow outputs to their MRFs. */
1643 next_mrf
= 2 + len_vertex_header
;
1644 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1645 if (c
->first_overflow_output
> 0 && i
>= c
->first_overflow_output
)
1647 if (!(c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)))
1649 if (i
== VERT_RESULT_PSIZ
)
1652 if (i
>= VERT_RESULT_TEX0
&&
1653 c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_GENERAL_REGISTER_FILE
) {
1654 brw_MOV(p
, brw_message_reg(next_mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1656 } else if (c
->regs
[PROGRAM_OUTPUT
][i
].file
== BRW_MESSAGE_REGISTER_FILE
) {
1657 next_mrf
= c
->regs
[PROGRAM_OUTPUT
][i
].nr
+ 1;
1661 eot
= (c
->first_overflow_output
== 0);
1664 brw_null_reg(), /* dest */
1665 0, /* starting mrf reg nr */
1669 MIN2(c
->nr_outputs
+ 1 + len_vertex_header
, (BRW_MAX_MRF
-1)), /* msg len */
1670 0, /* response len */
1672 eot
, /* writes complete */
1673 0, /* urb destination offset */
1674 BRW_URB_SWIZZLE_INTERLEAVE
);
1676 if (c
->first_overflow_output
> 0) {
1677 /* Not all of the vertex outputs/results fit into the MRF.
1678 * Move the overflowed attributes from the GRF to the MRF and
1679 * issue another brw_urb_WRITE().
1682 for (i
= c
->first_overflow_output
; i
< VERT_RESULT_MAX
; i
++) {
1683 if (c
->prog_data
.outputs_written
& BITFIELD64_BIT(i
)) {
1684 /* move from GRF to MRF */
1685 brw_MOV(p
, brw_message_reg(mrf
), c
->regs
[PROGRAM_OUTPUT
][i
]);
1691 brw_null_reg(), /* dest */
1692 0, /* starting mrf reg nr */
1697 0, /* response len */
1699 1, /* writes complete */
1700 14 / 2, /* urb destination offset */
1701 BRW_URB_SWIZZLE_INTERLEAVE
);
1706 accumulator_contains(struct brw_vs_compile
*c
, struct brw_reg val
)
1708 struct brw_compile
*p
= &c
->func
;
1709 struct brw_instruction
*prev_insn
= &p
->store
[p
->nr_insn
- 1];
1711 if (p
->nr_insn
== 0)
1714 if (val
.address_mode
!= BRW_ADDRESS_DIRECT
)
1717 switch (prev_insn
->header
.opcode
) {
1718 case BRW_OPCODE_MOV
:
1719 case BRW_OPCODE_MAC
:
1720 case BRW_OPCODE_MUL
:
1721 if (prev_insn
->header
.access_mode
== BRW_ALIGN_16
&&
1722 prev_insn
->header
.execution_size
== val
.width
&&
1723 prev_insn
->bits1
.da1
.dest_reg_file
== val
.file
&&
1724 prev_insn
->bits1
.da1
.dest_reg_type
== val
.type
&&
1725 prev_insn
->bits1
.da1
.dest_address_mode
== val
.address_mode
&&
1726 prev_insn
->bits1
.da1
.dest_reg_nr
== val
.nr
&&
1727 prev_insn
->bits1
.da16
.dest_subreg_nr
== val
.subnr
/ 16 &&
1728 prev_insn
->bits1
.da16
.dest_writemask
== 0xf)
1738 get_predicate(const struct prog_instruction
*inst
)
1740 if (inst
->DstReg
.CondMask
== COND_TR
)
1741 return BRW_PREDICATE_NONE
;
1743 /* All of GLSL only produces predicates for COND_NE and one channel per
1744 * vector. Fail badly if someone starts doing something else, as it might
1745 * mean infinite looping or something.
1747 * We'd like to support all the condition codes, but our hardware doesn't
1748 * quite match the Mesa IR, which is modeled after the NV extensions. For
1749 * those, the instruction may update the condition codes or not, then any
1750 * later instruction may use one of those condition codes. For gen4, the
1751 * instruction may update the flags register based on one of the condition
1752 * codes output by the instruction, and then further instructions may
1753 * predicate on that. We can probably support this, but it won't
1754 * necessarily be easy.
1756 assert(inst
->DstReg
.CondMask
== COND_NE
);
1758 switch (inst
->DstReg
.CondSwizzle
) {
1760 return BRW_PREDICATE_ALIGN16_REPLICATE_X
;
1762 return BRW_PREDICATE_ALIGN16_REPLICATE_Y
;
1764 return BRW_PREDICATE_ALIGN16_REPLICATE_Z
;
1766 return BRW_PREDICATE_ALIGN16_REPLICATE_W
;
1768 _mesa_problem(NULL
, "Unexpected predicate: 0x%08x\n",
1769 inst
->DstReg
.CondMask
);
1770 return BRW_PREDICATE_NORMAL
;
1774 /* Emit the vertex program instructions here.
1776 void brw_vs_emit(struct brw_vs_compile
*c
)
1778 #define MAX_IF_DEPTH 32
1779 #define MAX_LOOP_DEPTH 32
1780 struct brw_compile
*p
= &c
->func
;
1781 struct brw_context
*brw
= p
->brw
;
1782 struct intel_context
*intel
= &brw
->intel
;
1783 const GLuint nr_insns
= c
->vp
->program
.Base
.NumInstructions
;
1784 GLuint insn
, if_depth
= 0, loop_depth
= 0;
1785 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
] = { 0 };
1786 int if_depth_in_loop
[MAX_LOOP_DEPTH
];
1787 const struct brw_indirect stack_index
= brw_indirect(0, 0);
1791 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
1792 printf("vs-mesa:\n");
1793 _mesa_fprint_program_opt(stdout
, &c
->vp
->program
.Base
, PROG_PRINT_DEBUG
,
1798 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1799 brw_set_access_mode(p
, BRW_ALIGN_16
);
1800 if_depth_in_loop
[loop_depth
] = 0;
1802 brw_set_acc_write_control(p
, 1);
1804 for (insn
= 0; insn
< nr_insns
; insn
++) {
1806 struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1808 /* Message registers can't be read, so copy the output into GRF
1809 * register if they are used in source registers
1811 for (i
= 0; i
< 3; i
++) {
1812 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1813 GLuint index
= src
->Index
;
1814 GLuint file
= src
->File
;
1815 if (file
== PROGRAM_OUTPUT
&& index
!= VERT_RESULT_HPOS
)
1816 c
->output_regs
[index
].used_in_src
= GL_TRUE
;
1819 switch (inst
->Opcode
) {
1822 c
->needs_stack
= GL_TRUE
;
1829 /* Static register allocation
1831 brw_vs_alloc_regs(c
);
1834 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1836 for (insn
= 0; insn
< nr_insns
; insn
++) {
1838 const struct prog_instruction
*inst
= &c
->vp
->program
.Base
.Instructions
[insn
];
1839 struct brw_reg args
[3], dst
;
1843 printf("%d: ", insn
);
1844 _mesa_print_instruction(inst
);
1847 /* Get argument regs. SWZ is special and does this itself.
1849 if (inst
->Opcode
!= OPCODE_SWZ
)
1850 for (i
= 0; i
< 3; i
++) {
1851 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
1854 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1855 args
[i
] = c
->output_regs
[index
].reg
;
1857 args
[i
] = get_arg(c
, inst
, i
);
1860 /* Get dest regs. Note that it is possible for a reg to be both
1861 * dst and arg, given the static allocation of registers. So
1862 * care needs to be taken emitting multi-operation instructions.
1864 index
= inst
->DstReg
.Index
;
1865 file
= inst
->DstReg
.File
;
1866 if (file
== PROGRAM_OUTPUT
&& c
->output_regs
[index
].used_in_src
)
1867 dst
= c
->output_regs
[index
].reg
;
1869 dst
= get_dst(c
, inst
->DstReg
);
1871 if (inst
->SaturateMode
!= SATURATE_OFF
) {
1872 _mesa_problem(NULL
, "Unsupported saturate %d in vertex shader",
1873 inst
->SaturateMode
);
1876 switch (inst
->Opcode
) {
1878 brw_MOV(p
, dst
, brw_abs(args
[0]));
1881 brw_ADD(p
, dst
, args
[0], args
[1]);
1884 emit_math1(c
, BRW_MATH_FUNCTION_COS
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1887 brw_DP2(p
, dst
, args
[0], args
[1]);
1890 brw_DP3(p
, dst
, args
[0], args
[1]);
1893 brw_DP4(p
, dst
, args
[0], args
[1]);
1896 brw_DPH(p
, dst
, args
[0], args
[1]);
1899 emit_nrm(c
, dst
, args
[0], 3);
1902 emit_nrm(c
, dst
, args
[0], 4);
1905 unalias2(c
, dst
, args
[0], args
[1], emit_dst_noalias
);
1908 unalias1(c
, dst
, args
[0], emit_exp_noalias
);
1911 emit_math1(c
, BRW_MATH_FUNCTION_EXP
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1914 brw_RNDD(p
, dst
, args
[0]);
1917 brw_RNDD(p
, dst
, args
[0]);
1920 brw_FRC(p
, dst
, args
[0]);
1923 unalias1(c
, dst
, args
[0], emit_log_noalias
);
1926 emit_math1(c
, BRW_MATH_FUNCTION_LOG
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1929 unalias1(c
, dst
, args
[0], emit_lit_noalias
);
1932 unalias3(c
, dst
, args
[0], args
[1], args
[2], emit_lrp_noalias
);
1935 if (!accumulator_contains(c
, args
[2]))
1936 brw_MOV(p
, brw_acc_reg(), args
[2]);
1937 brw_MAC(p
, dst
, args
[0], args
[1]);
1940 emit_cmp(p
, dst
, args
[0], args
[1], args
[2]);
1943 emit_max(p
, dst
, args
[0], args
[1]);
1946 emit_min(p
, dst
, args
[0], args
[1]);
1949 brw_MOV(p
, dst
, args
[0]);
1952 brw_MUL(p
, dst
, args
[0], args
[1]);
1955 emit_math2(c
, BRW_MATH_FUNCTION_POW
, dst
, args
[0], args
[1], BRW_MATH_PRECISION_FULL
);
1958 emit_math1(c
, BRW_MATH_FUNCTION_INV
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1961 emit_math1(c
, BRW_MATH_FUNCTION_RSQ
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1965 unalias2(c
, dst
, args
[0], args
[1], emit_seq
);
1968 emit_math1(c
, BRW_MATH_FUNCTION_SIN
, dst
, args
[0], BRW_MATH_PRECISION_FULL
);
1971 unalias2(c
, dst
, args
[0], args
[1], emit_sne
);
1974 unalias2(c
, dst
, args
[0], args
[1], emit_sge
);
1977 unalias2(c
, dst
, args
[0], args
[1], emit_sgt
);
1980 unalias2(c
, dst
, args
[0], args
[1], emit_slt
);
1983 unalias2(c
, dst
, args
[0], args
[1], emit_sle
);
1986 unalias1(c
, dst
, args
[0], emit_sign
);
1989 brw_ADD(p
, dst
, args
[0], negate(args
[1]));
1992 /* The args[0] value can't be used here as it won't have
1993 * correctly encoded the full swizzle:
1995 emit_swz(c
, dst
, inst
);
1998 /* round toward zero */
1999 brw_RNDZ(p
, dst
, args
[0]);
2002 emit_xpd(p
, dst
, args
[0], args
[1]);
2005 assert(if_depth
< MAX_IF_DEPTH
);
2006 if_inst
[if_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
2007 /* Note that brw_IF smashes the predicate_control field. */
2008 if_inst
[if_depth
]->header
.predicate_control
= get_predicate(inst
);
2009 if_depth_in_loop
[loop_depth
]++;
2013 clear_current_const(c
);
2014 assert(if_depth
> 0);
2015 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
2018 clear_current_const(c
);
2019 assert(if_depth
> 0);
2020 brw_ENDIF(p
, if_inst
[--if_depth
]);
2021 if_depth_in_loop
[loop_depth
]--;
2023 case OPCODE_BGNLOOP
:
2024 clear_current_const(c
);
2025 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2026 if_depth_in_loop
[loop_depth
] = 0;
2029 brw_set_predicate_control(p
, get_predicate(inst
));
2030 brw_BREAK(p
, if_depth_in_loop
[loop_depth
]);
2031 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2034 brw_set_predicate_control(p
, get_predicate(inst
));
2035 brw_CONT(p
, if_depth_in_loop
[loop_depth
]);
2036 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2038 case OPCODE_ENDLOOP
:
2040 clear_current_const(c
);
2041 struct brw_instruction
*inst0
, *inst1
;
2046 if (intel
->gen
== 5)
2049 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
2050 /* patch all the BREAK/CONT instructions from last BEGINLOOP */
2051 while (inst0
> loop_inst
[loop_depth
]) {
2053 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2054 inst0
->bits3
.if_else
.jump_count
== 0) {
2055 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2057 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2058 inst0
->bits3
.if_else
.jump_count
== 0) {
2059 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2065 brw_set_predicate_control(p
, get_predicate(inst
));
2066 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2067 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2070 brw_set_access_mode(p
, BRW_ALIGN_1
);
2071 brw_ADD(p
, deref_1d(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
2072 brw_set_access_mode(p
, BRW_ALIGN_16
);
2073 brw_ADD(p
, get_addr_reg(stack_index
),
2074 get_addr_reg(stack_index
), brw_imm_d(4));
2075 brw_save_call(p
, inst
->Comment
, p
->nr_insn
);
2076 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
2079 brw_ADD(p
, get_addr_reg(stack_index
),
2080 get_addr_reg(stack_index
), brw_imm_d(-4));
2081 brw_set_access_mode(p
, BRW_ALIGN_1
);
2082 brw_MOV(p
, brw_ip_reg(), deref_1d(stack_index
, 0));
2083 brw_set_access_mode(p
, BRW_ALIGN_16
);
2086 emit_vertex_write(c
);
2092 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
2098 _mesa_problem(NULL
, "Unsupported opcode %i (%s) in vertex shader",
2099 inst
->Opcode
, inst
->Opcode
< MAX_OPCODE
?
2100 _mesa_opcode_string(inst
->Opcode
) :
2104 /* Set the predication update on the last instruction of the native
2105 * instruction sequence.
2107 * This would be problematic if it was set on a math instruction,
2108 * but that shouldn't be the case with the current GLSL compiler.
2110 if (inst
->CondUpdate
) {
2111 struct brw_instruction
*hw_insn
= &p
->store
[p
->nr_insn
- 1];
2113 assert(hw_insn
->header
.destreg__conditionalmod
== 0);
2114 hw_insn
->header
.destreg__conditionalmod
= BRW_CONDITIONAL_NZ
;
2117 if ((inst
->DstReg
.File
== PROGRAM_OUTPUT
)
2118 && (inst
->DstReg
.Index
!= VERT_RESULT_HPOS
)
2119 && c
->output_regs
[inst
->DstReg
.Index
].used_in_src
) {
2120 brw_MOV(p
, get_dst(c
, inst
->DstReg
), dst
);
2123 /* Result color clamping.
2125 * When destination register is an output register and
2126 * it's primary/secondary front/back color, we have to clamp
2127 * the result to [0,1]. This is done by enabling the
2128 * saturation bit for the last instruction.
2130 * We don't use brw_set_saturate() as it modifies
2131 * p->current->header.saturate, which affects all the subsequent
2132 * instructions. Instead, we directly modify the header
2133 * of the last (already stored) instruction.
2135 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
) {
2136 if ((inst
->DstReg
.Index
== VERT_RESULT_COL0
)
2137 || (inst
->DstReg
.Index
== VERT_RESULT_COL1
)
2138 || (inst
->DstReg
.Index
== VERT_RESULT_BFC0
)
2139 || (inst
->DstReg
.Index
== VERT_RESULT_BFC1
)) {
2140 p
->store
[p
->nr_insn
-1].header
.saturate
= 1;
2144 if (inst
->DstReg
.RelAddr
) {
2145 assert(inst
->DstReg
.File
== PROGRAM_TEMPORARY
||
2146 inst
->DstReg
.File
== PROGRAM_OUTPUT
);
2147 move_to_reladdr_dst(c
, inst
, dst
);
2153 brw_resolve_cals(p
);
2157 if (unlikely(INTEL_DEBUG
& DEBUG_VS
)) {
2160 printf("vs-native:\n");
2161 for (i
= 0; i
< p
->nr_insn
; i
++)
2162 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);