2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
302 struct brw_context
*brw
= p
->brw
;
304 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
305 assert(reg
.nr
< 128);
307 gen7_convert_mrf_to_grf(p
, ®
);
309 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
310 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
317 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
320 validate_reg(insn
, reg
);
322 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
323 insn
->bits1
.da1
.src0_reg_type
=
324 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
325 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
326 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
327 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
329 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
330 insn
->bits3
.ud
= reg
.dw1
.ud
;
332 /* The Bspec's section titled "Non-present Operands" claims that if src0
333 * is an immediate that src1's type must be the same as that of src0.
335 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
336 * that do not follow this rule. E.g., from the IVB/HSW table:
338 * DataTypeIndex 18-Bit Mapping Mapped Meaning
339 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
341 * And from the SNB table:
343 * DataTypeIndex 18-Bit Mapping Mapped Meaning
344 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
346 * Neither of these cause warnings from the simulator when used,
347 * compacted or otherwise. In fact, all compaction mappings that have an
348 * immediate in src0 use a:ud for src1.
350 * The GM45 instruction compaction tables do not contain mapped meanings
351 * so it's not clear whether it has the restriction. We'll assume it was
352 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
354 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
356 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
358 insn
->bits1
.da1
.src1_reg_type
= BRW_HW_REG_TYPE_UD
;
361 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
362 * for immediate values. Presumably the hardware engineers realized
363 * that the only useful floating-point value that could be represented
364 * in this format is 0.0, which can also be represented as a VF-typed
365 * immediate, so they gave us the previously mentioned mapping on IVB+.
367 * Strangely, we do have a mapping for imm:f in src1, so we don't need
370 * If we see a 0.0:F, change the type to VF so that it can be compacted.
372 if (insn
->bits3
.ud
== 0x0 &&
373 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_F
) {
374 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_IMM_TYPE_VF
;
379 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
380 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
381 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
382 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
385 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
386 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
390 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
392 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
393 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
396 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
400 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
401 if (reg
.width
== BRW_WIDTH_1
&&
402 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
403 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
404 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
405 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
408 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
409 insn
->bits2
.da1
.src0_width
= reg
.width
;
410 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
414 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
415 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
416 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
417 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
419 /* This is an oddity of the fact we're using the same
420 * descriptions for registers in align_16 as align_1:
422 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
423 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
425 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
432 brw_set_src1(struct brw_compile
*p
,
433 struct brw_instruction
*insn
,
436 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
438 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
439 assert(reg
.nr
< 128);
441 gen7_convert_mrf_to_grf(p
, ®
);
443 validate_reg(insn
, reg
);
445 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
446 insn
->bits1
.da1
.src1_reg_type
=
447 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
448 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
449 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
451 /* Only src1 can be immediate in two-argument instructions.
453 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
455 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
456 insn
->bits3
.ud
= reg
.dw1
.ud
;
459 /* This is a hardware restriction, which may or may not be lifted
462 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
463 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
465 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
466 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
467 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
470 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
471 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
474 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
475 if (reg
.width
== BRW_WIDTH_1
&&
476 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
477 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
478 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
479 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
482 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
483 insn
->bits3
.da1
.src1_width
= reg
.width
;
484 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
488 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
489 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
490 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
491 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
493 /* This is an oddity of the fact we're using the same
494 * descriptions for registers in align_16 as align_1:
496 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
497 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
499 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
505 * Set the Message Descriptor and Extended Message Descriptor fields
508 * \note This zeroes out the Function Control bits, so it must be called
509 * \b before filling out any message-specific data. Callers can
510 * choose not to fill in irrelevant bits; they will be zero.
513 brw_set_message_descriptor(struct brw_compile
*p
,
514 struct brw_instruction
*inst
,
515 enum brw_message_target sfid
,
517 unsigned response_length
,
521 struct brw_context
*brw
= p
->brw
;
523 brw_set_src1(p
, inst
, brw_imm_d(0));
526 inst
->bits3
.generic_gen5
.header_present
= header_present
;
527 inst
->bits3
.generic_gen5
.response_length
= response_length
;
528 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
529 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
532 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
533 inst
->header
.destreg__conditionalmod
= sfid
;
535 /* Set Extended Message Descriptor (ex_desc) */
536 inst
->bits2
.send_gen5
.sfid
= sfid
;
537 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
540 inst
->bits3
.generic
.response_length
= response_length
;
541 inst
->bits3
.generic
.msg_length
= msg_length
;
542 inst
->bits3
.generic
.msg_target
= sfid
;
543 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
547 static void brw_set_math_message( struct brw_compile
*p
,
548 struct brw_instruction
*insn
,
550 unsigned integer_type
,
554 struct brw_context
*brw
= p
->brw
;
556 unsigned response_length
;
558 /* Infer message length from the function */
560 case BRW_MATH_FUNCTION_POW
:
561 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
562 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
563 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
571 /* Infer response length from the function */
573 case BRW_MATH_FUNCTION_SINCOS
:
574 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
583 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
584 msg_length
, response_length
, false, false);
586 insn
->bits3
.math_gen5
.function
= function
;
587 insn
->bits3
.math_gen5
.int_type
= integer_type
;
588 insn
->bits3
.math_gen5
.precision
= low_precision
;
589 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
590 insn
->bits3
.math_gen5
.data_type
= dataType
;
591 insn
->bits3
.math_gen5
.snapshot
= 0;
593 insn
->bits3
.math
.function
= function
;
594 insn
->bits3
.math
.int_type
= integer_type
;
595 insn
->bits3
.math
.precision
= low_precision
;
596 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
597 insn
->bits3
.math
.data_type
= dataType
;
599 insn
->header
.saturate
= 0;
603 static void brw_set_ff_sync_message(struct brw_compile
*p
,
604 struct brw_instruction
*insn
,
606 unsigned response_length
,
609 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
610 1, response_length
, true, end_of_thread
);
611 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
612 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
613 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
614 insn
->bits3
.urb_gen5
.allocate
= allocate
;
615 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
616 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
619 static void brw_set_urb_message( struct brw_compile
*p
,
620 struct brw_instruction
*insn
,
621 enum brw_urb_write_flags flags
,
623 unsigned response_length
,
625 unsigned swizzle_control
)
627 struct brw_context
*brw
= p
->brw
;
629 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
630 msg_length
, response_length
, true,
631 flags
& BRW_URB_WRITE_EOT
);
633 if (flags
& BRW_URB_WRITE_OWORD
) {
634 assert(msg_length
== 2); /* header + one OWORD of data */
635 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
637 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
639 insn
->bits3
.urb_gen7
.offset
= offset
;
640 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
641 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
642 insn
->bits3
.urb_gen7
.per_slot_offset
=
643 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
644 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
645 } else if (brw
->gen
>= 5) {
646 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
647 insn
->bits3
.urb_gen5
.offset
= offset
;
648 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
649 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
650 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
651 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
653 insn
->bits3
.urb
.opcode
= 0; /* ? */
654 insn
->bits3
.urb
.offset
= offset
;
655 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
656 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
657 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
658 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
663 brw_set_dp_write_message(struct brw_compile
*p
,
664 struct brw_instruction
*insn
,
665 unsigned binding_table_index
,
666 unsigned msg_control
,
670 unsigned last_render_target
,
671 unsigned response_length
,
672 unsigned end_of_thread
,
673 unsigned send_commit_msg
)
675 struct brw_context
*brw
= p
->brw
;
679 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
680 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
681 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
683 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
684 } else if (brw
->gen
== 6) {
685 /* Use the render cache for all write messages. */
686 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
688 sfid
= BRW_SFID_DATAPORT_WRITE
;
691 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
692 header_present
, end_of_thread
);
695 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
696 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
697 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
698 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
699 } else if (brw
->gen
== 6) {
700 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
701 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
702 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
703 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
704 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
705 } else if (brw
->gen
== 5) {
706 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
707 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
708 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
709 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
710 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
712 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
713 insn
->bits3
.dp_write
.msg_control
= msg_control
;
714 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
715 insn
->bits3
.dp_write
.msg_type
= msg_type
;
716 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
721 brw_set_dp_read_message(struct brw_compile
*p
,
722 struct brw_instruction
*insn
,
723 unsigned binding_table_index
,
724 unsigned msg_control
,
726 unsigned target_cache
,
729 unsigned response_length
)
731 struct brw_context
*brw
= p
->brw
;
735 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
736 } else if (brw
->gen
== 6) {
737 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
738 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
740 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
742 sfid
= BRW_SFID_DATAPORT_READ
;
745 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
746 header_present
, false);
749 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
750 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
751 insn
->bits3
.gen7_dp
.last_render_target
= 0;
752 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
753 } else if (brw
->gen
== 6) {
754 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
755 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
756 insn
->bits3
.gen6_dp
.last_render_target
= 0;
757 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
758 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
759 } else if (brw
->gen
== 5) {
760 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
761 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
762 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
763 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
764 } else if (brw
->is_g4x
) {
765 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
766 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
767 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
768 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
770 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
771 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
772 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
773 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
778 brw_set_sampler_message(struct brw_compile
*p
,
779 struct brw_instruction
*insn
,
780 unsigned binding_table_index
,
783 unsigned response_length
,
785 unsigned header_present
,
787 unsigned return_format
)
789 struct brw_context
*brw
= p
->brw
;
791 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
792 response_length
, header_present
, false);
795 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
796 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
797 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
798 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
799 } else if (brw
->gen
>= 5) {
800 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
801 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
802 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
803 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
804 } else if (brw
->is_g4x
) {
805 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
806 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
807 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
809 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
810 insn
->bits3
.sampler
.sampler
= sampler
;
811 insn
->bits3
.sampler
.msg_type
= msg_type
;
812 insn
->bits3
.sampler
.return_format
= return_format
;
817 #define next_insn brw_next_insn
818 struct brw_instruction
*
819 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
821 struct brw_instruction
*insn
;
823 if (p
->nr_insn
+ 1 > p
->store_size
) {
825 fprintf(stderr
, "incresing the store size to %d\n",
829 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
830 struct brw_instruction
, p
->store_size
);
832 assert(!"realloc eu store memeory failed");
835 p
->next_insn_offset
+= 16;
836 insn
= &p
->store
[p
->nr_insn
++];
837 memcpy(insn
, p
->current
, sizeof(*insn
));
839 /* Reset this one-shot flag:
842 if (p
->current
->header
.destreg__conditionalmod
) {
843 p
->current
->header
.destreg__conditionalmod
= 0;
844 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
847 insn
->header
.opcode
= opcode
;
851 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
856 struct brw_instruction
*insn
= next_insn(p
, opcode
);
857 brw_set_dest(p
, insn
, dest
);
858 brw_set_src0(p
, insn
, src
);
862 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
866 struct brw_reg src1
)
868 struct brw_instruction
*insn
= next_insn(p
, opcode
);
869 brw_set_dest(p
, insn
, dest
);
870 brw_set_src0(p
, insn
, src0
);
871 brw_set_src1(p
, insn
, src1
);
876 get_3src_subreg_nr(struct brw_reg reg
)
878 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
879 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
880 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
882 return reg
.subnr
/ 4;
886 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
893 struct brw_context
*brw
= p
->brw
;
894 struct brw_instruction
*insn
= next_insn(p
, opcode
);
896 gen7_convert_mrf_to_grf(p
, &dest
);
898 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
900 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
901 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
902 assert(dest
.nr
< 128);
903 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
904 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
905 dest
.type
== BRW_REGISTER_TYPE_D
||
906 dest
.type
== BRW_REGISTER_TYPE_UD
);
907 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
908 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
909 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
910 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
911 guess_execution_size(p
, insn
, dest
);
913 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
914 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
915 assert(src0
.nr
< 128);
916 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
917 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
918 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
919 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
920 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
921 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
923 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
924 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
925 assert(src1
.nr
< 128);
926 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
927 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
928 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
929 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
930 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
931 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
932 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
934 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
935 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
936 assert(src2
.nr
< 128);
937 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
938 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
939 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
940 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
941 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
942 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
945 /* Set both the source and destination types based on dest.type,
946 * ignoring the source register types. The MAD and LRP emitters ensure
947 * that all four types are float. The BFE and BFI2 emitters, however,
948 * may send us mixed D and UD types and want us to ignore that and use
949 * the destination type.
952 case BRW_REGISTER_TYPE_F
:
953 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
954 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
956 case BRW_REGISTER_TYPE_D
:
957 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
958 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
960 case BRW_REGISTER_TYPE_UD
:
961 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
962 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
971 /***********************************************************************
972 * Convenience routines.
975 struct brw_instruction *brw_##OP(struct brw_compile *p, \
976 struct brw_reg dest, \
977 struct brw_reg src0) \
979 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
983 struct brw_instruction *brw_##OP(struct brw_compile *p, \
984 struct brw_reg dest, \
985 struct brw_reg src0, \
986 struct brw_reg src1) \
988 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
992 struct brw_instruction *brw_##OP(struct brw_compile *p, \
993 struct brw_reg dest, \
994 struct brw_reg src0, \
995 struct brw_reg src1, \
996 struct brw_reg src2) \
998 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1002 struct brw_instruction *brw_##OP(struct brw_compile *p, \
1003 struct brw_reg dest, \
1004 struct brw_reg src0, \
1005 struct brw_reg src1, \
1006 struct brw_reg src2) \
1008 assert(dest.type == BRW_REGISTER_TYPE_F); \
1009 assert(src0.type == BRW_REGISTER_TYPE_F); \
1010 assert(src1.type == BRW_REGISTER_TYPE_F); \
1011 assert(src2.type == BRW_REGISTER_TYPE_F); \
1012 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1015 /* Rounding operations (other than RNDD) require two instructions - the first
1016 * stores a rounded value (possibly the wrong way) in the dest register, but
1017 * also sets a per-channel "increment bit" in the flag register. A predicated
1018 * add of 1.0 fixes dest to contain the desired result.
1020 * Sandybridge and later appear to round correctly without an ADD.
1023 void brw_##OP(struct brw_compile *p, \
1024 struct brw_reg dest, \
1025 struct brw_reg src) \
1027 struct brw_instruction *rnd, *add; \
1028 rnd = next_insn(p, BRW_OPCODE_##OP); \
1029 brw_set_dest(p, rnd, dest); \
1030 brw_set_src0(p, rnd, src); \
1032 if (p->brw->gen < 6) { \
1033 /* turn on round-increments */ \
1034 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1035 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1036 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1079 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1080 struct brw_reg dest
,
1081 struct brw_reg src0
,
1082 struct brw_reg src1
)
1085 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1086 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1087 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1088 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1089 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1092 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1093 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1094 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1095 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1096 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1099 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1102 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1103 struct brw_reg dest
,
1104 struct brw_reg src0
,
1105 struct brw_reg src1
)
1107 assert(dest
.type
== src0
.type
);
1108 assert(src0
.type
== src1
.type
);
1109 switch (src0
.type
) {
1110 case BRW_REGISTER_TYPE_B
:
1111 case BRW_REGISTER_TYPE_UB
:
1112 case BRW_REGISTER_TYPE_W
:
1113 case BRW_REGISTER_TYPE_UW
:
1114 case BRW_REGISTER_TYPE_D
:
1115 case BRW_REGISTER_TYPE_UD
:
1118 assert(!"Bad type for brw_AVG");
1121 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1124 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1125 struct brw_reg dest
,
1126 struct brw_reg src0
,
1127 struct brw_reg src1
)
1130 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1131 src0
.type
== BRW_REGISTER_TYPE_UD
||
1132 src1
.type
== BRW_REGISTER_TYPE_D
||
1133 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1134 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1137 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1138 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1139 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1140 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1141 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1144 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1145 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1146 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1147 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1148 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1151 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1152 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1153 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1154 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1156 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1160 void brw_NOP(struct brw_compile
*p
)
1162 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1163 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1164 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1165 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1172 /***********************************************************************
1173 * Comparisons, if/else/endif
1176 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1177 struct brw_reg dest
,
1178 struct brw_reg src0
,
1179 struct brw_reg src1
)
1181 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1183 insn
->header
.execution_size
= 1;
1184 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1185 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1187 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1193 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1195 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1197 p
->if_stack_depth
++;
1198 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1199 p
->if_stack_array_size
*= 2;
1200 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1201 p
->if_stack_array_size
);
1205 static struct brw_instruction
*
1206 pop_if_stack(struct brw_compile
*p
)
1208 p
->if_stack_depth
--;
1209 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1213 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1215 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1216 p
->loop_stack_array_size
*= 2;
1217 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1218 p
->loop_stack_array_size
);
1219 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1220 p
->loop_stack_array_size
);
1223 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1224 p
->loop_stack_depth
++;
1225 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1228 static struct brw_instruction
*
1229 get_inner_do_insn(struct brw_compile
*p
)
1231 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1234 /* EU takes the value from the flag register and pushes it onto some
1235 * sort of a stack (presumably merging with any flag value already on
1236 * the stack). Within an if block, the flags at the top of the stack
1237 * control execution on each channel of the unit, eg. on each of the
1238 * 16 pixel values in our wm programs.
1240 * When the matching 'else' instruction is reached (presumably by
1241 * countdown of the instruction count patched in by our ELSE/ENDIF
1242 * functions), the relevent flags are inverted.
1244 * When the matching 'endif' instruction is reached, the flags are
1245 * popped off. If the stack is now empty, normal execution resumes.
1247 struct brw_instruction
*
1248 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1250 struct brw_context
*brw
= p
->brw
;
1251 struct brw_instruction
*insn
;
1253 insn
= next_insn(p
, BRW_OPCODE_IF
);
1255 /* Override the defaults for this instruction:
1258 brw_set_dest(p
, insn
, brw_ip_reg());
1259 brw_set_src0(p
, insn
, brw_ip_reg());
1260 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1261 } else if (brw
->gen
== 6) {
1262 brw_set_dest(p
, insn
, brw_imm_w(0));
1263 insn
->bits1
.branch_gen6
.jump_count
= 0;
1264 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1265 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1267 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1268 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1269 brw_set_src1(p
, insn
, brw_imm_ud(0));
1270 insn
->bits3
.break_cont
.jip
= 0;
1271 insn
->bits3
.break_cont
.uip
= 0;
1274 insn
->header
.execution_size
= execute_size
;
1275 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1276 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1277 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1278 if (!p
->single_program_flow
)
1279 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1281 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1283 push_if_stack(p
, insn
);
1284 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1288 /* This function is only used for gen6-style IF instructions with an
1289 * embedded comparison (conditional modifier). It is not used on gen7.
1291 struct brw_instruction
*
1292 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1293 struct brw_reg src0
, struct brw_reg src1
)
1295 struct brw_instruction
*insn
;
1297 insn
= next_insn(p
, BRW_OPCODE_IF
);
1299 brw_set_dest(p
, insn
, brw_imm_w(0));
1300 if (p
->compressed
) {
1301 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1303 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1305 insn
->bits1
.branch_gen6
.jump_count
= 0;
1306 brw_set_src0(p
, insn
, src0
);
1307 brw_set_src1(p
, insn
, src1
);
1309 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1310 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1311 insn
->header
.destreg__conditionalmod
= conditional
;
1313 if (!p
->single_program_flow
)
1314 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1316 push_if_stack(p
, insn
);
1321 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1324 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1325 struct brw_instruction
*if_inst
,
1326 struct brw_instruction
*else_inst
)
1328 /* The next instruction (where the ENDIF would be, if it existed) */
1329 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1331 assert(p
->single_program_flow
);
1332 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1333 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1334 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1336 /* Convert IF to an ADD instruction that moves the instruction pointer
1337 * to the first instruction of the ELSE block. If there is no ELSE
1338 * block, point to where ENDIF would be. Reverse the predicate.
1340 * There's no need to execute an ENDIF since we don't need to do any
1341 * stack operations, and if we're currently executing, we just want to
1342 * continue normally.
1344 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1345 if_inst
->header
.predicate_inverse
= 1;
1347 if (else_inst
!= NULL
) {
1348 /* Convert ELSE to an ADD instruction that points where the ENDIF
1351 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1353 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1354 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1356 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1361 * Patch IF and ELSE instructions with appropriate jump targets.
1364 patch_IF_ELSE(struct brw_compile
*p
,
1365 struct brw_instruction
*if_inst
,
1366 struct brw_instruction
*else_inst
,
1367 struct brw_instruction
*endif_inst
)
1369 struct brw_context
*brw
= p
->brw
;
1371 /* We shouldn't be patching IF and ELSE instructions in single program flow
1372 * mode when gen < 6, because in single program flow mode on those
1373 * platforms, we convert flow control instructions to conditional ADDs that
1374 * operate on IP (see brw_ENDIF).
1376 * However, on Gen6, writing to IP doesn't work in single program flow mode
1377 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1378 * not be updated by non-flow control instructions."). And on later
1379 * platforms, there is no significant benefit to converting control flow
1380 * instructions to conditional ADDs. So we do patch IF and ELSE
1381 * instructions in single program flow mode on those platforms.
1384 assert(!p
->single_program_flow
);
1386 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1387 assert(endif_inst
!= NULL
);
1388 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1391 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1392 * requires 2 chunks.
1397 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1398 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1400 if (else_inst
== NULL
) {
1401 /* Patch IF -> ENDIF */
1403 /* Turn it into an IFF, which means no mask stack operations for
1404 * all-false and jumping past the ENDIF.
1406 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1407 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1408 if_inst
->bits3
.if_else
.pop_count
= 0;
1409 if_inst
->bits3
.if_else
.pad0
= 0;
1410 } else if (brw
->gen
== 6) {
1411 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1412 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1414 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1415 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1418 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1420 /* Patch IF -> ELSE */
1422 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1423 if_inst
->bits3
.if_else
.pop_count
= 0;
1424 if_inst
->bits3
.if_else
.pad0
= 0;
1425 } else if (brw
->gen
== 6) {
1426 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1429 /* Patch ELSE -> ENDIF */
1431 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1434 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1435 else_inst
->bits3
.if_else
.pop_count
= 1;
1436 else_inst
->bits3
.if_else
.pad0
= 0;
1437 } else if (brw
->gen
== 6) {
1438 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1439 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1441 /* The IF instruction's JIP should point just past the ELSE */
1442 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1443 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1444 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1445 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1451 brw_ELSE(struct brw_compile
*p
)
1453 struct brw_context
*brw
= p
->brw
;
1454 struct brw_instruction
*insn
;
1456 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1459 brw_set_dest(p
, insn
, brw_ip_reg());
1460 brw_set_src0(p
, insn
, brw_ip_reg());
1461 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1462 } else if (brw
->gen
== 6) {
1463 brw_set_dest(p
, insn
, brw_imm_w(0));
1464 insn
->bits1
.branch_gen6
.jump_count
= 0;
1465 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1466 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1468 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1469 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1470 brw_set_src1(p
, insn
, brw_imm_ud(0));
1471 insn
->bits3
.break_cont
.jip
= 0;
1472 insn
->bits3
.break_cont
.uip
= 0;
1475 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1476 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1477 if (!p
->single_program_flow
)
1478 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1480 push_if_stack(p
, insn
);
1484 brw_ENDIF(struct brw_compile
*p
)
1486 struct brw_context
*brw
= p
->brw
;
1487 struct brw_instruction
*insn
= NULL
;
1488 struct brw_instruction
*else_inst
= NULL
;
1489 struct brw_instruction
*if_inst
= NULL
;
1490 struct brw_instruction
*tmp
;
1491 bool emit_endif
= true;
1493 /* In single program flow mode, we can express IF and ELSE instructions
1494 * equivalently as ADD instructions that operate on IP. On platforms prior
1495 * to Gen6, flow control instructions cause an implied thread switch, so
1496 * this is a significant savings.
1498 * However, on Gen6, writing to IP doesn't work in single program flow mode
1499 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1500 * not be updated by non-flow control instructions."). And on later
1501 * platforms, there is no significant benefit to converting control flow
1502 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1505 if (brw
->gen
< 6 && p
->single_program_flow
)
1509 * A single next_insn() may change the base adress of instruction store
1510 * memory(p->store), so call it first before referencing the instruction
1511 * store pointer from an index
1514 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1516 /* Pop the IF and (optional) ELSE instructions from the stack */
1517 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1518 tmp
= pop_if_stack(p
);
1519 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1521 tmp
= pop_if_stack(p
);
1526 /* ENDIF is useless; don't bother emitting it. */
1527 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1532 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1533 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1534 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1535 } else if (brw
->gen
== 6) {
1536 brw_set_dest(p
, insn
, brw_imm_w(0));
1537 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1538 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1540 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1541 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1542 brw_set_src1(p
, insn
, brw_imm_ud(0));
1545 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1546 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1547 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1549 /* Also pop item off the stack in the endif instruction: */
1551 insn
->bits3
.if_else
.jump_count
= 0;
1552 insn
->bits3
.if_else
.pop_count
= 1;
1553 insn
->bits3
.if_else
.pad0
= 0;
1554 } else if (brw
->gen
== 6) {
1555 insn
->bits1
.branch_gen6
.jump_count
= 2;
1557 insn
->bits3
.break_cont
.jip
= 2;
1559 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1562 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1564 struct brw_context
*brw
= p
->brw
;
1565 struct brw_instruction
*insn
;
1567 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1568 if (brw
->gen
>= 6) {
1569 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1570 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1571 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1573 brw_set_dest(p
, insn
, brw_ip_reg());
1574 brw_set_src0(p
, insn
, brw_ip_reg());
1575 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1576 insn
->bits3
.if_else
.pad0
= 0;
1577 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1579 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1580 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1585 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1587 struct brw_instruction
*insn
;
1589 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1590 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1591 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1592 brw_set_dest(p
, insn
, brw_ip_reg());
1593 brw_set_src0(p
, insn
, brw_ip_reg());
1594 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1596 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1597 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1601 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1603 struct brw_instruction
*insn
;
1604 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1605 brw_set_dest(p
, insn
, brw_ip_reg());
1606 brw_set_src0(p
, insn
, brw_ip_reg());
1607 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1608 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1609 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1610 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1611 insn
->bits3
.if_else
.pad0
= 0;
1612 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1616 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1618 struct brw_instruction
*insn
;
1620 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1621 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1622 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1623 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1625 if (p
->compressed
) {
1626 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1628 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1629 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1636 * The DO/WHILE is just an unterminated loop -- break or continue are
1637 * used for control within the loop. We have a few ways they can be
1640 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1641 * jip and no DO instruction.
1643 * For non-uniform control flow pre-gen6, there's a DO instruction to
1644 * push the mask, and a WHILE to jump back, and BREAK to get out and
1647 * For gen6, there's no more mask stack, so no need for DO. WHILE
1648 * just points back to the first instruction of the loop.
1650 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1652 struct brw_context
*brw
= p
->brw
;
1654 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1655 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1656 return &p
->store
[p
->nr_insn
];
1658 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1660 push_loop_stack(p
, insn
);
1662 /* Override the defaults for this instruction:
1664 brw_set_dest(p
, insn
, brw_null_reg());
1665 brw_set_src0(p
, insn
, brw_null_reg());
1666 brw_set_src1(p
, insn
, brw_null_reg());
1668 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1669 insn
->header
.execution_size
= execute_size
;
1670 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1671 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1672 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1679 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1682 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1683 * nesting, since it can always just point to the end of the block/current loop.
1686 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1688 struct brw_context
*brw
= p
->brw
;
1689 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1690 struct brw_instruction
*inst
;
1691 int br
= (brw
->gen
== 5) ? 2 : 1;
1693 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1694 /* If the jump count is != 0, that means that this instruction has already
1695 * been patched because it's part of a loop inside of the one we're
1698 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1699 inst
->bits3
.if_else
.jump_count
== 0) {
1700 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1701 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1702 inst
->bits3
.if_else
.jump_count
== 0) {
1703 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1708 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1710 struct brw_context
*brw
= p
->brw
;
1711 struct brw_instruction
*insn
, *do_insn
;
1717 if (brw
->gen
>= 7) {
1718 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1719 do_insn
= get_inner_do_insn(p
);
1721 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1722 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1723 brw_set_src1(p
, insn
, brw_imm_ud(0));
1724 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1726 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1727 } else if (brw
->gen
== 6) {
1728 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1729 do_insn
= get_inner_do_insn(p
);
1731 brw_set_dest(p
, insn
, brw_imm_w(0));
1732 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1733 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1734 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1736 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1738 if (p
->single_program_flow
) {
1739 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1740 do_insn
= get_inner_do_insn(p
);
1742 brw_set_dest(p
, insn
, brw_ip_reg());
1743 brw_set_src0(p
, insn
, brw_ip_reg());
1744 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1745 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1747 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1748 do_insn
= get_inner_do_insn(p
);
1750 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1752 brw_set_dest(p
, insn
, brw_ip_reg());
1753 brw_set_src0(p
, insn
, brw_ip_reg());
1754 brw_set_src1(p
, insn
, brw_imm_d(0));
1756 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1757 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1758 insn
->bits3
.if_else
.pop_count
= 0;
1759 insn
->bits3
.if_else
.pad0
= 0;
1761 brw_patch_break_cont(p
, insn
);
1764 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1765 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1767 p
->loop_stack_depth
--;
1772 /* To integrate with the above, it makes sense that the comparison
1773 * instruction should populate the flag register. It might be simpler
1774 * just to use the flag reg for most WM tasks?
1776 void brw_CMP(struct brw_compile
*p
,
1777 struct brw_reg dest
,
1778 unsigned conditional
,
1779 struct brw_reg src0
,
1780 struct brw_reg src1
)
1782 struct brw_context
*brw
= p
->brw
;
1783 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1785 insn
->header
.destreg__conditionalmod
= conditional
;
1786 brw_set_dest(p
, insn
, dest
);
1787 brw_set_src0(p
, insn
, src0
);
1788 brw_set_src1(p
, insn
, src1
);
1790 /* guess_execution_size(insn, src0); */
1793 /* Make it so that future instructions will use the computed flag
1794 * value until brw_set_predicate_control_flag_value() is called
1797 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1799 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1800 p
->flag_value
= 0xff;
1803 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1805 * "Any CMP instruction with a null destination must use a {switch}."
1807 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1808 * mentioned on their work-arounds pages.
1810 if (brw
->gen
== 7) {
1811 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1812 dest
.nr
== BRW_ARF_NULL
) {
1813 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1818 /* Issue 'wait' instruction for n1, host could program MMIO
1819 to wake up thread. */
1820 void brw_WAIT (struct brw_compile
*p
)
1822 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1823 struct brw_reg src
= brw_notification_1_reg();
1825 brw_set_dest(p
, insn
, src
);
1826 brw_set_src0(p
, insn
, src
);
1827 brw_set_src1(p
, insn
, brw_null_reg());
1828 insn
->header
.execution_size
= 0; /* must */
1829 insn
->header
.predicate_control
= 0;
1830 insn
->header
.compression_control
= 0;
1834 /***********************************************************************
1835 * Helpers for the various SEND message types:
1838 /** Extended math function, float[8].
1840 void brw_math( struct brw_compile
*p
,
1841 struct brw_reg dest
,
1843 unsigned msg_reg_nr
,
1846 unsigned precision
)
1848 struct brw_context
*brw
= p
->brw
;
1850 if (brw
->gen
>= 6) {
1851 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1853 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1854 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1855 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1857 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1859 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1861 /* Source modifiers are ignored for extended math instructions on Gen6. */
1862 if (brw
->gen
== 6) {
1863 assert(!src
.negate
);
1867 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1868 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1869 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1870 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1872 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1875 /* Math is the same ISA format as other opcodes, except that CondModifier
1876 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1878 insn
->header
.destreg__conditionalmod
= function
;
1880 brw_set_dest(p
, insn
, dest
);
1881 brw_set_src0(p
, insn
, src
);
1882 brw_set_src1(p
, insn
, brw_null_reg());
1884 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1886 /* Example code doesn't set predicate_control for send
1889 insn
->header
.predicate_control
= 0;
1890 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1892 brw_set_dest(p
, insn
, dest
);
1893 brw_set_src0(p
, insn
, src
);
1894 brw_set_math_message(p
,
1897 src
.type
== BRW_REGISTER_TYPE_D
,
1903 /** Extended math function, float[8].
1905 void brw_math2(struct brw_compile
*p
,
1906 struct brw_reg dest
,
1908 struct brw_reg src0
,
1909 struct brw_reg src1
)
1911 struct brw_context
*brw
= p
->brw
;
1912 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1914 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1915 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1916 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1917 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1919 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1920 if (brw
->gen
== 6) {
1921 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1922 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1925 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1926 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1927 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1928 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1929 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1931 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1932 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1935 /* Source modifiers are ignored for extended math instructions on Gen6. */
1936 if (brw
->gen
== 6) {
1937 assert(!src0
.negate
);
1939 assert(!src1
.negate
);
1943 /* Math is the same ISA format as other opcodes, except that CondModifier
1944 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1946 insn
->header
.destreg__conditionalmod
= function
;
1948 brw_set_dest(p
, insn
, dest
);
1949 brw_set_src0(p
, insn
, src0
);
1950 brw_set_src1(p
, insn
, src1
);
1955 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1956 * using a constant offset per channel.
1958 * The offset must be aligned to oword size (16 bytes). Used for
1959 * register spilling.
1961 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1966 struct brw_context
*brw
= p
->brw
;
1967 uint32_t msg_control
, msg_type
;
1973 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1975 if (num_regs
== 1) {
1976 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1979 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1983 /* Set up the message header. This is g0, with g0.2 filled with
1984 * the offset. We don't want to leave our offset around in g0 or
1985 * it'll screw up texture samples, so set it up inside the message
1989 brw_push_insn_state(p
);
1990 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1991 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1993 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1995 /* set message header global offset field (reg 0, element 2) */
1997 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1999 2), BRW_REGISTER_TYPE_UD
),
2000 brw_imm_ud(offset
));
2002 brw_pop_insn_state(p
);
2006 struct brw_reg dest
;
2007 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2008 int send_commit_msg
;
2009 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2010 BRW_REGISTER_TYPE_UW
);
2012 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
2013 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2014 src_header
= vec16(src_header
);
2016 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2017 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2019 /* Until gen6, writes followed by reads from the same location
2020 * are not guaranteed to be ordered unless write_commit is set.
2021 * If set, then a no-op write is issued to the destination
2022 * register to set a dependency, and a read from the destination
2023 * can be used to ensure the ordering.
2025 * For gen6, only writes between different threads need ordering
2026 * protection. Our use of DP writes is all about register
2027 * spilling within a thread.
2029 if (brw
->gen
>= 6) {
2030 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2031 send_commit_msg
= 0;
2034 send_commit_msg
= 1;
2037 brw_set_dest(p
, insn
, dest
);
2038 if (brw
->gen
>= 6) {
2039 brw_set_src0(p
, insn
, mrf
);
2041 brw_set_src0(p
, insn
, brw_null_reg());
2045 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2047 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2049 brw_set_dp_write_message(p
,
2051 255, /* binding table index (255=stateless) */
2055 true, /* header_present */
2056 0, /* not a render target */
2057 send_commit_msg
, /* response_length */
2065 * Read a block of owords (half a GRF each) from the scratch buffer
2066 * using a constant index per channel.
2068 * Offset must be aligned to oword size (16 bytes). Used for register
2072 brw_oword_block_read_scratch(struct brw_compile
*p
,
2073 struct brw_reg dest
,
2078 struct brw_context
*brw
= p
->brw
;
2079 uint32_t msg_control
;
2085 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2086 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2088 if (num_regs
== 1) {
2089 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2092 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2097 brw_push_insn_state(p
);
2098 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2099 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2101 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2103 /* set message header global offset field (reg 0, element 2) */
2105 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2107 2), BRW_REGISTER_TYPE_UD
),
2108 brw_imm_ud(offset
));
2110 brw_pop_insn_state(p
);
2114 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2116 assert(insn
->header
.predicate_control
== 0);
2117 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2118 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2120 brw_set_dest(p
, insn
, dest
); /* UW? */
2121 if (brw
->gen
>= 6) {
2122 brw_set_src0(p
, insn
, mrf
);
2124 brw_set_src0(p
, insn
, brw_null_reg());
2127 brw_set_dp_read_message(p
,
2129 255, /* binding table index (255=stateless) */
2131 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2132 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2134 true, /* header_present */
2140 gen7_block_read_scratch(struct brw_compile
*p
,
2141 struct brw_reg dest
,
2145 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2147 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2149 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2150 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2152 brw_set_dest(p
, insn
, dest
);
2154 /* The HW requires that the header is present; this is to get the g0.5
2157 bool header_present
= true;
2158 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2160 brw_set_message_descriptor(p
, insn
,
2161 GEN7_SFID_DATAPORT_DATA_CACHE
,
2162 1, /* mlen: just g0 */
2167 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2169 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2170 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2172 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2173 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2174 * is 32 bytes, which happens to be the size of a register.
2177 assert(offset
< (1 << 12));
2178 insn
->bits3
.ud
|= offset
;
2182 * Read a float[4] vector from the data port Data Cache (const buffer).
2183 * Location (in buffer) should be a multiple of 16.
2184 * Used for fetching shader constants.
2186 void brw_oword_block_read(struct brw_compile
*p
,
2187 struct brw_reg dest
,
2190 uint32_t bind_table_index
)
2192 struct brw_context
*brw
= p
->brw
;
2194 /* On newer hardware, offset is in units of owords. */
2198 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2200 brw_push_insn_state(p
);
2201 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2202 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2203 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2205 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2207 /* set message header global offset field (reg 0, element 2) */
2209 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2211 2), BRW_REGISTER_TYPE_UD
),
2212 brw_imm_ud(offset
));
2214 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2215 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2217 /* cast dest to a uword[8] vector */
2218 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2220 brw_set_dest(p
, insn
, dest
);
2221 if (brw
->gen
>= 6) {
2222 brw_set_src0(p
, insn
, mrf
);
2224 brw_set_src0(p
, insn
, brw_null_reg());
2227 brw_set_dp_read_message(p
,
2230 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2231 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2232 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2234 true, /* header_present */
2235 1); /* response_length (1 reg, 2 owords!) */
2237 brw_pop_insn_state(p
);
2241 void brw_fb_WRITE(struct brw_compile
*p
,
2243 unsigned msg_reg_nr
,
2244 struct brw_reg src0
,
2245 unsigned msg_control
,
2246 unsigned binding_table_index
,
2247 unsigned msg_length
,
2248 unsigned response_length
,
2250 bool header_present
)
2252 struct brw_context
*brw
= p
->brw
;
2253 struct brw_instruction
*insn
;
2255 struct brw_reg dest
;
2257 if (dispatch_width
== 16)
2258 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2260 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2262 if (brw
->gen
>= 6) {
2263 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2265 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2267 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2269 if (brw
->gen
>= 6) {
2270 /* headerless version, just submit color payload */
2271 src0
= brw_message_reg(msg_reg_nr
);
2273 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2275 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2277 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2280 brw_set_dest(p
, insn
, dest
);
2281 brw_set_src0(p
, insn
, src0
);
2282 brw_set_dp_write_message(p
,
2284 binding_table_index
,
2289 eot
, /* last render target write */
2292 0 /* send_commit_msg */);
2297 * Texture sample instruction.
2298 * Note: the msg_type plus msg_length values determine exactly what kind
2299 * of sampling operation is performed. See volume 4, page 161 of docs.
2301 void brw_SAMPLE(struct brw_compile
*p
,
2302 struct brw_reg dest
,
2303 unsigned msg_reg_nr
,
2304 struct brw_reg src0
,
2305 unsigned binding_table_index
,
2308 unsigned response_length
,
2309 unsigned msg_length
,
2310 unsigned header_present
,
2312 unsigned return_format
)
2314 struct brw_context
*brw
= p
->brw
;
2315 struct brw_instruction
*insn
;
2317 if (msg_reg_nr
!= -1)
2318 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2320 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2321 insn
->header
.predicate_control
= 0; /* XXX */
2323 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2325 * "Instruction compression is not allowed for this instruction (that
2326 * is, send). The hardware behavior is undefined if this instruction is
2327 * set as compressed. However, compress control can be set to "SecHalf"
2328 * to affect the EMask generation."
2330 * No similar wording is found in later PRMs, but there are examples
2331 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2332 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2333 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2335 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2336 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2339 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2341 brw_set_dest(p
, insn
, dest
);
2342 brw_set_src0(p
, insn
, src0
);
2343 brw_set_sampler_message(p
, insn
,
2344 binding_table_index
,
2354 /* All these variables are pretty confusing - we might be better off
2355 * using bitmasks and macros for this, in the old style. Or perhaps
2356 * just having the caller instantiate the fields in dword3 itself.
2358 void brw_urb_WRITE(struct brw_compile
*p
,
2359 struct brw_reg dest
,
2360 unsigned msg_reg_nr
,
2361 struct brw_reg src0
,
2362 enum brw_urb_write_flags flags
,
2363 unsigned msg_length
,
2364 unsigned response_length
,
2368 struct brw_context
*brw
= p
->brw
;
2369 struct brw_instruction
*insn
;
2371 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2373 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2374 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2375 brw_push_insn_state(p
);
2376 brw_set_access_mode(p
, BRW_ALIGN_1
);
2377 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2378 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2379 BRW_REGISTER_TYPE_UD
),
2380 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2381 brw_imm_ud(0xff00));
2382 brw_pop_insn_state(p
);
2385 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2387 assert(msg_length
< BRW_MAX_MRF
);
2389 brw_set_dest(p
, insn
, dest
);
2390 brw_set_src0(p
, insn
, src0
);
2391 brw_set_src1(p
, insn
, brw_imm_d(0));
2394 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2396 brw_set_urb_message(p
,
2406 brw_find_next_block_end(struct brw_compile
*p
, int start_offset
)
2409 void *store
= p
->store
;
2411 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2412 offset
= next_offset(store
, offset
)) {
2413 struct brw_instruction
*insn
= store
+ offset
;
2415 switch (insn
->header
.opcode
) {
2416 case BRW_OPCODE_ENDIF
:
2417 case BRW_OPCODE_ELSE
:
2418 case BRW_OPCODE_WHILE
:
2419 case BRW_OPCODE_HALT
:
2427 /* There is no DO instruction on gen6, so to find the end of the loop
2428 * we have to see if the loop is jumping back before our start
2432 brw_find_loop_end(struct brw_compile
*p
, int start_offset
)
2434 struct brw_context
*brw
= p
->brw
;
2437 void *store
= p
->store
;
2439 /* Always start after the instruction (such as a WHILE) we're trying to fix
2442 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2443 offset
= next_offset(store
, offset
)) {
2444 struct brw_instruction
*insn
= store
+ offset
;
2446 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2447 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2448 : insn
->bits3
.break_cont
.jip
;
2449 if (offset
+ jip
* scale
<= start_offset
)
2453 assert(!"not reached");
2454 return start_offset
;
2457 /* After program generation, go back and update the UIP and JIP of
2458 * BREAK, CONT, and HALT instructions to their correct locations.
2461 brw_set_uip_jip(struct brw_compile
*p
)
2463 struct brw_context
*brw
= p
->brw
;
2466 void *store
= p
->store
;
2471 for (offset
= 0; offset
< p
->next_insn_offset
;
2472 offset
= next_offset(store
, offset
)) {
2473 struct brw_instruction
*insn
= store
+ offset
;
2475 if (insn
->header
.cmpt_control
) {
2476 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2477 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2478 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2479 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2483 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2484 switch (insn
->header
.opcode
) {
2485 case BRW_OPCODE_BREAK
:
2486 assert(block_end_offset
!= 0);
2487 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2488 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2489 insn
->bits3
.break_cont
.uip
=
2490 (brw_find_loop_end(p
, offset
) - offset
+
2491 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2493 case BRW_OPCODE_CONTINUE
:
2494 assert(block_end_offset
!= 0);
2495 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2496 insn
->bits3
.break_cont
.uip
=
2497 (brw_find_loop_end(p
, offset
) - offset
) / scale
;
2499 assert(insn
->bits3
.break_cont
.uip
!= 0);
2500 assert(insn
->bits3
.break_cont
.jip
!= 0);
2503 case BRW_OPCODE_ENDIF
:
2504 if (block_end_offset
== 0)
2505 insn
->bits3
.break_cont
.jip
= 2;
2507 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2510 case BRW_OPCODE_HALT
:
2511 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2513 * "In case of the halt instruction not inside any conditional
2514 * code block, the value of <JIP> and <UIP> should be the
2515 * same. In case of the halt instruction inside conditional code
2516 * block, the <UIP> should be the end of the program, and the
2517 * <JIP> should be end of the most inner conditional code block."
2519 * The uip will have already been set by whoever set up the
2522 if (block_end_offset
== 0) {
2523 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2525 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2527 assert(insn
->bits3
.break_cont
.uip
!= 0);
2528 assert(insn
->bits3
.break_cont
.jip
!= 0);
2534 void brw_ff_sync(struct brw_compile
*p
,
2535 struct brw_reg dest
,
2536 unsigned msg_reg_nr
,
2537 struct brw_reg src0
,
2539 unsigned response_length
,
2542 struct brw_context
*brw
= p
->brw
;
2543 struct brw_instruction
*insn
;
2545 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2547 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2548 brw_set_dest(p
, insn
, dest
);
2549 brw_set_src0(p
, insn
, src0
);
2550 brw_set_src1(p
, insn
, brw_imm_d(0));
2553 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2555 brw_set_ff_sync_message(p
,
2563 * Emit the SEND instruction necessary to generate stream output data on Gen6
2564 * (for transform feedback).
2566 * If send_commit_msg is true, this is the last piece of stream output data
2567 * from this thread, so send the data as a committed write. According to the
2568 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2570 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2571 * writes are complete by sending the final write as a committed write."
2574 brw_svb_write(struct brw_compile
*p
,
2575 struct brw_reg dest
,
2576 unsigned msg_reg_nr
,
2577 struct brw_reg src0
,
2578 unsigned binding_table_index
,
2579 bool send_commit_msg
)
2581 struct brw_instruction
*insn
;
2583 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2585 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2586 brw_set_dest(p
, insn
, dest
);
2587 brw_set_src0(p
, insn
, src0
);
2588 brw_set_src1(p
, insn
, brw_imm_d(0));
2589 brw_set_dp_write_message(p
, insn
,
2590 binding_table_index
,
2591 0, /* msg_control: ignored */
2592 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2594 true, /* header_present */
2595 0, /* last_render_target: ignored */
2596 send_commit_msg
, /* response_length */
2597 0, /* end_of_thread */
2598 send_commit_msg
); /* send_commit_msg */
2602 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2603 struct brw_instruction
*insn
,
2605 unsigned bind_table_index
,
2606 unsigned msg_length
,
2607 unsigned response_length
,
2608 bool header_present
)
2610 if (p
->brw
->is_haswell
) {
2611 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2612 msg_length
, response_length
,
2613 header_present
, false);
2616 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2617 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2618 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2620 insn
->bits3
.gen7_dp
.msg_type
=
2621 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2623 insn
->bits3
.gen7_dp
.msg_type
=
2624 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2628 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2629 msg_length
, response_length
,
2630 header_present
, false);
2632 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2634 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2635 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2638 if (response_length
)
2639 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2641 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2642 insn
->bits3
.ud
|= atomic_op
<< 8;
2646 brw_untyped_atomic(struct brw_compile
*p
,
2647 struct brw_reg dest
,
2650 unsigned bind_table_index
,
2651 unsigned msg_length
,
2652 unsigned response_length
) {
2653 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2655 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2656 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2657 brw_set_src1(p
, insn
, brw_imm_d(0));
2658 brw_set_dp_untyped_atomic_message(
2659 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2660 insn
->header
.access_mode
== BRW_ALIGN_1
);
2664 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2665 struct brw_instruction
*insn
,
2666 unsigned bind_table_index
,
2667 unsigned msg_length
,
2668 unsigned response_length
,
2669 bool header_present
)
2671 const unsigned dispatch_width
=
2672 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2673 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2675 if (p
->brw
->is_haswell
) {
2676 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2677 msg_length
, response_length
,
2678 header_present
, false);
2680 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2682 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2683 msg_length
, response_length
,
2684 header_present
, false);
2686 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2689 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2690 if (dispatch_width
== 16)
2691 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2693 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2696 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2698 /* Set mask of 32-bit channels to drop. */
2699 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2703 brw_untyped_surface_read(struct brw_compile
*p
,
2704 struct brw_reg dest
,
2706 unsigned bind_table_index
,
2707 unsigned msg_length
,
2708 unsigned response_length
)
2710 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2712 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2713 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2714 brw_set_dp_untyped_surface_read_message(
2715 p
, insn
, bind_table_index
, msg_length
, response_length
,
2716 insn
->header
.access_mode
== BRW_ALIGN_1
);
2720 * This instruction is generated as a single-channel align1 instruction by
2721 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2723 * We can't use the typed atomic op in the FS because that has the execution
2724 * mask ANDed with the pixel mask, but we just want to write the one dword for
2727 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2728 * one u32. So we use the same untyped atomic write message as the pixel
2731 * The untyped atomic operation requires a BUFFER surface type with RAW
2732 * format, and is only accessible through the legacy DATA_CACHE dataport
2735 void brw_shader_time_add(struct brw_compile
*p
,
2736 struct brw_reg payload
,
2737 uint32_t surf_index
)
2739 struct brw_context
*brw
= p
->brw
;
2740 assert(brw
->gen
>= 7);
2742 brw_push_insn_state(p
);
2743 brw_set_access_mode(p
, BRW_ALIGN_1
);
2744 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2745 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2746 brw_pop_insn_state(p
);
2748 /* We use brw_vec1_reg and unmasked because we want to increment the given
2751 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2753 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2755 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2756 2 /* message length */,
2757 0 /* response length */,
2758 false /* header present */);