2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "util/ralloc.h"
40 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
41 * registers, implicitly moving the operand to a message register.
43 * On Sandybridge, this is no longer the case. This function performs the
44 * explicit move; it should be called before emitting a SEND instruction.
47 gen6_resolve_implied_move(struct brw_codegen
*p
,
51 const struct brw_device_info
*devinfo
= p
->devinfo
;
55 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
58 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
59 brw_push_insn_state(p
);
60 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
61 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
62 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
63 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
64 retype(*src
, BRW_REGISTER_TYPE_UD
));
65 brw_pop_insn_state(p
);
67 *src
= brw_message_reg(msg_reg_nr
);
71 gen7_convert_mrf_to_grf(struct brw_codegen
*p
, struct brw_reg
*reg
)
73 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
74 * "The send with EOT should use register space R112-R127 for <src>. This is
75 * to enable loading of a new thread into the same slot while the message
76 * with EOT for current thread is pending dispatch."
78 * Since we're pretending to have 16 MRFs anyway, we may as well use the
79 * registers required for messages with EOT.
81 const struct brw_device_info
*devinfo
= p
->devinfo
;
82 if (devinfo
->gen
>= 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
83 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
84 reg
->nr
+= GEN7_MRF_HACK_START
;
89 * Convert a brw_reg_type enumeration value into the hardware representation.
91 * The hardware encoding may depend on whether the value is an immediate.
94 brw_reg_type_to_hw_type(const struct brw_device_info
*devinfo
,
95 enum brw_reg_type type
, enum brw_reg_file file
)
97 if (file
== BRW_IMMEDIATE_VALUE
) {
98 static const int imm_hw_types
[] = {
99 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
100 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
101 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
102 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
103 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
104 [BRW_REGISTER_TYPE_UB
] = -1,
105 [BRW_REGISTER_TYPE_B
] = -1,
106 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
107 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
108 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
109 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
110 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
111 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
112 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
114 assert(type
< ARRAY_SIZE(imm_hw_types
));
115 assert(imm_hw_types
[type
] != -1);
116 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
117 return imm_hw_types
[type
];
119 /* Non-immediate registers */
120 static const int hw_types
[] = {
121 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
122 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
123 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
124 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
125 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
126 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
127 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
128 [BRW_REGISTER_TYPE_UV
] = -1,
129 [BRW_REGISTER_TYPE_VF
] = -1,
130 [BRW_REGISTER_TYPE_V
] = -1,
131 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
132 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
133 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
134 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
136 assert(type
< ARRAY_SIZE(hw_types
));
137 assert(hw_types
[type
] != -1);
138 assert(devinfo
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
139 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
140 return hw_types
[type
];
145 brw_set_dest(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg dest
)
147 const struct brw_device_info
*devinfo
= p
->devinfo
;
149 if (dest
.file
== BRW_MESSAGE_REGISTER_FILE
)
150 assert((dest
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
151 else if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
152 assert(dest
.nr
< 128);
154 gen7_convert_mrf_to_grf(p
, &dest
);
156 brw_inst_set_dst_reg_file(devinfo
, inst
, dest
.file
);
157 brw_inst_set_dst_reg_type(devinfo
, inst
,
158 brw_reg_type_to_hw_type(devinfo
, dest
.type
,
160 brw_inst_set_dst_address_mode(devinfo
, inst
, dest
.address_mode
);
162 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
163 brw_inst_set_dst_da_reg_nr(devinfo
, inst
, dest
.nr
);
165 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
166 brw_inst_set_dst_da1_subreg_nr(devinfo
, inst
, dest
.subnr
);
167 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
168 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
169 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
171 brw_inst_set_dst_da16_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
172 brw_inst_set_da16_writemask(devinfo
, inst
, dest
.writemask
);
173 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
174 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
175 assert(dest
.writemask
!= 0);
177 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
178 * Although Dst.HorzStride is a don't care for Align16, HW needs
179 * this to be programmed as "01".
181 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
184 brw_inst_set_dst_ia_subreg_nr(devinfo
, inst
, dest
.subnr
);
186 /* These are different sizes in align1 vs align16:
188 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
189 brw_inst_set_dst_ia1_addr_imm(devinfo
, inst
,
190 dest
.indirect_offset
);
191 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
192 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
193 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
195 brw_inst_set_dst_ia16_addr_imm(devinfo
, inst
,
196 dest
.indirect_offset
);
197 /* even ignored in da16, still need to set as '01' */
198 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
202 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
203 * or 16 (SIMD16), as that's normally correct. However, when dealing with
204 * small registers, we automatically reduce it to match the register size.
206 * In platforms that support fp64 we can emit instructions with a width of
207 * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these
208 * cases we need to make sure that these instructions have their exec sizes
209 * set properly when they are emitted and we can't rely on this code to fix
213 if (devinfo
->gen
>= 6)
214 fix_exec_size
= dest
.width
< BRW_EXECUTE_4
;
216 fix_exec_size
= dest
.width
< BRW_EXECUTE_8
;
219 brw_inst_set_exec_size(devinfo
, inst
, dest
.width
);
222 extern int reg_type_size
[];
225 validate_reg(const struct brw_device_info
*devinfo
,
226 brw_inst
*inst
, struct brw_reg reg
)
228 const int hstride_for_reg
[] = {0, 1, 2, 4};
229 const int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32};
230 const int width_for_reg
[] = {1, 2, 4, 8, 16};
231 const int execsize_for_reg
[] = {1, 2, 4, 8, 16, 32};
232 int width
, hstride
, vstride
, execsize
;
234 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
235 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
236 * mean the destination has to be 128-bit aligned and the
237 * destination horiz stride has to be a word.
239 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
240 assert(hstride_for_reg
[brw_inst_dst_hstride(devinfo
, inst
)] *
241 reg_type_size
[brw_inst_dst_reg_type(devinfo
, inst
)] == 2);
247 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
248 reg
.file
== BRW_ARF_NULL
)
251 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
253 * "Swizzling is not allowed when an accumulator is used as an implicit
254 * source or an explicit source in an instruction."
256 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
257 reg
.nr
== BRW_ARF_ACCUMULATOR
)
258 assert(reg
.swizzle
== BRW_SWIZZLE_XYZW
);
260 assert(reg
.hstride
>= 0 && reg
.hstride
< ARRAY_SIZE(hstride_for_reg
));
261 hstride
= hstride_for_reg
[reg
.hstride
];
263 if (reg
.vstride
== 0xf) {
266 assert(reg
.vstride
>= 0 && reg
.vstride
< ARRAY_SIZE(vstride_for_reg
));
267 vstride
= vstride_for_reg
[reg
.vstride
];
270 assert(reg
.width
>= 0 && reg
.width
< ARRAY_SIZE(width_for_reg
));
271 width
= width_for_reg
[reg
.width
];
273 assert(brw_inst_exec_size(devinfo
, inst
) >= 0 &&
274 brw_inst_exec_size(devinfo
, inst
) < ARRAY_SIZE(execsize_for_reg
));
275 execsize
= execsize_for_reg
[brw_inst_exec_size(devinfo
, inst
)];
277 /* Restrictions from 3.3.10: Register Region Restrictions. */
279 assert(execsize
>= width
);
282 if (execsize
== width
&& hstride
!= 0) {
283 assert(vstride
== -1 || vstride
== width
* hstride
);
287 if (execsize
== width
&& hstride
== 0) {
288 /* no restriction on vstride. */
293 assert(hstride
== 0);
297 if (execsize
== 1 && width
== 1) {
298 assert(hstride
== 0);
299 assert(vstride
== 0);
303 if (vstride
== 0 && hstride
== 0) {
307 /* 10. Check destination issues. */
311 is_compactable_immediate(unsigned imm
)
313 /* We get the low 12 bits as-is. */
316 /* We get one bit replicated through the top 20 bits. */
317 return imm
== 0 || imm
== 0xfffff000;
321 brw_set_src0(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
323 const struct brw_device_info
*devinfo
= p
->devinfo
;
325 if (reg
.file
== BRW_MESSAGE_REGISTER_FILE
)
326 assert((reg
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
327 else if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
328 assert(reg
.nr
< 128);
330 gen7_convert_mrf_to_grf(p
, ®
);
332 if (devinfo
->gen
>= 6 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
333 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
)) {
334 /* Any source modifiers or regions will be ignored, since this just
335 * identifies the MRF/GRF to start reading the message contents from.
336 * Check for some likely failures.
340 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
343 validate_reg(devinfo
, inst
, reg
);
345 brw_inst_set_src0_reg_file(devinfo
, inst
, reg
.file
);
346 brw_inst_set_src0_reg_type(devinfo
, inst
,
347 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
348 brw_inst_set_src0_abs(devinfo
, inst
, reg
.abs
);
349 brw_inst_set_src0_negate(devinfo
, inst
, reg
.negate
);
350 brw_inst_set_src0_address_mode(devinfo
, inst
, reg
.address_mode
);
352 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
353 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
355 /* The Bspec's section titled "Non-present Operands" claims that if src0
356 * is an immediate that src1's type must be the same as that of src0.
358 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
359 * that do not follow this rule. E.g., from the IVB/HSW table:
361 * DataTypeIndex 18-Bit Mapping Mapped Meaning
362 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
364 * And from the SNB table:
366 * DataTypeIndex 18-Bit Mapping Mapped Meaning
367 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
369 * Neither of these cause warnings from the simulator when used,
370 * compacted or otherwise. In fact, all compaction mappings that have an
371 * immediate in src0 use a:ud for src1.
373 * The GM45 instruction compaction tables do not contain mapped meanings
374 * so it's not clear whether it has the restriction. We'll assume it was
375 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
377 brw_inst_set_src1_reg_file(devinfo
, inst
, BRW_ARCHITECTURE_REGISTER_FILE
);
378 if (devinfo
->gen
< 6) {
379 brw_inst_set_src1_reg_type(devinfo
, inst
,
380 brw_inst_src0_reg_type(devinfo
, inst
));
382 brw_inst_set_src1_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
385 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
386 * for immediate values. Presumably the hardware engineers realized
387 * that the only useful floating-point value that could be represented
388 * in this format is 0.0, which can also be represented as a VF-typed
389 * immediate, so they gave us the previously mentioned mapping on IVB+.
391 * Strangely, we do have a mapping for imm:f in src1, so we don't need
394 * If we see a 0.0:F, change the type to VF so that it can be compacted.
396 if (brw_inst_imm_ud(devinfo
, inst
) == 0x0 &&
397 brw_inst_src0_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_F
) {
398 brw_inst_set_src0_reg_type(devinfo
, inst
, BRW_HW_REG_IMM_TYPE_VF
);
401 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
402 * set the types to :UD so the instruction can be compacted.
404 if (is_compactable_immediate(brw_inst_imm_ud(devinfo
, inst
)) &&
405 brw_inst_cond_modifier(devinfo
, inst
) == BRW_CONDITIONAL_NONE
&&
406 brw_inst_src0_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_D
&&
407 brw_inst_dst_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_D
) {
408 brw_inst_set_src0_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
409 brw_inst_set_dst_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
412 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
413 brw_inst_set_src0_da_reg_nr(devinfo
, inst
, reg
.nr
);
414 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
415 brw_inst_set_src0_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
417 brw_inst_set_src0_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
420 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.subnr
);
422 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
423 brw_inst_set_src0_ia1_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
425 brw_inst_set_src0_ia16_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
429 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
430 if (reg
.width
== BRW_WIDTH_1
&&
431 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
432 brw_inst_set_src0_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
433 brw_inst_set_src0_width(devinfo
, inst
, BRW_WIDTH_1
);
434 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
436 brw_inst_set_src0_hstride(devinfo
, inst
, reg
.hstride
);
437 brw_inst_set_src0_width(devinfo
, inst
, reg
.width
);
438 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
441 brw_inst_set_src0_da16_swiz_x(devinfo
, inst
,
442 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
443 brw_inst_set_src0_da16_swiz_y(devinfo
, inst
,
444 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
445 brw_inst_set_src0_da16_swiz_z(devinfo
, inst
,
446 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
447 brw_inst_set_src0_da16_swiz_w(devinfo
, inst
,
448 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
450 /* This is an oddity of the fact we're using the same
451 * descriptions for registers in align_16 as align_1:
453 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
454 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
456 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
463 brw_set_src1(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
465 const struct brw_device_info
*devinfo
= p
->devinfo
;
467 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
468 assert(reg
.nr
< 128);
470 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
472 * "Accumulator registers may be accessed explicitly as src0
475 assert(reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
476 reg
.nr
!= BRW_ARF_ACCUMULATOR
);
478 gen7_convert_mrf_to_grf(p
, ®
);
479 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
481 validate_reg(devinfo
, inst
, reg
);
483 brw_inst_set_src1_reg_file(devinfo
, inst
, reg
.file
);
484 brw_inst_set_src1_reg_type(devinfo
, inst
,
485 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
486 brw_inst_set_src1_abs(devinfo
, inst
, reg
.abs
);
487 brw_inst_set_src1_negate(devinfo
, inst
, reg
.negate
);
489 /* Only src1 can be immediate in two-argument instructions.
491 assert(brw_inst_src0_reg_file(devinfo
, inst
) != BRW_IMMEDIATE_VALUE
);
493 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
494 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
496 /* This is a hardware restriction, which may or may not be lifted
499 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
500 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
502 brw_inst_set_src1_da_reg_nr(devinfo
, inst
, reg
.nr
);
503 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
504 brw_inst_set_src1_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
506 brw_inst_set_src1_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
509 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
510 if (reg
.width
== BRW_WIDTH_1
&&
511 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
512 brw_inst_set_src1_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
513 brw_inst_set_src1_width(devinfo
, inst
, BRW_WIDTH_1
);
514 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
516 brw_inst_set_src1_hstride(devinfo
, inst
, reg
.hstride
);
517 brw_inst_set_src1_width(devinfo
, inst
, reg
.width
);
518 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
521 brw_inst_set_src1_da16_swiz_x(devinfo
, inst
,
522 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
523 brw_inst_set_src1_da16_swiz_y(devinfo
, inst
,
524 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
525 brw_inst_set_src1_da16_swiz_z(devinfo
, inst
,
526 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
527 brw_inst_set_src1_da16_swiz_w(devinfo
, inst
,
528 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
530 /* This is an oddity of the fact we're using the same
531 * descriptions for registers in align_16 as align_1:
533 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
534 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
536 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
542 * Set the Message Descriptor and Extended Message Descriptor fields
545 * \note This zeroes out the Function Control bits, so it must be called
546 * \b before filling out any message-specific data. Callers can
547 * choose not to fill in irrelevant bits; they will be zero.
550 brw_set_message_descriptor(struct brw_codegen
*p
,
552 enum brw_message_target sfid
,
554 unsigned response_length
,
558 const struct brw_device_info
*devinfo
= p
->devinfo
;
560 brw_set_src1(p
, inst
, brw_imm_d(0));
562 /* For indirect sends, `inst` will not be the SEND/SENDC instruction
563 * itself; instead, it will be a MOV/OR into the address register.
565 * In this case, we avoid setting the extended message descriptor bits,
566 * since they go on the later SEND/SENDC instead and if set here would
567 * instead clobber the conditionalmod bits.
569 unsigned opcode
= brw_inst_opcode(devinfo
, inst
);
570 if (opcode
== BRW_OPCODE_SEND
|| opcode
== BRW_OPCODE_SENDC
) {
571 brw_inst_set_sfid(devinfo
, inst
, sfid
);
574 brw_inst_set_mlen(devinfo
, inst
, msg_length
);
575 brw_inst_set_rlen(devinfo
, inst
, response_length
);
576 brw_inst_set_eot(devinfo
, inst
, end_of_thread
);
578 if (devinfo
->gen
>= 5) {
579 brw_inst_set_header_present(devinfo
, inst
, header_present
);
583 static void brw_set_math_message( struct brw_codegen
*p
,
586 unsigned integer_type
,
590 const struct brw_device_info
*devinfo
= p
->devinfo
;
592 unsigned response_length
;
594 /* Infer message length from the function */
596 case BRW_MATH_FUNCTION_POW
:
597 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
598 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
599 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
607 /* Infer response length from the function */
609 case BRW_MATH_FUNCTION_SINCOS
:
610 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
619 brw_set_message_descriptor(p
, inst
, BRW_SFID_MATH
,
620 msg_length
, response_length
, false, false);
621 brw_inst_set_math_msg_function(devinfo
, inst
, function
);
622 brw_inst_set_math_msg_signed_int(devinfo
, inst
, integer_type
);
623 brw_inst_set_math_msg_precision(devinfo
, inst
, low_precision
);
624 brw_inst_set_math_msg_saturate(devinfo
, inst
, brw_inst_saturate(devinfo
, inst
));
625 brw_inst_set_math_msg_data_type(devinfo
, inst
, dataType
);
626 brw_inst_set_saturate(devinfo
, inst
, 0);
630 static void brw_set_ff_sync_message(struct brw_codegen
*p
,
633 unsigned response_length
,
636 const struct brw_device_info
*devinfo
= p
->devinfo
;
638 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
639 1, response_length
, true, end_of_thread
);
640 brw_inst_set_urb_opcode(devinfo
, insn
, 1); /* FF_SYNC */
641 brw_inst_set_urb_allocate(devinfo
, insn
, allocate
);
642 /* The following fields are not used by FF_SYNC: */
643 brw_inst_set_urb_global_offset(devinfo
, insn
, 0);
644 brw_inst_set_urb_swizzle_control(devinfo
, insn
, 0);
645 brw_inst_set_urb_used(devinfo
, insn
, 0);
646 brw_inst_set_urb_complete(devinfo
, insn
, 0);
649 static void brw_set_urb_message( struct brw_codegen
*p
,
651 enum brw_urb_write_flags flags
,
653 unsigned response_length
,
655 unsigned swizzle_control
)
657 const struct brw_device_info
*devinfo
= p
->devinfo
;
659 assert(devinfo
->gen
< 7 || swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
660 assert(devinfo
->gen
< 7 || !(flags
& BRW_URB_WRITE_ALLOCATE
));
661 assert(devinfo
->gen
>= 7 || !(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
663 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
664 msg_length
, response_length
, true,
665 flags
& BRW_URB_WRITE_EOT
);
667 if (flags
& BRW_URB_WRITE_OWORD
) {
668 assert(msg_length
== 2); /* header + one OWORD of data */
669 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_OWORD
);
671 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_HWORD
);
674 brw_inst_set_urb_global_offset(devinfo
, insn
, offset
);
675 brw_inst_set_urb_swizzle_control(devinfo
, insn
, swizzle_control
);
677 if (devinfo
->gen
< 8) {
678 brw_inst_set_urb_complete(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_COMPLETE
));
681 if (devinfo
->gen
< 7) {
682 brw_inst_set_urb_allocate(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_ALLOCATE
));
683 brw_inst_set_urb_used(devinfo
, insn
, !(flags
& BRW_URB_WRITE_UNUSED
));
685 brw_inst_set_urb_per_slot_offset(devinfo
, insn
,
686 !!(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
691 brw_set_dp_write_message(struct brw_codegen
*p
,
693 unsigned binding_table_index
,
694 unsigned msg_control
,
698 unsigned last_render_target
,
699 unsigned response_length
,
700 unsigned end_of_thread
,
701 unsigned send_commit_msg
)
703 const struct brw_device_info
*devinfo
= p
->devinfo
;
706 if (devinfo
->gen
>= 7) {
707 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
708 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
709 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
711 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
712 } else if (devinfo
->gen
== 6) {
713 /* Use the render cache for all write messages. */
714 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
716 sfid
= BRW_SFID_DATAPORT_WRITE
;
719 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
720 header_present
, end_of_thread
);
722 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
723 brw_inst_set_dp_write_msg_type(devinfo
, insn
, msg_type
);
724 brw_inst_set_dp_write_msg_control(devinfo
, insn
, msg_control
);
725 brw_inst_set_rt_last(devinfo
, insn
, last_render_target
);
726 if (devinfo
->gen
< 7) {
727 brw_inst_set_dp_write_commit(devinfo
, insn
, send_commit_msg
);
732 brw_set_dp_read_message(struct brw_codegen
*p
,
734 unsigned binding_table_index
,
735 unsigned msg_control
,
737 unsigned target_cache
,
740 unsigned response_length
)
742 const struct brw_device_info
*devinfo
= p
->devinfo
;
745 if (devinfo
->gen
>= 7) {
746 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
747 } else if (devinfo
->gen
== 6) {
748 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
749 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
751 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
753 sfid
= BRW_SFID_DATAPORT_READ
;
756 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
757 header_present
, false);
759 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
760 brw_inst_set_dp_read_msg_type(devinfo
, insn
, msg_type
);
761 brw_inst_set_dp_read_msg_control(devinfo
, insn
, msg_control
);
762 if (devinfo
->gen
< 6)
763 brw_inst_set_dp_read_target_cache(devinfo
, insn
, target_cache
);
767 brw_set_sampler_message(struct brw_codegen
*p
,
769 unsigned binding_table_index
,
772 unsigned response_length
,
774 unsigned header_present
,
776 unsigned return_format
)
778 const struct brw_device_info
*devinfo
= p
->devinfo
;
780 brw_set_message_descriptor(p
, inst
, BRW_SFID_SAMPLER
, msg_length
,
781 response_length
, header_present
, false);
783 brw_inst_set_binding_table_index(devinfo
, inst
, binding_table_index
);
784 brw_inst_set_sampler(devinfo
, inst
, sampler
);
785 brw_inst_set_sampler_msg_type(devinfo
, inst
, msg_type
);
786 if (devinfo
->gen
>= 5) {
787 brw_inst_set_sampler_simd_mode(devinfo
, inst
, simd_mode
);
788 } else if (devinfo
->gen
== 4 && !devinfo
->is_g4x
) {
789 brw_inst_set_sampler_return_format(devinfo
, inst
, return_format
);
794 gen7_set_dp_scratch_message(struct brw_codegen
*p
,
798 bool invalidate_after_read
,
800 unsigned addr_offset
,
805 const struct brw_device_info
*devinfo
= p
->devinfo
;
806 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 ||
807 (devinfo
->gen
>= 8 && num_regs
== 8));
808 brw_set_message_descriptor(p
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
809 mlen
, rlen
, header_present
, false);
810 brw_inst_set_dp_category(devinfo
, inst
, 1); /* Scratch Block Read/Write msgs */
811 brw_inst_set_scratch_read_write(devinfo
, inst
, write
);
812 brw_inst_set_scratch_type(devinfo
, inst
, dword
);
813 brw_inst_set_scratch_invalidate_after_read(devinfo
, inst
, invalidate_after_read
);
814 brw_inst_set_scratch_block_size(devinfo
, inst
, ffs(num_regs
) - 1);
815 brw_inst_set_scratch_addr_offset(devinfo
, inst
, addr_offset
);
818 #define next_insn brw_next_insn
820 brw_next_insn(struct brw_codegen
*p
, unsigned opcode
)
822 const struct brw_device_info
*devinfo
= p
->devinfo
;
825 if (p
->nr_insn
+ 1 > p
->store_size
) {
827 p
->store
= reralloc(p
->mem_ctx
, p
->store
, brw_inst
, p
->store_size
);
830 p
->next_insn_offset
+= 16;
831 insn
= &p
->store
[p
->nr_insn
++];
832 memcpy(insn
, p
->current
, sizeof(*insn
));
834 brw_inst_set_opcode(devinfo
, insn
, opcode
);
839 brw_alu1(struct brw_codegen
*p
, unsigned opcode
,
840 struct brw_reg dest
, struct brw_reg src
)
842 brw_inst
*insn
= next_insn(p
, opcode
);
843 brw_set_dest(p
, insn
, dest
);
844 brw_set_src0(p
, insn
, src
);
849 brw_alu2(struct brw_codegen
*p
, unsigned opcode
,
850 struct brw_reg dest
, struct brw_reg src0
, struct brw_reg src1
)
852 brw_inst
*insn
= next_insn(p
, opcode
);
853 brw_set_dest(p
, insn
, dest
);
854 brw_set_src0(p
, insn
, src0
);
855 brw_set_src1(p
, insn
, src1
);
860 get_3src_subreg_nr(struct brw_reg reg
)
862 /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
863 * use 32-bit units (components 0..7). Since they only support F/D/UD
864 * types, this doesn't lose any flexibility, but uses fewer bits.
866 return reg
.subnr
/ 4;
870 brw_alu3(struct brw_codegen
*p
, unsigned opcode
, struct brw_reg dest
,
871 struct brw_reg src0
, struct brw_reg src1
, struct brw_reg src2
)
873 const struct brw_device_info
*devinfo
= p
->devinfo
;
874 brw_inst
*inst
= next_insn(p
, opcode
);
876 gen7_convert_mrf_to_grf(p
, &dest
);
878 assert(brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_16
);
880 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
881 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
882 assert(dest
.nr
< 128);
883 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
884 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
885 dest
.type
== BRW_REGISTER_TYPE_DF
||
886 dest
.type
== BRW_REGISTER_TYPE_D
||
887 dest
.type
== BRW_REGISTER_TYPE_UD
);
888 if (devinfo
->gen
== 6) {
889 brw_inst_set_3src_dst_reg_file(devinfo
, inst
,
890 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
892 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
893 brw_inst_set_3src_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
894 brw_inst_set_3src_dst_writemask(devinfo
, inst
, dest
.writemask
);
896 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
897 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
898 assert(src0
.nr
< 128);
899 brw_inst_set_3src_src0_swizzle(devinfo
, inst
, src0
.swizzle
);
900 brw_inst_set_3src_src0_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src0
));
901 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
902 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
903 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
904 brw_inst_set_3src_src0_rep_ctrl(devinfo
, inst
,
905 src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
907 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
908 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
909 assert(src1
.nr
< 128);
910 brw_inst_set_3src_src1_swizzle(devinfo
, inst
, src1
.swizzle
);
911 brw_inst_set_3src_src1_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src1
));
912 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
913 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
914 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
915 brw_inst_set_3src_src1_rep_ctrl(devinfo
, inst
,
916 src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
918 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
919 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
920 assert(src2
.nr
< 128);
921 brw_inst_set_3src_src2_swizzle(devinfo
, inst
, src2
.swizzle
);
922 brw_inst_set_3src_src2_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src2
));
923 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
924 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
925 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
926 brw_inst_set_3src_src2_rep_ctrl(devinfo
, inst
,
927 src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
929 if (devinfo
->gen
>= 7) {
930 /* Set both the source and destination types based on dest.type,
931 * ignoring the source register types. The MAD and LRP emitters ensure
932 * that all four types are float. The BFE and BFI2 emitters, however,
933 * may send us mixed D and UD types and want us to ignore that and use
934 * the destination type.
937 case BRW_REGISTER_TYPE_F
:
938 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
939 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
941 case BRW_REGISTER_TYPE_DF
:
942 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_DF
);
943 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_DF
);
945 case BRW_REGISTER_TYPE_D
:
946 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
947 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
949 case BRW_REGISTER_TYPE_UD
:
950 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
951 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
954 unreachable("not reached");
962 /***********************************************************************
963 * Convenience routines.
966 brw_inst *brw_##OP(struct brw_codegen *p, \
967 struct brw_reg dest, \
968 struct brw_reg src0) \
970 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
974 brw_inst *brw_##OP(struct brw_codegen *p, \
975 struct brw_reg dest, \
976 struct brw_reg src0, \
977 struct brw_reg src1) \
979 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
983 brw_inst *brw_##OP(struct brw_codegen *p, \
984 struct brw_reg dest, \
985 struct brw_reg src0, \
986 struct brw_reg src1, \
987 struct brw_reg src2) \
989 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
993 brw_inst *brw_##OP(struct brw_codegen *p, \
994 struct brw_reg dest, \
995 struct brw_reg src0, \
996 struct brw_reg src1, \
997 struct brw_reg src2) \
999 assert(dest.type == BRW_REGISTER_TYPE_F || \
1000 dest.type == BRW_REGISTER_TYPE_DF); \
1001 if (dest.type == BRW_REGISTER_TYPE_F) { \
1002 assert(src0.type == BRW_REGISTER_TYPE_F); \
1003 assert(src1.type == BRW_REGISTER_TYPE_F); \
1004 assert(src2.type == BRW_REGISTER_TYPE_F); \
1005 } else if (dest.type == BRW_REGISTER_TYPE_DF) { \
1006 assert(src0.type == BRW_REGISTER_TYPE_DF); \
1007 assert(src1.type == BRW_REGISTER_TYPE_DF); \
1008 assert(src2.type == BRW_REGISTER_TYPE_DF); \
1010 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1013 /* Rounding operations (other than RNDD) require two instructions - the first
1014 * stores a rounded value (possibly the wrong way) in the dest register, but
1015 * also sets a per-channel "increment bit" in the flag register. A predicated
1016 * add of 1.0 fixes dest to contain the desired result.
1018 * Sandybridge and later appear to round correctly without an ADD.
1021 void brw_##OP(struct brw_codegen *p, \
1022 struct brw_reg dest, \
1023 struct brw_reg src) \
1025 const struct brw_device_info *devinfo = p->devinfo; \
1026 brw_inst *rnd, *add; \
1027 rnd = next_insn(p, BRW_OPCODE_##OP); \
1028 brw_set_dest(p, rnd, dest); \
1029 brw_set_src0(p, rnd, src); \
1031 if (devinfo->gen < 6) { \
1032 /* turn on round-increments */ \
1033 brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
1034 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1035 brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
1075 brw_ADD(struct brw_codegen
*p
, struct brw_reg dest
,
1076 struct brw_reg src0
, struct brw_reg src1
)
1079 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1080 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1081 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1082 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1083 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1086 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1087 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1088 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1089 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1090 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1093 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1097 brw_AVG(struct brw_codegen
*p
, struct brw_reg dest
,
1098 struct brw_reg src0
, struct brw_reg src1
)
1100 assert(dest
.type
== src0
.type
);
1101 assert(src0
.type
== src1
.type
);
1102 switch (src0
.type
) {
1103 case BRW_REGISTER_TYPE_B
:
1104 case BRW_REGISTER_TYPE_UB
:
1105 case BRW_REGISTER_TYPE_W
:
1106 case BRW_REGISTER_TYPE_UW
:
1107 case BRW_REGISTER_TYPE_D
:
1108 case BRW_REGISTER_TYPE_UD
:
1111 unreachable("Bad type for brw_AVG");
1114 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1118 brw_MUL(struct brw_codegen
*p
, struct brw_reg dest
,
1119 struct brw_reg src0
, struct brw_reg src1
)
1122 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1123 src0
.type
== BRW_REGISTER_TYPE_UD
||
1124 src1
.type
== BRW_REGISTER_TYPE_D
||
1125 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1126 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1129 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1130 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1131 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1132 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1133 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1136 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1137 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1138 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1139 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1140 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1143 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1144 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1145 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1146 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1148 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1152 brw_LINE(struct brw_codegen
*p
, struct brw_reg dest
,
1153 struct brw_reg src0
, struct brw_reg src1
)
1155 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1156 src0
.width
= BRW_WIDTH_1
;
1157 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1158 return brw_alu2(p
, BRW_OPCODE_LINE
, dest
, src0
, src1
);
1162 brw_PLN(struct brw_codegen
*p
, struct brw_reg dest
,
1163 struct brw_reg src0
, struct brw_reg src1
)
1165 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1166 src0
.width
= BRW_WIDTH_1
;
1167 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1168 src1
.vstride
= BRW_VERTICAL_STRIDE_8
;
1169 src1
.width
= BRW_WIDTH_8
;
1170 src1
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
1171 return brw_alu2(p
, BRW_OPCODE_PLN
, dest
, src0
, src1
);
1175 brw_F32TO16(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1177 const struct brw_device_info
*devinfo
= p
->devinfo
;
1178 const bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1179 /* The F32TO16 instruction doesn't support 32-bit destination types in
1180 * Align1 mode, and neither does the Gen8 implementation in terms of a
1181 * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
1182 * an undocumented feature.
1184 const bool needs_zero_fill
= (dst
.type
== BRW_REGISTER_TYPE_UD
&&
1185 (!align16
|| devinfo
->gen
>= 8));
1189 assert(dst
.type
== BRW_REGISTER_TYPE_UD
);
1191 assert(dst
.type
== BRW_REGISTER_TYPE_UD
||
1192 dst
.type
== BRW_REGISTER_TYPE_W
||
1193 dst
.type
== BRW_REGISTER_TYPE_UW
||
1194 dst
.type
== BRW_REGISTER_TYPE_HF
);
1197 brw_push_insn_state(p
);
1199 if (needs_zero_fill
) {
1200 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
1201 dst
= spread(retype(dst
, BRW_REGISTER_TYPE_W
), 2);
1204 if (devinfo
->gen
>= 8) {
1205 inst
= brw_MOV(p
, retype(dst
, BRW_REGISTER_TYPE_HF
), src
);
1207 assert(devinfo
->gen
== 7);
1208 inst
= brw_alu1(p
, BRW_OPCODE_F32TO16
, dst
, src
);
1211 if (needs_zero_fill
) {
1212 brw_inst_set_no_dd_clear(devinfo
, inst
, true);
1213 inst
= brw_MOV(p
, suboffset(dst
, 1), brw_imm_ud(0u));
1214 brw_inst_set_no_dd_check(devinfo
, inst
, true);
1217 brw_pop_insn_state(p
);
1222 brw_F16TO32(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1224 const struct brw_device_info
*devinfo
= p
->devinfo
;
1225 bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1228 assert(src
.type
== BRW_REGISTER_TYPE_UD
);
1230 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1232 * Because this instruction does not have a 16-bit floating-point
1233 * type, the source data type must be Word (W). The destination type
1234 * must be F (Float).
1236 if (src
.type
== BRW_REGISTER_TYPE_UD
)
1237 src
= spread(retype(src
, BRW_REGISTER_TYPE_W
), 2);
1239 assert(src
.type
== BRW_REGISTER_TYPE_W
||
1240 src
.type
== BRW_REGISTER_TYPE_UW
||
1241 src
.type
== BRW_REGISTER_TYPE_HF
);
1244 if (devinfo
->gen
>= 8) {
1245 return brw_MOV(p
, dst
, retype(src
, BRW_REGISTER_TYPE_HF
));
1247 assert(devinfo
->gen
== 7);
1248 return brw_alu1(p
, BRW_OPCODE_F16TO32
, dst
, src
);
1253 void brw_NOP(struct brw_codegen
*p
)
1255 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1256 brw_inst_set_exec_size(p
->devinfo
, insn
, BRW_EXECUTE_1
);
1257 brw_set_dest(p
, insn
, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD
));
1258 brw_set_src0(p
, insn
, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD
));
1259 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1266 /***********************************************************************
1267 * Comparisons, if/else/endif
1271 brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1272 unsigned predicate_control
)
1274 const struct brw_device_info
*devinfo
= p
->devinfo
;
1275 struct brw_reg ip
= brw_ip_reg();
1276 brw_inst
*inst
= brw_alu2(p
, BRW_OPCODE_JMPI
, ip
, ip
, index
);
1278 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_2
);
1279 brw_inst_set_qtr_control(devinfo
, inst
, BRW_COMPRESSION_NONE
);
1280 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
1281 brw_inst_set_pred_control(devinfo
, inst
, predicate_control
);
1287 push_if_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1289 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1291 p
->if_stack_depth
++;
1292 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1293 p
->if_stack_array_size
*= 2;
1294 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1295 p
->if_stack_array_size
);
1300 pop_if_stack(struct brw_codegen
*p
)
1302 p
->if_stack_depth
--;
1303 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1307 push_loop_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1309 if (p
->loop_stack_array_size
<= (p
->loop_stack_depth
+ 1)) {
1310 p
->loop_stack_array_size
*= 2;
1311 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1312 p
->loop_stack_array_size
);
1313 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1314 p
->loop_stack_array_size
);
1317 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1318 p
->loop_stack_depth
++;
1319 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1323 get_inner_do_insn(struct brw_codegen
*p
)
1325 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1328 /* EU takes the value from the flag register and pushes it onto some
1329 * sort of a stack (presumably merging with any flag value already on
1330 * the stack). Within an if block, the flags at the top of the stack
1331 * control execution on each channel of the unit, eg. on each of the
1332 * 16 pixel values in our wm programs.
1334 * When the matching 'else' instruction is reached (presumably by
1335 * countdown of the instruction count patched in by our ELSE/ENDIF
1336 * functions), the relevant flags are inverted.
1338 * When the matching 'endif' instruction is reached, the flags are
1339 * popped off. If the stack is now empty, normal execution resumes.
1342 brw_IF(struct brw_codegen
*p
, unsigned execute_size
)
1344 const struct brw_device_info
*devinfo
= p
->devinfo
;
1347 insn
= next_insn(p
, BRW_OPCODE_IF
);
1349 /* Override the defaults for this instruction:
1351 if (devinfo
->gen
< 6) {
1352 brw_set_dest(p
, insn
, brw_ip_reg());
1353 brw_set_src0(p
, insn
, brw_ip_reg());
1354 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1355 } else if (devinfo
->gen
== 6) {
1356 brw_set_dest(p
, insn
, brw_imm_w(0));
1357 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1358 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1359 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1360 } else if (devinfo
->gen
== 7) {
1361 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1362 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1363 brw_set_src1(p
, insn
, brw_imm_w(0));
1364 brw_inst_set_jip(devinfo
, insn
, 0);
1365 brw_inst_set_uip(devinfo
, insn
, 0);
1367 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1368 brw_set_src0(p
, insn
, brw_imm_d(0));
1369 brw_inst_set_jip(devinfo
, insn
, 0);
1370 brw_inst_set_uip(devinfo
, insn
, 0);
1373 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1374 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1375 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NORMAL
);
1376 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1377 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1378 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1380 push_if_stack(p
, insn
);
1381 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1385 /* This function is only used for gen6-style IF instructions with an
1386 * embedded comparison (conditional modifier). It is not used on gen7.
1389 gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1390 struct brw_reg src0
, struct brw_reg src1
)
1392 const struct brw_device_info
*devinfo
= p
->devinfo
;
1395 insn
= next_insn(p
, BRW_OPCODE_IF
);
1397 brw_set_dest(p
, insn
, brw_imm_w(0));
1398 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1400 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1401 brw_set_src0(p
, insn
, src0
);
1402 brw_set_src1(p
, insn
, src1
);
1404 assert(brw_inst_qtr_control(devinfo
, insn
) == BRW_COMPRESSION_NONE
);
1405 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
1406 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1408 push_if_stack(p
, insn
);
1413 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1416 convert_IF_ELSE_to_ADD(struct brw_codegen
*p
,
1417 brw_inst
*if_inst
, brw_inst
*else_inst
)
1419 const struct brw_device_info
*devinfo
= p
->devinfo
;
1421 /* The next instruction (where the ENDIF would be, if it existed) */
1422 brw_inst
*next_inst
= &p
->store
[p
->nr_insn
];
1424 assert(p
->single_program_flow
);
1425 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1426 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1427 assert(brw_inst_exec_size(devinfo
, if_inst
) == BRW_EXECUTE_1
);
1429 /* Convert IF to an ADD instruction that moves the instruction pointer
1430 * to the first instruction of the ELSE block. If there is no ELSE
1431 * block, point to where ENDIF would be. Reverse the predicate.
1433 * There's no need to execute an ENDIF since we don't need to do any
1434 * stack operations, and if we're currently executing, we just want to
1435 * continue normally.
1437 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_ADD
);
1438 brw_inst_set_pred_inv(devinfo
, if_inst
, true);
1440 if (else_inst
!= NULL
) {
1441 /* Convert ELSE to an ADD instruction that points where the ENDIF
1444 brw_inst_set_opcode(devinfo
, else_inst
, BRW_OPCODE_ADD
);
1446 brw_inst_set_imm_ud(devinfo
, if_inst
, (else_inst
- if_inst
+ 1) * 16);
1447 brw_inst_set_imm_ud(devinfo
, else_inst
, (next_inst
- else_inst
) * 16);
1449 brw_inst_set_imm_ud(devinfo
, if_inst
, (next_inst
- if_inst
) * 16);
1454 * Patch IF and ELSE instructions with appropriate jump targets.
1457 patch_IF_ELSE(struct brw_codegen
*p
,
1458 brw_inst
*if_inst
, brw_inst
*else_inst
, brw_inst
*endif_inst
)
1460 const struct brw_device_info
*devinfo
= p
->devinfo
;
1462 /* We shouldn't be patching IF and ELSE instructions in single program flow
1463 * mode when gen < 6, because in single program flow mode on those
1464 * platforms, we convert flow control instructions to conditional ADDs that
1465 * operate on IP (see brw_ENDIF).
1467 * However, on Gen6, writing to IP doesn't work in single program flow mode
1468 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1469 * not be updated by non-flow control instructions."). And on later
1470 * platforms, there is no significant benefit to converting control flow
1471 * instructions to conditional ADDs. So we do patch IF and ELSE
1472 * instructions in single program flow mode on those platforms.
1474 if (devinfo
->gen
< 6)
1475 assert(!p
->single_program_flow
);
1477 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1478 assert(endif_inst
!= NULL
);
1479 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1481 unsigned br
= brw_jump_scale(devinfo
);
1483 assert(brw_inst_opcode(devinfo
, endif_inst
) == BRW_OPCODE_ENDIF
);
1484 brw_inst_set_exec_size(devinfo
, endif_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1486 if (else_inst
== NULL
) {
1487 /* Patch IF -> ENDIF */
1488 if (devinfo
->gen
< 6) {
1489 /* Turn it into an IFF, which means no mask stack operations for
1490 * all-false and jumping past the ENDIF.
1492 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_IFF
);
1493 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1494 br
* (endif_inst
- if_inst
+ 1));
1495 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1496 } else if (devinfo
->gen
== 6) {
1497 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1498 brw_inst_set_gen6_jump_count(devinfo
, if_inst
, br
*(endif_inst
- if_inst
));
1500 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1501 brw_inst_set_jip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1504 brw_inst_set_exec_size(devinfo
, else_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1506 /* Patch IF -> ELSE */
1507 if (devinfo
->gen
< 6) {
1508 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1509 br
* (else_inst
- if_inst
));
1510 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1511 } else if (devinfo
->gen
== 6) {
1512 brw_inst_set_gen6_jump_count(devinfo
, if_inst
,
1513 br
* (else_inst
- if_inst
+ 1));
1516 /* Patch ELSE -> ENDIF */
1517 if (devinfo
->gen
< 6) {
1518 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1521 brw_inst_set_gen4_jump_count(devinfo
, else_inst
,
1522 br
* (endif_inst
- else_inst
+ 1));
1523 brw_inst_set_gen4_pop_count(devinfo
, else_inst
, 1);
1524 } else if (devinfo
->gen
== 6) {
1525 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1526 brw_inst_set_gen6_jump_count(devinfo
, else_inst
,
1527 br
* (endif_inst
- else_inst
));
1529 /* The IF instruction's JIP should point just past the ELSE */
1530 brw_inst_set_jip(devinfo
, if_inst
, br
* (else_inst
- if_inst
+ 1));
1531 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1532 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1533 brw_inst_set_jip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1534 if (devinfo
->gen
>= 8) {
1535 /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1536 * should point to ENDIF.
1538 brw_inst_set_uip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1545 brw_ELSE(struct brw_codegen
*p
)
1547 const struct brw_device_info
*devinfo
= p
->devinfo
;
1550 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1552 if (devinfo
->gen
< 6) {
1553 brw_set_dest(p
, insn
, brw_ip_reg());
1554 brw_set_src0(p
, insn
, brw_ip_reg());
1555 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1556 } else if (devinfo
->gen
== 6) {
1557 brw_set_dest(p
, insn
, brw_imm_w(0));
1558 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1559 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1560 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1561 } else if (devinfo
->gen
== 7) {
1562 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1563 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1564 brw_set_src1(p
, insn
, brw_imm_w(0));
1565 brw_inst_set_jip(devinfo
, insn
, 0);
1566 brw_inst_set_uip(devinfo
, insn
, 0);
1568 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1569 brw_set_src0(p
, insn
, brw_imm_d(0));
1570 brw_inst_set_jip(devinfo
, insn
, 0);
1571 brw_inst_set_uip(devinfo
, insn
, 0);
1574 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1575 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1576 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1577 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1579 push_if_stack(p
, insn
);
1583 brw_ENDIF(struct brw_codegen
*p
)
1585 const struct brw_device_info
*devinfo
= p
->devinfo
;
1586 brw_inst
*insn
= NULL
;
1587 brw_inst
*else_inst
= NULL
;
1588 brw_inst
*if_inst
= NULL
;
1590 bool emit_endif
= true;
1592 /* In single program flow mode, we can express IF and ELSE instructions
1593 * equivalently as ADD instructions that operate on IP. On platforms prior
1594 * to Gen6, flow control instructions cause an implied thread switch, so
1595 * this is a significant savings.
1597 * However, on Gen6, writing to IP doesn't work in single program flow mode
1598 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1599 * not be updated by non-flow control instructions."). And on later
1600 * platforms, there is no significant benefit to converting control flow
1601 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1604 if (devinfo
->gen
< 6 && p
->single_program_flow
)
1608 * A single next_insn() may change the base address of instruction store
1609 * memory(p->store), so call it first before referencing the instruction
1610 * store pointer from an index
1613 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1615 /* Pop the IF and (optional) ELSE instructions from the stack */
1616 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1617 tmp
= pop_if_stack(p
);
1618 if (brw_inst_opcode(devinfo
, tmp
) == BRW_OPCODE_ELSE
) {
1620 tmp
= pop_if_stack(p
);
1625 /* ENDIF is useless; don't bother emitting it. */
1626 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1630 if (devinfo
->gen
< 6) {
1631 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1632 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1633 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1634 } else if (devinfo
->gen
== 6) {
1635 brw_set_dest(p
, insn
, brw_imm_w(0));
1636 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1637 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1638 } else if (devinfo
->gen
== 7) {
1639 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1640 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1641 brw_set_src1(p
, insn
, brw_imm_w(0));
1643 brw_set_src0(p
, insn
, brw_imm_d(0));
1646 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1647 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1648 if (devinfo
->gen
< 6)
1649 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1651 /* Also pop item off the stack in the endif instruction: */
1652 if (devinfo
->gen
< 6) {
1653 brw_inst_set_gen4_jump_count(devinfo
, insn
, 0);
1654 brw_inst_set_gen4_pop_count(devinfo
, insn
, 1);
1655 } else if (devinfo
->gen
== 6) {
1656 brw_inst_set_gen6_jump_count(devinfo
, insn
, 2);
1658 brw_inst_set_jip(devinfo
, insn
, 2);
1660 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1664 brw_BREAK(struct brw_codegen
*p
)
1666 const struct brw_device_info
*devinfo
= p
->devinfo
;
1669 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1670 if (devinfo
->gen
>= 8) {
1671 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1672 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1673 } else if (devinfo
->gen
>= 6) {
1674 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1675 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1676 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1678 brw_set_dest(p
, insn
, brw_ip_reg());
1679 brw_set_src0(p
, insn
, brw_ip_reg());
1680 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1681 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1682 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1684 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1685 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1692 brw_CONT(struct brw_codegen
*p
)
1694 const struct brw_device_info
*devinfo
= p
->devinfo
;
1697 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1698 brw_set_dest(p
, insn
, brw_ip_reg());
1699 if (devinfo
->gen
>= 8) {
1700 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1702 brw_set_src0(p
, insn
, brw_ip_reg());
1703 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1706 if (devinfo
->gen
< 6) {
1707 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1708 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1710 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1711 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1717 gen6_HALT(struct brw_codegen
*p
)
1719 const struct brw_device_info
*devinfo
= p
->devinfo
;
1722 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1723 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1724 if (devinfo
->gen
>= 8) {
1725 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1727 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1728 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1731 if (p
->compressed
) {
1732 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_16
);
1734 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1735 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_8
);
1742 * The DO/WHILE is just an unterminated loop -- break or continue are
1743 * used for control within the loop. We have a few ways they can be
1746 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1747 * jip and no DO instruction.
1749 * For non-uniform control flow pre-gen6, there's a DO instruction to
1750 * push the mask, and a WHILE to jump back, and BREAK to get out and
1753 * For gen6, there's no more mask stack, so no need for DO. WHILE
1754 * just points back to the first instruction of the loop.
1757 brw_DO(struct brw_codegen
*p
, unsigned execute_size
)
1759 const struct brw_device_info
*devinfo
= p
->devinfo
;
1761 if (devinfo
->gen
>= 6 || p
->single_program_flow
) {
1762 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1763 return &p
->store
[p
->nr_insn
];
1765 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1767 push_loop_stack(p
, insn
);
1769 /* Override the defaults for this instruction:
1771 brw_set_dest(p
, insn
, brw_null_reg());
1772 brw_set_src0(p
, insn
, brw_null_reg());
1773 brw_set_src1(p
, insn
, brw_null_reg());
1775 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1776 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1777 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
);
1784 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1787 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1788 * nesting, since it can always just point to the end of the block/current loop.
1791 brw_patch_break_cont(struct brw_codegen
*p
, brw_inst
*while_inst
)
1793 const struct brw_device_info
*devinfo
= p
->devinfo
;
1794 brw_inst
*do_inst
= get_inner_do_insn(p
);
1796 unsigned br
= brw_jump_scale(devinfo
);
1798 assert(devinfo
->gen
< 6);
1800 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1801 /* If the jump count is != 0, that means that this instruction has already
1802 * been patched because it's part of a loop inside of the one we're
1805 if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_BREAK
&&
1806 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1807 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
*((while_inst
- inst
) + 1));
1808 } else if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_CONTINUE
&&
1809 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1810 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
* (while_inst
- inst
));
1816 brw_WHILE(struct brw_codegen
*p
)
1818 const struct brw_device_info
*devinfo
= p
->devinfo
;
1819 brw_inst
*insn
, *do_insn
;
1820 unsigned br
= brw_jump_scale(devinfo
);
1822 if (devinfo
->gen
>= 6) {
1823 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1824 do_insn
= get_inner_do_insn(p
);
1826 if (devinfo
->gen
>= 8) {
1827 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1828 brw_set_src0(p
, insn
, brw_imm_d(0));
1829 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1830 } else if (devinfo
->gen
== 7) {
1831 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1832 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1833 brw_set_src1(p
, insn
, brw_imm_w(0));
1834 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1836 brw_set_dest(p
, insn
, brw_imm_w(0));
1837 brw_inst_set_gen6_jump_count(devinfo
, insn
, br
* (do_insn
- insn
));
1838 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1839 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1842 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1845 if (p
->single_program_flow
) {
1846 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1847 do_insn
= get_inner_do_insn(p
);
1849 brw_set_dest(p
, insn
, brw_ip_reg());
1850 brw_set_src0(p
, insn
, brw_ip_reg());
1851 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1852 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
1854 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1855 do_insn
= get_inner_do_insn(p
);
1857 assert(brw_inst_opcode(devinfo
, do_insn
) == BRW_OPCODE_DO
);
1859 brw_set_dest(p
, insn
, brw_ip_reg());
1860 brw_set_src0(p
, insn
, brw_ip_reg());
1861 brw_set_src1(p
, insn
, brw_imm_d(0));
1863 brw_inst_set_exec_size(devinfo
, insn
, brw_inst_exec_size(devinfo
, do_insn
));
1864 brw_inst_set_gen4_jump_count(devinfo
, insn
, br
* (do_insn
- insn
+ 1));
1865 brw_inst_set_gen4_pop_count(devinfo
, insn
, 0);
1867 brw_patch_break_cont(p
, insn
);
1870 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1872 p
->loop_stack_depth
--;
1879 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
)
1881 const struct brw_device_info
*devinfo
= p
->devinfo
;
1882 brw_inst
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1885 if (devinfo
->gen
>= 5)
1888 assert(brw_inst_opcode(devinfo
, jmp_insn
) == BRW_OPCODE_JMPI
);
1889 assert(brw_inst_src1_reg_file(devinfo
, jmp_insn
) == BRW_IMMEDIATE_VALUE
);
1891 brw_inst_set_gen4_jump_count(devinfo
, jmp_insn
,
1892 jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1));
1895 /* To integrate with the above, it makes sense that the comparison
1896 * instruction should populate the flag register. It might be simpler
1897 * just to use the flag reg for most WM tasks?
1899 void brw_CMP(struct brw_codegen
*p
,
1900 struct brw_reg dest
,
1901 unsigned conditional
,
1902 struct brw_reg src0
,
1903 struct brw_reg src1
)
1905 const struct brw_device_info
*devinfo
= p
->devinfo
;
1906 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1908 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1909 brw_set_dest(p
, insn
, dest
);
1910 brw_set_src0(p
, insn
, src0
);
1911 brw_set_src1(p
, insn
, src1
);
1913 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1915 * "Any CMP instruction with a null destination must use a {switch}."
1917 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1918 * mentioned on their work-arounds pages.
1920 if (devinfo
->gen
== 7) {
1921 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1922 dest
.nr
== BRW_ARF_NULL
) {
1923 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1928 /***********************************************************************
1929 * Helpers for the various SEND message types:
1932 /** Extended math function, float[8].
1934 void gen4_math(struct brw_codegen
*p
,
1935 struct brw_reg dest
,
1937 unsigned msg_reg_nr
,
1939 unsigned precision
)
1941 const struct brw_device_info
*devinfo
= p
->devinfo
;
1942 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1944 if (has_scalar_region(src
)) {
1945 data_type
= BRW_MATH_DATA_SCALAR
;
1947 data_type
= BRW_MATH_DATA_VECTOR
;
1950 assert(devinfo
->gen
< 6);
1952 /* Example code doesn't set predicate_control for send
1955 brw_inst_set_pred_control(devinfo
, insn
, 0);
1956 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
1958 brw_set_dest(p
, insn
, dest
);
1959 brw_set_src0(p
, insn
, src
);
1960 brw_set_math_message(p
,
1963 src
.type
== BRW_REGISTER_TYPE_D
,
1968 void gen6_math(struct brw_codegen
*p
,
1969 struct brw_reg dest
,
1971 struct brw_reg src0
,
1972 struct brw_reg src1
)
1974 const struct brw_device_info
*devinfo
= p
->devinfo
;
1975 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1977 assert(devinfo
->gen
>= 6);
1979 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1980 (devinfo
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1981 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
||
1982 (devinfo
->gen
>= 8 && src0
.file
== BRW_IMMEDIATE_VALUE
));
1984 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1985 if (devinfo
->gen
== 6) {
1986 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1987 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1990 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1991 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1992 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1993 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1994 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1995 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
1996 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
1998 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1999 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
2000 if (function
== BRW_MATH_FUNCTION_POW
) {
2001 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
2002 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
2004 assert(src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
2005 src1
.nr
== BRW_ARF_NULL
);
2009 /* Source modifiers are ignored for extended math instructions on Gen6. */
2010 if (devinfo
->gen
== 6) {
2011 assert(!src0
.negate
);
2013 assert(!src1
.negate
);
2017 brw_inst_set_math_function(devinfo
, insn
, function
);
2019 brw_set_dest(p
, insn
, dest
);
2020 brw_set_src0(p
, insn
, src0
);
2021 brw_set_src1(p
, insn
, src1
);
2025 * Return the right surface index to access the thread scratch space using
2026 * stateless dataport messages.
2029 brw_scratch_surface_idx(const struct brw_codegen
*p
)
2031 /* The scratch space is thread-local so IA coherency is unnecessary. */
2032 if (p
->devinfo
->gen
>= 8)
2033 return GEN8_BTI_STATELESS_NON_COHERENT
;
2035 return BRW_BTI_STATELESS
;
2039 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
2040 * using a constant offset per channel.
2042 * The offset must be aligned to oword size (16 bytes). Used for
2043 * register spilling.
2045 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
2050 const struct brw_device_info
*devinfo
= p
->devinfo
;
2051 uint32_t msg_control
, msg_type
;
2054 if (devinfo
->gen
>= 6)
2057 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2059 if (num_regs
== 1) {
2060 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2063 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2067 /* Set up the message header. This is g0, with g0.2 filled with
2068 * the offset. We don't want to leave our offset around in g0 or
2069 * it'll screw up texture samples, so set it up inside the message
2073 brw_push_insn_state(p
);
2074 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2075 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2076 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2078 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2080 /* set message header global offset field (reg 0, element 2) */
2082 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2084 2), BRW_REGISTER_TYPE_UD
),
2085 brw_imm_ud(offset
));
2087 brw_pop_insn_state(p
);
2091 struct brw_reg dest
;
2092 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2093 int send_commit_msg
;
2094 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2095 BRW_REGISTER_TYPE_UW
);
2097 if (brw_inst_qtr_control(devinfo
, insn
) != BRW_COMPRESSION_NONE
) {
2098 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2099 src_header
= vec16(src_header
);
2101 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2102 if (devinfo
->gen
< 6)
2103 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2105 /* Until gen6, writes followed by reads from the same location
2106 * are not guaranteed to be ordered unless write_commit is set.
2107 * If set, then a no-op write is issued to the destination
2108 * register to set a dependency, and a read from the destination
2109 * can be used to ensure the ordering.
2111 * For gen6, only writes between different threads need ordering
2112 * protection. Our use of DP writes is all about register
2113 * spilling within a thread.
2115 if (devinfo
->gen
>= 6) {
2116 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2117 send_commit_msg
= 0;
2120 send_commit_msg
= 1;
2123 brw_set_dest(p
, insn
, dest
);
2124 if (devinfo
->gen
>= 6) {
2125 brw_set_src0(p
, insn
, mrf
);
2127 brw_set_src0(p
, insn
, brw_null_reg());
2130 if (devinfo
->gen
>= 6)
2131 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2133 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2135 brw_set_dp_write_message(p
,
2137 brw_scratch_surface_idx(p
),
2141 true, /* header_present */
2142 0, /* not a render target */
2143 send_commit_msg
, /* response_length */
2151 * Read a block of owords (half a GRF each) from the scratch buffer
2152 * using a constant index per channel.
2154 * Offset must be aligned to oword size (16 bytes). Used for register
2158 brw_oword_block_read_scratch(struct brw_codegen
*p
,
2159 struct brw_reg dest
,
2164 const struct brw_device_info
*devinfo
= p
->devinfo
;
2165 uint32_t msg_control
;
2168 if (devinfo
->gen
>= 6)
2171 if (p
->devinfo
->gen
>= 7) {
2172 /* On gen 7 and above, we no longer have message registers and we can
2173 * send from any register we want. By using the destination register
2174 * for the message, we guarantee that the implied message write won't
2175 * accidentally overwrite anything. This has been a problem because
2176 * the MRF registers and source for the final FB write are both fixed
2179 mrf
= retype(dest
, BRW_REGISTER_TYPE_UD
);
2181 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2183 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2185 if (num_regs
== 1) {
2186 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2189 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2194 brw_push_insn_state(p
);
2195 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2196 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2197 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2199 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2201 /* set message header global offset field (reg 0, element 2) */
2202 brw_MOV(p
, get_element_ud(mrf
, 2), brw_imm_ud(offset
));
2204 brw_pop_insn_state(p
);
2208 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2210 assert(brw_inst_pred_control(devinfo
, insn
) == 0);
2211 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2213 brw_set_dest(p
, insn
, dest
); /* UW? */
2214 if (devinfo
->gen
>= 6) {
2215 brw_set_src0(p
, insn
, mrf
);
2217 brw_set_src0(p
, insn
, brw_null_reg());
2218 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2221 brw_set_dp_read_message(p
,
2223 brw_scratch_surface_idx(p
),
2225 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2226 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2228 true, /* header_present */
2234 gen7_block_read_scratch(struct brw_codegen
*p
,
2235 struct brw_reg dest
,
2239 const struct brw_device_info
*devinfo
= p
->devinfo
;
2240 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2241 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2243 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2244 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UW
));
2246 /* The HW requires that the header is present; this is to get the g0.5
2249 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2251 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2252 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2253 * is 32 bytes, which happens to be the size of a register.
2256 assert(offset
< (1 << 12));
2258 gen7_set_dp_scratch_message(p
, insn
,
2259 false, /* scratch read */
2261 false, /* invalidate after read */
2264 1, /* mlen: just g0 */
2265 num_regs
, /* rlen */
2266 true); /* header present */
2270 * Read a float[4] vector from the data port Data Cache (const buffer).
2271 * Location (in buffer) should be a multiple of 16.
2272 * Used for fetching shader constants.
2274 void brw_oword_block_read(struct brw_codegen
*p
,
2275 struct brw_reg dest
,
2278 uint32_t bind_table_index
)
2280 const struct brw_device_info
*devinfo
= p
->devinfo
;
2282 /* On newer hardware, offset is in units of owords. */
2283 if (devinfo
->gen
>= 6)
2286 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2288 brw_push_insn_state(p
);
2289 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2290 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2291 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2292 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2294 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2296 /* set message header global offset field (reg 0, element 2) */
2298 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2300 2), BRW_REGISTER_TYPE_UD
),
2301 brw_imm_ud(offset
));
2303 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2305 /* cast dest to a uword[8] vector */
2306 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2308 brw_set_dest(p
, insn
, dest
);
2309 if (devinfo
->gen
>= 6) {
2310 brw_set_src0(p
, insn
, mrf
);
2312 brw_set_src0(p
, insn
, brw_null_reg());
2313 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2316 brw_set_dp_read_message(p
,
2319 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2320 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2321 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2323 true, /* header_present */
2324 1); /* response_length (1 reg, 2 owords!) */
2326 brw_pop_insn_state(p
);
2330 void brw_fb_WRITE(struct brw_codegen
*p
,
2332 struct brw_reg payload
,
2333 struct brw_reg implied_header
,
2334 unsigned msg_control
,
2335 unsigned binding_table_index
,
2336 unsigned msg_length
,
2337 unsigned response_length
,
2339 bool last_render_target
,
2340 bool header_present
)
2342 const struct brw_device_info
*devinfo
= p
->devinfo
;
2345 struct brw_reg dest
, src0
;
2347 if (dispatch_width
== 16)
2348 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2350 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2352 if (devinfo
->gen
>= 6) {
2353 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2355 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2357 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2359 if (devinfo
->gen
>= 6) {
2360 /* headerless version, just submit color payload */
2363 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2365 assert(payload
.file
== BRW_MESSAGE_REGISTER_FILE
);
2366 brw_inst_set_base_mrf(devinfo
, insn
, payload
.nr
);
2367 src0
= implied_header
;
2369 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2372 brw_set_dest(p
, insn
, dest
);
2373 brw_set_src0(p
, insn
, src0
);
2374 brw_set_dp_write_message(p
,
2376 binding_table_index
,
2384 0 /* send_commit_msg */);
2389 * Texture sample instruction.
2390 * Note: the msg_type plus msg_length values determine exactly what kind
2391 * of sampling operation is performed. See volume 4, page 161 of docs.
2393 void brw_SAMPLE(struct brw_codegen
*p
,
2394 struct brw_reg dest
,
2395 unsigned msg_reg_nr
,
2396 struct brw_reg src0
,
2397 unsigned binding_table_index
,
2400 unsigned response_length
,
2401 unsigned msg_length
,
2402 unsigned header_present
,
2404 unsigned return_format
)
2406 const struct brw_device_info
*devinfo
= p
->devinfo
;
2409 if (msg_reg_nr
!= -1)
2410 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2412 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2413 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
); /* XXX */
2415 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2417 * "Instruction compression is not allowed for this instruction (that
2418 * is, send). The hardware behavior is undefined if this instruction is
2419 * set as compressed. However, compress control can be set to "SecHalf"
2420 * to affect the EMask generation."
2422 * No similar wording is found in later PRMs, but there are examples
2423 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2424 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2425 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2427 if (brw_inst_qtr_control(devinfo
, insn
) != BRW_COMPRESSION_2NDHALF
)
2428 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2430 if (devinfo
->gen
< 6)
2431 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2433 brw_set_dest(p
, insn
, dest
);
2434 brw_set_src0(p
, insn
, src0
);
2435 brw_set_sampler_message(p
, insn
,
2436 binding_table_index
,
2446 /* Adjust the message header's sampler state pointer to
2447 * select the correct group of 16 samplers.
2449 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
2450 struct brw_reg header
,
2451 struct brw_reg sampler_index
)
2453 /* The "Sampler Index" field can only store values between 0 and 15.
2454 * However, we can add an offset to the "Sampler State Pointer"
2455 * field, effectively selecting a different set of 16 samplers.
2457 * The "Sampler State Pointer" needs to be aligned to a 32-byte
2458 * offset, and each sampler state is only 16-bytes, so we can't
2459 * exclusively use the offset - we have to use both.
2462 const struct brw_device_info
*devinfo
= p
->devinfo
;
2464 if (sampler_index
.file
== BRW_IMMEDIATE_VALUE
) {
2465 const int sampler_state_size
= 16; /* 16 bytes */
2466 uint32_t sampler
= sampler_index
.ud
;
2468 if (sampler
>= 16) {
2469 assert(devinfo
->is_haswell
|| devinfo
->gen
>= 8);
2471 get_element_ud(header
, 3),
2472 get_element_ud(brw_vec8_grf(0, 0), 3),
2473 brw_imm_ud(16 * (sampler
/ 16) * sampler_state_size
));
2476 /* Non-const sampler array indexing case */
2477 if (devinfo
->gen
< 8 && !devinfo
->is_haswell
) {
2481 struct brw_reg temp
= get_element_ud(header
, 3);
2483 brw_AND(p
, temp
, get_element_ud(sampler_index
, 0), brw_imm_ud(0x0f0));
2484 brw_SHL(p
, temp
, temp
, brw_imm_ud(4));
2486 get_element_ud(header
, 3),
2487 get_element_ud(brw_vec8_grf(0, 0), 3),
2492 /* All these variables are pretty confusing - we might be better off
2493 * using bitmasks and macros for this, in the old style. Or perhaps
2494 * just having the caller instantiate the fields in dword3 itself.
2496 void brw_urb_WRITE(struct brw_codegen
*p
,
2497 struct brw_reg dest
,
2498 unsigned msg_reg_nr
,
2499 struct brw_reg src0
,
2500 enum brw_urb_write_flags flags
,
2501 unsigned msg_length
,
2502 unsigned response_length
,
2506 const struct brw_device_info
*devinfo
= p
->devinfo
;
2509 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2511 if (devinfo
->gen
>= 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2512 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2513 brw_push_insn_state(p
);
2514 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2515 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2516 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2517 BRW_REGISTER_TYPE_UD
),
2518 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2519 brw_imm_ud(0xff00));
2520 brw_pop_insn_state(p
);
2523 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2525 assert(msg_length
< BRW_MAX_MRF(devinfo
->gen
));
2527 brw_set_dest(p
, insn
, dest
);
2528 brw_set_src0(p
, insn
, src0
);
2529 brw_set_src1(p
, insn
, brw_imm_d(0));
2531 if (devinfo
->gen
< 6)
2532 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2534 brw_set_urb_message(p
,
2544 brw_send_indirect_message(struct brw_codegen
*p
,
2547 struct brw_reg payload
,
2548 struct brw_reg desc
)
2550 const struct brw_device_info
*devinfo
= p
->devinfo
;
2551 struct brw_inst
*send
;
2554 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
2556 assert(desc
.type
== BRW_REGISTER_TYPE_UD
);
2558 /* We hold on to the setup instruction (the SEND in the direct case, the OR
2559 * in the indirect case) by its index in the instruction store. The
2560 * pointer returned by next_insn() may become invalid if emitting the SEND
2561 * in the indirect case reallocs the store.
2564 if (desc
.file
== BRW_IMMEDIATE_VALUE
) {
2566 send
= next_insn(p
, BRW_OPCODE_SEND
);
2567 brw_set_src1(p
, send
, desc
);
2570 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2572 brw_push_insn_state(p
);
2573 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2574 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2575 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2577 /* Load the indirect descriptor to an address register using OR so the
2578 * caller can specify additional descriptor bits with the usual
2579 * brw_set_*_message() helper functions.
2582 brw_OR(p
, addr
, desc
, brw_imm_ud(0));
2584 brw_pop_insn_state(p
);
2586 send
= next_insn(p
, BRW_OPCODE_SEND
);
2587 brw_set_src1(p
, send
, addr
);
2590 if (dst
.width
< BRW_EXECUTE_8
)
2591 brw_inst_set_exec_size(devinfo
, send
, dst
.width
);
2593 brw_set_dest(p
, send
, dst
);
2594 brw_set_src0(p
, send
, retype(payload
, BRW_REGISTER_TYPE_UD
));
2595 brw_inst_set_sfid(devinfo
, send
, sfid
);
2597 return &p
->store
[setup
];
2600 static struct brw_inst
*
2601 brw_send_indirect_surface_message(struct brw_codegen
*p
,
2604 struct brw_reg payload
,
2605 struct brw_reg surface
,
2606 unsigned message_len
,
2607 unsigned response_len
,
2608 bool header_present
)
2610 const struct brw_device_info
*devinfo
= p
->devinfo
;
2611 struct brw_inst
*insn
;
2613 if (surface
.file
!= BRW_IMMEDIATE_VALUE
) {
2614 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2616 brw_push_insn_state(p
);
2617 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2618 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2619 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2621 /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2622 * some surface array is accessed out of bounds.
2624 insn
= brw_AND(p
, addr
,
2625 suboffset(vec1(retype(surface
, BRW_REGISTER_TYPE_UD
)),
2626 BRW_GET_SWZ(surface
.swizzle
, 0)),
2629 brw_pop_insn_state(p
);
2634 insn
= brw_send_indirect_message(p
, sfid
, dst
, payload
, surface
);
2635 brw_inst_set_mlen(devinfo
, insn
, message_len
);
2636 brw_inst_set_rlen(devinfo
, insn
, response_len
);
2637 brw_inst_set_header_present(devinfo
, insn
, header_present
);
2643 brw_find_next_block_end(struct brw_codegen
*p
, int start_offset
)
2646 void *store
= p
->store
;
2647 const struct brw_device_info
*devinfo
= p
->devinfo
;
2651 for (offset
= next_offset(devinfo
, store
, start_offset
);
2652 offset
< p
->next_insn_offset
;
2653 offset
= next_offset(devinfo
, store
, offset
)) {
2654 brw_inst
*insn
= store
+ offset
;
2656 switch (brw_inst_opcode(devinfo
, insn
)) {
2660 case BRW_OPCODE_ENDIF
:
2665 case BRW_OPCODE_ELSE
:
2666 case BRW_OPCODE_WHILE
:
2667 case BRW_OPCODE_HALT
:
2676 /* There is no DO instruction on gen6, so to find the end of the loop
2677 * we have to see if the loop is jumping back before our start
2681 brw_find_loop_end(struct brw_codegen
*p
, int start_offset
)
2683 const struct brw_device_info
*devinfo
= p
->devinfo
;
2685 int scale
= 16 / brw_jump_scale(devinfo
);
2686 void *store
= p
->store
;
2688 assert(devinfo
->gen
>= 6);
2690 /* Always start after the instruction (such as a WHILE) we're trying to fix
2693 for (offset
= next_offset(devinfo
, store
, start_offset
);
2694 offset
< p
->next_insn_offset
;
2695 offset
= next_offset(devinfo
, store
, offset
)) {
2696 brw_inst
*insn
= store
+ offset
;
2698 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
) {
2699 int jip
= devinfo
->gen
== 6 ? brw_inst_gen6_jump_count(devinfo
, insn
)
2700 : brw_inst_jip(devinfo
, insn
);
2701 if (offset
+ jip
* scale
<= start_offset
)
2705 assert(!"not reached");
2706 return start_offset
;
2709 /* After program generation, go back and update the UIP and JIP of
2710 * BREAK, CONT, and HALT instructions to their correct locations.
2713 brw_set_uip_jip(struct brw_codegen
*p
)
2715 const struct brw_device_info
*devinfo
= p
->devinfo
;
2717 int br
= brw_jump_scale(devinfo
);
2718 int scale
= 16 / br
;
2719 void *store
= p
->store
;
2721 if (devinfo
->gen
< 6)
2724 for (offset
= 0; offset
< p
->next_insn_offset
;
2725 offset
= next_offset(devinfo
, store
, offset
)) {
2726 brw_inst
*insn
= store
+ offset
;
2728 if (brw_inst_cmpt_control(devinfo
, insn
)) {
2729 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2730 assert(brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_BREAK
&&
2731 brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_CONTINUE
&&
2732 brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_HALT
);
2736 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2737 switch (brw_inst_opcode(devinfo
, insn
)) {
2738 case BRW_OPCODE_BREAK
:
2739 assert(block_end_offset
!= 0);
2740 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2741 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2742 brw_inst_set_uip(devinfo
, insn
,
2743 (brw_find_loop_end(p
, offset
) - offset
+
2744 (devinfo
->gen
== 6 ? 16 : 0)) / scale
);
2746 case BRW_OPCODE_CONTINUE
:
2747 assert(block_end_offset
!= 0);
2748 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2749 brw_inst_set_uip(devinfo
, insn
,
2750 (brw_find_loop_end(p
, offset
) - offset
) / scale
);
2752 assert(brw_inst_uip(devinfo
, insn
) != 0);
2753 assert(brw_inst_jip(devinfo
, insn
) != 0);
2756 case BRW_OPCODE_ENDIF
: {
2757 int32_t jump
= (block_end_offset
== 0) ?
2758 1 * br
: (block_end_offset
- offset
) / scale
;
2759 if (devinfo
->gen
>= 7)
2760 brw_inst_set_jip(devinfo
, insn
, jump
);
2762 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump
);
2766 case BRW_OPCODE_HALT
:
2767 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2769 * "In case of the halt instruction not inside any conditional
2770 * code block, the value of <JIP> and <UIP> should be the
2771 * same. In case of the halt instruction inside conditional code
2772 * block, the <UIP> should be the end of the program, and the
2773 * <JIP> should be end of the most inner conditional code block."
2775 * The uip will have already been set by whoever set up the
2778 if (block_end_offset
== 0) {
2779 brw_inst_set_jip(devinfo
, insn
, brw_inst_uip(devinfo
, insn
));
2781 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2783 assert(brw_inst_uip(devinfo
, insn
) != 0);
2784 assert(brw_inst_jip(devinfo
, insn
) != 0);
2790 void brw_ff_sync(struct brw_codegen
*p
,
2791 struct brw_reg dest
,
2792 unsigned msg_reg_nr
,
2793 struct brw_reg src0
,
2795 unsigned response_length
,
2798 const struct brw_device_info
*devinfo
= p
->devinfo
;
2801 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2803 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2804 brw_set_dest(p
, insn
, dest
);
2805 brw_set_src0(p
, insn
, src0
);
2806 brw_set_src1(p
, insn
, brw_imm_d(0));
2808 if (devinfo
->gen
< 6)
2809 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2811 brw_set_ff_sync_message(p
,
2819 * Emit the SEND instruction necessary to generate stream output data on Gen6
2820 * (for transform feedback).
2822 * If send_commit_msg is true, this is the last piece of stream output data
2823 * from this thread, so send the data as a committed write. According to the
2824 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2826 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2827 * writes are complete by sending the final write as a committed write."
2830 brw_svb_write(struct brw_codegen
*p
,
2831 struct brw_reg dest
,
2832 unsigned msg_reg_nr
,
2833 struct brw_reg src0
,
2834 unsigned binding_table_index
,
2835 bool send_commit_msg
)
2839 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2841 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2842 brw_set_dest(p
, insn
, dest
);
2843 brw_set_src0(p
, insn
, src0
);
2844 brw_set_src1(p
, insn
, brw_imm_d(0));
2845 brw_set_dp_write_message(p
, insn
,
2846 binding_table_index
,
2847 0, /* msg_control: ignored */
2848 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2850 true, /* header_present */
2851 0, /* last_render_target: ignored */
2852 send_commit_msg
, /* response_length */
2853 0, /* end_of_thread */
2854 send_commit_msg
); /* send_commit_msg */
2858 brw_surface_payload_size(struct brw_codegen
*p
,
2859 unsigned num_channels
,
2863 if (has_simd4x2
&& brw_inst_access_mode(p
->devinfo
, p
->current
) == BRW_ALIGN_16
)
2865 else if (has_simd16
&& p
->compressed
)
2866 return 2 * num_channels
;
2868 return num_channels
;
2872 brw_set_dp_untyped_atomic_message(struct brw_codegen
*p
,
2875 bool response_expected
)
2877 const struct brw_device_info
*devinfo
= p
->devinfo
;
2878 unsigned msg_control
=
2879 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2880 (response_expected
? 1 << 5 : 0); /* Return data expected */
2882 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2883 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2885 msg_control
|= 1 << 4; /* SIMD8 mode */
2887 brw_inst_set_dp_msg_type(devinfo
, insn
,
2888 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
);
2890 brw_inst_set_dp_msg_type(devinfo
, insn
,
2891 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
);
2894 brw_inst_set_dp_msg_type(devinfo
, insn
,
2895 GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
);
2898 msg_control
|= 1 << 4; /* SIMD8 mode */
2901 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2905 brw_untyped_atomic(struct brw_codegen
*p
,
2907 struct brw_reg payload
,
2908 struct brw_reg surface
,
2910 unsigned msg_length
,
2911 bool response_expected
)
2913 const struct brw_device_info
*devinfo
= p
->devinfo
;
2914 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2915 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2916 GEN7_SFID_DATAPORT_DATA_CACHE
);
2917 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2918 /* Mask out unused components -- This is especially important in Align16
2919 * mode on generations that don't have native support for SIMD4x2 atomics,
2920 * because unused but enabled components will cause the dataport to perform
2921 * additional atomic operations on the addresses that happen to be in the
2922 * uninitialized Y, Z and W coordinates of the payload.
2924 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2925 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2926 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
2927 brw_surface_payload_size(p
, response_expected
,
2928 devinfo
->gen
>= 8 || devinfo
->is_haswell
, true),
2931 brw_set_dp_untyped_atomic_message(
2932 p
, insn
, atomic_op
, response_expected
);
2936 brw_set_dp_untyped_surface_read_message(struct brw_codegen
*p
,
2937 struct brw_inst
*insn
,
2938 unsigned num_channels
)
2940 const struct brw_device_info
*devinfo
= p
->devinfo
;
2941 /* Set mask of 32-bit channels to drop. */
2942 unsigned msg_control
= 0xf & (0xf << num_channels
);
2944 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2946 msg_control
|= 1 << 4; /* SIMD16 mode */
2948 msg_control
|= 2 << 4; /* SIMD8 mode */
2951 brw_inst_set_dp_msg_type(devinfo
, insn
,
2952 (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2953 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
:
2954 GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
));
2955 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2959 brw_untyped_surface_read(struct brw_codegen
*p
,
2961 struct brw_reg payload
,
2962 struct brw_reg surface
,
2963 unsigned msg_length
,
2964 unsigned num_channels
)
2966 const struct brw_device_info
*devinfo
= p
->devinfo
;
2967 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2968 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2969 GEN7_SFID_DATAPORT_DATA_CACHE
);
2970 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2971 p
, sfid
, dst
, payload
, surface
, msg_length
,
2972 brw_surface_payload_size(p
, num_channels
, true, true),
2975 brw_set_dp_untyped_surface_read_message(
2976 p
, insn
, num_channels
);
2980 brw_set_dp_untyped_surface_write_message(struct brw_codegen
*p
,
2981 struct brw_inst
*insn
,
2982 unsigned num_channels
)
2984 const struct brw_device_info
*devinfo
= p
->devinfo
;
2985 /* Set mask of 32-bit channels to drop. */
2986 unsigned msg_control
= 0xf & (0xf << num_channels
);
2988 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2990 msg_control
|= 1 << 4; /* SIMD16 mode */
2992 msg_control
|= 2 << 4; /* SIMD8 mode */
2994 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
2995 msg_control
|= 0 << 4; /* SIMD4x2 mode */
2997 msg_control
|= 2 << 4; /* SIMD8 mode */
3000 brw_inst_set_dp_msg_type(devinfo
, insn
,
3001 devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3002 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
:
3003 GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
);
3004 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3008 brw_untyped_surface_write(struct brw_codegen
*p
,
3009 struct brw_reg payload
,
3010 struct brw_reg surface
,
3011 unsigned msg_length
,
3012 unsigned num_channels
)
3014 const struct brw_device_info
*devinfo
= p
->devinfo
;
3015 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3016 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3017 GEN7_SFID_DATAPORT_DATA_CACHE
);
3018 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3019 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3020 const unsigned mask
= devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3021 WRITEMASK_X
: WRITEMASK_XYZW
;
3022 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3023 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3024 payload
, surface
, msg_length
, 0, align1
);
3026 brw_set_dp_untyped_surface_write_message(
3027 p
, insn
, num_channels
);
3031 brw_set_dp_typed_atomic_message(struct brw_codegen
*p
,
3032 struct brw_inst
*insn
,
3034 bool response_expected
)
3036 const struct brw_device_info
*devinfo
= p
->devinfo
;
3037 unsigned msg_control
=
3038 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
3039 (response_expected
? 1 << 5 : 0); /* Return data expected */
3041 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3042 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3043 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3044 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3046 brw_inst_set_dp_msg_type(devinfo
, insn
,
3047 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
);
3049 brw_inst_set_dp_msg_type(devinfo
, insn
,
3050 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
);
3054 brw_inst_set_dp_msg_type(devinfo
, insn
,
3055 GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
);
3057 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3058 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3061 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3065 brw_typed_atomic(struct brw_codegen
*p
,
3067 struct brw_reg payload
,
3068 struct brw_reg surface
,
3070 unsigned msg_length
,
3071 bool response_expected
) {
3072 const struct brw_device_info
*devinfo
= p
->devinfo
;
3073 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3074 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3075 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3076 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3077 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3078 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
3079 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3080 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
3081 brw_surface_payload_size(p
, response_expected
,
3082 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3085 brw_set_dp_typed_atomic_message(
3086 p
, insn
, atomic_op
, response_expected
);
3090 brw_set_dp_typed_surface_read_message(struct brw_codegen
*p
,
3091 struct brw_inst
*insn
,
3092 unsigned num_channels
)
3094 const struct brw_device_info
*devinfo
= p
->devinfo
;
3095 /* Set mask of unused channels. */
3096 unsigned msg_control
= 0xf & (0xf << num_channels
);
3098 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3099 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3100 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3101 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3103 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3106 brw_inst_set_dp_msg_type(devinfo
, insn
,
3107 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
);
3109 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3110 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3111 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3114 brw_inst_set_dp_msg_type(devinfo
, insn
,
3115 GEN7_DATAPORT_RC_TYPED_SURFACE_READ
);
3118 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3122 brw_typed_surface_read(struct brw_codegen
*p
,
3124 struct brw_reg payload
,
3125 struct brw_reg surface
,
3126 unsigned msg_length
,
3127 unsigned num_channels
)
3129 const struct brw_device_info
*devinfo
= p
->devinfo
;
3130 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3131 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3132 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3133 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3134 p
, sfid
, dst
, payload
, surface
, msg_length
,
3135 brw_surface_payload_size(p
, num_channels
,
3136 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3139 brw_set_dp_typed_surface_read_message(
3140 p
, insn
, num_channels
);
3144 brw_set_dp_typed_surface_write_message(struct brw_codegen
*p
,
3145 struct brw_inst
*insn
,
3146 unsigned num_channels
)
3148 const struct brw_device_info
*devinfo
= p
->devinfo
;
3149 /* Set mask of unused channels. */
3150 unsigned msg_control
= 0xf & (0xf << num_channels
);
3152 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3153 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3154 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3155 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3157 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3160 brw_inst_set_dp_msg_type(devinfo
, insn
,
3161 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
);
3164 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3165 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3166 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3169 brw_inst_set_dp_msg_type(devinfo
, insn
,
3170 GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
);
3173 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3177 brw_typed_surface_write(struct brw_codegen
*p
,
3178 struct brw_reg payload
,
3179 struct brw_reg surface
,
3180 unsigned msg_length
,
3181 unsigned num_channels
)
3183 const struct brw_device_info
*devinfo
= p
->devinfo
;
3184 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3185 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3186 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3187 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3188 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3189 const unsigned mask
= (devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3190 WRITEMASK_X
: WRITEMASK_XYZW
);
3191 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3192 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3193 payload
, surface
, msg_length
, 0, true);
3195 brw_set_dp_typed_surface_write_message(
3196 p
, insn
, num_channels
);
3200 brw_set_memory_fence_message(struct brw_codegen
*p
,
3201 struct brw_inst
*insn
,
3202 enum brw_message_target sfid
,
3205 const struct brw_device_info
*devinfo
= p
->devinfo
;
3207 brw_set_message_descriptor(p
, insn
, sfid
,
3208 1 /* message length */,
3209 (commit_enable
? 1 : 0) /* response length */,
3210 true /* header present */,
3214 case GEN6_SFID_DATAPORT_RENDER_CACHE
:
3215 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_RC_MEMORY_FENCE
);
3217 case GEN7_SFID_DATAPORT_DATA_CACHE
:
3218 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_DC_MEMORY_FENCE
);
3221 unreachable("Not reached");
3225 brw_inst_set_dp_msg_control(devinfo
, insn
, 1 << 5);
3229 brw_memory_fence(struct brw_codegen
*p
,
3232 const struct brw_device_info
*devinfo
= p
->devinfo
;
3233 const bool commit_enable
= devinfo
->gen
== 7 && !devinfo
->is_haswell
;
3234 struct brw_inst
*insn
;
3236 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3237 * message doesn't write anything back.
3239 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3240 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
3241 brw_set_dest(p
, insn
, dst
);
3242 brw_set_src0(p
, insn
, dst
);
3243 brw_set_memory_fence_message(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
3246 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
3247 /* IVB does typed surface access through the render cache, so we need to
3248 * flush it too. Use a different register so both flushes can be
3249 * pipelined by the hardware.
3251 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3252 brw_set_dest(p
, insn
, offset(dst
, 1));
3253 brw_set_src0(p
, insn
, offset(dst
, 1));
3254 brw_set_memory_fence_message(p
, insn
, GEN6_SFID_DATAPORT_RENDER_CACHE
,
3257 /* Now write the response of the second message into the response of the
3258 * first to trigger a pipeline stall -- This way future render and data
3259 * cache messages will be properly ordered with respect to past data and
3260 * render cache messages.
3262 brw_push_insn_state(p
);
3263 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3264 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3265 brw_MOV(p
, dst
, offset(dst
, 1));
3266 brw_pop_insn_state(p
);
3271 brw_pixel_interpolator_query(struct brw_codegen
*p
,
3272 struct brw_reg dest
,
3276 struct brw_reg data
,
3277 unsigned msg_length
,
3278 unsigned response_length
)
3280 const struct brw_device_info
*devinfo
= p
->devinfo
;
3281 struct brw_inst
*insn
;
3282 const uint16_t exec_size
= brw_inst_exec_size(devinfo
, p
->current
);
3284 /* brw_send_indirect_message will automatically use a direct send message
3285 * if data is actually immediate.
3287 insn
= brw_send_indirect_message(p
,
3288 GEN7_SFID_PIXEL_INTERPOLATOR
,
3292 brw_inst_set_mlen(devinfo
, insn
, msg_length
);
3293 brw_inst_set_rlen(devinfo
, insn
, response_length
);
3295 brw_inst_set_pi_simd_mode(devinfo
, insn
, exec_size
== BRW_EXECUTE_16
);
3296 brw_inst_set_pi_slot_group(devinfo
, insn
, 0); /* zero unless 32/64px dispatch */
3297 brw_inst_set_pi_nopersp(devinfo
, insn
, noperspective
);
3298 brw_inst_set_pi_message_type(devinfo
, insn
, mode
);
3302 brw_find_live_channel(struct brw_codegen
*p
, struct brw_reg dst
)
3304 const struct brw_device_info
*devinfo
= p
->devinfo
;
3307 assert(devinfo
->gen
>= 7);
3309 brw_push_insn_state(p
);
3311 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3312 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3314 if (devinfo
->gen
>= 8) {
3315 /* Getting the first active channel index is easy on Gen8: Just find
3316 * the first bit set in the mask register. The same register exists
3317 * on HSW already but it reads back as all ones when the current
3318 * instruction has execution masking disabled, so it's kind of
3321 inst
= brw_FBL(p
, vec1(dst
),
3322 retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
));
3324 /* Quarter control has the effect of magically shifting the value of
3325 * this register. Make sure it's set to zero.
3327 brw_inst_set_qtr_control(devinfo
, inst
, GEN6_COMPRESSION_1Q
);
3329 const struct brw_reg flag
= retype(brw_flag_reg(1, 0),
3330 BRW_REGISTER_TYPE_UD
);
3332 brw_MOV(p
, flag
, brw_imm_ud(0));
3334 /* Run a 16-wide instruction returning zero with execution masking
3335 * and a conditional modifier enabled in order to get the current
3336 * execution mask in f1.0.
3338 inst
= brw_MOV(p
, brw_null_reg(), brw_imm_ud(0));
3339 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_16
);
3340 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3341 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_Z
);
3342 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3344 brw_FBL(p
, vec1(dst
), flag
);
3347 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3349 if (devinfo
->gen
>= 8) {
3350 /* In SIMD4x2 mode the first active channel index is just the
3351 * negation of the first bit of the mask register.
3353 inst
= brw_AND(p
, brw_writemask(dst
, WRITEMASK_X
),
3354 negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
)),
3358 /* Overwrite the destination without and with execution masking to
3359 * find out which of the channels is active.
3361 brw_push_insn_state(p
);
3362 brw_set_default_exec_size(p
, BRW_EXECUTE_4
);
3363 brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3366 inst
= brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3368 brw_pop_insn_state(p
);
3369 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3373 brw_pop_insn_state(p
);
3377 brw_broadcast(struct brw_codegen
*p
,
3382 const struct brw_device_info
*devinfo
= p
->devinfo
;
3383 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3386 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
&&
3387 src
.address_mode
== BRW_ADDRESS_DIRECT
);
3389 if ((src
.vstride
== 0 && (src
.hstride
== 0 || !align1
)) ||
3390 idx
.file
== BRW_IMMEDIATE_VALUE
) {
3391 /* Trivial, the source is already uniform or the index is a constant.
3392 * We will typically not get here if the optimizer is doing its job, but
3393 * asserting would be mean.
3395 const unsigned i
= idx
.file
== BRW_IMMEDIATE_VALUE
? idx
.ud
: 0;
3397 (align1
? stride(suboffset(src
, i
), 0, 1, 0) :
3398 stride(suboffset(src
, 4 * i
), 0, 4, 1)));
3401 const struct brw_reg addr
=
3402 retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
3403 const unsigned offset
= src
.nr
* REG_SIZE
+ src
.subnr
;
3404 /* Limit in bytes of the signed indirect addressing immediate. */
3405 const unsigned limit
= 512;
3407 brw_push_insn_state(p
);
3408 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3409 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
3411 /* Take into account the component size and horizontal stride. */
3412 assert(src
.vstride
== src
.hstride
+ src
.width
);
3413 brw_SHL(p
, addr
, vec1(idx
),
3414 brw_imm_ud(_mesa_logbase2(type_sz(src
.type
)) +
3417 /* We can only address up to limit bytes using the indirect
3418 * addressing immediate, account for the difference if the source
3419 * register is above this limit.
3421 if (offset
>= limit
)
3422 brw_ADD(p
, addr
, addr
, brw_imm_ud(offset
- offset
% limit
));
3424 brw_pop_insn_state(p
);
3426 /* Use indirect addressing to fetch the specified component. */
3428 retype(brw_vec1_indirect(addr
.subnr
, offset
% limit
),
3431 /* In SIMD4x2 mode the index can be either zero or one, replicate it
3432 * to all bits of a flag register,
3436 stride(brw_swizzle(idx
, BRW_SWIZZLE_XXXX
), 0, 4, 1));
3437 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NONE
);
3438 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_NZ
);
3439 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3441 /* and use predicated SEL to pick the right channel. */
3442 inst
= brw_SEL(p
, dst
,
3443 stride(suboffset(src
, 4), 0, 4, 1),
3444 stride(src
, 0, 4, 1));
3445 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NORMAL
);
3446 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3452 * This instruction is generated as a single-channel align1 instruction by
3453 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3455 * We can't use the typed atomic op in the FS because that has the execution
3456 * mask ANDed with the pixel mask, but we just want to write the one dword for
3459 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3460 * one u32. So we use the same untyped atomic write message as the pixel
3463 * The untyped atomic operation requires a BUFFER surface type with RAW
3464 * format, and is only accessible through the legacy DATA_CACHE dataport
3467 void brw_shader_time_add(struct brw_codegen
*p
,
3468 struct brw_reg payload
,
3469 uint32_t surf_index
)
3471 const unsigned sfid
= (p
->devinfo
->gen
>= 8 || p
->devinfo
->is_haswell
?
3472 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3473 GEN7_SFID_DATAPORT_DATA_CACHE
);
3474 assert(p
->devinfo
->gen
>= 7);
3476 brw_push_insn_state(p
);
3477 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3478 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3479 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3480 brw_inst
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
3482 /* We use brw_vec1_reg and unmasked because we want to increment the given
3485 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
3487 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
3489 brw_set_src1(p
, send
, brw_imm_ud(0));
3490 brw_set_message_descriptor(p
, send
, sfid
, 2, 0, false, false);
3491 brw_inst_set_binding_table_index(p
->devinfo
, send
, surf_index
);
3492 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, false);
3494 brw_pop_insn_state(p
);
3499 * Emit the SEND message for a barrier
3502 brw_barrier(struct brw_codegen
*p
, struct brw_reg src
)
3504 const struct brw_device_info
*devinfo
= p
->devinfo
;
3505 struct brw_inst
*inst
;
3507 assert(devinfo
->gen
>= 7);
3509 inst
= next_insn(p
, BRW_OPCODE_SEND
);
3510 brw_set_dest(p
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
));
3511 brw_set_src0(p
, inst
, src
);
3512 brw_set_src1(p
, inst
, brw_null_reg());
3514 brw_set_message_descriptor(p
, inst
, BRW_SFID_MESSAGE_GATEWAY
,
3516 0 /* response_length */,
3517 false /* header_present */,
3518 false /* end_of_thread */);
3520 brw_inst_set_gateway_notify(devinfo
, inst
, 1);
3521 brw_inst_set_gateway_subfuncid(devinfo
, inst
,
3522 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG
);
3524 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
3529 * Emit the wait instruction for a barrier
3532 brw_WAIT(struct brw_codegen
*p
)
3534 const struct brw_device_info
*devinfo
= p
->devinfo
;
3535 struct brw_inst
*insn
;
3537 struct brw_reg src
= brw_notification_reg();
3539 insn
= next_insn(p
, BRW_OPCODE_WAIT
);
3540 brw_set_dest(p
, insn
, src
);
3541 brw_set_src0(p
, insn
, src
);
3542 brw_set_src1(p
, insn
, brw_null_reg());
3544 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
3545 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_DISABLE
);