2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_eu_defines.h"
36 #include "util/ralloc.h"
39 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
40 * registers, implicitly moving the operand to a message register.
42 * On Sandybridge, this is no longer the case. This function performs the
43 * explicit move; it should be called before emitting a SEND instruction.
46 gen6_resolve_implied_move(struct brw_codegen
*p
,
50 const struct gen_device_info
*devinfo
= p
->devinfo
;
54 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
57 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
58 brw_push_insn_state(p
);
59 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
60 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
61 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
62 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
63 retype(*src
, BRW_REGISTER_TYPE_UD
));
64 brw_pop_insn_state(p
);
66 *src
= brw_message_reg(msg_reg_nr
);
70 gen7_convert_mrf_to_grf(struct brw_codegen
*p
, struct brw_reg
*reg
)
72 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
73 * "The send with EOT should use register space R112-R127 for <src>. This is
74 * to enable loading of a new thread into the same slot while the message
75 * with EOT for current thread is pending dispatch."
77 * Since we're pretending to have 16 MRFs anyway, we may as well use the
78 * registers required for messages with EOT.
80 const struct gen_device_info
*devinfo
= p
->devinfo
;
81 if (devinfo
->gen
>= 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
82 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
83 reg
->nr
+= GEN7_MRF_HACK_START
;
88 * Convert a brw_reg_type enumeration value into the hardware representation.
90 * The hardware encoding may depend on whether the value is an immediate.
93 brw_reg_type_to_hw_type(const struct gen_device_info
*devinfo
,
94 enum brw_reg_type type
, enum brw_reg_file file
)
96 if (file
== BRW_IMMEDIATE_VALUE
) {
97 static const int imm_hw_types
[] = {
98 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
99 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
100 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
101 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
102 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
103 [BRW_REGISTER_TYPE_UB
] = -1,
104 [BRW_REGISTER_TYPE_B
] = -1,
105 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
106 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
107 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
108 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
109 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
110 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
111 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
113 assert(type
< ARRAY_SIZE(imm_hw_types
));
114 assert(imm_hw_types
[type
] != -1);
115 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
116 return imm_hw_types
[type
];
118 /* Non-immediate registers */
119 static const int hw_types
[] = {
120 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
121 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
122 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
123 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
124 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
125 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
126 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
127 [BRW_REGISTER_TYPE_UV
] = -1,
128 [BRW_REGISTER_TYPE_VF
] = -1,
129 [BRW_REGISTER_TYPE_V
] = -1,
130 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
131 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
132 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
133 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
135 assert(type
< ARRAY_SIZE(hw_types
));
136 assert(hw_types
[type
] != -1);
137 assert(devinfo
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
138 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_Q
);
139 return hw_types
[type
];
144 * Return the element size given a hardware register type and file.
146 * The hardware encoding may depend on whether the value is an immediate.
149 brw_hw_reg_type_to_size(const struct gen_device_info
*devinfo
,
150 unsigned type
, enum brw_reg_file file
)
152 if (file
== BRW_IMMEDIATE_VALUE
) {
153 static const unsigned imm_hw_sizes
[] = {
154 [BRW_HW_REG_TYPE_UD
] = 4,
155 [BRW_HW_REG_TYPE_D
] = 4,
156 [BRW_HW_REG_TYPE_UW
] = 2,
157 [BRW_HW_REG_TYPE_W
] = 2,
158 [BRW_HW_REG_IMM_TYPE_UV
] = 2,
159 [BRW_HW_REG_IMM_TYPE_VF
] = 4,
160 [BRW_HW_REG_IMM_TYPE_V
] = 2,
161 [BRW_HW_REG_TYPE_F
] = 4,
162 [GEN8_HW_REG_TYPE_UQ
] = 8,
163 [GEN8_HW_REG_TYPE_Q
] = 8,
164 [GEN8_HW_REG_IMM_TYPE_DF
] = 8,
165 [GEN8_HW_REG_IMM_TYPE_HF
] = 2,
167 assert(type
< ARRAY_SIZE(imm_hw_sizes
));
168 assert(devinfo
->gen
>= 6 || type
!= BRW_HW_REG_IMM_TYPE_UV
);
169 assert(devinfo
->gen
>= 8 || type
<= BRW_HW_REG_TYPE_F
);
170 return imm_hw_sizes
[type
];
172 /* Non-immediate registers */
173 static const unsigned hw_sizes
[] = {
174 [BRW_HW_REG_TYPE_UD
] = 4,
175 [BRW_HW_REG_TYPE_D
] = 4,
176 [BRW_HW_REG_TYPE_UW
] = 2,
177 [BRW_HW_REG_TYPE_W
] = 2,
178 [BRW_HW_REG_NON_IMM_TYPE_UB
] = 1,
179 [BRW_HW_REG_NON_IMM_TYPE_B
] = 1,
180 [GEN7_HW_REG_NON_IMM_TYPE_DF
] = 8,
181 [BRW_HW_REG_TYPE_F
] = 4,
182 [GEN8_HW_REG_TYPE_UQ
] = 8,
183 [GEN8_HW_REG_TYPE_Q
] = 8,
184 [GEN8_HW_REG_NON_IMM_TYPE_HF
] = 2,
186 assert(type
< ARRAY_SIZE(hw_sizes
));
187 assert(devinfo
->gen
>= 7 ||
188 (type
< GEN7_HW_REG_NON_IMM_TYPE_DF
|| type
== BRW_HW_REG_TYPE_F
));
189 assert(devinfo
->gen
>= 8 || type
<= BRW_HW_REG_TYPE_F
);
190 return hw_sizes
[type
];
195 brw_set_dest(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg dest
)
197 const struct gen_device_info
*devinfo
= p
->devinfo
;
199 if (dest
.file
== BRW_MESSAGE_REGISTER_FILE
)
200 assert((dest
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
201 else if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
202 assert(dest
.nr
< 128);
204 gen7_convert_mrf_to_grf(p
, &dest
);
206 brw_inst_set_dst_reg_file(devinfo
, inst
, dest
.file
);
207 brw_inst_set_dst_reg_type(devinfo
, inst
,
208 brw_reg_type_to_hw_type(devinfo
, dest
.type
,
210 brw_inst_set_dst_address_mode(devinfo
, inst
, dest
.address_mode
);
212 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
213 brw_inst_set_dst_da_reg_nr(devinfo
, inst
, dest
.nr
);
215 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
216 brw_inst_set_dst_da1_subreg_nr(devinfo
, inst
, dest
.subnr
);
217 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
218 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
219 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
221 brw_inst_set_dst_da16_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
222 brw_inst_set_da16_writemask(devinfo
, inst
, dest
.writemask
);
223 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
224 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
225 assert(dest
.writemask
!= 0);
227 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
228 * Although Dst.HorzStride is a don't care for Align16, HW needs
229 * this to be programmed as "01".
231 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
234 brw_inst_set_dst_ia_subreg_nr(devinfo
, inst
, dest
.subnr
);
236 /* These are different sizes in align1 vs align16:
238 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
239 brw_inst_set_dst_ia1_addr_imm(devinfo
, inst
,
240 dest
.indirect_offset
);
241 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
242 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
243 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
245 brw_inst_set_dst_ia16_addr_imm(devinfo
, inst
,
246 dest
.indirect_offset
);
247 /* even ignored in da16, still need to set as '01' */
248 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
252 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
253 * or 16 (SIMD16), as that's normally correct. However, when dealing with
254 * small registers, we automatically reduce it to match the register size.
256 * In platforms that support fp64 we can emit instructions with a width of
257 * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these
258 * cases we need to make sure that these instructions have their exec sizes
259 * set properly when they are emitted and we can't rely on this code to fix
263 if (devinfo
->gen
>= 6)
264 fix_exec_size
= dest
.width
< BRW_EXECUTE_4
;
266 fix_exec_size
= dest
.width
< BRW_EXECUTE_8
;
269 brw_inst_set_exec_size(devinfo
, inst
, dest
.width
);
273 validate_reg(const struct gen_device_info
*devinfo
,
274 brw_inst
*inst
, struct brw_reg reg
)
276 const int hstride_for_reg
[] = {0, 1, 2, 4};
277 const int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32};
278 const int width_for_reg
[] = {1, 2, 4, 8, 16};
279 const int execsize_for_reg
[] = {1, 2, 4, 8, 16, 32};
280 int width
, hstride
, vstride
, execsize
;
282 if (reg
.file
== BRW_IMMEDIATE_VALUE
)
285 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
286 reg
.file
== BRW_ARF_NULL
)
289 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
291 * "Swizzling is not allowed when an accumulator is used as an implicit
292 * source or an explicit source in an instruction."
294 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
295 reg
.nr
== BRW_ARF_ACCUMULATOR
)
296 assert(reg
.swizzle
== BRW_SWIZZLE_XYZW
);
298 assert(reg
.hstride
< ARRAY_SIZE(hstride_for_reg
));
299 hstride
= hstride_for_reg
[reg
.hstride
];
301 if (reg
.vstride
== 0xf) {
304 assert(reg
.vstride
>= 0 && reg
.vstride
< ARRAY_SIZE(vstride_for_reg
));
305 vstride
= vstride_for_reg
[reg
.vstride
];
308 assert(reg
.width
>= 0 && reg
.width
< ARRAY_SIZE(width_for_reg
));
309 width
= width_for_reg
[reg
.width
];
311 assert(brw_inst_exec_size(devinfo
, inst
) >= 0 &&
312 brw_inst_exec_size(devinfo
, inst
) < ARRAY_SIZE(execsize_for_reg
));
313 execsize
= execsize_for_reg
[brw_inst_exec_size(devinfo
, inst
)];
315 /* Restrictions from 3.3.10: Register Region Restrictions. */
317 assert(execsize
>= width
);
320 if (execsize
== width
&& hstride
!= 0) {
321 assert(vstride
== -1 || vstride
== width
* hstride
);
325 if (execsize
== width
&& hstride
== 0) {
326 /* no restriction on vstride. */
331 assert(hstride
== 0);
335 if (execsize
== 1 && width
== 1) {
336 assert(hstride
== 0);
337 assert(vstride
== 0);
341 if (vstride
== 0 && hstride
== 0) {
345 /* 10. Check destination issues. */
349 brw_set_src0(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
351 const struct gen_device_info
*devinfo
= p
->devinfo
;
353 if (reg
.file
== BRW_MESSAGE_REGISTER_FILE
)
354 assert((reg
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
355 else if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
356 assert(reg
.nr
< 128);
358 gen7_convert_mrf_to_grf(p
, ®
);
360 if (devinfo
->gen
>= 6 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
361 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
)) {
362 /* Any source modifiers or regions will be ignored, since this just
363 * identifies the MRF/GRF to start reading the message contents from.
364 * Check for some likely failures.
368 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
371 validate_reg(devinfo
, inst
, reg
);
373 brw_inst_set_src0_reg_file(devinfo
, inst
, reg
.file
);
374 brw_inst_set_src0_reg_type(devinfo
, inst
,
375 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
376 brw_inst_set_src0_abs(devinfo
, inst
, reg
.abs
);
377 brw_inst_set_src0_negate(devinfo
, inst
, reg
.negate
);
378 brw_inst_set_src0_address_mode(devinfo
, inst
, reg
.address_mode
);
380 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
381 if (reg
.type
== BRW_REGISTER_TYPE_DF
||
382 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_DIM
)
383 brw_inst_set_imm_df(devinfo
, inst
, reg
.df
);
384 else if (reg
.type
== BRW_REGISTER_TYPE_UQ
||
385 reg
.type
== BRW_REGISTER_TYPE_Q
)
386 brw_inst_set_imm_uq(devinfo
, inst
, reg
.u64
);
388 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
390 if (type_sz(reg
.type
) < 8) {
391 brw_inst_set_src1_reg_file(devinfo
, inst
,
392 BRW_ARCHITECTURE_REGISTER_FILE
);
393 brw_inst_set_src1_reg_type(devinfo
, inst
,
394 brw_inst_src0_reg_type(devinfo
, inst
));
397 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
398 brw_inst_set_src0_da_reg_nr(devinfo
, inst
, reg
.nr
);
399 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
400 brw_inst_set_src0_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
402 brw_inst_set_src0_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
405 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.subnr
);
407 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
408 brw_inst_set_src0_ia1_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
410 brw_inst_set_src0_ia16_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
414 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
415 if (reg
.width
== BRW_WIDTH_1
&&
416 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
417 brw_inst_set_src0_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
418 brw_inst_set_src0_width(devinfo
, inst
, BRW_WIDTH_1
);
419 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
421 brw_inst_set_src0_hstride(devinfo
, inst
, reg
.hstride
);
422 brw_inst_set_src0_width(devinfo
, inst
, reg
.width
);
423 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
426 brw_inst_set_src0_da16_swiz_x(devinfo
, inst
,
427 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
428 brw_inst_set_src0_da16_swiz_y(devinfo
, inst
,
429 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
430 brw_inst_set_src0_da16_swiz_z(devinfo
, inst
,
431 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
432 brw_inst_set_src0_da16_swiz_w(devinfo
, inst
,
433 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
435 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
436 /* This is an oddity of the fact we're using the same
437 * descriptions for registers in align_16 as align_1:
439 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
440 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
441 reg
.type
== BRW_REGISTER_TYPE_DF
&&
442 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
445 * "For Align16 access mode, only encodings of 0000 and 0011
446 * are allowed. Other codes are reserved."
448 * Presumably the DevSNB behavior applies to IVB as well.
450 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
452 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
460 brw_set_src1(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
462 const struct gen_device_info
*devinfo
= p
->devinfo
;
464 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
465 assert(reg
.nr
< 128);
467 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
469 * "Accumulator registers may be accessed explicitly as src0
472 assert(reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
473 reg
.nr
!= BRW_ARF_ACCUMULATOR
);
475 gen7_convert_mrf_to_grf(p
, ®
);
476 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
478 validate_reg(devinfo
, inst
, reg
);
480 brw_inst_set_src1_reg_file(devinfo
, inst
, reg
.file
);
481 brw_inst_set_src1_reg_type(devinfo
, inst
,
482 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
483 brw_inst_set_src1_abs(devinfo
, inst
, reg
.abs
);
484 brw_inst_set_src1_negate(devinfo
, inst
, reg
.negate
);
486 /* Only src1 can be immediate in two-argument instructions.
488 assert(brw_inst_src0_reg_file(devinfo
, inst
) != BRW_IMMEDIATE_VALUE
);
490 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
491 /* two-argument instructions can only use 32-bit immediates */
492 assert(type_sz(reg
.type
) < 8);
493 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
495 /* This is a hardware restriction, which may or may not be lifted
498 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
499 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
501 brw_inst_set_src1_da_reg_nr(devinfo
, inst
, reg
.nr
);
502 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
503 brw_inst_set_src1_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
505 brw_inst_set_src1_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
508 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
509 if (reg
.width
== BRW_WIDTH_1
&&
510 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
511 brw_inst_set_src1_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
512 brw_inst_set_src1_width(devinfo
, inst
, BRW_WIDTH_1
);
513 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
515 brw_inst_set_src1_hstride(devinfo
, inst
, reg
.hstride
);
516 brw_inst_set_src1_width(devinfo
, inst
, reg
.width
);
517 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
520 brw_inst_set_src1_da16_swiz_x(devinfo
, inst
,
521 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
522 brw_inst_set_src1_da16_swiz_y(devinfo
, inst
,
523 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
524 brw_inst_set_src1_da16_swiz_z(devinfo
, inst
,
525 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
526 brw_inst_set_src1_da16_swiz_w(devinfo
, inst
,
527 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
529 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
530 /* This is an oddity of the fact we're using the same
531 * descriptions for registers in align_16 as align_1:
533 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
534 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
535 reg
.type
== BRW_REGISTER_TYPE_DF
&&
536 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
539 * "For Align16 access mode, only encodings of 0000 and 0011
540 * are allowed. Other codes are reserved."
542 * Presumably the DevSNB behavior applies to IVB as well.
544 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
546 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
553 * Set the Message Descriptor and Extended Message Descriptor fields
556 * \note This zeroes out the Function Control bits, so it must be called
557 * \b before filling out any message-specific data. Callers can
558 * choose not to fill in irrelevant bits; they will be zero.
561 brw_set_message_descriptor(struct brw_codegen
*p
,
563 enum brw_message_target sfid
,
565 unsigned response_length
,
569 const struct gen_device_info
*devinfo
= p
->devinfo
;
571 brw_set_src1(p
, inst
, brw_imm_d(0));
573 /* For indirect sends, `inst` will not be the SEND/SENDC instruction
574 * itself; instead, it will be a MOV/OR into the address register.
576 * In this case, we avoid setting the extended message descriptor bits,
577 * since they go on the later SEND/SENDC instead and if set here would
578 * instead clobber the conditionalmod bits.
580 unsigned opcode
= brw_inst_opcode(devinfo
, inst
);
581 if (opcode
== BRW_OPCODE_SEND
|| opcode
== BRW_OPCODE_SENDC
) {
582 brw_inst_set_sfid(devinfo
, inst
, sfid
);
585 brw_inst_set_mlen(devinfo
, inst
, msg_length
);
586 brw_inst_set_rlen(devinfo
, inst
, response_length
);
587 brw_inst_set_eot(devinfo
, inst
, end_of_thread
);
589 if (devinfo
->gen
>= 5) {
590 brw_inst_set_header_present(devinfo
, inst
, header_present
);
594 static void brw_set_math_message( struct brw_codegen
*p
,
597 unsigned integer_type
,
601 const struct gen_device_info
*devinfo
= p
->devinfo
;
603 unsigned response_length
;
605 /* Infer message length from the function */
607 case BRW_MATH_FUNCTION_POW
:
608 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
609 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
610 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
618 /* Infer response length from the function */
620 case BRW_MATH_FUNCTION_SINCOS
:
621 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
630 brw_set_message_descriptor(p
, inst
, BRW_SFID_MATH
,
631 msg_length
, response_length
, false, false);
632 brw_inst_set_math_msg_function(devinfo
, inst
, function
);
633 brw_inst_set_math_msg_signed_int(devinfo
, inst
, integer_type
);
634 brw_inst_set_math_msg_precision(devinfo
, inst
, low_precision
);
635 brw_inst_set_math_msg_saturate(devinfo
, inst
, brw_inst_saturate(devinfo
, inst
));
636 brw_inst_set_math_msg_data_type(devinfo
, inst
, dataType
);
637 brw_inst_set_saturate(devinfo
, inst
, 0);
641 static void brw_set_ff_sync_message(struct brw_codegen
*p
,
644 unsigned response_length
,
647 const struct gen_device_info
*devinfo
= p
->devinfo
;
649 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
650 1, response_length
, true, end_of_thread
);
651 brw_inst_set_urb_opcode(devinfo
, insn
, 1); /* FF_SYNC */
652 brw_inst_set_urb_allocate(devinfo
, insn
, allocate
);
653 /* The following fields are not used by FF_SYNC: */
654 brw_inst_set_urb_global_offset(devinfo
, insn
, 0);
655 brw_inst_set_urb_swizzle_control(devinfo
, insn
, 0);
656 brw_inst_set_urb_used(devinfo
, insn
, 0);
657 brw_inst_set_urb_complete(devinfo
, insn
, 0);
660 static void brw_set_urb_message( struct brw_codegen
*p
,
662 enum brw_urb_write_flags flags
,
664 unsigned response_length
,
666 unsigned swizzle_control
)
668 const struct gen_device_info
*devinfo
= p
->devinfo
;
670 assert(devinfo
->gen
< 7 || swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
671 assert(devinfo
->gen
< 7 || !(flags
& BRW_URB_WRITE_ALLOCATE
));
672 assert(devinfo
->gen
>= 7 || !(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
674 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
675 msg_length
, response_length
, true,
676 flags
& BRW_URB_WRITE_EOT
);
678 if (flags
& BRW_URB_WRITE_OWORD
) {
679 assert(msg_length
== 2); /* header + one OWORD of data */
680 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_OWORD
);
682 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_HWORD
);
685 brw_inst_set_urb_global_offset(devinfo
, insn
, offset
);
686 brw_inst_set_urb_swizzle_control(devinfo
, insn
, swizzle_control
);
688 if (devinfo
->gen
< 8) {
689 brw_inst_set_urb_complete(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_COMPLETE
));
692 if (devinfo
->gen
< 7) {
693 brw_inst_set_urb_allocate(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_ALLOCATE
));
694 brw_inst_set_urb_used(devinfo
, insn
, !(flags
& BRW_URB_WRITE_UNUSED
));
696 brw_inst_set_urb_per_slot_offset(devinfo
, insn
,
697 !!(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
702 brw_set_dp_write_message(struct brw_codegen
*p
,
704 unsigned binding_table_index
,
705 unsigned msg_control
,
707 unsigned target_cache
,
710 unsigned last_render_target
,
711 unsigned response_length
,
712 unsigned end_of_thread
,
713 unsigned send_commit_msg
)
715 const struct gen_device_info
*devinfo
= p
->devinfo
;
716 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
717 BRW_SFID_DATAPORT_WRITE
);
719 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
720 header_present
, end_of_thread
);
722 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
723 brw_inst_set_dp_write_msg_type(devinfo
, insn
, msg_type
);
724 brw_inst_set_dp_write_msg_control(devinfo
, insn
, msg_control
);
725 brw_inst_set_rt_last(devinfo
, insn
, last_render_target
);
726 if (devinfo
->gen
< 7) {
727 brw_inst_set_dp_write_commit(devinfo
, insn
, send_commit_msg
);
732 brw_set_dp_read_message(struct brw_codegen
*p
,
734 unsigned binding_table_index
,
735 unsigned msg_control
,
737 unsigned target_cache
,
740 unsigned response_length
)
742 const struct gen_device_info
*devinfo
= p
->devinfo
;
743 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
744 BRW_SFID_DATAPORT_READ
);
746 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
747 header_present
, false);
749 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
750 brw_inst_set_dp_read_msg_type(devinfo
, insn
, msg_type
);
751 brw_inst_set_dp_read_msg_control(devinfo
, insn
, msg_control
);
752 if (devinfo
->gen
< 6)
753 brw_inst_set_dp_read_target_cache(devinfo
, insn
, target_cache
);
757 brw_set_sampler_message(struct brw_codegen
*p
,
759 unsigned binding_table_index
,
762 unsigned response_length
,
764 unsigned header_present
,
766 unsigned return_format
)
768 const struct gen_device_info
*devinfo
= p
->devinfo
;
770 brw_set_message_descriptor(p
, inst
, BRW_SFID_SAMPLER
, msg_length
,
771 response_length
, header_present
, false);
773 brw_inst_set_binding_table_index(devinfo
, inst
, binding_table_index
);
774 brw_inst_set_sampler(devinfo
, inst
, sampler
);
775 brw_inst_set_sampler_msg_type(devinfo
, inst
, msg_type
);
776 if (devinfo
->gen
>= 5) {
777 brw_inst_set_sampler_simd_mode(devinfo
, inst
, simd_mode
);
778 } else if (devinfo
->gen
== 4 && !devinfo
->is_g4x
) {
779 brw_inst_set_sampler_return_format(devinfo
, inst
, return_format
);
784 gen7_set_dp_scratch_message(struct brw_codegen
*p
,
788 bool invalidate_after_read
,
790 unsigned addr_offset
,
795 const struct gen_device_info
*devinfo
= p
->devinfo
;
796 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 ||
797 (devinfo
->gen
>= 8 && num_regs
== 8));
798 const unsigned block_size
= (devinfo
->gen
>= 8 ? _mesa_logbase2(num_regs
) :
801 brw_set_message_descriptor(p
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
802 mlen
, rlen
, header_present
, false);
803 brw_inst_set_dp_category(devinfo
, inst
, 1); /* Scratch Block Read/Write msgs */
804 brw_inst_set_scratch_read_write(devinfo
, inst
, write
);
805 brw_inst_set_scratch_type(devinfo
, inst
, dword
);
806 brw_inst_set_scratch_invalidate_after_read(devinfo
, inst
, invalidate_after_read
);
807 brw_inst_set_scratch_block_size(devinfo
, inst
, block_size
);
808 brw_inst_set_scratch_addr_offset(devinfo
, inst
, addr_offset
);
811 #define next_insn brw_next_insn
813 brw_next_insn(struct brw_codegen
*p
, unsigned opcode
)
815 const struct gen_device_info
*devinfo
= p
->devinfo
;
818 if (p
->nr_insn
+ 1 > p
->store_size
) {
820 p
->store
= reralloc(p
->mem_ctx
, p
->store
, brw_inst
, p
->store_size
);
823 p
->next_insn_offset
+= 16;
824 insn
= &p
->store
[p
->nr_insn
++];
825 memcpy(insn
, p
->current
, sizeof(*insn
));
827 brw_inst_set_opcode(devinfo
, insn
, opcode
);
832 brw_alu1(struct brw_codegen
*p
, unsigned opcode
,
833 struct brw_reg dest
, struct brw_reg src
)
835 brw_inst
*insn
= next_insn(p
, opcode
);
836 brw_set_dest(p
, insn
, dest
);
837 brw_set_src0(p
, insn
, src
);
842 brw_alu2(struct brw_codegen
*p
, unsigned opcode
,
843 struct brw_reg dest
, struct brw_reg src0
, struct brw_reg src1
)
845 /* 64-bit immediates are only supported on 1-src instructions */
846 assert(src0
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src0
.type
) <= 4);
847 assert(src1
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src1
.type
) <= 4);
849 brw_inst
*insn
= next_insn(p
, opcode
);
850 brw_set_dest(p
, insn
, dest
);
851 brw_set_src0(p
, insn
, src0
);
852 brw_set_src1(p
, insn
, src1
);
857 get_3src_subreg_nr(struct brw_reg reg
)
859 /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
860 * use 32-bit units (components 0..7). Since they only support F/D/UD
861 * types, this doesn't lose any flexibility, but uses fewer bits.
863 return reg
.subnr
/ 4;
867 brw_alu3(struct brw_codegen
*p
, unsigned opcode
, struct brw_reg dest
,
868 struct brw_reg src0
, struct brw_reg src1
, struct brw_reg src2
)
870 const struct gen_device_info
*devinfo
= p
->devinfo
;
871 brw_inst
*inst
= next_insn(p
, opcode
);
873 gen7_convert_mrf_to_grf(p
, &dest
);
875 assert(brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_16
);
877 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
878 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
879 assert(dest
.nr
< 128);
880 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
881 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
882 dest
.type
== BRW_REGISTER_TYPE_DF
||
883 dest
.type
== BRW_REGISTER_TYPE_D
||
884 dest
.type
== BRW_REGISTER_TYPE_UD
);
885 if (devinfo
->gen
== 6) {
886 brw_inst_set_3src_dst_reg_file(devinfo
, inst
,
887 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
889 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
890 brw_inst_set_3src_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
891 brw_inst_set_3src_dst_writemask(devinfo
, inst
, dest
.writemask
);
893 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
894 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
895 assert(src0
.nr
< 128);
896 brw_inst_set_3src_src0_swizzle(devinfo
, inst
, src0
.swizzle
);
897 brw_inst_set_3src_src0_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src0
));
898 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
899 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
900 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
901 brw_inst_set_3src_src0_rep_ctrl(devinfo
, inst
,
902 src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
904 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
905 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
906 assert(src1
.nr
< 128);
907 brw_inst_set_3src_src1_swizzle(devinfo
, inst
, src1
.swizzle
);
908 brw_inst_set_3src_src1_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src1
));
909 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
910 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
911 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
912 brw_inst_set_3src_src1_rep_ctrl(devinfo
, inst
,
913 src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
915 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
916 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
917 assert(src2
.nr
< 128);
918 brw_inst_set_3src_src2_swizzle(devinfo
, inst
, src2
.swizzle
);
919 brw_inst_set_3src_src2_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src2
));
920 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
921 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
922 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
923 brw_inst_set_3src_src2_rep_ctrl(devinfo
, inst
,
924 src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
926 if (devinfo
->gen
>= 7) {
927 /* Set both the source and destination types based on dest.type,
928 * ignoring the source register types. The MAD and LRP emitters ensure
929 * that all four types are float. The BFE and BFI2 emitters, however,
930 * may send us mixed D and UD types and want us to ignore that and use
931 * the destination type.
934 case BRW_REGISTER_TYPE_F
:
935 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
936 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
938 case BRW_REGISTER_TYPE_DF
:
939 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_DF
);
940 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_DF
);
942 case BRW_REGISTER_TYPE_D
:
943 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
944 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
946 case BRW_REGISTER_TYPE_UD
:
947 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
948 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
951 unreachable("not reached");
959 /***********************************************************************
960 * Convenience routines.
963 brw_inst *brw_##OP(struct brw_codegen *p, \
964 struct brw_reg dest, \
965 struct brw_reg src0) \
967 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
971 brw_inst *brw_##OP(struct brw_codegen *p, \
972 struct brw_reg dest, \
973 struct brw_reg src0, \
974 struct brw_reg src1) \
976 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
980 brw_inst *brw_##OP(struct brw_codegen *p, \
981 struct brw_reg dest, \
982 struct brw_reg src0, \
983 struct brw_reg src1, \
984 struct brw_reg src2) \
986 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
990 brw_inst *brw_##OP(struct brw_codegen *p, \
991 struct brw_reg dest, \
992 struct brw_reg src0, \
993 struct brw_reg src1, \
994 struct brw_reg src2) \
996 assert(dest.type == BRW_REGISTER_TYPE_F || \
997 dest.type == BRW_REGISTER_TYPE_DF); \
998 if (dest.type == BRW_REGISTER_TYPE_F) { \
999 assert(src0.type == BRW_REGISTER_TYPE_F); \
1000 assert(src1.type == BRW_REGISTER_TYPE_F); \
1001 assert(src2.type == BRW_REGISTER_TYPE_F); \
1002 } else if (dest.type == BRW_REGISTER_TYPE_DF) { \
1003 assert(src0.type == BRW_REGISTER_TYPE_DF); \
1004 assert(src1.type == BRW_REGISTER_TYPE_DF); \
1005 assert(src2.type == BRW_REGISTER_TYPE_DF); \
1007 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1010 /* Rounding operations (other than RNDD) require two instructions - the first
1011 * stores a rounded value (possibly the wrong way) in the dest register, but
1012 * also sets a per-channel "increment bit" in the flag register. A predicated
1013 * add of 1.0 fixes dest to contain the desired result.
1015 * Sandybridge and later appear to round correctly without an ADD.
1018 void brw_##OP(struct brw_codegen *p, \
1019 struct brw_reg dest, \
1020 struct brw_reg src) \
1022 const struct gen_device_info *devinfo = p->devinfo; \
1023 brw_inst *rnd, *add; \
1024 rnd = next_insn(p, BRW_OPCODE_##OP); \
1025 brw_set_dest(p, rnd, dest); \
1026 brw_set_src0(p, rnd, src); \
1028 if (devinfo->gen < 6) { \
1029 /* turn on round-increments */ \
1030 brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
1031 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1032 brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
1071 brw_MOV(struct brw_codegen
*p
, struct brw_reg dest
, struct brw_reg src0
)
1073 const struct gen_device_info
*devinfo
= p
->devinfo
;
1075 /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
1076 * To avoid the problems that causes, we use a <1,2,0> source region to read
1077 * each element twice.
1079 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
1080 brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
&&
1081 dest
.type
== BRW_REGISTER_TYPE_DF
&&
1082 (src0
.type
== BRW_REGISTER_TYPE_F
||
1083 src0
.type
== BRW_REGISTER_TYPE_D
||
1084 src0
.type
== BRW_REGISTER_TYPE_UD
) &&
1085 !has_scalar_region(src0
)) {
1086 assert(src0
.vstride
== BRW_VERTICAL_STRIDE_4
&&
1087 src0
.width
== BRW_WIDTH_4
&&
1088 src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1090 src0
.vstride
= BRW_VERTICAL_STRIDE_1
;
1091 src0
.width
= BRW_WIDTH_2
;
1092 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1095 return brw_alu1(p
, BRW_OPCODE_MOV
, dest
, src0
);
1099 brw_ADD(struct brw_codegen
*p
, struct brw_reg dest
,
1100 struct brw_reg src0
, struct brw_reg src1
)
1103 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1104 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1105 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1106 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1107 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1110 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1111 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1112 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1113 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1114 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1117 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1121 brw_AVG(struct brw_codegen
*p
, struct brw_reg dest
,
1122 struct brw_reg src0
, struct brw_reg src1
)
1124 assert(dest
.type
== src0
.type
);
1125 assert(src0
.type
== src1
.type
);
1126 switch (src0
.type
) {
1127 case BRW_REGISTER_TYPE_B
:
1128 case BRW_REGISTER_TYPE_UB
:
1129 case BRW_REGISTER_TYPE_W
:
1130 case BRW_REGISTER_TYPE_UW
:
1131 case BRW_REGISTER_TYPE_D
:
1132 case BRW_REGISTER_TYPE_UD
:
1135 unreachable("Bad type for brw_AVG");
1138 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1142 brw_MUL(struct brw_codegen
*p
, struct brw_reg dest
,
1143 struct brw_reg src0
, struct brw_reg src1
)
1146 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1147 src0
.type
== BRW_REGISTER_TYPE_UD
||
1148 src1
.type
== BRW_REGISTER_TYPE_D
||
1149 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1150 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1153 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1154 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1155 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1156 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1157 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1160 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1161 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1162 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1163 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1164 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1167 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1168 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1169 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1170 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1172 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1176 brw_LINE(struct brw_codegen
*p
, struct brw_reg dest
,
1177 struct brw_reg src0
, struct brw_reg src1
)
1179 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1180 src0
.width
= BRW_WIDTH_1
;
1181 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1182 return brw_alu2(p
, BRW_OPCODE_LINE
, dest
, src0
, src1
);
1186 brw_PLN(struct brw_codegen
*p
, struct brw_reg dest
,
1187 struct brw_reg src0
, struct brw_reg src1
)
1189 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1190 src0
.width
= BRW_WIDTH_1
;
1191 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1192 src1
.vstride
= BRW_VERTICAL_STRIDE_8
;
1193 src1
.width
= BRW_WIDTH_8
;
1194 src1
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
1195 return brw_alu2(p
, BRW_OPCODE_PLN
, dest
, src0
, src1
);
1199 brw_F32TO16(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1201 const struct gen_device_info
*devinfo
= p
->devinfo
;
1202 const bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1203 /* The F32TO16 instruction doesn't support 32-bit destination types in
1204 * Align1 mode, and neither does the Gen8 implementation in terms of a
1205 * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
1206 * an undocumented feature.
1208 const bool needs_zero_fill
= (dst
.type
== BRW_REGISTER_TYPE_UD
&&
1209 (!align16
|| devinfo
->gen
>= 8));
1213 assert(dst
.type
== BRW_REGISTER_TYPE_UD
);
1215 assert(dst
.type
== BRW_REGISTER_TYPE_UD
||
1216 dst
.type
== BRW_REGISTER_TYPE_W
||
1217 dst
.type
== BRW_REGISTER_TYPE_UW
||
1218 dst
.type
== BRW_REGISTER_TYPE_HF
);
1221 brw_push_insn_state(p
);
1223 if (needs_zero_fill
) {
1224 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
1225 dst
= spread(retype(dst
, BRW_REGISTER_TYPE_W
), 2);
1228 if (devinfo
->gen
>= 8) {
1229 inst
= brw_MOV(p
, retype(dst
, BRW_REGISTER_TYPE_HF
), src
);
1231 assert(devinfo
->gen
== 7);
1232 inst
= brw_alu1(p
, BRW_OPCODE_F32TO16
, dst
, src
);
1235 if (needs_zero_fill
) {
1236 brw_inst_set_no_dd_clear(devinfo
, inst
, true);
1237 inst
= brw_MOV(p
, suboffset(dst
, 1), brw_imm_w(0));
1238 brw_inst_set_no_dd_check(devinfo
, inst
, true);
1241 brw_pop_insn_state(p
);
1246 brw_F16TO32(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1248 const struct gen_device_info
*devinfo
= p
->devinfo
;
1249 bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1252 assert(src
.type
== BRW_REGISTER_TYPE_UD
);
1254 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1256 * Because this instruction does not have a 16-bit floating-point
1257 * type, the source data type must be Word (W). The destination type
1258 * must be F (Float).
1260 if (src
.type
== BRW_REGISTER_TYPE_UD
)
1261 src
= spread(retype(src
, BRW_REGISTER_TYPE_W
), 2);
1263 assert(src
.type
== BRW_REGISTER_TYPE_W
||
1264 src
.type
== BRW_REGISTER_TYPE_UW
||
1265 src
.type
== BRW_REGISTER_TYPE_HF
);
1268 if (devinfo
->gen
>= 8) {
1269 return brw_MOV(p
, dst
, retype(src
, BRW_REGISTER_TYPE_HF
));
1271 assert(devinfo
->gen
== 7);
1272 return brw_alu1(p
, BRW_OPCODE_F16TO32
, dst
, src
);
1277 void brw_NOP(struct brw_codegen
*p
)
1279 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1280 memset(insn
, 0, sizeof(*insn
));
1281 brw_inst_set_opcode(p
->devinfo
, insn
, BRW_OPCODE_NOP
);
1288 /***********************************************************************
1289 * Comparisons, if/else/endif
1293 brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1294 unsigned predicate_control
)
1296 const struct gen_device_info
*devinfo
= p
->devinfo
;
1297 struct brw_reg ip
= brw_ip_reg();
1298 brw_inst
*inst
= brw_alu2(p
, BRW_OPCODE_JMPI
, ip
, ip
, index
);
1300 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_2
);
1301 brw_inst_set_qtr_control(devinfo
, inst
, BRW_COMPRESSION_NONE
);
1302 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
1303 brw_inst_set_pred_control(devinfo
, inst
, predicate_control
);
1309 push_if_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1311 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1313 p
->if_stack_depth
++;
1314 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1315 p
->if_stack_array_size
*= 2;
1316 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1317 p
->if_stack_array_size
);
1322 pop_if_stack(struct brw_codegen
*p
)
1324 p
->if_stack_depth
--;
1325 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1329 push_loop_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1331 if (p
->loop_stack_array_size
<= (p
->loop_stack_depth
+ 1)) {
1332 p
->loop_stack_array_size
*= 2;
1333 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1334 p
->loop_stack_array_size
);
1335 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1336 p
->loop_stack_array_size
);
1339 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1340 p
->loop_stack_depth
++;
1341 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1345 get_inner_do_insn(struct brw_codegen
*p
)
1347 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1350 /* EU takes the value from the flag register and pushes it onto some
1351 * sort of a stack (presumably merging with any flag value already on
1352 * the stack). Within an if block, the flags at the top of the stack
1353 * control execution on each channel of the unit, eg. on each of the
1354 * 16 pixel values in our wm programs.
1356 * When the matching 'else' instruction is reached (presumably by
1357 * countdown of the instruction count patched in by our ELSE/ENDIF
1358 * functions), the relevant flags are inverted.
1360 * When the matching 'endif' instruction is reached, the flags are
1361 * popped off. If the stack is now empty, normal execution resumes.
1364 brw_IF(struct brw_codegen
*p
, unsigned execute_size
)
1366 const struct gen_device_info
*devinfo
= p
->devinfo
;
1369 insn
= next_insn(p
, BRW_OPCODE_IF
);
1371 /* Override the defaults for this instruction:
1373 if (devinfo
->gen
< 6) {
1374 brw_set_dest(p
, insn
, brw_ip_reg());
1375 brw_set_src0(p
, insn
, brw_ip_reg());
1376 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1377 } else if (devinfo
->gen
== 6) {
1378 brw_set_dest(p
, insn
, brw_imm_w(0));
1379 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1380 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1381 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1382 } else if (devinfo
->gen
== 7) {
1383 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1384 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1385 brw_set_src1(p
, insn
, brw_imm_w(0));
1386 brw_inst_set_jip(devinfo
, insn
, 0);
1387 brw_inst_set_uip(devinfo
, insn
, 0);
1389 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1390 brw_set_src0(p
, insn
, brw_imm_d(0));
1391 brw_inst_set_jip(devinfo
, insn
, 0);
1392 brw_inst_set_uip(devinfo
, insn
, 0);
1395 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1396 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1397 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NORMAL
);
1398 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1399 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1400 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1402 push_if_stack(p
, insn
);
1403 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1407 /* This function is only used for gen6-style IF instructions with an
1408 * embedded comparison (conditional modifier). It is not used on gen7.
1411 gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1412 struct brw_reg src0
, struct brw_reg src1
)
1414 const struct gen_device_info
*devinfo
= p
->devinfo
;
1417 insn
= next_insn(p
, BRW_OPCODE_IF
);
1419 brw_set_dest(p
, insn
, brw_imm_w(0));
1420 brw_inst_set_exec_size(devinfo
, insn
,
1421 brw_inst_exec_size(devinfo
, p
->current
));
1422 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1423 brw_set_src0(p
, insn
, src0
);
1424 brw_set_src1(p
, insn
, src1
);
1426 assert(brw_inst_qtr_control(devinfo
, insn
) == BRW_COMPRESSION_NONE
);
1427 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
1428 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1430 push_if_stack(p
, insn
);
1435 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1438 convert_IF_ELSE_to_ADD(struct brw_codegen
*p
,
1439 brw_inst
*if_inst
, brw_inst
*else_inst
)
1441 const struct gen_device_info
*devinfo
= p
->devinfo
;
1443 /* The next instruction (where the ENDIF would be, if it existed) */
1444 brw_inst
*next_inst
= &p
->store
[p
->nr_insn
];
1446 assert(p
->single_program_flow
);
1447 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1448 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1449 assert(brw_inst_exec_size(devinfo
, if_inst
) == BRW_EXECUTE_1
);
1451 /* Convert IF to an ADD instruction that moves the instruction pointer
1452 * to the first instruction of the ELSE block. If there is no ELSE
1453 * block, point to where ENDIF would be. Reverse the predicate.
1455 * There's no need to execute an ENDIF since we don't need to do any
1456 * stack operations, and if we're currently executing, we just want to
1457 * continue normally.
1459 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_ADD
);
1460 brw_inst_set_pred_inv(devinfo
, if_inst
, true);
1462 if (else_inst
!= NULL
) {
1463 /* Convert ELSE to an ADD instruction that points where the ENDIF
1466 brw_inst_set_opcode(devinfo
, else_inst
, BRW_OPCODE_ADD
);
1468 brw_inst_set_imm_ud(devinfo
, if_inst
, (else_inst
- if_inst
+ 1) * 16);
1469 brw_inst_set_imm_ud(devinfo
, else_inst
, (next_inst
- else_inst
) * 16);
1471 brw_inst_set_imm_ud(devinfo
, if_inst
, (next_inst
- if_inst
) * 16);
1476 * Patch IF and ELSE instructions with appropriate jump targets.
1479 patch_IF_ELSE(struct brw_codegen
*p
,
1480 brw_inst
*if_inst
, brw_inst
*else_inst
, brw_inst
*endif_inst
)
1482 const struct gen_device_info
*devinfo
= p
->devinfo
;
1484 /* We shouldn't be patching IF and ELSE instructions in single program flow
1485 * mode when gen < 6, because in single program flow mode on those
1486 * platforms, we convert flow control instructions to conditional ADDs that
1487 * operate on IP (see brw_ENDIF).
1489 * However, on Gen6, writing to IP doesn't work in single program flow mode
1490 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1491 * not be updated by non-flow control instructions."). And on later
1492 * platforms, there is no significant benefit to converting control flow
1493 * instructions to conditional ADDs. So we do patch IF and ELSE
1494 * instructions in single program flow mode on those platforms.
1496 if (devinfo
->gen
< 6)
1497 assert(!p
->single_program_flow
);
1499 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1500 assert(endif_inst
!= NULL
);
1501 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1503 unsigned br
= brw_jump_scale(devinfo
);
1505 assert(brw_inst_opcode(devinfo
, endif_inst
) == BRW_OPCODE_ENDIF
);
1506 brw_inst_set_exec_size(devinfo
, endif_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1508 if (else_inst
== NULL
) {
1509 /* Patch IF -> ENDIF */
1510 if (devinfo
->gen
< 6) {
1511 /* Turn it into an IFF, which means no mask stack operations for
1512 * all-false and jumping past the ENDIF.
1514 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_IFF
);
1515 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1516 br
* (endif_inst
- if_inst
+ 1));
1517 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1518 } else if (devinfo
->gen
== 6) {
1519 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1520 brw_inst_set_gen6_jump_count(devinfo
, if_inst
, br
*(endif_inst
- if_inst
));
1522 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1523 brw_inst_set_jip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1526 brw_inst_set_exec_size(devinfo
, else_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1528 /* Patch IF -> ELSE */
1529 if (devinfo
->gen
< 6) {
1530 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1531 br
* (else_inst
- if_inst
));
1532 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1533 } else if (devinfo
->gen
== 6) {
1534 brw_inst_set_gen6_jump_count(devinfo
, if_inst
,
1535 br
* (else_inst
- if_inst
+ 1));
1538 /* Patch ELSE -> ENDIF */
1539 if (devinfo
->gen
< 6) {
1540 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1543 brw_inst_set_gen4_jump_count(devinfo
, else_inst
,
1544 br
* (endif_inst
- else_inst
+ 1));
1545 brw_inst_set_gen4_pop_count(devinfo
, else_inst
, 1);
1546 } else if (devinfo
->gen
== 6) {
1547 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1548 brw_inst_set_gen6_jump_count(devinfo
, else_inst
,
1549 br
* (endif_inst
- else_inst
));
1551 /* The IF instruction's JIP should point just past the ELSE */
1552 brw_inst_set_jip(devinfo
, if_inst
, br
* (else_inst
- if_inst
+ 1));
1553 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1554 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1555 brw_inst_set_jip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1556 if (devinfo
->gen
>= 8) {
1557 /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1558 * should point to ENDIF.
1560 brw_inst_set_uip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1567 brw_ELSE(struct brw_codegen
*p
)
1569 const struct gen_device_info
*devinfo
= p
->devinfo
;
1572 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1574 if (devinfo
->gen
< 6) {
1575 brw_set_dest(p
, insn
, brw_ip_reg());
1576 brw_set_src0(p
, insn
, brw_ip_reg());
1577 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1578 } else if (devinfo
->gen
== 6) {
1579 brw_set_dest(p
, insn
, brw_imm_w(0));
1580 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1581 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1582 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1583 } else if (devinfo
->gen
== 7) {
1584 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1585 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1586 brw_set_src1(p
, insn
, brw_imm_w(0));
1587 brw_inst_set_jip(devinfo
, insn
, 0);
1588 brw_inst_set_uip(devinfo
, insn
, 0);
1590 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1591 brw_set_src0(p
, insn
, brw_imm_d(0));
1592 brw_inst_set_jip(devinfo
, insn
, 0);
1593 brw_inst_set_uip(devinfo
, insn
, 0);
1596 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1597 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1598 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1599 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1601 push_if_stack(p
, insn
);
1605 brw_ENDIF(struct brw_codegen
*p
)
1607 const struct gen_device_info
*devinfo
= p
->devinfo
;
1608 brw_inst
*insn
= NULL
;
1609 brw_inst
*else_inst
= NULL
;
1610 brw_inst
*if_inst
= NULL
;
1612 bool emit_endif
= true;
1614 /* In single program flow mode, we can express IF and ELSE instructions
1615 * equivalently as ADD instructions that operate on IP. On platforms prior
1616 * to Gen6, flow control instructions cause an implied thread switch, so
1617 * this is a significant savings.
1619 * However, on Gen6, writing to IP doesn't work in single program flow mode
1620 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1621 * not be updated by non-flow control instructions."). And on later
1622 * platforms, there is no significant benefit to converting control flow
1623 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1626 if (devinfo
->gen
< 6 && p
->single_program_flow
)
1630 * A single next_insn() may change the base address of instruction store
1631 * memory(p->store), so call it first before referencing the instruction
1632 * store pointer from an index
1635 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1637 /* Pop the IF and (optional) ELSE instructions from the stack */
1638 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1639 tmp
= pop_if_stack(p
);
1640 if (brw_inst_opcode(devinfo
, tmp
) == BRW_OPCODE_ELSE
) {
1642 tmp
= pop_if_stack(p
);
1647 /* ENDIF is useless; don't bother emitting it. */
1648 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1652 if (devinfo
->gen
< 6) {
1653 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1654 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1655 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1656 } else if (devinfo
->gen
== 6) {
1657 brw_set_dest(p
, insn
, brw_imm_w(0));
1658 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1659 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1660 } else if (devinfo
->gen
== 7) {
1661 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1662 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1663 brw_set_src1(p
, insn
, brw_imm_w(0));
1665 brw_set_src0(p
, insn
, brw_imm_d(0));
1668 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1669 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1670 if (devinfo
->gen
< 6)
1671 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1673 /* Also pop item off the stack in the endif instruction: */
1674 if (devinfo
->gen
< 6) {
1675 brw_inst_set_gen4_jump_count(devinfo
, insn
, 0);
1676 brw_inst_set_gen4_pop_count(devinfo
, insn
, 1);
1677 } else if (devinfo
->gen
== 6) {
1678 brw_inst_set_gen6_jump_count(devinfo
, insn
, 2);
1680 brw_inst_set_jip(devinfo
, insn
, 2);
1682 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1686 brw_BREAK(struct brw_codegen
*p
)
1688 const struct gen_device_info
*devinfo
= p
->devinfo
;
1691 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1692 if (devinfo
->gen
>= 8) {
1693 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1694 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1695 } else if (devinfo
->gen
>= 6) {
1696 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1697 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1698 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1700 brw_set_dest(p
, insn
, brw_ip_reg());
1701 brw_set_src0(p
, insn
, brw_ip_reg());
1702 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1703 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1704 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1706 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1707 brw_inst_set_exec_size(devinfo
, insn
,
1708 brw_inst_exec_size(devinfo
, p
->current
));
1714 brw_CONT(struct brw_codegen
*p
)
1716 const struct gen_device_info
*devinfo
= p
->devinfo
;
1719 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1720 brw_set_dest(p
, insn
, brw_ip_reg());
1721 if (devinfo
->gen
>= 8) {
1722 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1724 brw_set_src0(p
, insn
, brw_ip_reg());
1725 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1728 if (devinfo
->gen
< 6) {
1729 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1730 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1732 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1733 brw_inst_set_exec_size(devinfo
, insn
,
1734 brw_inst_exec_size(devinfo
, p
->current
));
1739 gen6_HALT(struct brw_codegen
*p
)
1741 const struct gen_device_info
*devinfo
= p
->devinfo
;
1744 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1745 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1746 if (devinfo
->gen
>= 8) {
1747 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1749 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1750 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1753 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1754 brw_inst_set_exec_size(devinfo
, insn
,
1755 brw_inst_exec_size(devinfo
, p
->current
));
1761 * The DO/WHILE is just an unterminated loop -- break or continue are
1762 * used for control within the loop. We have a few ways they can be
1765 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1766 * jip and no DO instruction.
1768 * For non-uniform control flow pre-gen6, there's a DO instruction to
1769 * push the mask, and a WHILE to jump back, and BREAK to get out and
1772 * For gen6, there's no more mask stack, so no need for DO. WHILE
1773 * just points back to the first instruction of the loop.
1776 brw_DO(struct brw_codegen
*p
, unsigned execute_size
)
1778 const struct gen_device_info
*devinfo
= p
->devinfo
;
1780 if (devinfo
->gen
>= 6 || p
->single_program_flow
) {
1781 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1782 return &p
->store
[p
->nr_insn
];
1784 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1786 push_loop_stack(p
, insn
);
1788 /* Override the defaults for this instruction:
1790 brw_set_dest(p
, insn
, brw_null_reg());
1791 brw_set_src0(p
, insn
, brw_null_reg());
1792 brw_set_src1(p
, insn
, brw_null_reg());
1794 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1795 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1796 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
);
1803 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1806 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1807 * nesting, since it can always just point to the end of the block/current loop.
1810 brw_patch_break_cont(struct brw_codegen
*p
, brw_inst
*while_inst
)
1812 const struct gen_device_info
*devinfo
= p
->devinfo
;
1813 brw_inst
*do_inst
= get_inner_do_insn(p
);
1815 unsigned br
= brw_jump_scale(devinfo
);
1817 assert(devinfo
->gen
< 6);
1819 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1820 /* If the jump count is != 0, that means that this instruction has already
1821 * been patched because it's part of a loop inside of the one we're
1824 if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_BREAK
&&
1825 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1826 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
*((while_inst
- inst
) + 1));
1827 } else if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_CONTINUE
&&
1828 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1829 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
* (while_inst
- inst
));
1835 brw_WHILE(struct brw_codegen
*p
)
1837 const struct gen_device_info
*devinfo
= p
->devinfo
;
1838 brw_inst
*insn
, *do_insn
;
1839 unsigned br
= brw_jump_scale(devinfo
);
1841 if (devinfo
->gen
>= 6) {
1842 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1843 do_insn
= get_inner_do_insn(p
);
1845 if (devinfo
->gen
>= 8) {
1846 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1847 brw_set_src0(p
, insn
, brw_imm_d(0));
1848 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1849 } else if (devinfo
->gen
== 7) {
1850 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1851 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1852 brw_set_src1(p
, insn
, brw_imm_w(0));
1853 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1855 brw_set_dest(p
, insn
, brw_imm_w(0));
1856 brw_inst_set_gen6_jump_count(devinfo
, insn
, br
* (do_insn
- insn
));
1857 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1858 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1861 brw_inst_set_exec_size(devinfo
, insn
,
1862 brw_inst_exec_size(devinfo
, p
->current
));
1865 if (p
->single_program_flow
) {
1866 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1867 do_insn
= get_inner_do_insn(p
);
1869 brw_set_dest(p
, insn
, brw_ip_reg());
1870 brw_set_src0(p
, insn
, brw_ip_reg());
1871 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1872 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
1874 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1875 do_insn
= get_inner_do_insn(p
);
1877 assert(brw_inst_opcode(devinfo
, do_insn
) == BRW_OPCODE_DO
);
1879 brw_set_dest(p
, insn
, brw_ip_reg());
1880 brw_set_src0(p
, insn
, brw_ip_reg());
1881 brw_set_src1(p
, insn
, brw_imm_d(0));
1883 brw_inst_set_exec_size(devinfo
, insn
, brw_inst_exec_size(devinfo
, do_insn
));
1884 brw_inst_set_gen4_jump_count(devinfo
, insn
, br
* (do_insn
- insn
+ 1));
1885 brw_inst_set_gen4_pop_count(devinfo
, insn
, 0);
1887 brw_patch_break_cont(p
, insn
);
1890 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1892 p
->loop_stack_depth
--;
1899 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
)
1901 const struct gen_device_info
*devinfo
= p
->devinfo
;
1902 brw_inst
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1905 if (devinfo
->gen
>= 5)
1908 assert(brw_inst_opcode(devinfo
, jmp_insn
) == BRW_OPCODE_JMPI
);
1909 assert(brw_inst_src1_reg_file(devinfo
, jmp_insn
) == BRW_IMMEDIATE_VALUE
);
1911 brw_inst_set_gen4_jump_count(devinfo
, jmp_insn
,
1912 jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1));
1915 /* To integrate with the above, it makes sense that the comparison
1916 * instruction should populate the flag register. It might be simpler
1917 * just to use the flag reg for most WM tasks?
1919 void brw_CMP(struct brw_codegen
*p
,
1920 struct brw_reg dest
,
1921 unsigned conditional
,
1922 struct brw_reg src0
,
1923 struct brw_reg src1
)
1925 const struct gen_device_info
*devinfo
= p
->devinfo
;
1926 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1928 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1929 brw_set_dest(p
, insn
, dest
);
1930 brw_set_src0(p
, insn
, src0
);
1931 brw_set_src1(p
, insn
, src1
);
1933 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1935 * "Any CMP instruction with a null destination must use a {switch}."
1937 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1938 * mentioned on their work-arounds pages.
1940 if (devinfo
->gen
== 7) {
1941 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1942 dest
.nr
== BRW_ARF_NULL
) {
1943 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1948 /***********************************************************************
1949 * Helpers for the various SEND message types:
1952 /** Extended math function, float[8].
1954 void gen4_math(struct brw_codegen
*p
,
1955 struct brw_reg dest
,
1957 unsigned msg_reg_nr
,
1959 unsigned precision
)
1961 const struct gen_device_info
*devinfo
= p
->devinfo
;
1962 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1964 if (has_scalar_region(src
)) {
1965 data_type
= BRW_MATH_DATA_SCALAR
;
1967 data_type
= BRW_MATH_DATA_VECTOR
;
1970 assert(devinfo
->gen
< 6);
1972 /* Example code doesn't set predicate_control for send
1975 brw_inst_set_pred_control(devinfo
, insn
, 0);
1976 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
1978 brw_set_dest(p
, insn
, dest
);
1979 brw_set_src0(p
, insn
, src
);
1980 brw_set_math_message(p
,
1983 src
.type
== BRW_REGISTER_TYPE_D
,
1988 void gen6_math(struct brw_codegen
*p
,
1989 struct brw_reg dest
,
1991 struct brw_reg src0
,
1992 struct brw_reg src1
)
1994 const struct gen_device_info
*devinfo
= p
->devinfo
;
1995 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1997 assert(devinfo
->gen
>= 6);
1999 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
2000 (devinfo
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
2002 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
2003 if (devinfo
->gen
== 6) {
2004 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
2005 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
2008 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
2009 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
2010 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
2011 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
2012 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
2013 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
2014 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
2016 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
2017 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
2020 /* Source modifiers are ignored for extended math instructions on Gen6. */
2021 if (devinfo
->gen
== 6) {
2022 assert(!src0
.negate
);
2024 assert(!src1
.negate
);
2028 brw_inst_set_math_function(devinfo
, insn
, function
);
2030 brw_set_dest(p
, insn
, dest
);
2031 brw_set_src0(p
, insn
, src0
);
2032 brw_set_src1(p
, insn
, src1
);
2036 * Return the right surface index to access the thread scratch space using
2037 * stateless dataport messages.
2040 brw_scratch_surface_idx(const struct brw_codegen
*p
)
2042 /* The scratch space is thread-local so IA coherency is unnecessary. */
2043 if (p
->devinfo
->gen
>= 8)
2044 return GEN8_BTI_STATELESS_NON_COHERENT
;
2046 return BRW_BTI_STATELESS
;
2050 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
2051 * using a constant offset per channel.
2053 * The offset must be aligned to oword size (16 bytes). Used for
2054 * register spilling.
2056 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
2061 const struct gen_device_info
*devinfo
= p
->devinfo
;
2062 const unsigned target_cache
=
2063 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2064 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2065 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2068 if (devinfo
->gen
>= 6)
2071 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2073 const unsigned mlen
= 1 + num_regs
;
2075 /* Set up the message header. This is g0, with g0.2 filled with
2076 * the offset. We don't want to leave our offset around in g0 or
2077 * it'll screw up texture samples, so set it up inside the message
2081 brw_push_insn_state(p
);
2082 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2083 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2084 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2086 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2088 /* set message header global offset field (reg 0, element 2) */
2090 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2092 2), BRW_REGISTER_TYPE_UD
),
2093 brw_imm_ud(offset
));
2095 brw_pop_insn_state(p
);
2099 struct brw_reg dest
;
2100 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2101 int send_commit_msg
;
2102 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2103 BRW_REGISTER_TYPE_UW
);
2105 brw_inst_set_compression(devinfo
, insn
, false);
2107 if (brw_inst_exec_size(devinfo
, insn
) >= 16)
2108 src_header
= vec16(src_header
);
2110 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2111 if (devinfo
->gen
< 6)
2112 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2114 /* Until gen6, writes followed by reads from the same location
2115 * are not guaranteed to be ordered unless write_commit is set.
2116 * If set, then a no-op write is issued to the destination
2117 * register to set a dependency, and a read from the destination
2118 * can be used to ensure the ordering.
2120 * For gen6, only writes between different threads need ordering
2121 * protection. Our use of DP writes is all about register
2122 * spilling within a thread.
2124 if (devinfo
->gen
>= 6) {
2125 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2126 send_commit_msg
= 0;
2129 send_commit_msg
= 1;
2132 brw_set_dest(p
, insn
, dest
);
2133 if (devinfo
->gen
>= 6) {
2134 brw_set_src0(p
, insn
, mrf
);
2136 brw_set_src0(p
, insn
, brw_null_reg());
2139 if (devinfo
->gen
>= 6)
2140 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2142 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2144 brw_set_dp_write_message(p
,
2146 brw_scratch_surface_idx(p
),
2147 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2151 true, /* header_present */
2152 0, /* not a render target */
2153 send_commit_msg
, /* response_length */
2161 * Read a block of owords (half a GRF each) from the scratch buffer
2162 * using a constant index per channel.
2164 * Offset must be aligned to oword size (16 bytes). Used for register
2168 brw_oword_block_read_scratch(struct brw_codegen
*p
,
2169 struct brw_reg dest
,
2174 const struct gen_device_info
*devinfo
= p
->devinfo
;
2176 if (devinfo
->gen
>= 6)
2179 if (p
->devinfo
->gen
>= 7) {
2180 /* On gen 7 and above, we no longer have message registers and we can
2181 * send from any register we want. By using the destination register
2182 * for the message, we guarantee that the implied message write won't
2183 * accidentally overwrite anything. This has been a problem because
2184 * the MRF registers and source for the final FB write are both fixed
2187 mrf
= retype(dest
, BRW_REGISTER_TYPE_UD
);
2189 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2191 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2193 const unsigned rlen
= num_regs
;
2194 const unsigned target_cache
=
2195 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2196 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2197 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2200 brw_push_insn_state(p
);
2201 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2202 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2203 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2205 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2207 /* set message header global offset field (reg 0, element 2) */
2208 brw_MOV(p
, get_element_ud(mrf
, 2), brw_imm_ud(offset
));
2210 brw_pop_insn_state(p
);
2214 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2216 assert(brw_inst_pred_control(devinfo
, insn
) == 0);
2217 brw_inst_set_compression(devinfo
, insn
, false);
2219 brw_set_dest(p
, insn
, dest
); /* UW? */
2220 if (devinfo
->gen
>= 6) {
2221 brw_set_src0(p
, insn
, mrf
);
2223 brw_set_src0(p
, insn
, brw_null_reg());
2224 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2227 brw_set_dp_read_message(p
,
2229 brw_scratch_surface_idx(p
),
2230 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2231 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2234 true, /* header_present */
2240 gen7_block_read_scratch(struct brw_codegen
*p
,
2241 struct brw_reg dest
,
2245 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2246 assert(brw_inst_pred_control(p
->devinfo
, insn
) == BRW_PREDICATE_NONE
);
2248 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UW
));
2250 /* The HW requires that the header is present; this is to get the g0.5
2253 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2255 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2256 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2257 * is 32 bytes, which happens to be the size of a register.
2260 assert(offset
< (1 << 12));
2262 gen7_set_dp_scratch_message(p
, insn
,
2263 false, /* scratch read */
2265 false, /* invalidate after read */
2268 1, /* mlen: just g0 */
2269 num_regs
, /* rlen */
2270 true); /* header present */
2274 * Read float[4] vectors from the data port constant cache.
2275 * Location (in buffer) should be a multiple of 16.
2276 * Used for fetching shader constants.
2278 void brw_oword_block_read(struct brw_codegen
*p
,
2279 struct brw_reg dest
,
2282 uint32_t bind_table_index
)
2284 const struct gen_device_info
*devinfo
= p
->devinfo
;
2285 const unsigned target_cache
=
2286 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE
:
2287 BRW_DATAPORT_READ_TARGET_DATA_CACHE
);
2288 const unsigned exec_size
= 1 << brw_inst_exec_size(devinfo
, p
->current
);
2290 /* On newer hardware, offset is in units of owords. */
2291 if (devinfo
->gen
>= 6)
2294 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2296 brw_push_insn_state(p
);
2297 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2298 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2299 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2301 brw_push_insn_state(p
);
2302 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2303 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2305 /* set message header global offset field (reg 0, element 2) */
2307 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2309 2), BRW_REGISTER_TYPE_UD
),
2310 brw_imm_ud(offset
));
2311 brw_pop_insn_state(p
);
2313 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2315 /* cast dest to a uword[8] vector */
2316 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2318 brw_set_dest(p
, insn
, dest
);
2319 if (devinfo
->gen
>= 6) {
2320 brw_set_src0(p
, insn
, mrf
);
2322 brw_set_src0(p
, insn
, brw_null_reg());
2323 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2326 brw_set_dp_read_message(p
, insn
, bind_table_index
,
2327 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size
),
2328 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2331 true, /* header_present */
2332 DIV_ROUND_UP(exec_size
, 8)); /* response_length */
2334 brw_pop_insn_state(p
);
2338 void brw_fb_WRITE(struct brw_codegen
*p
,
2339 struct brw_reg payload
,
2340 struct brw_reg implied_header
,
2341 unsigned msg_control
,
2342 unsigned binding_table_index
,
2343 unsigned msg_length
,
2344 unsigned response_length
,
2346 bool last_render_target
,
2347 bool header_present
)
2349 const struct gen_device_info
*devinfo
= p
->devinfo
;
2350 const unsigned target_cache
=
2351 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2352 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2355 struct brw_reg dest
, src0
;
2357 if (brw_inst_exec_size(devinfo
, p
->current
) >= BRW_EXECUTE_16
)
2358 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2360 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2362 if (devinfo
->gen
>= 6) {
2363 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2365 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2367 brw_inst_set_compression(devinfo
, insn
, false);
2369 if (devinfo
->gen
>= 6) {
2370 /* headerless version, just submit color payload */
2373 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2375 assert(payload
.file
== BRW_MESSAGE_REGISTER_FILE
);
2376 brw_inst_set_base_mrf(devinfo
, insn
, payload
.nr
);
2377 src0
= implied_header
;
2379 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2382 brw_set_dest(p
, insn
, dest
);
2383 brw_set_src0(p
, insn
, src0
);
2384 brw_set_dp_write_message(p
,
2386 binding_table_index
,
2395 0 /* send_commit_msg */);
2399 gen9_fb_READ(struct brw_codegen
*p
,
2401 struct brw_reg payload
,
2402 unsigned binding_table_index
,
2403 unsigned msg_length
,
2404 unsigned response_length
,
2407 const struct gen_device_info
*devinfo
= p
->devinfo
;
2408 assert(devinfo
->gen
>= 9);
2409 const unsigned msg_subtype
=
2410 brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
? 0 : 1;
2411 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2413 brw_set_dest(p
, insn
, dst
);
2414 brw_set_src0(p
, insn
, payload
);
2415 brw_set_dp_read_message(p
, insn
, binding_table_index
,
2416 per_sample
<< 5 | msg_subtype
,
2417 GEN9_DATAPORT_RC_RENDER_TARGET_READ
,
2418 GEN6_SFID_DATAPORT_RENDER_CACHE
,
2419 msg_length
, true /* header_present */,
2421 brw_inst_set_rt_slot_group(devinfo
, insn
,
2422 brw_inst_qtr_control(devinfo
, p
->current
) / 2);
2428 * Texture sample instruction.
2429 * Note: the msg_type plus msg_length values determine exactly what kind
2430 * of sampling operation is performed. See volume 4, page 161 of docs.
2432 void brw_SAMPLE(struct brw_codegen
*p
,
2433 struct brw_reg dest
,
2434 unsigned msg_reg_nr
,
2435 struct brw_reg src0
,
2436 unsigned binding_table_index
,
2439 unsigned response_length
,
2440 unsigned msg_length
,
2441 unsigned header_present
,
2443 unsigned return_format
)
2445 const struct gen_device_info
*devinfo
= p
->devinfo
;
2448 if (msg_reg_nr
!= -1)
2449 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2451 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2452 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
); /* XXX */
2454 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2456 * "Instruction compression is not allowed for this instruction (that
2457 * is, send). The hardware behavior is undefined if this instruction is
2458 * set as compressed. However, compress control can be set to "SecHalf"
2459 * to affect the EMask generation."
2461 * No similar wording is found in later PRMs, but there are examples
2462 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2463 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2464 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2466 brw_inst_set_compression(devinfo
, insn
, false);
2468 if (devinfo
->gen
< 6)
2469 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2471 brw_set_dest(p
, insn
, dest
);
2472 brw_set_src0(p
, insn
, src0
);
2473 brw_set_sampler_message(p
, insn
,
2474 binding_table_index
,
2484 /* Adjust the message header's sampler state pointer to
2485 * select the correct group of 16 samplers.
2487 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
2488 struct brw_reg header
,
2489 struct brw_reg sampler_index
)
2491 /* The "Sampler Index" field can only store values between 0 and 15.
2492 * However, we can add an offset to the "Sampler State Pointer"
2493 * field, effectively selecting a different set of 16 samplers.
2495 * The "Sampler State Pointer" needs to be aligned to a 32-byte
2496 * offset, and each sampler state is only 16-bytes, so we can't
2497 * exclusively use the offset - we have to use both.
2500 const struct gen_device_info
*devinfo
= p
->devinfo
;
2502 if (sampler_index
.file
== BRW_IMMEDIATE_VALUE
) {
2503 const int sampler_state_size
= 16; /* 16 bytes */
2504 uint32_t sampler
= sampler_index
.ud
;
2506 if (sampler
>= 16) {
2507 assert(devinfo
->is_haswell
|| devinfo
->gen
>= 8);
2509 get_element_ud(header
, 3),
2510 get_element_ud(brw_vec8_grf(0, 0), 3),
2511 brw_imm_ud(16 * (sampler
/ 16) * sampler_state_size
));
2514 /* Non-const sampler array indexing case */
2515 if (devinfo
->gen
< 8 && !devinfo
->is_haswell
) {
2519 struct brw_reg temp
= get_element_ud(header
, 3);
2521 brw_AND(p
, temp
, get_element_ud(sampler_index
, 0), brw_imm_ud(0x0f0));
2522 brw_SHL(p
, temp
, temp
, brw_imm_ud(4));
2524 get_element_ud(header
, 3),
2525 get_element_ud(brw_vec8_grf(0, 0), 3),
2530 /* All these variables are pretty confusing - we might be better off
2531 * using bitmasks and macros for this, in the old style. Or perhaps
2532 * just having the caller instantiate the fields in dword3 itself.
2534 void brw_urb_WRITE(struct brw_codegen
*p
,
2535 struct brw_reg dest
,
2536 unsigned msg_reg_nr
,
2537 struct brw_reg src0
,
2538 enum brw_urb_write_flags flags
,
2539 unsigned msg_length
,
2540 unsigned response_length
,
2544 const struct gen_device_info
*devinfo
= p
->devinfo
;
2547 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2549 if (devinfo
->gen
>= 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2550 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2551 brw_push_insn_state(p
);
2552 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2553 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2554 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2555 BRW_REGISTER_TYPE_UD
),
2556 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2557 brw_imm_ud(0xff00));
2558 brw_pop_insn_state(p
);
2561 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2563 assert(msg_length
< BRW_MAX_MRF(devinfo
->gen
));
2565 brw_set_dest(p
, insn
, dest
);
2566 brw_set_src0(p
, insn
, src0
);
2567 brw_set_src1(p
, insn
, brw_imm_d(0));
2569 if (devinfo
->gen
< 6)
2570 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2572 brw_set_urb_message(p
,
2582 brw_send_indirect_message(struct brw_codegen
*p
,
2585 struct brw_reg payload
,
2586 struct brw_reg desc
)
2588 const struct gen_device_info
*devinfo
= p
->devinfo
;
2589 struct brw_inst
*send
;
2592 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
2594 assert(desc
.type
== BRW_REGISTER_TYPE_UD
);
2596 /* We hold on to the setup instruction (the SEND in the direct case, the OR
2597 * in the indirect case) by its index in the instruction store. The
2598 * pointer returned by next_insn() may become invalid if emitting the SEND
2599 * in the indirect case reallocs the store.
2602 if (desc
.file
== BRW_IMMEDIATE_VALUE
) {
2604 send
= next_insn(p
, BRW_OPCODE_SEND
);
2605 brw_set_src1(p
, send
, desc
);
2608 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2610 brw_push_insn_state(p
);
2611 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2612 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2613 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2615 /* Load the indirect descriptor to an address register using OR so the
2616 * caller can specify additional descriptor bits with the usual
2617 * brw_set_*_message() helper functions.
2620 brw_OR(p
, addr
, desc
, brw_imm_ud(0));
2622 brw_pop_insn_state(p
);
2624 send
= next_insn(p
, BRW_OPCODE_SEND
);
2625 brw_set_src1(p
, send
, addr
);
2628 if (dst
.width
< BRW_EXECUTE_8
)
2629 brw_inst_set_exec_size(devinfo
, send
, dst
.width
);
2631 brw_set_dest(p
, send
, dst
);
2632 brw_set_src0(p
, send
, retype(payload
, BRW_REGISTER_TYPE_UD
));
2633 brw_inst_set_sfid(devinfo
, send
, sfid
);
2635 return &p
->store
[setup
];
2638 static struct brw_inst
*
2639 brw_send_indirect_surface_message(struct brw_codegen
*p
,
2642 struct brw_reg payload
,
2643 struct brw_reg surface
,
2644 unsigned message_len
,
2645 unsigned response_len
,
2646 bool header_present
)
2648 const struct gen_device_info
*devinfo
= p
->devinfo
;
2649 struct brw_inst
*insn
;
2651 if (surface
.file
!= BRW_IMMEDIATE_VALUE
) {
2652 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2654 brw_push_insn_state(p
);
2655 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2656 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2657 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2659 /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2660 * some surface array is accessed out of bounds.
2662 insn
= brw_AND(p
, addr
,
2663 suboffset(vec1(retype(surface
, BRW_REGISTER_TYPE_UD
)),
2664 BRW_GET_SWZ(surface
.swizzle
, 0)),
2667 brw_pop_insn_state(p
);
2672 insn
= brw_send_indirect_message(p
, sfid
, dst
, payload
, surface
);
2673 brw_inst_set_mlen(devinfo
, insn
, message_len
);
2674 brw_inst_set_rlen(devinfo
, insn
, response_len
);
2675 brw_inst_set_header_present(devinfo
, insn
, header_present
);
2681 while_jumps_before_offset(const struct gen_device_info
*devinfo
,
2682 brw_inst
*insn
, int while_offset
, int start_offset
)
2684 int scale
= 16 / brw_jump_scale(devinfo
);
2685 int jip
= devinfo
->gen
== 6 ? brw_inst_gen6_jump_count(devinfo
, insn
)
2686 : brw_inst_jip(devinfo
, insn
);
2688 return while_offset
+ jip
* scale
<= start_offset
;
2693 brw_find_next_block_end(struct brw_codegen
*p
, int start_offset
)
2696 void *store
= p
->store
;
2697 const struct gen_device_info
*devinfo
= p
->devinfo
;
2701 for (offset
= next_offset(devinfo
, store
, start_offset
);
2702 offset
< p
->next_insn_offset
;
2703 offset
= next_offset(devinfo
, store
, offset
)) {
2704 brw_inst
*insn
= store
+ offset
;
2706 switch (brw_inst_opcode(devinfo
, insn
)) {
2710 case BRW_OPCODE_ENDIF
:
2715 case BRW_OPCODE_WHILE
:
2716 /* If the while doesn't jump before our instruction, it's the end
2717 * of a sibling do...while loop. Ignore it.
2719 if (!while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2722 case BRW_OPCODE_ELSE
:
2723 case BRW_OPCODE_HALT
:
2732 /* There is no DO instruction on gen6, so to find the end of the loop
2733 * we have to see if the loop is jumping back before our start
2737 brw_find_loop_end(struct brw_codegen
*p
, int start_offset
)
2739 const struct gen_device_info
*devinfo
= p
->devinfo
;
2741 void *store
= p
->store
;
2743 assert(devinfo
->gen
>= 6);
2745 /* Always start after the instruction (such as a WHILE) we're trying to fix
2748 for (offset
= next_offset(devinfo
, store
, start_offset
);
2749 offset
< p
->next_insn_offset
;
2750 offset
= next_offset(devinfo
, store
, offset
)) {
2751 brw_inst
*insn
= store
+ offset
;
2753 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
) {
2754 if (while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2758 assert(!"not reached");
2759 return start_offset
;
2762 /* After program generation, go back and update the UIP and JIP of
2763 * BREAK, CONT, and HALT instructions to their correct locations.
2766 brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
)
2768 const struct gen_device_info
*devinfo
= p
->devinfo
;
2770 int br
= brw_jump_scale(devinfo
);
2771 int scale
= 16 / br
;
2772 void *store
= p
->store
;
2774 if (devinfo
->gen
< 6)
2777 for (offset
= start_offset
; offset
< p
->next_insn_offset
; offset
+= 16) {
2778 brw_inst
*insn
= store
+ offset
;
2779 assert(brw_inst_cmpt_control(devinfo
, insn
) == 0);
2781 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2782 switch (brw_inst_opcode(devinfo
, insn
)) {
2783 case BRW_OPCODE_BREAK
:
2784 assert(block_end_offset
!= 0);
2785 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2786 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2787 brw_inst_set_uip(devinfo
, insn
,
2788 (brw_find_loop_end(p
, offset
) - offset
+
2789 (devinfo
->gen
== 6 ? 16 : 0)) / scale
);
2791 case BRW_OPCODE_CONTINUE
:
2792 assert(block_end_offset
!= 0);
2793 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2794 brw_inst_set_uip(devinfo
, insn
,
2795 (brw_find_loop_end(p
, offset
) - offset
) / scale
);
2797 assert(brw_inst_uip(devinfo
, insn
) != 0);
2798 assert(brw_inst_jip(devinfo
, insn
) != 0);
2801 case BRW_OPCODE_ENDIF
: {
2802 int32_t jump
= (block_end_offset
== 0) ?
2803 1 * br
: (block_end_offset
- offset
) / scale
;
2804 if (devinfo
->gen
>= 7)
2805 brw_inst_set_jip(devinfo
, insn
, jump
);
2807 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump
);
2811 case BRW_OPCODE_HALT
:
2812 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2814 * "In case of the halt instruction not inside any conditional
2815 * code block, the value of <JIP> and <UIP> should be the
2816 * same. In case of the halt instruction inside conditional code
2817 * block, the <UIP> should be the end of the program, and the
2818 * <JIP> should be end of the most inner conditional code block."
2820 * The uip will have already been set by whoever set up the
2823 if (block_end_offset
== 0) {
2824 brw_inst_set_jip(devinfo
, insn
, brw_inst_uip(devinfo
, insn
));
2826 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2828 assert(brw_inst_uip(devinfo
, insn
) != 0);
2829 assert(brw_inst_jip(devinfo
, insn
) != 0);
2835 void brw_ff_sync(struct brw_codegen
*p
,
2836 struct brw_reg dest
,
2837 unsigned msg_reg_nr
,
2838 struct brw_reg src0
,
2840 unsigned response_length
,
2843 const struct gen_device_info
*devinfo
= p
->devinfo
;
2846 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2848 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2849 brw_set_dest(p
, insn
, dest
);
2850 brw_set_src0(p
, insn
, src0
);
2851 brw_set_src1(p
, insn
, brw_imm_d(0));
2853 if (devinfo
->gen
< 6)
2854 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2856 brw_set_ff_sync_message(p
,
2864 * Emit the SEND instruction necessary to generate stream output data on Gen6
2865 * (for transform feedback).
2867 * If send_commit_msg is true, this is the last piece of stream output data
2868 * from this thread, so send the data as a committed write. According to the
2869 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2871 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2872 * writes are complete by sending the final write as a committed write."
2875 brw_svb_write(struct brw_codegen
*p
,
2876 struct brw_reg dest
,
2877 unsigned msg_reg_nr
,
2878 struct brw_reg src0
,
2879 unsigned binding_table_index
,
2880 bool send_commit_msg
)
2882 const struct gen_device_info
*devinfo
= p
->devinfo
;
2883 const unsigned target_cache
=
2884 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2885 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2886 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2889 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2891 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2892 brw_set_dest(p
, insn
, dest
);
2893 brw_set_src0(p
, insn
, src0
);
2894 brw_set_src1(p
, insn
, brw_imm_d(0));
2895 brw_set_dp_write_message(p
, insn
,
2896 binding_table_index
,
2897 0, /* msg_control: ignored */
2898 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2901 true, /* header_present */
2902 0, /* last_render_target: ignored */
2903 send_commit_msg
, /* response_length */
2904 0, /* end_of_thread */
2905 send_commit_msg
); /* send_commit_msg */
2909 brw_surface_payload_size(struct brw_codegen
*p
,
2910 unsigned num_channels
,
2915 brw_inst_access_mode(p
->devinfo
, p
->current
) == BRW_ALIGN_16
)
2917 else if (has_simd16
&&
2918 brw_inst_exec_size(p
->devinfo
, p
->current
) == BRW_EXECUTE_16
)
2919 return 2 * num_channels
;
2921 return num_channels
;
2925 brw_set_dp_untyped_atomic_message(struct brw_codegen
*p
,
2928 bool response_expected
)
2930 const struct gen_device_info
*devinfo
= p
->devinfo
;
2931 unsigned msg_control
=
2932 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2933 (response_expected
? 1 << 5 : 0); /* Return data expected */
2935 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2936 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2937 if (brw_inst_exec_size(devinfo
, p
->current
) != BRW_EXECUTE_16
)
2938 msg_control
|= 1 << 4; /* SIMD8 mode */
2940 brw_inst_set_dp_msg_type(devinfo
, insn
,
2941 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
);
2943 brw_inst_set_dp_msg_type(devinfo
, insn
,
2944 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
);
2947 brw_inst_set_dp_msg_type(devinfo
, insn
,
2948 GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
);
2950 if (brw_inst_exec_size(devinfo
, p
->current
) != BRW_EXECUTE_16
)
2951 msg_control
|= 1 << 4; /* SIMD8 mode */
2954 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2958 brw_untyped_atomic(struct brw_codegen
*p
,
2960 struct brw_reg payload
,
2961 struct brw_reg surface
,
2963 unsigned msg_length
,
2964 bool response_expected
)
2966 const struct gen_device_info
*devinfo
= p
->devinfo
;
2967 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2968 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2969 GEN7_SFID_DATAPORT_DATA_CACHE
);
2970 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2971 /* Mask out unused components -- This is especially important in Align16
2972 * mode on generations that don't have native support for SIMD4x2 atomics,
2973 * because unused but enabled components will cause the dataport to perform
2974 * additional atomic operations on the addresses that happen to be in the
2975 * uninitialized Y, Z and W coordinates of the payload.
2977 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2978 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2979 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
2980 brw_surface_payload_size(p
, response_expected
,
2981 devinfo
->gen
>= 8 || devinfo
->is_haswell
, true),
2984 brw_set_dp_untyped_atomic_message(
2985 p
, insn
, atomic_op
, response_expected
);
2989 brw_set_dp_untyped_surface_read_message(struct brw_codegen
*p
,
2990 struct brw_inst
*insn
,
2991 unsigned num_channels
)
2993 const struct gen_device_info
*devinfo
= p
->devinfo
;
2994 /* Set mask of 32-bit channels to drop. */
2995 unsigned msg_control
= 0xf & (0xf << num_channels
);
2997 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2998 if (brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
)
2999 msg_control
|= 1 << 4; /* SIMD16 mode */
3001 msg_control
|= 2 << 4; /* SIMD8 mode */
3004 brw_inst_set_dp_msg_type(devinfo
, insn
,
3005 (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3006 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
:
3007 GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
));
3008 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3012 brw_untyped_surface_read(struct brw_codegen
*p
,
3014 struct brw_reg payload
,
3015 struct brw_reg surface
,
3016 unsigned msg_length
,
3017 unsigned num_channels
)
3019 const struct gen_device_info
*devinfo
= p
->devinfo
;
3020 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3021 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3022 GEN7_SFID_DATAPORT_DATA_CACHE
);
3023 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3024 p
, sfid
, dst
, payload
, surface
, msg_length
,
3025 brw_surface_payload_size(p
, num_channels
, true, true),
3028 brw_set_dp_untyped_surface_read_message(
3029 p
, insn
, num_channels
);
3033 brw_set_dp_untyped_surface_write_message(struct brw_codegen
*p
,
3034 struct brw_inst
*insn
,
3035 unsigned num_channels
)
3037 const struct gen_device_info
*devinfo
= p
->devinfo
;
3038 /* Set mask of 32-bit channels to drop. */
3039 unsigned msg_control
= 0xf & (0xf << num_channels
);
3041 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3042 if (brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
)
3043 msg_control
|= 1 << 4; /* SIMD16 mode */
3045 msg_control
|= 2 << 4; /* SIMD8 mode */
3047 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
3048 msg_control
|= 0 << 4; /* SIMD4x2 mode */
3050 msg_control
|= 2 << 4; /* SIMD8 mode */
3053 brw_inst_set_dp_msg_type(devinfo
, insn
,
3054 devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3055 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
:
3056 GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
);
3057 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3061 brw_untyped_surface_write(struct brw_codegen
*p
,
3062 struct brw_reg payload
,
3063 struct brw_reg surface
,
3064 unsigned msg_length
,
3065 unsigned num_channels
)
3067 const struct gen_device_info
*devinfo
= p
->devinfo
;
3068 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3069 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3070 GEN7_SFID_DATAPORT_DATA_CACHE
);
3071 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3072 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3073 const unsigned mask
= devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3074 WRITEMASK_X
: WRITEMASK_XYZW
;
3075 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3076 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3077 payload
, surface
, msg_length
, 0, align1
);
3079 brw_set_dp_untyped_surface_write_message(
3080 p
, insn
, num_channels
);
3084 brw_set_dp_typed_atomic_message(struct brw_codegen
*p
,
3085 struct brw_inst
*insn
,
3087 bool response_expected
)
3089 const struct gen_device_info
*devinfo
= p
->devinfo
;
3090 unsigned msg_control
=
3091 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
3092 (response_expected
? 1 << 5 : 0); /* Return data expected */
3094 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3095 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3096 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3097 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3099 brw_inst_set_dp_msg_type(devinfo
, insn
,
3100 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
);
3102 brw_inst_set_dp_msg_type(devinfo
, insn
,
3103 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
);
3107 brw_inst_set_dp_msg_type(devinfo
, insn
,
3108 GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
);
3110 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3111 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3114 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3118 brw_typed_atomic(struct brw_codegen
*p
,
3120 struct brw_reg payload
,
3121 struct brw_reg surface
,
3123 unsigned msg_length
,
3124 bool response_expected
) {
3125 const struct gen_device_info
*devinfo
= p
->devinfo
;
3126 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3127 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3128 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3129 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3130 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3131 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
3132 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3133 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
3134 brw_surface_payload_size(p
, response_expected
,
3135 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3138 brw_set_dp_typed_atomic_message(
3139 p
, insn
, atomic_op
, response_expected
);
3143 brw_set_dp_typed_surface_read_message(struct brw_codegen
*p
,
3144 struct brw_inst
*insn
,
3145 unsigned num_channels
)
3147 const struct gen_device_info
*devinfo
= p
->devinfo
;
3148 /* Set mask of unused channels. */
3149 unsigned msg_control
= 0xf & (0xf << num_channels
);
3151 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3152 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3153 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3154 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3156 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3159 brw_inst_set_dp_msg_type(devinfo
, insn
,
3160 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
);
3162 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3163 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3164 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3167 brw_inst_set_dp_msg_type(devinfo
, insn
,
3168 GEN7_DATAPORT_RC_TYPED_SURFACE_READ
);
3171 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3175 brw_typed_surface_read(struct brw_codegen
*p
,
3177 struct brw_reg payload
,
3178 struct brw_reg surface
,
3179 unsigned msg_length
,
3180 unsigned num_channels
)
3182 const struct gen_device_info
*devinfo
= p
->devinfo
;
3183 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3184 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3185 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3186 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3187 p
, sfid
, dst
, payload
, surface
, msg_length
,
3188 brw_surface_payload_size(p
, num_channels
,
3189 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3192 brw_set_dp_typed_surface_read_message(
3193 p
, insn
, num_channels
);
3197 brw_set_dp_typed_surface_write_message(struct brw_codegen
*p
,
3198 struct brw_inst
*insn
,
3199 unsigned num_channels
)
3201 const struct gen_device_info
*devinfo
= p
->devinfo
;
3202 /* Set mask of unused channels. */
3203 unsigned msg_control
= 0xf & (0xf << num_channels
);
3205 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3206 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3207 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3208 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3210 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3213 brw_inst_set_dp_msg_type(devinfo
, insn
,
3214 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
);
3217 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3218 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3219 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3222 brw_inst_set_dp_msg_type(devinfo
, insn
,
3223 GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
);
3226 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3230 brw_typed_surface_write(struct brw_codegen
*p
,
3231 struct brw_reg payload
,
3232 struct brw_reg surface
,
3233 unsigned msg_length
,
3234 unsigned num_channels
)
3236 const struct gen_device_info
*devinfo
= p
->devinfo
;
3237 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3238 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3239 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3240 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3241 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3242 const unsigned mask
= (devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3243 WRITEMASK_X
: WRITEMASK_XYZW
);
3244 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3245 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3246 payload
, surface
, msg_length
, 0, true);
3248 brw_set_dp_typed_surface_write_message(
3249 p
, insn
, num_channels
);
3253 brw_set_memory_fence_message(struct brw_codegen
*p
,
3254 struct brw_inst
*insn
,
3255 enum brw_message_target sfid
,
3258 const struct gen_device_info
*devinfo
= p
->devinfo
;
3260 brw_set_message_descriptor(p
, insn
, sfid
,
3261 1 /* message length */,
3262 (commit_enable
? 1 : 0) /* response length */,
3263 true /* header present */,
3267 case GEN6_SFID_DATAPORT_RENDER_CACHE
:
3268 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_RC_MEMORY_FENCE
);
3270 case GEN7_SFID_DATAPORT_DATA_CACHE
:
3271 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_DC_MEMORY_FENCE
);
3274 unreachable("Not reached");
3278 brw_inst_set_dp_msg_control(devinfo
, insn
, 1 << 5);
3282 brw_memory_fence(struct brw_codegen
*p
,
3285 const struct gen_device_info
*devinfo
= p
->devinfo
;
3286 const bool commit_enable
= devinfo
->gen
== 7 && !devinfo
->is_haswell
;
3287 struct brw_inst
*insn
;
3289 brw_push_insn_state(p
);
3290 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3291 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3294 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3295 * message doesn't write anything back.
3297 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3298 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
3299 brw_set_dest(p
, insn
, dst
);
3300 brw_set_src0(p
, insn
, dst
);
3301 brw_set_memory_fence_message(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
3304 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
3305 /* IVB does typed surface access through the render cache, so we need to
3306 * flush it too. Use a different register so both flushes can be
3307 * pipelined by the hardware.
3309 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3310 brw_set_dest(p
, insn
, offset(dst
, 1));
3311 brw_set_src0(p
, insn
, offset(dst
, 1));
3312 brw_set_memory_fence_message(p
, insn
, GEN6_SFID_DATAPORT_RENDER_CACHE
,
3315 /* Now write the response of the second message into the response of the
3316 * first to trigger a pipeline stall -- This way future render and data
3317 * cache messages will be properly ordered with respect to past data and
3318 * render cache messages.
3320 brw_MOV(p
, dst
, offset(dst
, 1));
3323 brw_pop_insn_state(p
);
3327 brw_pixel_interpolator_query(struct brw_codegen
*p
,
3328 struct brw_reg dest
,
3332 struct brw_reg data
,
3333 unsigned msg_length
,
3334 unsigned response_length
)
3336 const struct gen_device_info
*devinfo
= p
->devinfo
;
3337 struct brw_inst
*insn
;
3338 const uint16_t exec_size
= brw_inst_exec_size(devinfo
, p
->current
);
3340 /* brw_send_indirect_message will automatically use a direct send message
3341 * if data is actually immediate.
3343 insn
= brw_send_indirect_message(p
,
3344 GEN7_SFID_PIXEL_INTERPOLATOR
,
3348 brw_inst_set_mlen(devinfo
, insn
, msg_length
);
3349 brw_inst_set_rlen(devinfo
, insn
, response_length
);
3351 brw_inst_set_pi_simd_mode(devinfo
, insn
, exec_size
== BRW_EXECUTE_16
);
3352 brw_inst_set_pi_slot_group(devinfo
, insn
, 0); /* zero unless 32/64px dispatch */
3353 brw_inst_set_pi_nopersp(devinfo
, insn
, noperspective
);
3354 brw_inst_set_pi_message_type(devinfo
, insn
, mode
);
3358 brw_find_live_channel(struct brw_codegen
*p
, struct brw_reg dst
,
3359 struct brw_reg mask
)
3361 const struct gen_device_info
*devinfo
= p
->devinfo
;
3362 const unsigned exec_size
= 1 << brw_inst_exec_size(devinfo
, p
->current
);
3363 const unsigned qtr_control
= brw_inst_qtr_control(devinfo
, p
->current
);
3366 assert(devinfo
->gen
>= 7);
3367 assert(mask
.type
== BRW_REGISTER_TYPE_UD
);
3369 brw_push_insn_state(p
);
3371 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3372 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3374 if (devinfo
->gen
>= 8) {
3375 /* Getting the first active channel index is easy on Gen8: Just find
3376 * the first bit set in the execution mask. The register exists on
3377 * HSW already but it reads back as all ones when the current
3378 * instruction has execution masking disabled, so it's kind of
3381 struct brw_reg exec_mask
=
3382 retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
);
3384 if (mask
.file
!= BRW_IMMEDIATE_VALUE
|| mask
.ud
!= 0xffffffff) {
3385 /* Unfortunately, ce0 does not take into account the thread
3386 * dispatch mask, which may be a problem in cases where it's not
3387 * tightly packed (i.e. it doesn't have the form '2^n - 1' for
3388 * some n). Combine ce0 with the given dispatch (or vector) mask
3389 * to mask off those channels which were never dispatched by the
3392 brw_SHR(p
, vec1(dst
), mask
, brw_imm_ud(qtr_control
* 8));
3393 brw_AND(p
, vec1(dst
), exec_mask
, vec1(dst
));
3394 exec_mask
= vec1(dst
);
3397 /* Quarter control has the effect of magically shifting the value of
3398 * ce0 so you'll get the first active channel relative to the
3399 * specified quarter control as result.
3401 inst
= brw_FBL(p
, vec1(dst
), exec_mask
);
3403 const struct brw_reg flag
= brw_flag_reg(1, 0);
3405 brw_MOV(p
, retype(flag
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
3407 /* Run enough instructions returning zero with execution masking and
3408 * a conditional modifier enabled in order to get the full execution
3409 * mask in f1.0. We could use a single 32-wide move here if it
3410 * weren't because of the hardware bug that causes channel enables to
3411 * be applied incorrectly to the second half of 32-wide instructions
3414 const unsigned lower_size
= MIN2(16, exec_size
);
3415 for (unsigned i
= 0; i
< exec_size
/ lower_size
; i
++) {
3416 inst
= brw_MOV(p
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
),
3418 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3419 brw_inst_set_group(devinfo
, inst
, lower_size
* i
+ 8 * qtr_control
);
3420 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_Z
);
3421 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3422 brw_inst_set_exec_size(devinfo
, inst
, cvt(lower_size
) - 1);
3425 /* Find the first bit set in the exec_size-wide portion of the flag
3426 * register that was updated by the last sequence of MOV
3429 const enum brw_reg_type type
= brw_int_type(exec_size
/ 8, false);
3430 brw_FBL(p
, vec1(dst
), byte_offset(retype(flag
, type
), qtr_control
));
3433 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3435 if (devinfo
->gen
>= 8 &&
3436 mask
.file
== BRW_IMMEDIATE_VALUE
&& mask
.ud
== 0xffffffff) {
3437 /* In SIMD4x2 mode the first active channel index is just the
3438 * negation of the first bit of the mask register. Note that ce0
3439 * doesn't take into account the dispatch mask, so the Gen7 path
3440 * should be used instead unless you have the guarantee that the
3441 * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
3444 inst
= brw_AND(p
, brw_writemask(dst
, WRITEMASK_X
),
3445 negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
)),
3449 /* Overwrite the destination without and with execution masking to
3450 * find out which of the channels is active.
3452 brw_push_insn_state(p
);
3453 brw_set_default_exec_size(p
, BRW_EXECUTE_4
);
3454 brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3457 inst
= brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3459 brw_pop_insn_state(p
);
3460 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3464 brw_pop_insn_state(p
);
3468 brw_broadcast(struct brw_codegen
*p
,
3473 const struct gen_device_info
*devinfo
= p
->devinfo
;
3474 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3477 brw_push_insn_state(p
);
3478 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3479 brw_set_default_exec_size(p
, align1
? BRW_EXECUTE_1
: BRW_EXECUTE_4
);
3481 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
&&
3482 src
.address_mode
== BRW_ADDRESS_DIRECT
);
3484 if ((src
.vstride
== 0 && (src
.hstride
== 0 || !align1
)) ||
3485 idx
.file
== BRW_IMMEDIATE_VALUE
) {
3486 /* Trivial, the source is already uniform or the index is a constant.
3487 * We will typically not get here if the optimizer is doing its job, but
3488 * asserting would be mean.
3490 const unsigned i
= idx
.file
== BRW_IMMEDIATE_VALUE
? idx
.ud
: 0;
3492 (align1
? stride(suboffset(src
, i
), 0, 1, 0) :
3493 stride(suboffset(src
, 4 * i
), 0, 4, 1)));
3496 const struct brw_reg addr
=
3497 retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
3498 const unsigned offset
= src
.nr
* REG_SIZE
+ src
.subnr
;
3499 /* Limit in bytes of the signed indirect addressing immediate. */
3500 const unsigned limit
= 512;
3502 brw_push_insn_state(p
);
3503 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3504 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
3506 /* Take into account the component size and horizontal stride. */
3507 assert(src
.vstride
== src
.hstride
+ src
.width
);
3508 brw_SHL(p
, addr
, vec1(idx
),
3509 brw_imm_ud(_mesa_logbase2(type_sz(src
.type
)) +
3512 /* We can only address up to limit bytes using the indirect
3513 * addressing immediate, account for the difference if the source
3514 * register is above this limit.
3516 if (offset
>= limit
)
3517 brw_ADD(p
, addr
, addr
, brw_imm_ud(offset
- offset
% limit
));
3519 brw_pop_insn_state(p
);
3521 /* Use indirect addressing to fetch the specified component. */
3523 retype(brw_vec1_indirect(addr
.subnr
, offset
% limit
),
3526 /* In SIMD4x2 mode the index can be either zero or one, replicate it
3527 * to all bits of a flag register,
3531 stride(brw_swizzle(idx
, BRW_SWIZZLE_XXXX
), 4, 4, 1));
3532 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NONE
);
3533 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_NZ
);
3534 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3536 /* and use predicated SEL to pick the right channel. */
3537 inst
= brw_SEL(p
, dst
,
3538 stride(suboffset(src
, 4), 4, 4, 1),
3539 stride(src
, 4, 4, 1));
3540 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NORMAL
);
3541 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3545 brw_pop_insn_state(p
);
3549 * This instruction is generated as a single-channel align1 instruction by
3550 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3552 * We can't use the typed atomic op in the FS because that has the execution
3553 * mask ANDed with the pixel mask, but we just want to write the one dword for
3556 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3557 * one u32. So we use the same untyped atomic write message as the pixel
3560 * The untyped atomic operation requires a BUFFER surface type with RAW
3561 * format, and is only accessible through the legacy DATA_CACHE dataport
3564 void brw_shader_time_add(struct brw_codegen
*p
,
3565 struct brw_reg payload
,
3566 uint32_t surf_index
)
3568 const unsigned sfid
= (p
->devinfo
->gen
>= 8 || p
->devinfo
->is_haswell
?
3569 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3570 GEN7_SFID_DATAPORT_DATA_CACHE
);
3571 assert(p
->devinfo
->gen
>= 7);
3573 brw_push_insn_state(p
);
3574 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3575 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3576 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3577 brw_inst
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
3579 /* We use brw_vec1_reg and unmasked because we want to increment the given
3582 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
3584 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
3586 brw_set_src1(p
, send
, brw_imm_ud(0));
3587 brw_set_message_descriptor(p
, send
, sfid
, 2, 0, false, false);
3588 brw_inst_set_binding_table_index(p
->devinfo
, send
, surf_index
);
3589 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, false);
3591 brw_pop_insn_state(p
);
3596 * Emit the SEND message for a barrier
3599 brw_barrier(struct brw_codegen
*p
, struct brw_reg src
)
3601 const struct gen_device_info
*devinfo
= p
->devinfo
;
3602 struct brw_inst
*inst
;
3604 assert(devinfo
->gen
>= 7);
3606 brw_push_insn_state(p
);
3607 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3608 inst
= next_insn(p
, BRW_OPCODE_SEND
);
3609 brw_set_dest(p
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
));
3610 brw_set_src0(p
, inst
, src
);
3611 brw_set_src1(p
, inst
, brw_null_reg());
3613 brw_set_message_descriptor(p
, inst
, BRW_SFID_MESSAGE_GATEWAY
,
3615 0 /* response_length */,
3616 false /* header_present */,
3617 false /* end_of_thread */);
3619 brw_inst_set_gateway_notify(devinfo
, inst
, 1);
3620 brw_inst_set_gateway_subfuncid(devinfo
, inst
,
3621 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG
);
3623 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
3624 brw_pop_insn_state(p
);
3629 * Emit the wait instruction for a barrier
3632 brw_WAIT(struct brw_codegen
*p
)
3634 const struct gen_device_info
*devinfo
= p
->devinfo
;
3635 struct brw_inst
*insn
;
3637 struct brw_reg src
= brw_notification_reg();
3639 insn
= next_insn(p
, BRW_OPCODE_WAIT
);
3640 brw_set_dest(p
, insn
, src
);
3641 brw_set_src0(p
, insn
, src
);
3642 brw_set_src1(p
, insn
, brw_null_reg());
3644 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
3645 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_DISABLE
);