2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "util/ralloc.h"
40 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
41 * registers, implicitly moving the operand to a message register.
43 * On Sandybridge, this is no longer the case. This function performs the
44 * explicit move; it should be called before emitting a SEND instruction.
47 gen6_resolve_implied_move(struct brw_codegen
*p
,
51 const struct brw_device_info
*devinfo
= p
->devinfo
;
55 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
58 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
59 brw_push_insn_state(p
);
60 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
61 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
62 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
63 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
64 retype(*src
, BRW_REGISTER_TYPE_UD
));
65 brw_pop_insn_state(p
);
67 *src
= brw_message_reg(msg_reg_nr
);
71 gen7_convert_mrf_to_grf(struct brw_codegen
*p
, struct brw_reg
*reg
)
73 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
74 * "The send with EOT should use register space R112-R127 for <src>. This is
75 * to enable loading of a new thread into the same slot while the message
76 * with EOT for current thread is pending dispatch."
78 * Since we're pretending to have 16 MRFs anyway, we may as well use the
79 * registers required for messages with EOT.
81 const struct brw_device_info
*devinfo
= p
->devinfo
;
82 if (devinfo
->gen
>= 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
83 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
84 reg
->nr
+= GEN7_MRF_HACK_START
;
89 * Convert a brw_reg_type enumeration value into the hardware representation.
91 * The hardware encoding may depend on whether the value is an immediate.
94 brw_reg_type_to_hw_type(const struct brw_device_info
*devinfo
,
95 enum brw_reg_type type
, unsigned file
)
97 if (file
== BRW_IMMEDIATE_VALUE
) {
98 const static int imm_hw_types
[] = {
99 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
100 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
101 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
102 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
103 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
104 [BRW_REGISTER_TYPE_UB
] = -1,
105 [BRW_REGISTER_TYPE_B
] = -1,
106 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
107 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
108 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
109 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
110 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
111 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
112 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
114 assert(type
< ARRAY_SIZE(imm_hw_types
));
115 assert(imm_hw_types
[type
] != -1);
116 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
117 return imm_hw_types
[type
];
119 /* Non-immediate registers */
120 const static int hw_types
[] = {
121 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
122 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
123 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
124 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
125 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
126 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
127 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
128 [BRW_REGISTER_TYPE_UV
] = -1,
129 [BRW_REGISTER_TYPE_VF
] = -1,
130 [BRW_REGISTER_TYPE_V
] = -1,
131 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
132 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
133 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
134 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
136 assert(type
< ARRAY_SIZE(hw_types
));
137 assert(hw_types
[type
] != -1);
138 assert(devinfo
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
139 assert(devinfo
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
140 return hw_types
[type
];
145 brw_set_dest(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg dest
)
147 const struct brw_device_info
*devinfo
= p
->devinfo
;
149 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
150 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
151 assert(dest
.nr
< 128);
153 gen7_convert_mrf_to_grf(p
, &dest
);
155 brw_inst_set_dst_reg_file(devinfo
, inst
, dest
.file
);
156 brw_inst_set_dst_reg_type(devinfo
, inst
,
157 brw_reg_type_to_hw_type(devinfo
, dest
.type
,
159 brw_inst_set_dst_address_mode(devinfo
, inst
, dest
.address_mode
);
161 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
162 brw_inst_set_dst_da_reg_nr(devinfo
, inst
, dest
.nr
);
164 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
165 brw_inst_set_dst_da1_subreg_nr(devinfo
, inst
, dest
.subnr
);
166 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
167 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
168 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
170 brw_inst_set_dst_da16_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
171 brw_inst_set_da16_writemask(devinfo
, inst
, dest
.dw1
.bits
.writemask
);
172 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
173 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
174 assert(dest
.dw1
.bits
.writemask
!= 0);
176 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
177 * Although Dst.HorzStride is a don't care for Align16, HW needs
178 * this to be programmed as "01".
180 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
183 brw_inst_set_dst_ia_subreg_nr(devinfo
, inst
, dest
.subnr
);
185 /* These are different sizes in align1 vs align16:
187 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
188 brw_inst_set_dst_ia1_addr_imm(devinfo
, inst
,
189 dest
.dw1
.bits
.indirect_offset
);
190 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
191 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
192 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
194 brw_inst_set_dst_ia16_addr_imm(devinfo
, inst
,
195 dest
.dw1
.bits
.indirect_offset
);
196 /* even ignored in da16, still need to set as '01' */
197 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
201 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
202 * or 16 (SIMD16), as that's normally correct. However, when dealing with
203 * small registers, we automatically reduce it to match the register size.
205 if (dest
.width
< BRW_EXECUTE_8
)
206 brw_inst_set_exec_size(devinfo
, inst
, dest
.width
);
209 extern int reg_type_size
[];
212 validate_reg(const struct brw_device_info
*devinfo
,
213 brw_inst
*inst
, struct brw_reg reg
)
215 const int hstride_for_reg
[] = {0, 1, 2, 4};
216 const int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32};
217 const int width_for_reg
[] = {1, 2, 4, 8, 16};
218 const int execsize_for_reg
[] = {1, 2, 4, 8, 16, 32};
219 int width
, hstride
, vstride
, execsize
;
221 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
222 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
223 * mean the destination has to be 128-bit aligned and the
224 * destination horiz stride has to be a word.
226 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
227 assert(hstride_for_reg
[brw_inst_dst_hstride(devinfo
, inst
)] *
228 reg_type_size
[brw_inst_dst_reg_type(devinfo
, inst
)] == 2);
234 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
235 reg
.file
== BRW_ARF_NULL
)
238 assert(reg
.hstride
>= 0 && reg
.hstride
< ARRAY_SIZE(hstride_for_reg
));
239 hstride
= hstride_for_reg
[reg
.hstride
];
241 if (reg
.vstride
== 0xf) {
244 assert(reg
.vstride
>= 0 && reg
.vstride
< ARRAY_SIZE(vstride_for_reg
));
245 vstride
= vstride_for_reg
[reg
.vstride
];
248 assert(reg
.width
>= 0 && reg
.width
< ARRAY_SIZE(width_for_reg
));
249 width
= width_for_reg
[reg
.width
];
251 assert(brw_inst_exec_size(devinfo
, inst
) >= 0 &&
252 brw_inst_exec_size(devinfo
, inst
) < ARRAY_SIZE(execsize_for_reg
));
253 execsize
= execsize_for_reg
[brw_inst_exec_size(devinfo
, inst
)];
255 /* Restrictions from 3.3.10: Register Region Restrictions. */
257 assert(execsize
>= width
);
260 if (execsize
== width
&& hstride
!= 0) {
261 assert(vstride
== -1 || vstride
== width
* hstride
);
265 if (execsize
== width
&& hstride
== 0) {
266 /* no restriction on vstride. */
271 assert(hstride
== 0);
275 if (execsize
== 1 && width
== 1) {
276 assert(hstride
== 0);
277 assert(vstride
== 0);
281 if (vstride
== 0 && hstride
== 0) {
285 /* 10. Check destination issues. */
289 is_compactable_immediate(unsigned imm
)
291 /* We get the low 12 bits as-is. */
294 /* We get one bit replicated through the top 20 bits. */
295 return imm
== 0 || imm
== 0xfffff000;
299 brw_set_src0(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
301 const struct brw_device_info
*devinfo
= p
->devinfo
;
303 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
304 assert(reg
.nr
< 128);
306 gen7_convert_mrf_to_grf(p
, ®
);
308 if (devinfo
->gen
>= 6 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
309 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
)) {
310 /* Any source modifiers or regions will be ignored, since this just
311 * identifies the MRF/GRF to start reading the message contents from.
312 * Check for some likely failures.
316 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
319 validate_reg(devinfo
, inst
, reg
);
321 brw_inst_set_src0_reg_file(devinfo
, inst
, reg
.file
);
322 brw_inst_set_src0_reg_type(devinfo
, inst
,
323 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
324 brw_inst_set_src0_abs(devinfo
, inst
, reg
.abs
);
325 brw_inst_set_src0_negate(devinfo
, inst
, reg
.negate
);
326 brw_inst_set_src0_address_mode(devinfo
, inst
, reg
.address_mode
);
328 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
329 brw_inst_set_imm_ud(devinfo
, inst
, reg
.dw1
.ud
);
331 /* The Bspec's section titled "Non-present Operands" claims that if src0
332 * is an immediate that src1's type must be the same as that of src0.
334 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
335 * that do not follow this rule. E.g., from the IVB/HSW table:
337 * DataTypeIndex 18-Bit Mapping Mapped Meaning
338 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
340 * And from the SNB table:
342 * DataTypeIndex 18-Bit Mapping Mapped Meaning
343 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
345 * Neither of these cause warnings from the simulator when used,
346 * compacted or otherwise. In fact, all compaction mappings that have an
347 * immediate in src0 use a:ud for src1.
349 * The GM45 instruction compaction tables do not contain mapped meanings
350 * so it's not clear whether it has the restriction. We'll assume it was
351 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
353 brw_inst_set_src1_reg_file(devinfo
, inst
, BRW_ARCHITECTURE_REGISTER_FILE
);
354 if (devinfo
->gen
< 6) {
355 brw_inst_set_src1_reg_type(devinfo
, inst
,
356 brw_inst_src0_reg_type(devinfo
, inst
));
358 brw_inst_set_src1_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
361 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
362 * for immediate values. Presumably the hardware engineers realized
363 * that the only useful floating-point value that could be represented
364 * in this format is 0.0, which can also be represented as a VF-typed
365 * immediate, so they gave us the previously mentioned mapping on IVB+.
367 * Strangely, we do have a mapping for imm:f in src1, so we don't need
370 * If we see a 0.0:F, change the type to VF so that it can be compacted.
372 if (brw_inst_imm_ud(devinfo
, inst
) == 0x0 &&
373 brw_inst_src0_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_F
) {
374 brw_inst_set_src0_reg_type(devinfo
, inst
, BRW_HW_REG_IMM_TYPE_VF
);
377 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
378 * set the types to :UD so the instruction can be compacted.
380 if (is_compactable_immediate(brw_inst_imm_ud(devinfo
, inst
)) &&
381 brw_inst_cond_modifier(devinfo
, inst
) == BRW_CONDITIONAL_NONE
&&
382 brw_inst_src0_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_D
&&
383 brw_inst_dst_reg_type(devinfo
, inst
) == BRW_HW_REG_TYPE_D
) {
384 brw_inst_set_src0_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
385 brw_inst_set_dst_reg_type(devinfo
, inst
, BRW_HW_REG_TYPE_UD
);
388 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
389 brw_inst_set_src0_da_reg_nr(devinfo
, inst
, reg
.nr
);
390 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
391 brw_inst_set_src0_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
393 brw_inst_set_src0_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
396 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.subnr
);
398 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
399 brw_inst_set_src0_ia1_addr_imm(devinfo
, inst
, reg
.dw1
.bits
.indirect_offset
);
401 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.dw1
.bits
.indirect_offset
);
405 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
406 if (reg
.width
== BRW_WIDTH_1
&&
407 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
408 brw_inst_set_src0_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
409 brw_inst_set_src0_width(devinfo
, inst
, BRW_WIDTH_1
);
410 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
412 brw_inst_set_src0_hstride(devinfo
, inst
, reg
.hstride
);
413 brw_inst_set_src0_width(devinfo
, inst
, reg
.width
);
414 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
417 brw_inst_set_src0_da16_swiz_x(devinfo
, inst
,
418 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
));
419 brw_inst_set_src0_da16_swiz_y(devinfo
, inst
,
420 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
));
421 brw_inst_set_src0_da16_swiz_z(devinfo
, inst
,
422 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
));
423 brw_inst_set_src0_da16_swiz_w(devinfo
, inst
,
424 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
));
426 /* This is an oddity of the fact we're using the same
427 * descriptions for registers in align_16 as align_1:
429 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
430 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
432 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
439 brw_set_src1(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
441 const struct brw_device_info
*devinfo
= p
->devinfo
;
443 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
444 assert(reg
.nr
< 128);
446 gen7_convert_mrf_to_grf(p
, ®
);
447 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
449 validate_reg(devinfo
, inst
, reg
);
451 brw_inst_set_src1_reg_file(devinfo
, inst
, reg
.file
);
452 brw_inst_set_src1_reg_type(devinfo
, inst
,
453 brw_reg_type_to_hw_type(devinfo
, reg
.type
, reg
.file
));
454 brw_inst_set_src1_abs(devinfo
, inst
, reg
.abs
);
455 brw_inst_set_src1_negate(devinfo
, inst
, reg
.negate
);
457 /* Only src1 can be immediate in two-argument instructions.
459 assert(brw_inst_src0_reg_file(devinfo
, inst
) != BRW_IMMEDIATE_VALUE
);
461 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
462 brw_inst_set_imm_ud(devinfo
, inst
, reg
.dw1
.ud
);
464 /* This is a hardware restriction, which may or may not be lifted
467 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
468 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
470 brw_inst_set_src1_da_reg_nr(devinfo
, inst
, reg
.nr
);
471 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
472 brw_inst_set_src1_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
474 brw_inst_set_src1_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
477 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
478 if (reg
.width
== BRW_WIDTH_1
&&
479 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
480 brw_inst_set_src1_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
481 brw_inst_set_src1_width(devinfo
, inst
, BRW_WIDTH_1
);
482 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
484 brw_inst_set_src1_hstride(devinfo
, inst
, reg
.hstride
);
485 brw_inst_set_src1_width(devinfo
, inst
, reg
.width
);
486 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
489 brw_inst_set_src1_da16_swiz_x(devinfo
, inst
,
490 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
));
491 brw_inst_set_src1_da16_swiz_y(devinfo
, inst
,
492 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
));
493 brw_inst_set_src1_da16_swiz_z(devinfo
, inst
,
494 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
));
495 brw_inst_set_src1_da16_swiz_w(devinfo
, inst
,
496 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
));
498 /* This is an oddity of the fact we're using the same
499 * descriptions for registers in align_16 as align_1:
501 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
502 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
504 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
510 * Set the Message Descriptor and Extended Message Descriptor fields
513 * \note This zeroes out the Function Control bits, so it must be called
514 * \b before filling out any message-specific data. Callers can
515 * choose not to fill in irrelevant bits; they will be zero.
518 brw_set_message_descriptor(struct brw_codegen
*p
,
520 enum brw_message_target sfid
,
522 unsigned response_length
,
526 const struct brw_device_info
*devinfo
= p
->devinfo
;
528 brw_set_src1(p
, inst
, brw_imm_d(0));
530 /* For indirect sends, `inst` will not be the SEND/SENDC instruction
531 * itself; instead, it will be a MOV/OR into the address register.
533 * In this case, we avoid setting the extended message descriptor bits,
534 * since they go on the later SEND/SENDC instead and if set here would
535 * instead clobber the conditionalmod bits.
537 unsigned opcode
= brw_inst_opcode(devinfo
, inst
);
538 if (opcode
== BRW_OPCODE_SEND
|| opcode
== BRW_OPCODE_SENDC
) {
539 brw_inst_set_sfid(devinfo
, inst
, sfid
);
542 brw_inst_set_mlen(devinfo
, inst
, msg_length
);
543 brw_inst_set_rlen(devinfo
, inst
, response_length
);
544 brw_inst_set_eot(devinfo
, inst
, end_of_thread
);
546 if (devinfo
->gen
>= 5) {
547 brw_inst_set_header_present(devinfo
, inst
, header_present
);
551 static void brw_set_math_message( struct brw_codegen
*p
,
554 unsigned integer_type
,
558 const struct brw_device_info
*devinfo
= p
->devinfo
;
560 unsigned response_length
;
562 /* Infer message length from the function */
564 case BRW_MATH_FUNCTION_POW
:
565 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
566 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
567 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
575 /* Infer response length from the function */
577 case BRW_MATH_FUNCTION_SINCOS
:
578 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
587 brw_set_message_descriptor(p
, inst
, BRW_SFID_MATH
,
588 msg_length
, response_length
, false, false);
589 brw_inst_set_math_msg_function(devinfo
, inst
, function
);
590 brw_inst_set_math_msg_signed_int(devinfo
, inst
, integer_type
);
591 brw_inst_set_math_msg_precision(devinfo
, inst
, low_precision
);
592 brw_inst_set_math_msg_saturate(devinfo
, inst
, brw_inst_saturate(devinfo
, inst
));
593 brw_inst_set_math_msg_data_type(devinfo
, inst
, dataType
);
594 brw_inst_set_saturate(devinfo
, inst
, 0);
598 static void brw_set_ff_sync_message(struct brw_codegen
*p
,
601 unsigned response_length
,
604 const struct brw_device_info
*devinfo
= p
->devinfo
;
606 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
607 1, response_length
, true, end_of_thread
);
608 brw_inst_set_urb_opcode(devinfo
, insn
, 1); /* FF_SYNC */
609 brw_inst_set_urb_allocate(devinfo
, insn
, allocate
);
610 /* The following fields are not used by FF_SYNC: */
611 brw_inst_set_urb_global_offset(devinfo
, insn
, 0);
612 brw_inst_set_urb_swizzle_control(devinfo
, insn
, 0);
613 brw_inst_set_urb_used(devinfo
, insn
, 0);
614 brw_inst_set_urb_complete(devinfo
, insn
, 0);
617 static void brw_set_urb_message( struct brw_codegen
*p
,
619 enum brw_urb_write_flags flags
,
621 unsigned response_length
,
623 unsigned swizzle_control
)
625 const struct brw_device_info
*devinfo
= p
->devinfo
;
627 assert(devinfo
->gen
< 7 || swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
628 assert(devinfo
->gen
< 7 || !(flags
& BRW_URB_WRITE_ALLOCATE
));
629 assert(devinfo
->gen
>= 7 || !(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
631 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
632 msg_length
, response_length
, true,
633 flags
& BRW_URB_WRITE_EOT
);
635 if (flags
& BRW_URB_WRITE_OWORD
) {
636 assert(msg_length
== 2); /* header + one OWORD of data */
637 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_OWORD
);
639 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_HWORD
);
642 brw_inst_set_urb_global_offset(devinfo
, insn
, offset
);
643 brw_inst_set_urb_swizzle_control(devinfo
, insn
, swizzle_control
);
645 if (devinfo
->gen
< 8) {
646 brw_inst_set_urb_complete(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_COMPLETE
));
649 if (devinfo
->gen
< 7) {
650 brw_inst_set_urb_allocate(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_ALLOCATE
));
651 brw_inst_set_urb_used(devinfo
, insn
, !(flags
& BRW_URB_WRITE_UNUSED
));
653 brw_inst_set_urb_per_slot_offset(devinfo
, insn
,
654 !!(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
659 brw_set_dp_write_message(struct brw_codegen
*p
,
661 unsigned binding_table_index
,
662 unsigned msg_control
,
666 unsigned last_render_target
,
667 unsigned response_length
,
668 unsigned end_of_thread
,
669 unsigned send_commit_msg
)
671 const struct brw_device_info
*devinfo
= p
->devinfo
;
674 if (devinfo
->gen
>= 7) {
675 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
676 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
677 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
679 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
680 } else if (devinfo
->gen
== 6) {
681 /* Use the render cache for all write messages. */
682 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
684 sfid
= BRW_SFID_DATAPORT_WRITE
;
687 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
688 header_present
, end_of_thread
);
690 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
691 brw_inst_set_dp_write_msg_type(devinfo
, insn
, msg_type
);
692 brw_inst_set_dp_write_msg_control(devinfo
, insn
, msg_control
);
693 brw_inst_set_rt_last(devinfo
, insn
, last_render_target
);
694 if (devinfo
->gen
< 7) {
695 brw_inst_set_dp_write_commit(devinfo
, insn
, send_commit_msg
);
700 brw_set_dp_read_message(struct brw_codegen
*p
,
702 unsigned binding_table_index
,
703 unsigned msg_control
,
705 unsigned target_cache
,
708 unsigned response_length
)
710 const struct brw_device_info
*devinfo
= p
->devinfo
;
713 if (devinfo
->gen
>= 7) {
714 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
715 } else if (devinfo
->gen
== 6) {
716 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
717 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
719 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
721 sfid
= BRW_SFID_DATAPORT_READ
;
724 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
725 header_present
, false);
727 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
728 brw_inst_set_dp_read_msg_type(devinfo
, insn
, msg_type
);
729 brw_inst_set_dp_read_msg_control(devinfo
, insn
, msg_control
);
730 if (devinfo
->gen
< 6)
731 brw_inst_set_dp_read_target_cache(devinfo
, insn
, target_cache
);
735 brw_set_sampler_message(struct brw_codegen
*p
,
737 unsigned binding_table_index
,
740 unsigned response_length
,
742 unsigned header_present
,
744 unsigned return_format
)
746 const struct brw_device_info
*devinfo
= p
->devinfo
;
748 brw_set_message_descriptor(p
, inst
, BRW_SFID_SAMPLER
, msg_length
,
749 response_length
, header_present
, false);
751 brw_inst_set_binding_table_index(devinfo
, inst
, binding_table_index
);
752 brw_inst_set_sampler(devinfo
, inst
, sampler
);
753 brw_inst_set_sampler_msg_type(devinfo
, inst
, msg_type
);
754 if (devinfo
->gen
>= 5) {
755 brw_inst_set_sampler_simd_mode(devinfo
, inst
, simd_mode
);
756 } else if (devinfo
->gen
== 4 && !devinfo
->is_g4x
) {
757 brw_inst_set_sampler_return_format(devinfo
, inst
, return_format
);
762 gen7_set_dp_scratch_message(struct brw_codegen
*p
,
766 bool invalidate_after_read
,
768 unsigned addr_offset
,
773 const struct brw_device_info
*devinfo
= p
->devinfo
;
774 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 ||
775 (devinfo
->gen
>= 8 && num_regs
== 8));
776 brw_set_message_descriptor(p
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
777 mlen
, rlen
, header_present
, false);
778 brw_inst_set_dp_category(devinfo
, inst
, 1); /* Scratch Block Read/Write msgs */
779 brw_inst_set_scratch_read_write(devinfo
, inst
, write
);
780 brw_inst_set_scratch_type(devinfo
, inst
, dword
);
781 brw_inst_set_scratch_invalidate_after_read(devinfo
, inst
, invalidate_after_read
);
782 brw_inst_set_scratch_block_size(devinfo
, inst
, ffs(num_regs
) - 1);
783 brw_inst_set_scratch_addr_offset(devinfo
, inst
, addr_offset
);
786 #define next_insn brw_next_insn
788 brw_next_insn(struct brw_codegen
*p
, unsigned opcode
)
790 const struct brw_device_info
*devinfo
= p
->devinfo
;
793 if (p
->nr_insn
+ 1 > p
->store_size
) {
795 p
->store
= reralloc(p
->mem_ctx
, p
->store
, brw_inst
, p
->store_size
);
798 p
->next_insn_offset
+= 16;
799 insn
= &p
->store
[p
->nr_insn
++];
800 memcpy(insn
, p
->current
, sizeof(*insn
));
802 brw_inst_set_opcode(devinfo
, insn
, opcode
);
807 brw_alu1(struct brw_codegen
*p
, unsigned opcode
,
808 struct brw_reg dest
, struct brw_reg src
)
810 brw_inst
*insn
= next_insn(p
, opcode
);
811 brw_set_dest(p
, insn
, dest
);
812 brw_set_src0(p
, insn
, src
);
817 brw_alu2(struct brw_codegen
*p
, unsigned opcode
,
818 struct brw_reg dest
, struct brw_reg src0
, struct brw_reg src1
)
820 brw_inst
*insn
= next_insn(p
, opcode
);
821 brw_set_dest(p
, insn
, dest
);
822 brw_set_src0(p
, insn
, src0
);
823 brw_set_src1(p
, insn
, src1
);
828 get_3src_subreg_nr(struct brw_reg reg
)
830 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
831 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
832 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
834 return reg
.subnr
/ 4;
839 brw_alu3(struct brw_codegen
*p
, unsigned opcode
, struct brw_reg dest
,
840 struct brw_reg src0
, struct brw_reg src1
, struct brw_reg src2
)
842 const struct brw_device_info
*devinfo
= p
->devinfo
;
843 brw_inst
*inst
= next_insn(p
, opcode
);
845 gen7_convert_mrf_to_grf(p
, &dest
);
847 assert(brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_16
);
849 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
850 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
851 assert(dest
.nr
< 128);
852 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
853 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
854 dest
.type
== BRW_REGISTER_TYPE_D
||
855 dest
.type
== BRW_REGISTER_TYPE_UD
);
856 if (devinfo
->gen
== 6) {
857 brw_inst_set_3src_dst_reg_file(devinfo
, inst
,
858 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
860 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
861 brw_inst_set_3src_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
862 brw_inst_set_3src_dst_writemask(devinfo
, inst
, dest
.dw1
.bits
.writemask
);
864 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
865 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
866 assert(src0
.nr
< 128);
867 brw_inst_set_3src_src0_swizzle(devinfo
, inst
, src0
.dw1
.bits
.swizzle
);
868 brw_inst_set_3src_src0_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src0
));
869 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
870 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
871 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
872 brw_inst_set_3src_src0_rep_ctrl(devinfo
, inst
,
873 src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
875 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
876 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
877 assert(src1
.nr
< 128);
878 brw_inst_set_3src_src1_swizzle(devinfo
, inst
, src1
.dw1
.bits
.swizzle
);
879 brw_inst_set_3src_src1_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src1
));
880 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
881 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
882 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
883 brw_inst_set_3src_src1_rep_ctrl(devinfo
, inst
,
884 src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
886 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
887 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
888 assert(src2
.nr
< 128);
889 brw_inst_set_3src_src2_swizzle(devinfo
, inst
, src2
.dw1
.bits
.swizzle
);
890 brw_inst_set_3src_src2_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src2
));
891 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
892 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
893 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
894 brw_inst_set_3src_src2_rep_ctrl(devinfo
, inst
,
895 src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
897 if (devinfo
->gen
>= 7) {
898 /* Set both the source and destination types based on dest.type,
899 * ignoring the source register types. The MAD and LRP emitters ensure
900 * that all four types are float. The BFE and BFI2 emitters, however,
901 * may send us mixed D and UD types and want us to ignore that and use
902 * the destination type.
905 case BRW_REGISTER_TYPE_F
:
906 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
907 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_F
);
909 case BRW_REGISTER_TYPE_D
:
910 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
911 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_D
);
913 case BRW_REGISTER_TYPE_UD
:
914 brw_inst_set_3src_src_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
915 brw_inst_set_3src_dst_type(devinfo
, inst
, BRW_3SRC_TYPE_UD
);
918 unreachable("not reached");
926 /***********************************************************************
927 * Convenience routines.
930 brw_inst *brw_##OP(struct brw_codegen *p, \
931 struct brw_reg dest, \
932 struct brw_reg src0) \
934 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
938 brw_inst *brw_##OP(struct brw_codegen *p, \
939 struct brw_reg dest, \
940 struct brw_reg src0, \
941 struct brw_reg src1) \
943 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
947 brw_inst *brw_##OP(struct brw_codegen *p, \
948 struct brw_reg dest, \
949 struct brw_reg src0, \
950 struct brw_reg src1, \
951 struct brw_reg src2) \
953 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
957 brw_inst *brw_##OP(struct brw_codegen *p, \
958 struct brw_reg dest, \
959 struct brw_reg src0, \
960 struct brw_reg src1, \
961 struct brw_reg src2) \
963 assert(dest.type == BRW_REGISTER_TYPE_F); \
964 assert(src0.type == BRW_REGISTER_TYPE_F); \
965 assert(src1.type == BRW_REGISTER_TYPE_F); \
966 assert(src2.type == BRW_REGISTER_TYPE_F); \
967 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
970 /* Rounding operations (other than RNDD) require two instructions - the first
971 * stores a rounded value (possibly the wrong way) in the dest register, but
972 * also sets a per-channel "increment bit" in the flag register. A predicated
973 * add of 1.0 fixes dest to contain the desired result.
975 * Sandybridge and later appear to round correctly without an ADD.
978 void brw_##OP(struct brw_codegen *p, \
979 struct brw_reg dest, \
980 struct brw_reg src) \
982 const struct brw_device_info *devinfo = p->devinfo; \
983 brw_inst *rnd, *add; \
984 rnd = next_insn(p, BRW_OPCODE_##OP); \
985 brw_set_dest(p, rnd, dest); \
986 brw_set_src0(p, rnd, src); \
988 if (devinfo->gen < 6) { \
989 /* turn on round-increments */ \
990 brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
991 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
992 brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
1032 brw_ADD(struct brw_codegen
*p
, struct brw_reg dest
,
1033 struct brw_reg src0
, struct brw_reg src1
)
1036 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1037 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1038 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1039 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1040 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1043 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1044 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1045 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1046 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1047 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1050 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1054 brw_AVG(struct brw_codegen
*p
, struct brw_reg dest
,
1055 struct brw_reg src0
, struct brw_reg src1
)
1057 assert(dest
.type
== src0
.type
);
1058 assert(src0
.type
== src1
.type
);
1059 switch (src0
.type
) {
1060 case BRW_REGISTER_TYPE_B
:
1061 case BRW_REGISTER_TYPE_UB
:
1062 case BRW_REGISTER_TYPE_W
:
1063 case BRW_REGISTER_TYPE_UW
:
1064 case BRW_REGISTER_TYPE_D
:
1065 case BRW_REGISTER_TYPE_UD
:
1068 unreachable("Bad type for brw_AVG");
1071 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1075 brw_MUL(struct brw_codegen
*p
, struct brw_reg dest
,
1076 struct brw_reg src0
, struct brw_reg src1
)
1079 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1080 src0
.type
== BRW_REGISTER_TYPE_UD
||
1081 src1
.type
== BRW_REGISTER_TYPE_D
||
1082 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1083 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1086 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1087 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1088 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1089 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1090 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1093 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1094 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1095 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1096 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1097 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1100 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1101 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1102 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1103 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1105 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1109 brw_LINE(struct brw_codegen
*p
, struct brw_reg dest
,
1110 struct brw_reg src0
, struct brw_reg src1
)
1112 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1113 src0
.width
= BRW_WIDTH_1
;
1114 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1115 return brw_alu2(p
, BRW_OPCODE_LINE
, dest
, src0
, src1
);
1119 brw_PLN(struct brw_codegen
*p
, struct brw_reg dest
,
1120 struct brw_reg src0
, struct brw_reg src1
)
1122 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1123 src0
.width
= BRW_WIDTH_1
;
1124 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1125 src1
.vstride
= BRW_VERTICAL_STRIDE_8
;
1126 src1
.width
= BRW_WIDTH_8
;
1127 src1
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
1128 return brw_alu2(p
, BRW_OPCODE_PLN
, dest
, src0
, src1
);
1132 brw_F32TO16(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1134 const struct brw_device_info
*devinfo
= p
->devinfo
;
1135 const bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1136 /* The F32TO16 instruction doesn't support 32-bit destination types in
1137 * Align1 mode, and neither does the Gen8 implementation in terms of a
1138 * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
1139 * an undocumented feature.
1141 const bool needs_zero_fill
= (dst
.type
== BRW_REGISTER_TYPE_UD
&&
1142 (!align16
|| devinfo
->gen
>= 8));
1146 assert(dst
.type
== BRW_REGISTER_TYPE_UD
);
1148 assert(dst
.type
== BRW_REGISTER_TYPE_UD
||
1149 dst
.type
== BRW_REGISTER_TYPE_W
||
1150 dst
.type
== BRW_REGISTER_TYPE_UW
||
1151 dst
.type
== BRW_REGISTER_TYPE_HF
);
1154 brw_push_insn_state(p
);
1156 if (needs_zero_fill
) {
1157 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
1158 dst
= spread(retype(dst
, BRW_REGISTER_TYPE_W
), 2);
1161 if (devinfo
->gen
>= 8) {
1162 inst
= brw_MOV(p
, retype(dst
, BRW_REGISTER_TYPE_HF
), src
);
1164 assert(devinfo
->gen
== 7);
1165 inst
= brw_alu1(p
, BRW_OPCODE_F32TO16
, dst
, src
);
1168 if (needs_zero_fill
) {
1169 brw_inst_set_no_dd_clear(devinfo
, inst
, true);
1170 inst
= brw_MOV(p
, suboffset(dst
, 1), brw_imm_ud(0u));
1171 brw_inst_set_no_dd_check(devinfo
, inst
, true);
1174 brw_pop_insn_state(p
);
1179 brw_F16TO32(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1181 const struct brw_device_info
*devinfo
= p
->devinfo
;
1182 bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1185 assert(src
.type
== BRW_REGISTER_TYPE_UD
);
1187 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1189 * Because this instruction does not have a 16-bit floating-point
1190 * type, the source data type must be Word (W). The destination type
1191 * must be F (Float).
1193 if (src
.type
== BRW_REGISTER_TYPE_UD
)
1194 src
= spread(retype(src
, BRW_REGISTER_TYPE_W
), 2);
1196 assert(src
.type
== BRW_REGISTER_TYPE_W
||
1197 src
.type
== BRW_REGISTER_TYPE_UW
||
1198 src
.type
== BRW_REGISTER_TYPE_HF
);
1201 if (devinfo
->gen
>= 8) {
1202 return brw_MOV(p
, dst
, retype(src
, BRW_REGISTER_TYPE_HF
));
1204 assert(devinfo
->gen
== 7);
1205 return brw_alu1(p
, BRW_OPCODE_F16TO32
, dst
, src
);
1210 void brw_NOP(struct brw_codegen
*p
)
1212 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1213 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1214 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1215 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1222 /***********************************************************************
1223 * Comparisons, if/else/endif
1227 brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1228 unsigned predicate_control
)
1230 const struct brw_device_info
*devinfo
= p
->devinfo
;
1231 struct brw_reg ip
= brw_ip_reg();
1232 brw_inst
*inst
= brw_alu2(p
, BRW_OPCODE_JMPI
, ip
, ip
, index
);
1234 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_2
);
1235 brw_inst_set_qtr_control(devinfo
, inst
, BRW_COMPRESSION_NONE
);
1236 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
1237 brw_inst_set_pred_control(devinfo
, inst
, predicate_control
);
1243 push_if_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1245 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1247 p
->if_stack_depth
++;
1248 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1249 p
->if_stack_array_size
*= 2;
1250 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1251 p
->if_stack_array_size
);
1256 pop_if_stack(struct brw_codegen
*p
)
1258 p
->if_stack_depth
--;
1259 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1263 push_loop_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1265 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1266 p
->loop_stack_array_size
*= 2;
1267 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1268 p
->loop_stack_array_size
);
1269 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1270 p
->loop_stack_array_size
);
1273 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1274 p
->loop_stack_depth
++;
1275 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1279 get_inner_do_insn(struct brw_codegen
*p
)
1281 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1284 /* EU takes the value from the flag register and pushes it onto some
1285 * sort of a stack (presumably merging with any flag value already on
1286 * the stack). Within an if block, the flags at the top of the stack
1287 * control execution on each channel of the unit, eg. on each of the
1288 * 16 pixel values in our wm programs.
1290 * When the matching 'else' instruction is reached (presumably by
1291 * countdown of the instruction count patched in by our ELSE/ENDIF
1292 * functions), the relevant flags are inverted.
1294 * When the matching 'endif' instruction is reached, the flags are
1295 * popped off. If the stack is now empty, normal execution resumes.
1298 brw_IF(struct brw_codegen
*p
, unsigned execute_size
)
1300 const struct brw_device_info
*devinfo
= p
->devinfo
;
1303 insn
= next_insn(p
, BRW_OPCODE_IF
);
1305 /* Override the defaults for this instruction:
1307 if (devinfo
->gen
< 6) {
1308 brw_set_dest(p
, insn
, brw_ip_reg());
1309 brw_set_src0(p
, insn
, brw_ip_reg());
1310 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1311 } else if (devinfo
->gen
== 6) {
1312 brw_set_dest(p
, insn
, brw_imm_w(0));
1313 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1314 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1315 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1316 } else if (devinfo
->gen
== 7) {
1317 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1318 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1319 brw_set_src1(p
, insn
, brw_imm_w(0));
1320 brw_inst_set_jip(devinfo
, insn
, 0);
1321 brw_inst_set_uip(devinfo
, insn
, 0);
1323 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1324 brw_set_src0(p
, insn
, brw_imm_d(0));
1325 brw_inst_set_jip(devinfo
, insn
, 0);
1326 brw_inst_set_uip(devinfo
, insn
, 0);
1329 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1330 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1331 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NORMAL
);
1332 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1333 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1334 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1336 push_if_stack(p
, insn
);
1337 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1341 /* This function is only used for gen6-style IF instructions with an
1342 * embedded comparison (conditional modifier). It is not used on gen7.
1345 gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1346 struct brw_reg src0
, struct brw_reg src1
)
1348 const struct brw_device_info
*devinfo
= p
->devinfo
;
1351 insn
= next_insn(p
, BRW_OPCODE_IF
);
1353 brw_set_dest(p
, insn
, brw_imm_w(0));
1354 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1356 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1357 brw_set_src0(p
, insn
, src0
);
1358 brw_set_src1(p
, insn
, src1
);
1360 assert(brw_inst_qtr_control(devinfo
, insn
) == BRW_COMPRESSION_NONE
);
1361 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
1362 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1364 push_if_stack(p
, insn
);
1369 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1372 convert_IF_ELSE_to_ADD(struct brw_codegen
*p
,
1373 brw_inst
*if_inst
, brw_inst
*else_inst
)
1375 const struct brw_device_info
*devinfo
= p
->devinfo
;
1377 /* The next instruction (where the ENDIF would be, if it existed) */
1378 brw_inst
*next_inst
= &p
->store
[p
->nr_insn
];
1380 assert(p
->single_program_flow
);
1381 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1382 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1383 assert(brw_inst_exec_size(devinfo
, if_inst
) == BRW_EXECUTE_1
);
1385 /* Convert IF to an ADD instruction that moves the instruction pointer
1386 * to the first instruction of the ELSE block. If there is no ELSE
1387 * block, point to where ENDIF would be. Reverse the predicate.
1389 * There's no need to execute an ENDIF since we don't need to do any
1390 * stack operations, and if we're currently executing, we just want to
1391 * continue normally.
1393 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_ADD
);
1394 brw_inst_set_pred_inv(devinfo
, if_inst
, true);
1396 if (else_inst
!= NULL
) {
1397 /* Convert ELSE to an ADD instruction that points where the ENDIF
1400 brw_inst_set_opcode(devinfo
, else_inst
, BRW_OPCODE_ADD
);
1402 brw_inst_set_imm_ud(devinfo
, if_inst
, (else_inst
- if_inst
+ 1) * 16);
1403 brw_inst_set_imm_ud(devinfo
, else_inst
, (next_inst
- else_inst
) * 16);
1405 brw_inst_set_imm_ud(devinfo
, if_inst
, (next_inst
- if_inst
) * 16);
1410 * Patch IF and ELSE instructions with appropriate jump targets.
1413 patch_IF_ELSE(struct brw_codegen
*p
,
1414 brw_inst
*if_inst
, brw_inst
*else_inst
, brw_inst
*endif_inst
)
1416 const struct brw_device_info
*devinfo
= p
->devinfo
;
1418 /* We shouldn't be patching IF and ELSE instructions in single program flow
1419 * mode when gen < 6, because in single program flow mode on those
1420 * platforms, we convert flow control instructions to conditional ADDs that
1421 * operate on IP (see brw_ENDIF).
1423 * However, on Gen6, writing to IP doesn't work in single program flow mode
1424 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1425 * not be updated by non-flow control instructions."). And on later
1426 * platforms, there is no significant benefit to converting control flow
1427 * instructions to conditional ADDs. So we do patch IF and ELSE
1428 * instructions in single program flow mode on those platforms.
1430 if (devinfo
->gen
< 6)
1431 assert(!p
->single_program_flow
);
1433 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1434 assert(endif_inst
!= NULL
);
1435 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1437 unsigned br
= brw_jump_scale(devinfo
);
1439 assert(brw_inst_opcode(devinfo
, endif_inst
) == BRW_OPCODE_ENDIF
);
1440 brw_inst_set_exec_size(devinfo
, endif_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1442 if (else_inst
== NULL
) {
1443 /* Patch IF -> ENDIF */
1444 if (devinfo
->gen
< 6) {
1445 /* Turn it into an IFF, which means no mask stack operations for
1446 * all-false and jumping past the ENDIF.
1448 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_IFF
);
1449 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1450 br
* (endif_inst
- if_inst
+ 1));
1451 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1452 } else if (devinfo
->gen
== 6) {
1453 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1454 brw_inst_set_gen6_jump_count(devinfo
, if_inst
, br
*(endif_inst
- if_inst
));
1456 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1457 brw_inst_set_jip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1460 brw_inst_set_exec_size(devinfo
, else_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1462 /* Patch IF -> ELSE */
1463 if (devinfo
->gen
< 6) {
1464 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1465 br
* (else_inst
- if_inst
));
1466 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1467 } else if (devinfo
->gen
== 6) {
1468 brw_inst_set_gen6_jump_count(devinfo
, if_inst
,
1469 br
* (else_inst
- if_inst
+ 1));
1472 /* Patch ELSE -> ENDIF */
1473 if (devinfo
->gen
< 6) {
1474 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1477 brw_inst_set_gen4_jump_count(devinfo
, else_inst
,
1478 br
* (endif_inst
- else_inst
+ 1));
1479 brw_inst_set_gen4_pop_count(devinfo
, else_inst
, 1);
1480 } else if (devinfo
->gen
== 6) {
1481 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1482 brw_inst_set_gen6_jump_count(devinfo
, else_inst
,
1483 br
* (endif_inst
- else_inst
));
1485 /* The IF instruction's JIP should point just past the ELSE */
1486 brw_inst_set_jip(devinfo
, if_inst
, br
* (else_inst
- if_inst
+ 1));
1487 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1488 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1489 brw_inst_set_jip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1490 if (devinfo
->gen
>= 8) {
1491 /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1492 * should point to ENDIF.
1494 brw_inst_set_uip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1501 brw_ELSE(struct brw_codegen
*p
)
1503 const struct brw_device_info
*devinfo
= p
->devinfo
;
1506 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1508 if (devinfo
->gen
< 6) {
1509 brw_set_dest(p
, insn
, brw_ip_reg());
1510 brw_set_src0(p
, insn
, brw_ip_reg());
1511 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1512 } else if (devinfo
->gen
== 6) {
1513 brw_set_dest(p
, insn
, brw_imm_w(0));
1514 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1515 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1516 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1517 } else if (devinfo
->gen
== 7) {
1518 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1519 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1520 brw_set_src1(p
, insn
, brw_imm_w(0));
1521 brw_inst_set_jip(devinfo
, insn
, 0);
1522 brw_inst_set_uip(devinfo
, insn
, 0);
1524 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1525 brw_set_src0(p
, insn
, brw_imm_d(0));
1526 brw_inst_set_jip(devinfo
, insn
, 0);
1527 brw_inst_set_uip(devinfo
, insn
, 0);
1530 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1531 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1532 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1533 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1535 push_if_stack(p
, insn
);
1539 brw_ENDIF(struct brw_codegen
*p
)
1541 const struct brw_device_info
*devinfo
= p
->devinfo
;
1542 brw_inst
*insn
= NULL
;
1543 brw_inst
*else_inst
= NULL
;
1544 brw_inst
*if_inst
= NULL
;
1546 bool emit_endif
= true;
1548 /* In single program flow mode, we can express IF and ELSE instructions
1549 * equivalently as ADD instructions that operate on IP. On platforms prior
1550 * to Gen6, flow control instructions cause an implied thread switch, so
1551 * this is a significant savings.
1553 * However, on Gen6, writing to IP doesn't work in single program flow mode
1554 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1555 * not be updated by non-flow control instructions."). And on later
1556 * platforms, there is no significant benefit to converting control flow
1557 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1560 if (devinfo
->gen
< 6 && p
->single_program_flow
)
1564 * A single next_insn() may change the base address of instruction store
1565 * memory(p->store), so call it first before referencing the instruction
1566 * store pointer from an index
1569 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1571 /* Pop the IF and (optional) ELSE instructions from the stack */
1572 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1573 tmp
= pop_if_stack(p
);
1574 if (brw_inst_opcode(devinfo
, tmp
) == BRW_OPCODE_ELSE
) {
1576 tmp
= pop_if_stack(p
);
1581 /* ENDIF is useless; don't bother emitting it. */
1582 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1586 if (devinfo
->gen
< 6) {
1587 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1588 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1589 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1590 } else if (devinfo
->gen
== 6) {
1591 brw_set_dest(p
, insn
, brw_imm_w(0));
1592 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1593 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1594 } else if (devinfo
->gen
== 7) {
1595 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1596 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1597 brw_set_src1(p
, insn
, brw_imm_w(0));
1599 brw_set_src0(p
, insn
, brw_imm_d(0));
1602 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1603 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1604 if (devinfo
->gen
< 6)
1605 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1607 /* Also pop item off the stack in the endif instruction: */
1608 if (devinfo
->gen
< 6) {
1609 brw_inst_set_gen4_jump_count(devinfo
, insn
, 0);
1610 brw_inst_set_gen4_pop_count(devinfo
, insn
, 1);
1611 } else if (devinfo
->gen
== 6) {
1612 brw_inst_set_gen6_jump_count(devinfo
, insn
, 2);
1614 brw_inst_set_jip(devinfo
, insn
, 2);
1616 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1620 brw_BREAK(struct brw_codegen
*p
)
1622 const struct brw_device_info
*devinfo
= p
->devinfo
;
1625 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1626 if (devinfo
->gen
>= 8) {
1627 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1628 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1629 } else if (devinfo
->gen
>= 6) {
1630 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1631 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1632 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1634 brw_set_dest(p
, insn
, brw_ip_reg());
1635 brw_set_src0(p
, insn
, brw_ip_reg());
1636 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1637 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1638 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1640 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1641 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1648 brw_CONT(struct brw_codegen
*p
)
1650 const struct brw_device_info
*devinfo
= p
->devinfo
;
1653 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1654 brw_set_dest(p
, insn
, brw_ip_reg());
1655 if (devinfo
->gen
>= 8) {
1656 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1658 brw_set_src0(p
, insn
, brw_ip_reg());
1659 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1662 if (devinfo
->gen
< 6) {
1663 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1664 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1666 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1667 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1673 gen6_HALT(struct brw_codegen
*p
)
1675 const struct brw_device_info
*devinfo
= p
->devinfo
;
1678 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1679 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1680 if (devinfo
->gen
>= 8) {
1681 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1683 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1684 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1687 if (p
->compressed
) {
1688 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_16
);
1690 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1691 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_8
);
1698 * The DO/WHILE is just an unterminated loop -- break or continue are
1699 * used for control within the loop. We have a few ways they can be
1702 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1703 * jip and no DO instruction.
1705 * For non-uniform control flow pre-gen6, there's a DO instruction to
1706 * push the mask, and a WHILE to jump back, and BREAK to get out and
1709 * For gen6, there's no more mask stack, so no need for DO. WHILE
1710 * just points back to the first instruction of the loop.
1713 brw_DO(struct brw_codegen
*p
, unsigned execute_size
)
1715 const struct brw_device_info
*devinfo
= p
->devinfo
;
1717 if (devinfo
->gen
>= 6 || p
->single_program_flow
) {
1718 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1719 return &p
->store
[p
->nr_insn
];
1721 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1723 push_loop_stack(p
, insn
);
1725 /* Override the defaults for this instruction:
1727 brw_set_dest(p
, insn
, brw_null_reg());
1728 brw_set_src0(p
, insn
, brw_null_reg());
1729 brw_set_src1(p
, insn
, brw_null_reg());
1731 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1732 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1733 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
);
1740 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1743 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1744 * nesting, since it can always just point to the end of the block/current loop.
1747 brw_patch_break_cont(struct brw_codegen
*p
, brw_inst
*while_inst
)
1749 const struct brw_device_info
*devinfo
= p
->devinfo
;
1750 brw_inst
*do_inst
= get_inner_do_insn(p
);
1752 unsigned br
= brw_jump_scale(devinfo
);
1754 assert(devinfo
->gen
< 6);
1756 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1757 /* If the jump count is != 0, that means that this instruction has already
1758 * been patched because it's part of a loop inside of the one we're
1761 if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_BREAK
&&
1762 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1763 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
*((while_inst
- inst
) + 1));
1764 } else if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_CONTINUE
&&
1765 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1766 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
* (while_inst
- inst
));
1772 brw_WHILE(struct brw_codegen
*p
)
1774 const struct brw_device_info
*devinfo
= p
->devinfo
;
1775 brw_inst
*insn
, *do_insn
;
1776 unsigned br
= brw_jump_scale(devinfo
);
1778 if (devinfo
->gen
>= 6) {
1779 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1780 do_insn
= get_inner_do_insn(p
);
1782 if (devinfo
->gen
>= 8) {
1783 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1784 brw_set_src0(p
, insn
, brw_imm_d(0));
1785 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1786 } else if (devinfo
->gen
== 7) {
1787 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1788 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1789 brw_set_src1(p
, insn
, brw_imm_w(0));
1790 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1792 brw_set_dest(p
, insn
, brw_imm_w(0));
1793 brw_inst_set_gen6_jump_count(devinfo
, insn
, br
* (do_insn
- insn
));
1794 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1795 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1798 brw_inst_set_exec_size(devinfo
, insn
, p
->compressed
? BRW_EXECUTE_16
1801 if (p
->single_program_flow
) {
1802 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1803 do_insn
= get_inner_do_insn(p
);
1805 brw_set_dest(p
, insn
, brw_ip_reg());
1806 brw_set_src0(p
, insn
, brw_ip_reg());
1807 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1808 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
1810 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1811 do_insn
= get_inner_do_insn(p
);
1813 assert(brw_inst_opcode(devinfo
, do_insn
) == BRW_OPCODE_DO
);
1815 brw_set_dest(p
, insn
, brw_ip_reg());
1816 brw_set_src0(p
, insn
, brw_ip_reg());
1817 brw_set_src1(p
, insn
, brw_imm_d(0));
1819 brw_inst_set_exec_size(devinfo
, insn
, brw_inst_exec_size(devinfo
, do_insn
));
1820 brw_inst_set_gen4_jump_count(devinfo
, insn
, br
* (do_insn
- insn
+ 1));
1821 brw_inst_set_gen4_pop_count(devinfo
, insn
, 0);
1823 brw_patch_break_cont(p
, insn
);
1826 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1828 p
->loop_stack_depth
--;
1835 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
)
1837 const struct brw_device_info
*devinfo
= p
->devinfo
;
1838 brw_inst
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1841 if (devinfo
->gen
>= 5)
1844 assert(brw_inst_opcode(devinfo
, jmp_insn
) == BRW_OPCODE_JMPI
);
1845 assert(brw_inst_src1_reg_file(devinfo
, jmp_insn
) == BRW_IMMEDIATE_VALUE
);
1847 brw_inst_set_gen4_jump_count(devinfo
, jmp_insn
,
1848 jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1));
1851 /* To integrate with the above, it makes sense that the comparison
1852 * instruction should populate the flag register. It might be simpler
1853 * just to use the flag reg for most WM tasks?
1855 void brw_CMP(struct brw_codegen
*p
,
1856 struct brw_reg dest
,
1857 unsigned conditional
,
1858 struct brw_reg src0
,
1859 struct brw_reg src1
)
1861 const struct brw_device_info
*devinfo
= p
->devinfo
;
1862 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1864 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1865 brw_set_dest(p
, insn
, dest
);
1866 brw_set_src0(p
, insn
, src0
);
1867 brw_set_src1(p
, insn
, src1
);
1869 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1871 * "Any CMP instruction with a null destination must use a {switch}."
1873 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1874 * mentioned on their work-arounds pages.
1876 if (devinfo
->gen
== 7) {
1877 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1878 dest
.nr
== BRW_ARF_NULL
) {
1879 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1884 /***********************************************************************
1885 * Helpers for the various SEND message types:
1888 /** Extended math function, float[8].
1890 void gen4_math(struct brw_codegen
*p
,
1891 struct brw_reg dest
,
1893 unsigned msg_reg_nr
,
1895 unsigned precision
)
1897 const struct brw_device_info
*devinfo
= p
->devinfo
;
1898 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1900 if (has_scalar_region(src
)) {
1901 data_type
= BRW_MATH_DATA_SCALAR
;
1903 data_type
= BRW_MATH_DATA_VECTOR
;
1906 assert(devinfo
->gen
< 6);
1908 /* Example code doesn't set predicate_control for send
1911 brw_inst_set_pred_control(devinfo
, insn
, 0);
1912 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
1914 brw_set_dest(p
, insn
, dest
);
1915 brw_set_src0(p
, insn
, src
);
1916 brw_set_math_message(p
,
1919 src
.type
== BRW_REGISTER_TYPE_D
,
1924 void gen6_math(struct brw_codegen
*p
,
1925 struct brw_reg dest
,
1927 struct brw_reg src0
,
1928 struct brw_reg src1
)
1930 const struct brw_device_info
*devinfo
= p
->devinfo
;
1931 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1933 assert(devinfo
->gen
>= 6);
1935 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1936 (devinfo
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1937 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
||
1938 (devinfo
->gen
>= 8 && src0
.file
== BRW_IMMEDIATE_VALUE
));
1940 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1941 if (devinfo
->gen
== 6) {
1942 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1943 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1946 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1947 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1948 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1949 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1950 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1951 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
1952 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
1954 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1955 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1956 if (function
== BRW_MATH_FUNCTION_POW
) {
1957 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
1958 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
1960 assert(src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1961 src1
.nr
== BRW_ARF_NULL
);
1965 /* Source modifiers are ignored for extended math instructions on Gen6. */
1966 if (devinfo
->gen
== 6) {
1967 assert(!src0
.negate
);
1969 assert(!src1
.negate
);
1973 brw_inst_set_math_function(devinfo
, insn
, function
);
1975 brw_set_dest(p
, insn
, dest
);
1976 brw_set_src0(p
, insn
, src0
);
1977 brw_set_src1(p
, insn
, src1
);
1982 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1983 * using a constant offset per channel.
1985 * The offset must be aligned to oword size (16 bytes). Used for
1986 * register spilling.
1988 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1993 const struct brw_device_info
*devinfo
= p
->devinfo
;
1994 uint32_t msg_control
, msg_type
;
1997 if (devinfo
->gen
>= 6)
2000 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2002 if (num_regs
== 1) {
2003 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2006 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2010 /* Set up the message header. This is g0, with g0.2 filled with
2011 * the offset. We don't want to leave our offset around in g0 or
2012 * it'll screw up texture samples, so set it up inside the message
2016 brw_push_insn_state(p
);
2017 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2018 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2019 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2021 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2023 /* set message header global offset field (reg 0, element 2) */
2025 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2027 2), BRW_REGISTER_TYPE_UD
),
2028 brw_imm_ud(offset
));
2030 brw_pop_insn_state(p
);
2034 struct brw_reg dest
;
2035 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2036 int send_commit_msg
;
2037 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2038 BRW_REGISTER_TYPE_UW
);
2040 if (brw_inst_qtr_control(devinfo
, insn
) != BRW_COMPRESSION_NONE
) {
2041 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2042 src_header
= vec16(src_header
);
2044 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2045 if (devinfo
->gen
< 6)
2046 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2048 /* Until gen6, writes followed by reads from the same location
2049 * are not guaranteed to be ordered unless write_commit is set.
2050 * If set, then a no-op write is issued to the destination
2051 * register to set a dependency, and a read from the destination
2052 * can be used to ensure the ordering.
2054 * For gen6, only writes between different threads need ordering
2055 * protection. Our use of DP writes is all about register
2056 * spilling within a thread.
2058 if (devinfo
->gen
>= 6) {
2059 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2060 send_commit_msg
= 0;
2063 send_commit_msg
= 1;
2066 brw_set_dest(p
, insn
, dest
);
2067 if (devinfo
->gen
>= 6) {
2068 brw_set_src0(p
, insn
, mrf
);
2070 brw_set_src0(p
, insn
, brw_null_reg());
2073 if (devinfo
->gen
>= 6)
2074 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2076 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2078 brw_set_dp_write_message(p
,
2080 255, /* binding table index (255=stateless) */
2084 true, /* header_present */
2085 0, /* not a render target */
2086 send_commit_msg
, /* response_length */
2094 * Read a block of owords (half a GRF each) from the scratch buffer
2095 * using a constant index per channel.
2097 * Offset must be aligned to oword size (16 bytes). Used for register
2101 brw_oword_block_read_scratch(struct brw_codegen
*p
,
2102 struct brw_reg dest
,
2107 const struct brw_device_info
*devinfo
= p
->devinfo
;
2108 uint32_t msg_control
;
2111 if (devinfo
->gen
>= 6)
2114 if (p
->devinfo
->gen
>= 7) {
2115 /* On gen 7 and above, we no longer have message registers and we can
2116 * send from any register we want. By using the destination register
2117 * for the message, we guarantee that the implied message write won't
2118 * accidentally overwrite anything. This has been a problem because
2119 * the MRF registers and source for the final FB write are both fixed
2122 mrf
= retype(dest
, BRW_REGISTER_TYPE_UD
);
2124 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2126 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2128 if (num_regs
== 1) {
2129 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2132 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2137 brw_push_insn_state(p
);
2138 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2139 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2140 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2142 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2144 /* set message header global offset field (reg 0, element 2) */
2145 brw_MOV(p
, get_element_ud(mrf
, 2), brw_imm_ud(offset
));
2147 brw_pop_insn_state(p
);
2151 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2153 assert(brw_inst_pred_control(devinfo
, insn
) == 0);
2154 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2156 brw_set_dest(p
, insn
, dest
); /* UW? */
2157 if (devinfo
->gen
>= 6) {
2158 brw_set_src0(p
, insn
, mrf
);
2160 brw_set_src0(p
, insn
, brw_null_reg());
2161 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2164 brw_set_dp_read_message(p
,
2166 255, /* binding table index (255=stateless) */
2168 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2169 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2171 true, /* header_present */
2177 gen7_block_read_scratch(struct brw_codegen
*p
,
2178 struct brw_reg dest
,
2182 const struct brw_device_info
*devinfo
= p
->devinfo
;
2183 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2184 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2186 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2187 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UW
));
2189 /* The HW requires that the header is present; this is to get the g0.5
2192 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2194 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2195 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2196 * is 32 bytes, which happens to be the size of a register.
2199 assert(offset
< (1 << 12));
2201 gen7_set_dp_scratch_message(p
, insn
,
2202 false, /* scratch read */
2204 false, /* invalidate after read */
2207 1, /* mlen: just g0 */
2208 num_regs
, /* rlen */
2209 true); /* header present */
2213 * Read a float[4] vector from the data port Data Cache (const buffer).
2214 * Location (in buffer) should be a multiple of 16.
2215 * Used for fetching shader constants.
2217 void brw_oword_block_read(struct brw_codegen
*p
,
2218 struct brw_reg dest
,
2221 uint32_t bind_table_index
)
2223 const struct brw_device_info
*devinfo
= p
->devinfo
;
2225 /* On newer hardware, offset is in units of owords. */
2226 if (devinfo
->gen
>= 6)
2229 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2231 brw_push_insn_state(p
);
2232 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2233 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2234 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2235 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2237 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2239 /* set message header global offset field (reg 0, element 2) */
2241 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2243 2), BRW_REGISTER_TYPE_UD
),
2244 brw_imm_ud(offset
));
2246 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2248 /* cast dest to a uword[8] vector */
2249 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2251 brw_set_dest(p
, insn
, dest
);
2252 if (devinfo
->gen
>= 6) {
2253 brw_set_src0(p
, insn
, mrf
);
2255 brw_set_src0(p
, insn
, brw_null_reg());
2256 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2259 brw_set_dp_read_message(p
,
2262 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2263 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2264 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2266 true, /* header_present */
2267 1); /* response_length (1 reg, 2 owords!) */
2269 brw_pop_insn_state(p
);
2273 void brw_fb_WRITE(struct brw_codegen
*p
,
2275 struct brw_reg payload
,
2276 struct brw_reg implied_header
,
2277 unsigned msg_control
,
2278 unsigned binding_table_index
,
2279 unsigned msg_length
,
2280 unsigned response_length
,
2282 bool last_render_target
,
2283 bool header_present
)
2285 const struct brw_device_info
*devinfo
= p
->devinfo
;
2288 struct brw_reg dest
, src0
;
2290 if (dispatch_width
== 16)
2291 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2293 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2295 if (devinfo
->gen
>= 6) {
2296 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2298 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2300 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2302 if (devinfo
->gen
>= 6) {
2303 /* headerless version, just submit color payload */
2306 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2308 assert(payload
.file
== BRW_MESSAGE_REGISTER_FILE
);
2309 brw_inst_set_base_mrf(devinfo
, insn
, payload
.nr
);
2310 src0
= implied_header
;
2312 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2315 brw_set_dest(p
, insn
, dest
);
2316 brw_set_src0(p
, insn
, src0
);
2317 brw_set_dp_write_message(p
,
2319 binding_table_index
,
2327 0 /* send_commit_msg */);
2332 * Texture sample instruction.
2333 * Note: the msg_type plus msg_length values determine exactly what kind
2334 * of sampling operation is performed. See volume 4, page 161 of docs.
2336 void brw_SAMPLE(struct brw_codegen
*p
,
2337 struct brw_reg dest
,
2338 unsigned msg_reg_nr
,
2339 struct brw_reg src0
,
2340 unsigned binding_table_index
,
2343 unsigned response_length
,
2344 unsigned msg_length
,
2345 unsigned header_present
,
2347 unsigned return_format
)
2349 const struct brw_device_info
*devinfo
= p
->devinfo
;
2352 if (msg_reg_nr
!= -1)
2353 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2355 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2356 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
); /* XXX */
2358 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2360 * "Instruction compression is not allowed for this instruction (that
2361 * is, send). The hardware behavior is undefined if this instruction is
2362 * set as compressed. However, compress control can be set to "SecHalf"
2363 * to affect the EMask generation."
2365 * No similar wording is found in later PRMs, but there are examples
2366 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2367 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2368 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2370 if (brw_inst_qtr_control(devinfo
, insn
) != BRW_COMPRESSION_2NDHALF
)
2371 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
2373 if (devinfo
->gen
< 6)
2374 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2376 brw_set_dest(p
, insn
, dest
);
2377 brw_set_src0(p
, insn
, src0
);
2378 brw_set_sampler_message(p
, insn
,
2379 binding_table_index
,
2389 /* Adjust the message header's sampler state pointer to
2390 * select the correct group of 16 samplers.
2392 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
2393 struct brw_reg header
,
2394 struct brw_reg sampler_index
)
2396 /* The "Sampler Index" field can only store values between 0 and 15.
2397 * However, we can add an offset to the "Sampler State Pointer"
2398 * field, effectively selecting a different set of 16 samplers.
2400 * The "Sampler State Pointer" needs to be aligned to a 32-byte
2401 * offset, and each sampler state is only 16-bytes, so we can't
2402 * exclusively use the offset - we have to use both.
2405 const struct brw_device_info
*devinfo
= p
->devinfo
;
2407 if (sampler_index
.file
== BRW_IMMEDIATE_VALUE
) {
2408 const int sampler_state_size
= 16; /* 16 bytes */
2409 uint32_t sampler
= sampler_index
.dw1
.ud
;
2411 if (sampler
>= 16) {
2412 assert(devinfo
->is_haswell
|| devinfo
->gen
>= 8);
2414 get_element_ud(header
, 3),
2415 get_element_ud(brw_vec8_grf(0, 0), 3),
2416 brw_imm_ud(16 * (sampler
/ 16) * sampler_state_size
));
2419 /* Non-const sampler array indexing case */
2420 if (devinfo
->gen
< 8 && !devinfo
->is_haswell
) {
2424 struct brw_reg temp
= get_element_ud(header
, 3);
2426 brw_AND(p
, temp
, get_element_ud(sampler_index
, 0), brw_imm_ud(0x0f0));
2427 brw_SHL(p
, temp
, temp
, brw_imm_ud(4));
2429 get_element_ud(header
, 3),
2430 get_element_ud(brw_vec8_grf(0, 0), 3),
2435 /* All these variables are pretty confusing - we might be better off
2436 * using bitmasks and macros for this, in the old style. Or perhaps
2437 * just having the caller instantiate the fields in dword3 itself.
2439 void brw_urb_WRITE(struct brw_codegen
*p
,
2440 struct brw_reg dest
,
2441 unsigned msg_reg_nr
,
2442 struct brw_reg src0
,
2443 enum brw_urb_write_flags flags
,
2444 unsigned msg_length
,
2445 unsigned response_length
,
2449 const struct brw_device_info
*devinfo
= p
->devinfo
;
2452 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2454 if (devinfo
->gen
>= 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2455 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2456 brw_push_insn_state(p
);
2457 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2458 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2459 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2460 BRW_REGISTER_TYPE_UD
),
2461 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2462 brw_imm_ud(0xff00));
2463 brw_pop_insn_state(p
);
2466 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2468 assert(msg_length
< BRW_MAX_MRF
);
2470 brw_set_dest(p
, insn
, dest
);
2471 brw_set_src0(p
, insn
, src0
);
2472 brw_set_src1(p
, insn
, brw_imm_d(0));
2474 if (devinfo
->gen
< 6)
2475 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2477 brw_set_urb_message(p
,
2487 brw_send_indirect_message(struct brw_codegen
*p
,
2490 struct brw_reg payload
,
2491 struct brw_reg desc
)
2493 const struct brw_device_info
*devinfo
= p
->devinfo
;
2494 struct brw_inst
*send
, *setup
;
2496 assert(desc
.type
== BRW_REGISTER_TYPE_UD
);
2498 if (desc
.file
== BRW_IMMEDIATE_VALUE
) {
2499 setup
= send
= next_insn(p
, BRW_OPCODE_SEND
);
2500 brw_set_src1(p
, send
, desc
);
2503 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2505 brw_push_insn_state(p
);
2506 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2507 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2508 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2510 /* Load the indirect descriptor to an address register using OR so the
2511 * caller can specify additional descriptor bits with the usual
2512 * brw_set_*_message() helper functions.
2514 setup
= brw_OR(p
, addr
, desc
, brw_imm_ud(0));
2516 brw_pop_insn_state(p
);
2518 send
= next_insn(p
, BRW_OPCODE_SEND
);
2519 brw_set_src1(p
, send
, addr
);
2522 brw_set_dest(p
, send
, dst
);
2523 brw_set_src0(p
, send
, retype(payload
, BRW_REGISTER_TYPE_UD
));
2524 brw_inst_set_sfid(devinfo
, send
, sfid
);
2529 static struct brw_inst
*
2530 brw_send_indirect_surface_message(struct brw_codegen
*p
,
2533 struct brw_reg payload
,
2534 struct brw_reg surface
,
2535 unsigned message_len
,
2536 unsigned response_len
,
2537 bool header_present
)
2539 const struct brw_device_info
*devinfo
= p
->devinfo
;
2540 struct brw_inst
*insn
;
2542 if (surface
.file
!= BRW_IMMEDIATE_VALUE
) {
2543 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2545 brw_push_insn_state(p
);
2546 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2547 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2548 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2550 /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2551 * some surface array is accessed out of bounds.
2553 insn
= brw_AND(p
, addr
,
2554 suboffset(vec1(retype(surface
, BRW_REGISTER_TYPE_UD
)),
2555 BRW_GET_SWZ(surface
.dw1
.bits
.swizzle
, 0)),
2558 brw_pop_insn_state(p
);
2563 insn
= brw_send_indirect_message(p
, sfid
, dst
, payload
, surface
);
2564 brw_inst_set_mlen(devinfo
, insn
, message_len
);
2565 brw_inst_set_rlen(devinfo
, insn
, response_len
);
2566 brw_inst_set_header_present(devinfo
, insn
, header_present
);
2572 brw_find_next_block_end(struct brw_codegen
*p
, int start_offset
)
2575 void *store
= p
->store
;
2576 const struct brw_device_info
*devinfo
= p
->devinfo
;
2578 for (offset
= next_offset(devinfo
, store
, start_offset
);
2579 offset
< p
->next_insn_offset
;
2580 offset
= next_offset(devinfo
, store
, offset
)) {
2581 brw_inst
*insn
= store
+ offset
;
2583 switch (brw_inst_opcode(devinfo
, insn
)) {
2584 case BRW_OPCODE_ENDIF
:
2585 case BRW_OPCODE_ELSE
:
2586 case BRW_OPCODE_WHILE
:
2587 case BRW_OPCODE_HALT
:
2595 /* There is no DO instruction on gen6, so to find the end of the loop
2596 * we have to see if the loop is jumping back before our start
2600 brw_find_loop_end(struct brw_codegen
*p
, int start_offset
)
2602 const struct brw_device_info
*devinfo
= p
->devinfo
;
2604 int scale
= 16 / brw_jump_scale(devinfo
);
2605 void *store
= p
->store
;
2607 assert(devinfo
->gen
>= 6);
2609 /* Always start after the instruction (such as a WHILE) we're trying to fix
2612 for (offset
= next_offset(devinfo
, store
, start_offset
);
2613 offset
< p
->next_insn_offset
;
2614 offset
= next_offset(devinfo
, store
, offset
)) {
2615 brw_inst
*insn
= store
+ offset
;
2617 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
) {
2618 int jip
= devinfo
->gen
== 6 ? brw_inst_gen6_jump_count(devinfo
, insn
)
2619 : brw_inst_jip(devinfo
, insn
);
2620 if (offset
+ jip
* scale
<= start_offset
)
2624 assert(!"not reached");
2625 return start_offset
;
2628 /* After program generation, go back and update the UIP and JIP of
2629 * BREAK, CONT, and HALT instructions to their correct locations.
2632 brw_set_uip_jip(struct brw_codegen
*p
)
2634 const struct brw_device_info
*devinfo
= p
->devinfo
;
2636 int br
= brw_jump_scale(devinfo
);
2637 int scale
= 16 / br
;
2638 void *store
= p
->store
;
2640 if (devinfo
->gen
< 6)
2643 for (offset
= 0; offset
< p
->next_insn_offset
;
2644 offset
= next_offset(devinfo
, store
, offset
)) {
2645 brw_inst
*insn
= store
+ offset
;
2647 if (brw_inst_cmpt_control(devinfo
, insn
)) {
2648 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2649 assert(brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_BREAK
&&
2650 brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_CONTINUE
&&
2651 brw_inst_opcode(devinfo
, insn
) != BRW_OPCODE_HALT
);
2655 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2656 switch (brw_inst_opcode(devinfo
, insn
)) {
2657 case BRW_OPCODE_BREAK
:
2658 assert(block_end_offset
!= 0);
2659 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2660 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2661 brw_inst_set_uip(devinfo
, insn
,
2662 (brw_find_loop_end(p
, offset
) - offset
+
2663 (devinfo
->gen
== 6 ? 16 : 0)) / scale
);
2665 case BRW_OPCODE_CONTINUE
:
2666 assert(block_end_offset
!= 0);
2667 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2668 brw_inst_set_uip(devinfo
, insn
,
2669 (brw_find_loop_end(p
, offset
) - offset
) / scale
);
2671 assert(brw_inst_uip(devinfo
, insn
) != 0);
2672 assert(brw_inst_jip(devinfo
, insn
) != 0);
2675 case BRW_OPCODE_ENDIF
: {
2676 int32_t jump
= (block_end_offset
== 0) ?
2677 1 * br
: (block_end_offset
- offset
) / scale
;
2678 if (devinfo
->gen
>= 7)
2679 brw_inst_set_jip(devinfo
, insn
, jump
);
2681 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump
);
2685 case BRW_OPCODE_HALT
:
2686 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2688 * "In case of the halt instruction not inside any conditional
2689 * code block, the value of <JIP> and <UIP> should be the
2690 * same. In case of the halt instruction inside conditional code
2691 * block, the <UIP> should be the end of the program, and the
2692 * <JIP> should be end of the most inner conditional code block."
2694 * The uip will have already been set by whoever set up the
2697 if (block_end_offset
== 0) {
2698 brw_inst_set_jip(devinfo
, insn
, brw_inst_uip(devinfo
, insn
));
2700 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2702 assert(brw_inst_uip(devinfo
, insn
) != 0);
2703 assert(brw_inst_jip(devinfo
, insn
) != 0);
2709 void brw_ff_sync(struct brw_codegen
*p
,
2710 struct brw_reg dest
,
2711 unsigned msg_reg_nr
,
2712 struct brw_reg src0
,
2714 unsigned response_length
,
2717 const struct brw_device_info
*devinfo
= p
->devinfo
;
2720 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2722 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2723 brw_set_dest(p
, insn
, dest
);
2724 brw_set_src0(p
, insn
, src0
);
2725 brw_set_src1(p
, insn
, brw_imm_d(0));
2727 if (devinfo
->gen
< 6)
2728 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2730 brw_set_ff_sync_message(p
,
2738 * Emit the SEND instruction necessary to generate stream output data on Gen6
2739 * (for transform feedback).
2741 * If send_commit_msg is true, this is the last piece of stream output data
2742 * from this thread, so send the data as a committed write. According to the
2743 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2745 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2746 * writes are complete by sending the final write as a committed write."
2749 brw_svb_write(struct brw_codegen
*p
,
2750 struct brw_reg dest
,
2751 unsigned msg_reg_nr
,
2752 struct brw_reg src0
,
2753 unsigned binding_table_index
,
2754 bool send_commit_msg
)
2758 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2760 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2761 brw_set_dest(p
, insn
, dest
);
2762 brw_set_src0(p
, insn
, src0
);
2763 brw_set_src1(p
, insn
, brw_imm_d(0));
2764 brw_set_dp_write_message(p
, insn
,
2765 binding_table_index
,
2766 0, /* msg_control: ignored */
2767 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2769 true, /* header_present */
2770 0, /* last_render_target: ignored */
2771 send_commit_msg
, /* response_length */
2772 0, /* end_of_thread */
2773 send_commit_msg
); /* send_commit_msg */
2777 brw_surface_payload_size(struct brw_codegen
*p
,
2778 unsigned num_channels
,
2782 if (has_simd4x2
&& brw_inst_access_mode(p
->devinfo
, p
->current
) == BRW_ALIGN_16
)
2784 else if (has_simd16
&& p
->compressed
)
2785 return 2 * num_channels
;
2787 return num_channels
;
2791 brw_set_dp_untyped_atomic_message(struct brw_codegen
*p
,
2794 bool response_expected
)
2796 const struct brw_device_info
*devinfo
= p
->devinfo
;
2797 unsigned msg_control
=
2798 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2799 (response_expected
? 1 << 5 : 0); /* Return data expected */
2801 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2802 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2804 msg_control
|= 1 << 4; /* SIMD8 mode */
2806 brw_inst_set_dp_msg_type(devinfo
, insn
,
2807 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
);
2809 brw_inst_set_dp_msg_type(devinfo
, insn
,
2810 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
);
2813 brw_inst_set_dp_msg_type(devinfo
, insn
,
2814 GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
);
2817 msg_control
|= 1 << 4; /* SIMD8 mode */
2820 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2824 brw_untyped_atomic(struct brw_codegen
*p
,
2826 struct brw_reg payload
,
2827 struct brw_reg surface
,
2829 unsigned msg_length
,
2830 bool response_expected
)
2832 const struct brw_device_info
*devinfo
= p
->devinfo
;
2833 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2834 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2835 GEN7_SFID_DATAPORT_DATA_CACHE
);
2836 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2837 /* Mask out unused components -- This is especially important in Align16
2838 * mode on generations that don't have native support for SIMD4x2 atomics,
2839 * because unused but enabled components will cause the dataport to perform
2840 * additional atomic operations on the addresses that happen to be in the
2841 * uninitialized Y, Z and W coordinates of the payload.
2843 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2844 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2845 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
2846 brw_surface_payload_size(p
, response_expected
,
2847 devinfo
->gen
>= 8 || devinfo
->is_haswell
, true),
2850 brw_set_dp_untyped_atomic_message(
2851 p
, insn
, atomic_op
, response_expected
);
2855 brw_set_dp_untyped_surface_read_message(struct brw_codegen
*p
,
2856 struct brw_inst
*insn
,
2857 unsigned num_channels
)
2859 const struct brw_device_info
*devinfo
= p
->devinfo
;
2860 /* Set mask of 32-bit channels to drop. */
2861 unsigned msg_control
= 0xf & (0xf << num_channels
);
2863 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2865 msg_control
|= 1 << 4; /* SIMD16 mode */
2867 msg_control
|= 2 << 4; /* SIMD8 mode */
2870 brw_inst_set_dp_msg_type(devinfo
, insn
,
2871 (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2872 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
:
2873 GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
));
2874 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2878 brw_untyped_surface_read(struct brw_codegen
*p
,
2880 struct brw_reg payload
,
2881 struct brw_reg surface
,
2882 unsigned msg_length
,
2883 unsigned num_channels
)
2885 const struct brw_device_info
*devinfo
= p
->devinfo
;
2886 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2887 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2888 GEN7_SFID_DATAPORT_DATA_CACHE
);
2889 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
2890 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2891 p
, sfid
, dst
, payload
, surface
, msg_length
,
2892 brw_surface_payload_size(p
, num_channels
, true, true),
2895 brw_set_dp_untyped_surface_read_message(
2896 p
, insn
, num_channels
);
2900 brw_set_dp_untyped_surface_write_message(struct brw_codegen
*p
,
2901 struct brw_inst
*insn
,
2902 unsigned num_channels
)
2904 const struct brw_device_info
*devinfo
= p
->devinfo
;
2905 /* Set mask of 32-bit channels to drop. */
2906 unsigned msg_control
= 0xf & (0xf << num_channels
);
2908 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2910 msg_control
|= 1 << 4; /* SIMD16 mode */
2912 msg_control
|= 2 << 4; /* SIMD8 mode */
2914 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
2915 msg_control
|= 0 << 4; /* SIMD4x2 mode */
2917 msg_control
|= 2 << 4; /* SIMD8 mode */
2920 brw_inst_set_dp_msg_type(devinfo
, insn
,
2921 devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2922 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
:
2923 GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
);
2924 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2928 brw_untyped_surface_write(struct brw_codegen
*p
,
2929 struct brw_reg payload
,
2930 struct brw_reg surface
,
2931 unsigned msg_length
,
2932 unsigned num_channels
)
2934 const struct brw_device_info
*devinfo
= p
->devinfo
;
2935 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2936 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2937 GEN7_SFID_DATAPORT_DATA_CACHE
);
2938 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2939 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2940 const unsigned mask
= devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
2941 WRITEMASK_X
: WRITEMASK_XYZW
;
2942 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2943 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
2944 payload
, surface
, msg_length
, 0, align1
);
2946 brw_set_dp_untyped_surface_write_message(
2947 p
, insn
, num_channels
);
2951 brw_set_dp_typed_atomic_message(struct brw_codegen
*p
,
2952 struct brw_inst
*insn
,
2954 bool response_expected
)
2956 const struct brw_device_info
*devinfo
= p
->devinfo
;
2957 unsigned msg_control
=
2958 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2959 (response_expected
? 1 << 5 : 0); /* Return data expected */
2961 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2962 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2963 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
2964 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
2966 brw_inst_set_dp_msg_type(devinfo
, insn
,
2967 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
);
2969 brw_inst_set_dp_msg_type(devinfo
, insn
,
2970 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
);
2974 brw_inst_set_dp_msg_type(devinfo
, insn
,
2975 GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
);
2977 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
2978 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
2981 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2985 brw_typed_atomic(struct brw_codegen
*p
,
2987 struct brw_reg payload
,
2988 struct brw_reg surface
,
2990 unsigned msg_length
,
2991 bool response_expected
) {
2992 const struct brw_device_info
*devinfo
= p
->devinfo
;
2993 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2994 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2995 GEN6_SFID_DATAPORT_RENDER_CACHE
);
2996 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
2997 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2998 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2999 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3000 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
3001 brw_surface_payload_size(p
, response_expected
,
3002 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3005 brw_set_dp_typed_atomic_message(
3006 p
, insn
, atomic_op
, response_expected
);
3010 brw_set_dp_typed_surface_read_message(struct brw_codegen
*p
,
3011 struct brw_inst
*insn
,
3012 unsigned num_channels
)
3014 const struct brw_device_info
*devinfo
= p
->devinfo
;
3015 /* Set mask of unused channels. */
3016 unsigned msg_control
= 0xf & (0xf << num_channels
);
3018 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3019 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3020 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3021 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3023 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3026 brw_inst_set_dp_msg_type(devinfo
, insn
,
3027 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
);
3029 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3030 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3031 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3034 brw_inst_set_dp_msg_type(devinfo
, insn
,
3035 GEN7_DATAPORT_RC_TYPED_SURFACE_READ
);
3038 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3042 brw_typed_surface_read(struct brw_codegen
*p
,
3044 struct brw_reg payload
,
3045 struct brw_reg surface
,
3046 unsigned msg_length
,
3047 unsigned num_channels
)
3049 const struct brw_device_info
*devinfo
= p
->devinfo
;
3050 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3051 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3052 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3053 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3054 p
, sfid
, dst
, payload
, surface
, msg_length
,
3055 brw_surface_payload_size(p
, num_channels
,
3056 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3059 brw_set_dp_typed_surface_read_message(
3060 p
, insn
, num_channels
);
3064 brw_set_dp_typed_surface_write_message(struct brw_codegen
*p
,
3065 struct brw_inst
*insn
,
3066 unsigned num_channels
)
3068 const struct brw_device_info
*devinfo
= p
->devinfo
;
3069 /* Set mask of unused channels. */
3070 unsigned msg_control
= 0xf & (0xf << num_channels
);
3072 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3073 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3074 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3075 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3077 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3080 brw_inst_set_dp_msg_type(devinfo
, insn
,
3081 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
);
3084 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3085 if (brw_inst_qtr_control(devinfo
, p
->current
) == GEN6_COMPRESSION_2Q
)
3086 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3089 brw_inst_set_dp_msg_type(devinfo
, insn
,
3090 GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
);
3093 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3097 brw_typed_surface_write(struct brw_codegen
*p
,
3098 struct brw_reg payload
,
3099 struct brw_reg surface
,
3100 unsigned msg_length
,
3101 unsigned num_channels
)
3103 const struct brw_device_info
*devinfo
= p
->devinfo
;
3104 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3105 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3106 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3107 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3108 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3109 const unsigned mask
= (devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3110 WRITEMASK_X
: WRITEMASK_XYZW
);
3111 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3112 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3113 payload
, surface
, msg_length
, 0, true);
3115 brw_set_dp_typed_surface_write_message(
3116 p
, insn
, num_channels
);
3120 brw_set_memory_fence_message(struct brw_codegen
*p
,
3121 struct brw_inst
*insn
,
3122 enum brw_message_target sfid
,
3125 const struct brw_device_info
*devinfo
= p
->devinfo
;
3127 brw_set_message_descriptor(p
, insn
, sfid
,
3128 1 /* message length */,
3129 (commit_enable
? 1 : 0) /* response length */,
3130 true /* header present */,
3134 case GEN6_SFID_DATAPORT_RENDER_CACHE
:
3135 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_RC_MEMORY_FENCE
);
3137 case GEN7_SFID_DATAPORT_DATA_CACHE
:
3138 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_DC_MEMORY_FENCE
);
3141 unreachable("Not reached");
3145 brw_inst_set_dp_msg_control(devinfo
, insn
, 1 << 5);
3149 brw_memory_fence(struct brw_codegen
*p
,
3152 const struct brw_device_info
*devinfo
= p
->devinfo
;
3153 const bool commit_enable
= devinfo
->gen
== 7 && !devinfo
->is_haswell
;
3154 struct brw_inst
*insn
;
3156 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3157 * message doesn't write anything back.
3159 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3160 brw_set_dest(p
, insn
, dst
);
3161 brw_set_src0(p
, insn
, dst
);
3162 brw_set_memory_fence_message(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
3165 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
3166 /* IVB does typed surface access through the render cache, so we need to
3167 * flush it too. Use a different register so both flushes can be
3168 * pipelined by the hardware.
3170 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3171 brw_set_dest(p
, insn
, offset(dst
, 1));
3172 brw_set_src0(p
, insn
, offset(dst
, 1));
3173 brw_set_memory_fence_message(p
, insn
, GEN6_SFID_DATAPORT_RENDER_CACHE
,
3176 /* Now write the response of the second message into the response of the
3177 * first to trigger a pipeline stall -- This way future render and data
3178 * cache messages will be properly ordered with respect to past data and
3179 * render cache messages.
3181 brw_push_insn_state(p
);
3182 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3183 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3184 brw_MOV(p
, dst
, offset(dst
, 1));
3185 brw_pop_insn_state(p
);
3190 brw_pixel_interpolator_query(struct brw_codegen
*p
,
3191 struct brw_reg dest
,
3196 unsigned msg_length
,
3197 unsigned response_length
)
3199 const struct brw_device_info
*devinfo
= p
->devinfo
;
3200 struct brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
3202 brw_set_dest(p
, insn
, dest
);
3203 brw_set_src0(p
, insn
, mrf
);
3204 brw_set_message_descriptor(p
, insn
, GEN7_SFID_PIXEL_INTERPOLATOR
,
3205 msg_length
, response_length
,
3206 false /* header is never present for PI */,
3209 brw_inst_set_pi_simd_mode(
3210 devinfo
, insn
, brw_inst_exec_size(devinfo
, insn
) == BRW_EXECUTE_16
);
3211 brw_inst_set_pi_slot_group(devinfo
, insn
, 0); /* zero unless 32/64px dispatch */
3212 brw_inst_set_pi_nopersp(devinfo
, insn
, noperspective
);
3213 brw_inst_set_pi_message_type(devinfo
, insn
, mode
);
3214 brw_inst_set_pi_message_data(devinfo
, insn
, data
);
3218 brw_find_live_channel(struct brw_codegen
*p
, struct brw_reg dst
)
3220 const struct brw_device_info
*devinfo
= p
->devinfo
;
3223 assert(devinfo
->gen
>= 7);
3225 brw_push_insn_state(p
);
3227 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3228 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3230 if (devinfo
->gen
>= 8) {
3231 /* Getting the first active channel index is easy on Gen8: Just find
3232 * the first bit set in the mask register. The same register exists
3233 * on HSW already but it reads back as all ones when the current
3234 * instruction has execution masking disabled, so it's kind of
3237 inst
= brw_FBL(p
, vec1(dst
),
3238 retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
));
3240 /* Quarter control has the effect of magically shifting the value of
3241 * this register. Make sure it's set to zero.
3243 brw_inst_set_qtr_control(devinfo
, inst
, GEN6_COMPRESSION_1Q
);
3245 const struct brw_reg flag
= retype(brw_flag_reg(1, 0),
3246 BRW_REGISTER_TYPE_UD
);
3248 brw_MOV(p
, flag
, brw_imm_ud(0));
3250 /* Run a 16-wide instruction returning zero with execution masking
3251 * and a conditional modifier enabled in order to get the current
3252 * execution mask in f1.0.
3254 inst
= brw_MOV(p
, brw_null_reg(), brw_imm_ud(0));
3255 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_16
);
3256 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3257 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_Z
);
3258 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3260 brw_FBL(p
, vec1(dst
), flag
);
3263 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3265 if (devinfo
->gen
>= 8) {
3266 /* In SIMD4x2 mode the first active channel index is just the
3267 * negation of the first bit of the mask register.
3269 inst
= brw_AND(p
, brw_writemask(dst
, WRITEMASK_X
),
3270 negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
)),
3274 /* Overwrite the destination without and with execution masking to
3275 * find out which of the channels is active.
3277 brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3280 inst
= brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3282 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3286 brw_pop_insn_state(p
);
3290 brw_broadcast(struct brw_codegen
*p
,
3295 const struct brw_device_info
*devinfo
= p
->devinfo
;
3296 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3299 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
&&
3300 src
.address_mode
== BRW_ADDRESS_DIRECT
);
3302 if ((src
.vstride
== 0 && (src
.hstride
== 0 || !align1
)) ||
3303 idx
.file
== BRW_IMMEDIATE_VALUE
) {
3304 /* Trivial, the source is already uniform or the index is a constant.
3305 * We will typically not get here if the optimizer is doing its job, but
3306 * asserting would be mean.
3308 const unsigned i
= idx
.file
== BRW_IMMEDIATE_VALUE
? idx
.dw1
.ud
: 0;
3310 (align1
? stride(suboffset(src
, i
), 0, 1, 0) :
3311 stride(suboffset(src
, 4 * i
), 0, 4, 1)));
3314 const struct brw_reg addr
=
3315 retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
3316 const unsigned offset
= src
.nr
* REG_SIZE
+ src
.subnr
;
3317 /* Limit in bytes of the signed indirect addressing immediate. */
3318 const unsigned limit
= 512;
3320 brw_push_insn_state(p
);
3321 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3322 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
3324 /* Take into account the component size and horizontal stride. */
3325 assert(src
.vstride
== src
.hstride
+ src
.width
);
3326 brw_SHL(p
, addr
, vec1(idx
),
3327 brw_imm_ud(_mesa_logbase2(type_sz(src
.type
)) +
3330 /* We can only address up to limit bytes using the indirect
3331 * addressing immediate, account for the difference if the source
3332 * register is above this limit.
3334 if (offset
>= limit
)
3335 brw_ADD(p
, addr
, addr
, brw_imm_ud(offset
- offset
% limit
));
3337 brw_pop_insn_state(p
);
3339 /* Use indirect addressing to fetch the specified component. */
3341 retype(brw_vec1_indirect(addr
.subnr
, offset
% limit
),
3344 /* In SIMD4x2 mode the index can be either zero or one, replicate it
3345 * to all bits of a flag register,
3349 stride(brw_swizzle1(idx
, 0), 0, 4, 1));
3350 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NONE
);
3351 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_NZ
);
3352 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3354 /* and use predicated SEL to pick the right channel. */
3355 inst
= brw_SEL(p
, dst
,
3356 stride(suboffset(src
, 4), 0, 4, 1),
3357 stride(src
, 0, 4, 1));
3358 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NORMAL
);
3359 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3365 * This instruction is generated as a single-channel align1 instruction by
3366 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3368 * We can't use the typed atomic op in the FS because that has the execution
3369 * mask ANDed with the pixel mask, but we just want to write the one dword for
3372 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3373 * one u32. So we use the same untyped atomic write message as the pixel
3376 * The untyped atomic operation requires a BUFFER surface type with RAW
3377 * format, and is only accessible through the legacy DATA_CACHE dataport
3380 void brw_shader_time_add(struct brw_codegen
*p
,
3381 struct brw_reg payload
,
3382 uint32_t surf_index
)
3384 const unsigned sfid
= (p
->devinfo
->gen
>= 8 || p
->devinfo
->is_haswell
?
3385 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3386 GEN7_SFID_DATAPORT_DATA_CACHE
);
3387 assert(p
->devinfo
->gen
>= 7);
3389 brw_push_insn_state(p
);
3390 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3391 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3392 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3393 brw_inst
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
3395 /* We use brw_vec1_reg and unmasked because we want to increment the given
3398 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
3400 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
3402 brw_set_src1(p
, send
, brw_imm_ud(0));
3403 brw_set_message_descriptor(p
, send
, sfid
, 2, 0, false, false);
3404 brw_inst_set_binding_table_index(p
->devinfo
, send
, surf_index
);
3405 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, false);
3407 brw_pop_insn_state(p
);
3412 * Emit the SEND message for a barrier
3415 brw_barrier(struct brw_codegen
*p
, struct brw_reg src
)
3417 const struct brw_device_info
*devinfo
= p
->devinfo
;
3418 struct brw_inst
*inst
;
3420 assert(devinfo
->gen
>= 7);
3422 inst
= next_insn(p
, BRW_OPCODE_SEND
);
3423 brw_set_dest(p
, inst
, brw_null_reg());
3424 brw_set_src0(p
, inst
, src
);
3425 brw_set_src1(p
, inst
, brw_null_reg());
3427 brw_set_message_descriptor(p
, inst
, BRW_SFID_MESSAGE_GATEWAY
,
3429 0 /* response_length */,
3430 false /* header_present */,
3431 false /* end_of_thread */);
3433 brw_inst_set_gateway_notify(devinfo
, inst
, 1);
3434 brw_inst_set_gateway_subfuncid(devinfo
, inst
,
3435 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG
);
3437 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
3442 * Emit the wait instruction for a barrier
3445 brw_WAIT(struct brw_codegen
*p
)
3447 const struct brw_device_info
*devinfo
= p
->devinfo
;
3448 struct brw_inst
*insn
;
3450 struct brw_reg src
= brw_notification_reg();
3452 insn
= next_insn(p
, BRW_OPCODE_WAIT
);
3453 brw_set_dest(p
, insn
, src
);
3454 brw_set_src0(p
, insn
, src
);
3455 brw_set_src1(p
, insn
, brw_null_reg());
3457 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
3458 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_DISABLE
);