2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_eu_defines.h"
36 #include "util/ralloc.h"
39 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
40 * registers, implicitly moving the operand to a message register.
42 * On Sandybridge, this is no longer the case. This function performs the
43 * explicit move; it should be called before emitting a SEND instruction.
46 gen6_resolve_implied_move(struct brw_codegen
*p
,
50 const struct gen_device_info
*devinfo
= p
->devinfo
;
54 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
57 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
58 brw_push_insn_state(p
);
59 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
60 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
61 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
62 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
63 retype(*src
, BRW_REGISTER_TYPE_UD
));
64 brw_pop_insn_state(p
);
66 *src
= brw_message_reg(msg_reg_nr
);
70 gen7_convert_mrf_to_grf(struct brw_codegen
*p
, struct brw_reg
*reg
)
72 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
73 * "The send with EOT should use register space R112-R127 for <src>. This is
74 * to enable loading of a new thread into the same slot while the message
75 * with EOT for current thread is pending dispatch."
77 * Since we're pretending to have 16 MRFs anyway, we may as well use the
78 * registers required for messages with EOT.
80 const struct gen_device_info
*devinfo
= p
->devinfo
;
81 if (devinfo
->gen
>= 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
82 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
83 reg
->nr
+= GEN7_MRF_HACK_START
;
88 brw_set_dest(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg dest
)
90 const struct gen_device_info
*devinfo
= p
->devinfo
;
92 if (dest
.file
== BRW_MESSAGE_REGISTER_FILE
)
93 assert((dest
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
94 else if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
95 assert(dest
.nr
< 128);
97 gen7_convert_mrf_to_grf(p
, &dest
);
99 brw_inst_set_dst_file_type(devinfo
, inst
, dest
.file
, dest
.type
);
100 brw_inst_set_dst_address_mode(devinfo
, inst
, dest
.address_mode
);
102 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
103 brw_inst_set_dst_da_reg_nr(devinfo
, inst
, dest
.nr
);
105 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
106 brw_inst_set_dst_da1_subreg_nr(devinfo
, inst
, dest
.subnr
);
107 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
108 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
109 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
111 brw_inst_set_dst_da16_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
112 brw_inst_set_da16_writemask(devinfo
, inst
, dest
.writemask
);
113 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
114 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
115 assert(dest
.writemask
!= 0);
117 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
118 * Although Dst.HorzStride is a don't care for Align16, HW needs
119 * this to be programmed as "01".
121 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
124 brw_inst_set_dst_ia_subreg_nr(devinfo
, inst
, dest
.subnr
);
126 /* These are different sizes in align1 vs align16:
128 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
129 brw_inst_set_dst_ia1_addr_imm(devinfo
, inst
,
130 dest
.indirect_offset
);
131 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
132 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
133 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
135 brw_inst_set_dst_ia16_addr_imm(devinfo
, inst
,
136 dest
.indirect_offset
);
137 /* even ignored in da16, still need to set as '01' */
138 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
142 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
143 * or 16 (SIMD16), as that's normally correct. However, when dealing with
144 * small registers, we automatically reduce it to match the register size.
146 * In platforms that support fp64 we can emit instructions with a width of
147 * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these
148 * cases we need to make sure that these instructions have their exec sizes
149 * set properly when they are emitted and we can't rely on this code to fix
153 if (devinfo
->gen
>= 6)
154 fix_exec_size
= dest
.width
< BRW_EXECUTE_4
;
156 fix_exec_size
= dest
.width
< BRW_EXECUTE_8
;
159 brw_inst_set_exec_size(devinfo
, inst
, dest
.width
);
163 brw_set_src0(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
165 const struct gen_device_info
*devinfo
= p
->devinfo
;
167 if (reg
.file
== BRW_MESSAGE_REGISTER_FILE
)
168 assert((reg
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
169 else if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
170 assert(reg
.nr
< 128);
172 gen7_convert_mrf_to_grf(p
, ®
);
174 if (devinfo
->gen
>= 6 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
175 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
)) {
176 /* Any source modifiers or regions will be ignored, since this just
177 * identifies the MRF/GRF to start reading the message contents from.
178 * Check for some likely failures.
182 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
185 brw_inst_set_src0_file_type(devinfo
, inst
, reg
.file
, reg
.type
);
186 brw_inst_set_src0_abs(devinfo
, inst
, reg
.abs
);
187 brw_inst_set_src0_negate(devinfo
, inst
, reg
.negate
);
188 brw_inst_set_src0_address_mode(devinfo
, inst
, reg
.address_mode
);
190 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
191 if (reg
.type
== BRW_REGISTER_TYPE_DF
||
192 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_DIM
)
193 brw_inst_set_imm_df(devinfo
, inst
, reg
.df
);
194 else if (reg
.type
== BRW_REGISTER_TYPE_UQ
||
195 reg
.type
== BRW_REGISTER_TYPE_Q
)
196 brw_inst_set_imm_uq(devinfo
, inst
, reg
.u64
);
198 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
200 if (type_sz(reg
.type
) < 8) {
201 brw_inst_set_src1_reg_file(devinfo
, inst
,
202 BRW_ARCHITECTURE_REGISTER_FILE
);
203 brw_inst_set_src1_reg_hw_type(devinfo
, inst
,
204 brw_inst_src0_reg_hw_type(devinfo
, inst
));
207 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
208 brw_inst_set_src0_da_reg_nr(devinfo
, inst
, reg
.nr
);
209 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
210 brw_inst_set_src0_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
212 brw_inst_set_src0_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
215 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.subnr
);
217 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
218 brw_inst_set_src0_ia1_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
220 brw_inst_set_src0_ia16_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
224 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
225 if (reg
.width
== BRW_WIDTH_1
&&
226 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
227 brw_inst_set_src0_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
228 brw_inst_set_src0_width(devinfo
, inst
, BRW_WIDTH_1
);
229 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
231 brw_inst_set_src0_hstride(devinfo
, inst
, reg
.hstride
);
232 brw_inst_set_src0_width(devinfo
, inst
, reg
.width
);
233 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
236 brw_inst_set_src0_da16_swiz_x(devinfo
, inst
,
237 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
238 brw_inst_set_src0_da16_swiz_y(devinfo
, inst
,
239 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
240 brw_inst_set_src0_da16_swiz_z(devinfo
, inst
,
241 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
242 brw_inst_set_src0_da16_swiz_w(devinfo
, inst
,
243 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
245 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
246 /* This is an oddity of the fact we're using the same
247 * descriptions for registers in align_16 as align_1:
249 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
250 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
251 reg
.type
== BRW_REGISTER_TYPE_DF
&&
252 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
255 * "For Align16 access mode, only encodings of 0000 and 0011
256 * are allowed. Other codes are reserved."
258 * Presumably the DevSNB behavior applies to IVB as well.
260 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
262 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
270 brw_set_src1(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
272 const struct gen_device_info
*devinfo
= p
->devinfo
;
274 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
275 assert(reg
.nr
< 128);
277 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
279 * "Accumulator registers may be accessed explicitly as src0
282 assert(reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
283 reg
.nr
!= BRW_ARF_ACCUMULATOR
);
285 gen7_convert_mrf_to_grf(p
, ®
);
286 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
288 brw_inst_set_src1_file_type(devinfo
, inst
, reg
.file
, reg
.type
);
289 brw_inst_set_src1_abs(devinfo
, inst
, reg
.abs
);
290 brw_inst_set_src1_negate(devinfo
, inst
, reg
.negate
);
292 /* Only src1 can be immediate in two-argument instructions.
294 assert(brw_inst_src0_reg_file(devinfo
, inst
) != BRW_IMMEDIATE_VALUE
);
296 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
297 /* two-argument instructions can only use 32-bit immediates */
298 assert(type_sz(reg
.type
) < 8);
299 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
301 /* This is a hardware restriction, which may or may not be lifted
304 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
305 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
307 brw_inst_set_src1_da_reg_nr(devinfo
, inst
, reg
.nr
);
308 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
309 brw_inst_set_src1_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
311 brw_inst_set_src1_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
314 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
315 if (reg
.width
== BRW_WIDTH_1
&&
316 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
317 brw_inst_set_src1_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
318 brw_inst_set_src1_width(devinfo
, inst
, BRW_WIDTH_1
);
319 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
321 brw_inst_set_src1_hstride(devinfo
, inst
, reg
.hstride
);
322 brw_inst_set_src1_width(devinfo
, inst
, reg
.width
);
323 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
326 brw_inst_set_src1_da16_swiz_x(devinfo
, inst
,
327 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
328 brw_inst_set_src1_da16_swiz_y(devinfo
, inst
,
329 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
330 brw_inst_set_src1_da16_swiz_z(devinfo
, inst
,
331 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
332 brw_inst_set_src1_da16_swiz_w(devinfo
, inst
,
333 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
335 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
336 /* This is an oddity of the fact we're using the same
337 * descriptions for registers in align_16 as align_1:
339 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
340 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
341 reg
.type
== BRW_REGISTER_TYPE_DF
&&
342 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
345 * "For Align16 access mode, only encodings of 0000 and 0011
346 * are allowed. Other codes are reserved."
348 * Presumably the DevSNB behavior applies to IVB as well.
350 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
352 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
359 * Set the Message Descriptor and Extended Message Descriptor fields
362 * \note This zeroes out the Function Control bits, so it must be called
363 * \b before filling out any message-specific data. Callers can
364 * choose not to fill in irrelevant bits; they will be zero.
367 brw_set_message_descriptor(struct brw_codegen
*p
,
369 enum brw_message_target sfid
,
371 unsigned response_length
,
375 const struct gen_device_info
*devinfo
= p
->devinfo
;
377 brw_set_src1(p
, inst
, brw_imm_d(0));
379 /* For indirect sends, `inst` will not be the SEND/SENDC instruction
380 * itself; instead, it will be a MOV/OR into the address register.
382 * In this case, we avoid setting the extended message descriptor bits,
383 * since they go on the later SEND/SENDC instead and if set here would
384 * instead clobber the conditionalmod bits.
386 unsigned opcode
= brw_inst_opcode(devinfo
, inst
);
387 if (opcode
== BRW_OPCODE_SEND
|| opcode
== BRW_OPCODE_SENDC
) {
388 brw_inst_set_sfid(devinfo
, inst
, sfid
);
391 brw_inst_set_mlen(devinfo
, inst
, msg_length
);
392 brw_inst_set_rlen(devinfo
, inst
, response_length
);
393 brw_inst_set_eot(devinfo
, inst
, end_of_thread
);
395 if (devinfo
->gen
>= 5) {
396 brw_inst_set_header_present(devinfo
, inst
, header_present
);
400 static void brw_set_math_message( struct brw_codegen
*p
,
403 unsigned integer_type
,
407 const struct gen_device_info
*devinfo
= p
->devinfo
;
409 unsigned response_length
;
411 /* Infer message length from the function */
413 case BRW_MATH_FUNCTION_POW
:
414 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
415 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
416 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
424 /* Infer response length from the function */
426 case BRW_MATH_FUNCTION_SINCOS
:
427 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
436 brw_set_message_descriptor(p
, inst
, BRW_SFID_MATH
,
437 msg_length
, response_length
, false, false);
438 brw_inst_set_math_msg_function(devinfo
, inst
, function
);
439 brw_inst_set_math_msg_signed_int(devinfo
, inst
, integer_type
);
440 brw_inst_set_math_msg_precision(devinfo
, inst
, low_precision
);
441 brw_inst_set_math_msg_saturate(devinfo
, inst
, brw_inst_saturate(devinfo
, inst
));
442 brw_inst_set_math_msg_data_type(devinfo
, inst
, dataType
);
443 brw_inst_set_saturate(devinfo
, inst
, 0);
447 static void brw_set_ff_sync_message(struct brw_codegen
*p
,
450 unsigned response_length
,
453 const struct gen_device_info
*devinfo
= p
->devinfo
;
455 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
456 1, response_length
, true, end_of_thread
);
457 brw_inst_set_urb_opcode(devinfo
, insn
, 1); /* FF_SYNC */
458 brw_inst_set_urb_allocate(devinfo
, insn
, allocate
);
459 /* The following fields are not used by FF_SYNC: */
460 brw_inst_set_urb_global_offset(devinfo
, insn
, 0);
461 brw_inst_set_urb_swizzle_control(devinfo
, insn
, 0);
462 brw_inst_set_urb_used(devinfo
, insn
, 0);
463 brw_inst_set_urb_complete(devinfo
, insn
, 0);
466 static void brw_set_urb_message( struct brw_codegen
*p
,
468 enum brw_urb_write_flags flags
,
470 unsigned response_length
,
472 unsigned swizzle_control
)
474 const struct gen_device_info
*devinfo
= p
->devinfo
;
476 assert(devinfo
->gen
< 7 || swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
477 assert(devinfo
->gen
< 7 || !(flags
& BRW_URB_WRITE_ALLOCATE
));
478 assert(devinfo
->gen
>= 7 || !(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
480 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
481 msg_length
, response_length
, true,
482 flags
& BRW_URB_WRITE_EOT
);
484 if (flags
& BRW_URB_WRITE_OWORD
) {
485 assert(msg_length
== 2); /* header + one OWORD of data */
486 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_OWORD
);
488 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_HWORD
);
491 brw_inst_set_urb_global_offset(devinfo
, insn
, offset
);
492 brw_inst_set_urb_swizzle_control(devinfo
, insn
, swizzle_control
);
494 if (devinfo
->gen
< 8) {
495 brw_inst_set_urb_complete(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_COMPLETE
));
498 if (devinfo
->gen
< 7) {
499 brw_inst_set_urb_allocate(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_ALLOCATE
));
500 brw_inst_set_urb_used(devinfo
, insn
, !(flags
& BRW_URB_WRITE_UNUSED
));
502 brw_inst_set_urb_per_slot_offset(devinfo
, insn
,
503 !!(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
508 brw_set_dp_write_message(struct brw_codegen
*p
,
510 unsigned binding_table_index
,
511 unsigned msg_control
,
513 unsigned target_cache
,
516 unsigned last_render_target
,
517 unsigned response_length
,
518 unsigned end_of_thread
,
519 unsigned send_commit_msg
)
521 const struct gen_device_info
*devinfo
= p
->devinfo
;
522 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
523 BRW_SFID_DATAPORT_WRITE
);
525 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
526 header_present
, end_of_thread
);
528 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
529 brw_inst_set_dp_write_msg_type(devinfo
, insn
, msg_type
);
530 brw_inst_set_dp_write_msg_control(devinfo
, insn
, msg_control
);
531 brw_inst_set_rt_last(devinfo
, insn
, last_render_target
);
532 if (devinfo
->gen
< 7) {
533 brw_inst_set_dp_write_commit(devinfo
, insn
, send_commit_msg
);
538 brw_set_dp_read_message(struct brw_codegen
*p
,
540 unsigned binding_table_index
,
541 unsigned msg_control
,
543 unsigned target_cache
,
546 unsigned response_length
)
548 const struct gen_device_info
*devinfo
= p
->devinfo
;
549 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
550 BRW_SFID_DATAPORT_READ
);
552 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
553 header_present
, false);
555 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
556 brw_inst_set_dp_read_msg_type(devinfo
, insn
, msg_type
);
557 brw_inst_set_dp_read_msg_control(devinfo
, insn
, msg_control
);
558 if (devinfo
->gen
< 6)
559 brw_inst_set_dp_read_target_cache(devinfo
, insn
, target_cache
);
563 brw_set_sampler_message(struct brw_codegen
*p
,
565 unsigned binding_table_index
,
568 unsigned response_length
,
570 unsigned header_present
,
572 unsigned return_format
)
574 const struct gen_device_info
*devinfo
= p
->devinfo
;
576 brw_set_message_descriptor(p
, inst
, BRW_SFID_SAMPLER
, msg_length
,
577 response_length
, header_present
, false);
579 brw_inst_set_binding_table_index(devinfo
, inst
, binding_table_index
);
580 brw_inst_set_sampler(devinfo
, inst
, sampler
);
581 brw_inst_set_sampler_msg_type(devinfo
, inst
, msg_type
);
582 if (devinfo
->gen
>= 5) {
583 brw_inst_set_sampler_simd_mode(devinfo
, inst
, simd_mode
);
584 } else if (devinfo
->gen
== 4 && !devinfo
->is_g4x
) {
585 brw_inst_set_sampler_return_format(devinfo
, inst
, return_format
);
590 gen7_set_dp_scratch_message(struct brw_codegen
*p
,
594 bool invalidate_after_read
,
596 unsigned addr_offset
,
601 const struct gen_device_info
*devinfo
= p
->devinfo
;
602 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 ||
603 (devinfo
->gen
>= 8 && num_regs
== 8));
604 const unsigned block_size
= (devinfo
->gen
>= 8 ? _mesa_logbase2(num_regs
) :
607 brw_set_message_descriptor(p
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
608 mlen
, rlen
, header_present
, false);
609 brw_inst_set_dp_category(devinfo
, inst
, 1); /* Scratch Block Read/Write msgs */
610 brw_inst_set_scratch_read_write(devinfo
, inst
, write
);
611 brw_inst_set_scratch_type(devinfo
, inst
, dword
);
612 brw_inst_set_scratch_invalidate_after_read(devinfo
, inst
, invalidate_after_read
);
613 brw_inst_set_scratch_block_size(devinfo
, inst
, block_size
);
614 brw_inst_set_scratch_addr_offset(devinfo
, inst
, addr_offset
);
617 #define next_insn brw_next_insn
619 brw_next_insn(struct brw_codegen
*p
, unsigned opcode
)
621 const struct gen_device_info
*devinfo
= p
->devinfo
;
624 if (p
->nr_insn
+ 1 > p
->store_size
) {
626 p
->store
= reralloc(p
->mem_ctx
, p
->store
, brw_inst
, p
->store_size
);
629 p
->next_insn_offset
+= 16;
630 insn
= &p
->store
[p
->nr_insn
++];
631 memcpy(insn
, p
->current
, sizeof(*insn
));
633 brw_inst_set_opcode(devinfo
, insn
, opcode
);
638 brw_alu1(struct brw_codegen
*p
, unsigned opcode
,
639 struct brw_reg dest
, struct brw_reg src
)
641 brw_inst
*insn
= next_insn(p
, opcode
);
642 brw_set_dest(p
, insn
, dest
);
643 brw_set_src0(p
, insn
, src
);
648 brw_alu2(struct brw_codegen
*p
, unsigned opcode
,
649 struct brw_reg dest
, struct brw_reg src0
, struct brw_reg src1
)
651 /* 64-bit immediates are only supported on 1-src instructions */
652 assert(src0
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src0
.type
) <= 4);
653 assert(src1
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src1
.type
) <= 4);
655 brw_inst
*insn
= next_insn(p
, opcode
);
656 brw_set_dest(p
, insn
, dest
);
657 brw_set_src0(p
, insn
, src0
);
658 brw_set_src1(p
, insn
, src1
);
663 get_3src_subreg_nr(struct brw_reg reg
)
665 /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
666 * use 32-bit units (components 0..7). Since they only support F/D/UD
667 * types, this doesn't lose any flexibility, but uses fewer bits.
669 return reg
.subnr
/ 4;
673 brw_alu3(struct brw_codegen
*p
, unsigned opcode
, struct brw_reg dest
,
674 struct brw_reg src0
, struct brw_reg src1
, struct brw_reg src2
)
676 const struct gen_device_info
*devinfo
= p
->devinfo
;
677 brw_inst
*inst
= next_insn(p
, opcode
);
679 gen7_convert_mrf_to_grf(p
, &dest
);
681 assert(dest
.nr
< 128);
682 assert(src0
.nr
< 128);
683 assert(src1
.nr
< 128);
684 assert(src2
.nr
< 128);
685 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
686 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
687 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
688 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
690 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
691 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
692 dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
);
694 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
) {
695 brw_inst_set_3src_a1_dst_reg_file(devinfo
, inst
,
696 BRW_ALIGN1_3SRC_ACCUMULATOR
);
697 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, BRW_ARF_ACCUMULATOR
);
699 brw_inst_set_3src_a1_dst_reg_file(devinfo
, inst
,
700 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
);
701 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
703 brw_inst_set_3src_a1_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 8);
705 brw_inst_set_3src_a1_dst_hstride(devinfo
, inst
, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1
);
707 if (brw_reg_type_is_floating_point(dest
.type
)) {
708 brw_inst_set_3src_a1_exec_type(devinfo
, inst
,
709 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT
);
711 brw_inst_set_3src_a1_exec_type(devinfo
, inst
,
712 BRW_ALIGN1_3SRC_EXEC_TYPE_INT
);
715 brw_inst_set_3src_a1_dst_type(devinfo
, inst
, dest
.type
);
716 brw_inst_set_3src_a1_src0_type(devinfo
, inst
, src0
.type
);
717 brw_inst_set_3src_a1_src1_type(devinfo
, inst
, src1
.type
);
718 brw_inst_set_3src_a1_src2_type(devinfo
, inst
, src2
.type
);
720 assert((src0
.vstride
== BRW_VERTICAL_STRIDE_0
&&
721 src0
.hstride
== BRW_HORIZONTAL_STRIDE_0
) ||
722 (src0
.vstride
== BRW_VERTICAL_STRIDE_8
&&
723 src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
));
724 assert((src1
.vstride
== BRW_VERTICAL_STRIDE_0
&&
725 src1
.hstride
== BRW_HORIZONTAL_STRIDE_0
) ||
726 (src1
.vstride
== BRW_VERTICAL_STRIDE_8
&&
727 src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
));
728 assert((src2
.vstride
== BRW_VERTICAL_STRIDE_0
&&
729 src2
.hstride
== BRW_HORIZONTAL_STRIDE_0
) ||
730 (src2
.vstride
== BRW_VERTICAL_STRIDE_8
&&
731 src2
.hstride
== BRW_HORIZONTAL_STRIDE_1
));
733 brw_inst_set_3src_a1_src0_vstride(devinfo
, inst
,
734 src0
.vstride
== BRW_VERTICAL_STRIDE_0
?
735 BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0
:
736 BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8
);
737 brw_inst_set_3src_a1_src1_vstride(devinfo
, inst
,
738 src1
.vstride
== BRW_VERTICAL_STRIDE_0
?
739 BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0
:
740 BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8
);
741 /* no vstride on src2 */
743 brw_inst_set_3src_a1_src0_hstride(devinfo
, inst
,
744 src0
.hstride
== BRW_HORIZONTAL_STRIDE_0
?
745 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0
:
746 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1
);
747 brw_inst_set_3src_a1_src1_hstride(devinfo
, inst
,
748 src1
.hstride
== BRW_HORIZONTAL_STRIDE_0
?
749 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0
:
750 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1
);
751 brw_inst_set_3src_a1_src2_hstride(devinfo
, inst
,
752 src2
.hstride
== BRW_HORIZONTAL_STRIDE_0
?
753 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0
:
754 BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1
);
756 brw_inst_set_3src_a1_src0_subreg_nr(devinfo
, inst
, src0
.subnr
);
757 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
758 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
759 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
761 brw_inst_set_3src_a1_src1_subreg_nr(devinfo
, inst
, src1
.subnr
);
762 if (src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
) {
763 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, BRW_ARF_ACCUMULATOR
);
765 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
767 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
768 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
770 brw_inst_set_3src_a1_src2_subreg_nr(devinfo
, inst
, src2
.subnr
);
771 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
772 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
773 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
775 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
||
776 src0
.file
== BRW_IMMEDIATE_VALUE
);
777 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
778 src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
);
779 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
||
780 src2
.file
== BRW_IMMEDIATE_VALUE
);
782 brw_inst_set_3src_a1_src0_reg_file(devinfo
, inst
,
783 src0
.file
== BRW_GENERAL_REGISTER_FILE
?
784 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
785 BRW_ALIGN1_3SRC_IMMEDIATE_VALUE
);
786 brw_inst_set_3src_a1_src1_reg_file(devinfo
, inst
,
787 src1
.file
== BRW_GENERAL_REGISTER_FILE
?
788 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
789 BRW_ALIGN1_3SRC_ACCUMULATOR
);
790 brw_inst_set_3src_a1_src2_reg_file(devinfo
, inst
,
791 src2
.file
== BRW_GENERAL_REGISTER_FILE
?
792 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
793 BRW_ALIGN1_3SRC_IMMEDIATE_VALUE
);
795 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
796 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
797 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
798 dest
.type
== BRW_REGISTER_TYPE_DF
||
799 dest
.type
== BRW_REGISTER_TYPE_D
||
800 dest
.type
== BRW_REGISTER_TYPE_UD
);
801 if (devinfo
->gen
== 6) {
802 brw_inst_set_3src_a16_dst_reg_file(devinfo
, inst
,
803 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
805 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
806 brw_inst_set_3src_a16_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
807 brw_inst_set_3src_a16_dst_writemask(devinfo
, inst
, dest
.writemask
);
809 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
810 brw_inst_set_3src_a16_src0_swizzle(devinfo
, inst
, src0
.swizzle
);
811 brw_inst_set_3src_a16_src0_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src0
));
812 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
813 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
814 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
815 brw_inst_set_3src_a16_src0_rep_ctrl(devinfo
, inst
,
816 src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
818 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
819 brw_inst_set_3src_a16_src1_swizzle(devinfo
, inst
, src1
.swizzle
);
820 brw_inst_set_3src_a16_src1_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src1
));
821 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
822 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
823 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
824 brw_inst_set_3src_a16_src1_rep_ctrl(devinfo
, inst
,
825 src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
827 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
828 brw_inst_set_3src_a16_src2_swizzle(devinfo
, inst
, src2
.swizzle
);
829 brw_inst_set_3src_a16_src2_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src2
));
830 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
831 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
832 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
833 brw_inst_set_3src_a16_src2_rep_ctrl(devinfo
, inst
,
834 src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
836 if (devinfo
->gen
>= 7) {
837 /* Set both the source and destination types based on dest.type,
838 * ignoring the source register types. The MAD and LRP emitters ensure
839 * that all four types are float. The BFE and BFI2 emitters, however,
840 * may send us mixed D and UD types and want us to ignore that and use
841 * the destination type.
843 brw_inst_set_3src_a16_src_type(devinfo
, inst
, dest
.type
);
844 brw_inst_set_3src_a16_dst_type(devinfo
, inst
, dest
.type
);
852 /***********************************************************************
853 * Convenience routines.
856 brw_inst *brw_##OP(struct brw_codegen *p, \
857 struct brw_reg dest, \
858 struct brw_reg src0) \
860 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
864 brw_inst *brw_##OP(struct brw_codegen *p, \
865 struct brw_reg dest, \
866 struct brw_reg src0, \
867 struct brw_reg src1) \
869 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
873 brw_inst *brw_##OP(struct brw_codegen *p, \
874 struct brw_reg dest, \
875 struct brw_reg src0, \
876 struct brw_reg src1, \
877 struct brw_reg src2) \
879 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
883 brw_inst *brw_##OP(struct brw_codegen *p, \
884 struct brw_reg dest, \
885 struct brw_reg src0, \
886 struct brw_reg src1, \
887 struct brw_reg src2) \
889 assert(dest.type == BRW_REGISTER_TYPE_F || \
890 dest.type == BRW_REGISTER_TYPE_DF); \
891 if (dest.type == BRW_REGISTER_TYPE_F) { \
892 assert(src0.type == BRW_REGISTER_TYPE_F); \
893 assert(src1.type == BRW_REGISTER_TYPE_F); \
894 assert(src2.type == BRW_REGISTER_TYPE_F); \
895 } else if (dest.type == BRW_REGISTER_TYPE_DF) { \
896 assert(src0.type == BRW_REGISTER_TYPE_DF); \
897 assert(src1.type == BRW_REGISTER_TYPE_DF); \
898 assert(src2.type == BRW_REGISTER_TYPE_DF); \
900 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
903 /* Rounding operations (other than RNDD) require two instructions - the first
904 * stores a rounded value (possibly the wrong way) in the dest register, but
905 * also sets a per-channel "increment bit" in the flag register. A predicated
906 * add of 1.0 fixes dest to contain the desired result.
908 * Sandybridge and later appear to round correctly without an ADD.
911 void brw_##OP(struct brw_codegen *p, \
912 struct brw_reg dest, \
913 struct brw_reg src) \
915 const struct gen_device_info *devinfo = p->devinfo; \
916 brw_inst *rnd, *add; \
917 rnd = next_insn(p, BRW_OPCODE_##OP); \
918 brw_set_dest(p, rnd, dest); \
919 brw_set_src0(p, rnd, src); \
921 if (devinfo->gen < 6) { \
922 /* turn on round-increments */ \
923 brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
924 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
925 brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
964 brw_MOV(struct brw_codegen
*p
, struct brw_reg dest
, struct brw_reg src0
)
966 const struct gen_device_info
*devinfo
= p
->devinfo
;
968 /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
969 * To avoid the problems that causes, we use a <1,2,0> source region to read
970 * each element twice.
972 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
973 brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
&&
974 dest
.type
== BRW_REGISTER_TYPE_DF
&&
975 (src0
.type
== BRW_REGISTER_TYPE_F
||
976 src0
.type
== BRW_REGISTER_TYPE_D
||
977 src0
.type
== BRW_REGISTER_TYPE_UD
) &&
978 !has_scalar_region(src0
)) {
979 assert(src0
.vstride
== BRW_VERTICAL_STRIDE_4
&&
980 src0
.width
== BRW_WIDTH_4
&&
981 src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
983 src0
.vstride
= BRW_VERTICAL_STRIDE_1
;
984 src0
.width
= BRW_WIDTH_2
;
985 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
988 return brw_alu1(p
, BRW_OPCODE_MOV
, dest
, src0
);
992 brw_ADD(struct brw_codegen
*p
, struct brw_reg dest
,
993 struct brw_reg src0
, struct brw_reg src1
)
996 if (src0
.type
== BRW_REGISTER_TYPE_F
||
997 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
998 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
999 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1000 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1003 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1004 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1005 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1006 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1007 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1010 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1014 brw_AVG(struct brw_codegen
*p
, struct brw_reg dest
,
1015 struct brw_reg src0
, struct brw_reg src1
)
1017 assert(dest
.type
== src0
.type
);
1018 assert(src0
.type
== src1
.type
);
1019 switch (src0
.type
) {
1020 case BRW_REGISTER_TYPE_B
:
1021 case BRW_REGISTER_TYPE_UB
:
1022 case BRW_REGISTER_TYPE_W
:
1023 case BRW_REGISTER_TYPE_UW
:
1024 case BRW_REGISTER_TYPE_D
:
1025 case BRW_REGISTER_TYPE_UD
:
1028 unreachable("Bad type for brw_AVG");
1031 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1035 brw_MUL(struct brw_codegen
*p
, struct brw_reg dest
,
1036 struct brw_reg src0
, struct brw_reg src1
)
1039 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1040 src0
.type
== BRW_REGISTER_TYPE_UD
||
1041 src1
.type
== BRW_REGISTER_TYPE_D
||
1042 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1043 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1046 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1047 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1048 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1049 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1050 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1053 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1054 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1055 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1056 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1057 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1060 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1061 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1062 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1063 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1065 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1069 brw_LINE(struct brw_codegen
*p
, struct brw_reg dest
,
1070 struct brw_reg src0
, struct brw_reg src1
)
1072 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1073 src0
.width
= BRW_WIDTH_1
;
1074 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1075 return brw_alu2(p
, BRW_OPCODE_LINE
, dest
, src0
, src1
);
1079 brw_PLN(struct brw_codegen
*p
, struct brw_reg dest
,
1080 struct brw_reg src0
, struct brw_reg src1
)
1082 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1083 src0
.width
= BRW_WIDTH_1
;
1084 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1085 src1
.vstride
= BRW_VERTICAL_STRIDE_8
;
1086 src1
.width
= BRW_WIDTH_8
;
1087 src1
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
1088 return brw_alu2(p
, BRW_OPCODE_PLN
, dest
, src0
, src1
);
1092 brw_F32TO16(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1094 const struct gen_device_info
*devinfo
= p
->devinfo
;
1095 const bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1096 /* The F32TO16 instruction doesn't support 32-bit destination types in
1097 * Align1 mode, and neither does the Gen8 implementation in terms of a
1098 * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
1099 * an undocumented feature.
1101 const bool needs_zero_fill
= (dst
.type
== BRW_REGISTER_TYPE_UD
&&
1102 (!align16
|| devinfo
->gen
>= 8));
1106 assert(dst
.type
== BRW_REGISTER_TYPE_UD
);
1108 assert(dst
.type
== BRW_REGISTER_TYPE_UD
||
1109 dst
.type
== BRW_REGISTER_TYPE_W
||
1110 dst
.type
== BRW_REGISTER_TYPE_UW
||
1111 dst
.type
== BRW_REGISTER_TYPE_HF
);
1114 brw_push_insn_state(p
);
1116 if (needs_zero_fill
) {
1117 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
1118 dst
= spread(retype(dst
, BRW_REGISTER_TYPE_W
), 2);
1121 if (devinfo
->gen
>= 8) {
1122 inst
= brw_MOV(p
, retype(dst
, BRW_REGISTER_TYPE_HF
), src
);
1124 assert(devinfo
->gen
== 7);
1125 inst
= brw_alu1(p
, BRW_OPCODE_F32TO16
, dst
, src
);
1128 if (needs_zero_fill
) {
1129 brw_inst_set_no_dd_clear(devinfo
, inst
, true);
1130 inst
= brw_MOV(p
, suboffset(dst
, 1), brw_imm_w(0));
1131 brw_inst_set_no_dd_check(devinfo
, inst
, true);
1134 brw_pop_insn_state(p
);
1139 brw_F16TO32(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1141 const struct gen_device_info
*devinfo
= p
->devinfo
;
1142 bool align16
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_16
;
1145 assert(src
.type
== BRW_REGISTER_TYPE_UD
);
1147 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1149 * Because this instruction does not have a 16-bit floating-point
1150 * type, the source data type must be Word (W). The destination type
1151 * must be F (Float).
1153 if (src
.type
== BRW_REGISTER_TYPE_UD
)
1154 src
= spread(retype(src
, BRW_REGISTER_TYPE_W
), 2);
1156 assert(src
.type
== BRW_REGISTER_TYPE_W
||
1157 src
.type
== BRW_REGISTER_TYPE_UW
||
1158 src
.type
== BRW_REGISTER_TYPE_HF
);
1161 if (devinfo
->gen
>= 8) {
1162 return brw_MOV(p
, dst
, retype(src
, BRW_REGISTER_TYPE_HF
));
1164 assert(devinfo
->gen
== 7);
1165 return brw_alu1(p
, BRW_OPCODE_F16TO32
, dst
, src
);
1170 void brw_NOP(struct brw_codegen
*p
)
1172 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1173 memset(insn
, 0, sizeof(*insn
));
1174 brw_inst_set_opcode(p
->devinfo
, insn
, BRW_OPCODE_NOP
);
1181 /***********************************************************************
1182 * Comparisons, if/else/endif
1186 brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1187 unsigned predicate_control
)
1189 const struct gen_device_info
*devinfo
= p
->devinfo
;
1190 struct brw_reg ip
= brw_ip_reg();
1191 brw_inst
*inst
= brw_alu2(p
, BRW_OPCODE_JMPI
, ip
, ip
, index
);
1193 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_1
);
1194 brw_inst_set_qtr_control(devinfo
, inst
, BRW_COMPRESSION_NONE
);
1195 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
1196 brw_inst_set_pred_control(devinfo
, inst
, predicate_control
);
1202 push_if_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1204 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1206 p
->if_stack_depth
++;
1207 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1208 p
->if_stack_array_size
*= 2;
1209 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1210 p
->if_stack_array_size
);
1215 pop_if_stack(struct brw_codegen
*p
)
1217 p
->if_stack_depth
--;
1218 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1222 push_loop_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1224 if (p
->loop_stack_array_size
<= (p
->loop_stack_depth
+ 1)) {
1225 p
->loop_stack_array_size
*= 2;
1226 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1227 p
->loop_stack_array_size
);
1228 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1229 p
->loop_stack_array_size
);
1232 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1233 p
->loop_stack_depth
++;
1234 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1238 get_inner_do_insn(struct brw_codegen
*p
)
1240 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1243 /* EU takes the value from the flag register and pushes it onto some
1244 * sort of a stack (presumably merging with any flag value already on
1245 * the stack). Within an if block, the flags at the top of the stack
1246 * control execution on each channel of the unit, eg. on each of the
1247 * 16 pixel values in our wm programs.
1249 * When the matching 'else' instruction is reached (presumably by
1250 * countdown of the instruction count patched in by our ELSE/ENDIF
1251 * functions), the relevant flags are inverted.
1253 * When the matching 'endif' instruction is reached, the flags are
1254 * popped off. If the stack is now empty, normal execution resumes.
1257 brw_IF(struct brw_codegen
*p
, unsigned execute_size
)
1259 const struct gen_device_info
*devinfo
= p
->devinfo
;
1262 insn
= next_insn(p
, BRW_OPCODE_IF
);
1264 /* Override the defaults for this instruction:
1266 if (devinfo
->gen
< 6) {
1267 brw_set_dest(p
, insn
, brw_ip_reg());
1268 brw_set_src0(p
, insn
, brw_ip_reg());
1269 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1270 } else if (devinfo
->gen
== 6) {
1271 brw_set_dest(p
, insn
, brw_imm_w(0));
1272 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1273 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1274 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1275 } else if (devinfo
->gen
== 7) {
1276 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1277 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1278 brw_set_src1(p
, insn
, brw_imm_w(0));
1279 brw_inst_set_jip(devinfo
, insn
, 0);
1280 brw_inst_set_uip(devinfo
, insn
, 0);
1282 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1283 brw_set_src0(p
, insn
, brw_imm_d(0));
1284 brw_inst_set_jip(devinfo
, insn
, 0);
1285 brw_inst_set_uip(devinfo
, insn
, 0);
1288 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1289 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1290 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NORMAL
);
1291 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1292 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1293 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1295 push_if_stack(p
, insn
);
1296 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1300 /* This function is only used for gen6-style IF instructions with an
1301 * embedded comparison (conditional modifier). It is not used on gen7.
1304 gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1305 struct brw_reg src0
, struct brw_reg src1
)
1307 const struct gen_device_info
*devinfo
= p
->devinfo
;
1310 insn
= next_insn(p
, BRW_OPCODE_IF
);
1312 brw_set_dest(p
, insn
, brw_imm_w(0));
1313 brw_inst_set_exec_size(devinfo
, insn
,
1314 brw_inst_exec_size(devinfo
, p
->current
));
1315 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1316 brw_set_src0(p
, insn
, src0
);
1317 brw_set_src1(p
, insn
, src1
);
1319 assert(brw_inst_qtr_control(devinfo
, insn
) == BRW_COMPRESSION_NONE
);
1320 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
1321 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1323 push_if_stack(p
, insn
);
1328 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1331 convert_IF_ELSE_to_ADD(struct brw_codegen
*p
,
1332 brw_inst
*if_inst
, brw_inst
*else_inst
)
1334 const struct gen_device_info
*devinfo
= p
->devinfo
;
1336 /* The next instruction (where the ENDIF would be, if it existed) */
1337 brw_inst
*next_inst
= &p
->store
[p
->nr_insn
];
1339 assert(p
->single_program_flow
);
1340 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1341 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1342 assert(brw_inst_exec_size(devinfo
, if_inst
) == BRW_EXECUTE_1
);
1344 /* Convert IF to an ADD instruction that moves the instruction pointer
1345 * to the first instruction of the ELSE block. If there is no ELSE
1346 * block, point to where ENDIF would be. Reverse the predicate.
1348 * There's no need to execute an ENDIF since we don't need to do any
1349 * stack operations, and if we're currently executing, we just want to
1350 * continue normally.
1352 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_ADD
);
1353 brw_inst_set_pred_inv(devinfo
, if_inst
, true);
1355 if (else_inst
!= NULL
) {
1356 /* Convert ELSE to an ADD instruction that points where the ENDIF
1359 brw_inst_set_opcode(devinfo
, else_inst
, BRW_OPCODE_ADD
);
1361 brw_inst_set_imm_ud(devinfo
, if_inst
, (else_inst
- if_inst
+ 1) * 16);
1362 brw_inst_set_imm_ud(devinfo
, else_inst
, (next_inst
- else_inst
) * 16);
1364 brw_inst_set_imm_ud(devinfo
, if_inst
, (next_inst
- if_inst
) * 16);
1369 * Patch IF and ELSE instructions with appropriate jump targets.
1372 patch_IF_ELSE(struct brw_codegen
*p
,
1373 brw_inst
*if_inst
, brw_inst
*else_inst
, brw_inst
*endif_inst
)
1375 const struct gen_device_info
*devinfo
= p
->devinfo
;
1377 /* We shouldn't be patching IF and ELSE instructions in single program flow
1378 * mode when gen < 6, because in single program flow mode on those
1379 * platforms, we convert flow control instructions to conditional ADDs that
1380 * operate on IP (see brw_ENDIF).
1382 * However, on Gen6, writing to IP doesn't work in single program flow mode
1383 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1384 * not be updated by non-flow control instructions."). And on later
1385 * platforms, there is no significant benefit to converting control flow
1386 * instructions to conditional ADDs. So we do patch IF and ELSE
1387 * instructions in single program flow mode on those platforms.
1389 if (devinfo
->gen
< 6)
1390 assert(!p
->single_program_flow
);
1392 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1393 assert(endif_inst
!= NULL
);
1394 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1396 unsigned br
= brw_jump_scale(devinfo
);
1398 assert(brw_inst_opcode(devinfo
, endif_inst
) == BRW_OPCODE_ENDIF
);
1399 brw_inst_set_exec_size(devinfo
, endif_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1401 if (else_inst
== NULL
) {
1402 /* Patch IF -> ENDIF */
1403 if (devinfo
->gen
< 6) {
1404 /* Turn it into an IFF, which means no mask stack operations for
1405 * all-false and jumping past the ENDIF.
1407 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_IFF
);
1408 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1409 br
* (endif_inst
- if_inst
+ 1));
1410 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1411 } else if (devinfo
->gen
== 6) {
1412 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1413 brw_inst_set_gen6_jump_count(devinfo
, if_inst
, br
*(endif_inst
- if_inst
));
1415 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1416 brw_inst_set_jip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1419 brw_inst_set_exec_size(devinfo
, else_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1421 /* Patch IF -> ELSE */
1422 if (devinfo
->gen
< 6) {
1423 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1424 br
* (else_inst
- if_inst
));
1425 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1426 } else if (devinfo
->gen
== 6) {
1427 brw_inst_set_gen6_jump_count(devinfo
, if_inst
,
1428 br
* (else_inst
- if_inst
+ 1));
1431 /* Patch ELSE -> ENDIF */
1432 if (devinfo
->gen
< 6) {
1433 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1436 brw_inst_set_gen4_jump_count(devinfo
, else_inst
,
1437 br
* (endif_inst
- else_inst
+ 1));
1438 brw_inst_set_gen4_pop_count(devinfo
, else_inst
, 1);
1439 } else if (devinfo
->gen
== 6) {
1440 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1441 brw_inst_set_gen6_jump_count(devinfo
, else_inst
,
1442 br
* (endif_inst
- else_inst
));
1444 /* The IF instruction's JIP should point just past the ELSE */
1445 brw_inst_set_jip(devinfo
, if_inst
, br
* (else_inst
- if_inst
+ 1));
1446 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1447 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1448 brw_inst_set_jip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1449 if (devinfo
->gen
>= 8) {
1450 /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1451 * should point to ENDIF.
1453 brw_inst_set_uip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1460 brw_ELSE(struct brw_codegen
*p
)
1462 const struct gen_device_info
*devinfo
= p
->devinfo
;
1465 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1467 if (devinfo
->gen
< 6) {
1468 brw_set_dest(p
, insn
, brw_ip_reg());
1469 brw_set_src0(p
, insn
, brw_ip_reg());
1470 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1471 } else if (devinfo
->gen
== 6) {
1472 brw_set_dest(p
, insn
, brw_imm_w(0));
1473 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1474 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1475 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1476 } else if (devinfo
->gen
== 7) {
1477 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1478 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1479 brw_set_src1(p
, insn
, brw_imm_w(0));
1480 brw_inst_set_jip(devinfo
, insn
, 0);
1481 brw_inst_set_uip(devinfo
, insn
, 0);
1483 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1484 brw_set_src0(p
, insn
, brw_imm_d(0));
1485 brw_inst_set_jip(devinfo
, insn
, 0);
1486 brw_inst_set_uip(devinfo
, insn
, 0);
1489 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1490 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1491 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1492 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1494 push_if_stack(p
, insn
);
1498 brw_ENDIF(struct brw_codegen
*p
)
1500 const struct gen_device_info
*devinfo
= p
->devinfo
;
1501 brw_inst
*insn
= NULL
;
1502 brw_inst
*else_inst
= NULL
;
1503 brw_inst
*if_inst
= NULL
;
1505 bool emit_endif
= true;
1507 /* In single program flow mode, we can express IF and ELSE instructions
1508 * equivalently as ADD instructions that operate on IP. On platforms prior
1509 * to Gen6, flow control instructions cause an implied thread switch, so
1510 * this is a significant savings.
1512 * However, on Gen6, writing to IP doesn't work in single program flow mode
1513 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1514 * not be updated by non-flow control instructions."). And on later
1515 * platforms, there is no significant benefit to converting control flow
1516 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1519 if (devinfo
->gen
< 6 && p
->single_program_flow
)
1523 * A single next_insn() may change the base address of instruction store
1524 * memory(p->store), so call it first before referencing the instruction
1525 * store pointer from an index
1528 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1530 /* Pop the IF and (optional) ELSE instructions from the stack */
1531 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1532 tmp
= pop_if_stack(p
);
1533 if (brw_inst_opcode(devinfo
, tmp
) == BRW_OPCODE_ELSE
) {
1535 tmp
= pop_if_stack(p
);
1540 /* ENDIF is useless; don't bother emitting it. */
1541 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1545 if (devinfo
->gen
< 6) {
1546 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1547 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1548 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1549 } else if (devinfo
->gen
== 6) {
1550 brw_set_dest(p
, insn
, brw_imm_w(0));
1551 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1552 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1553 } else if (devinfo
->gen
== 7) {
1554 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1555 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1556 brw_set_src1(p
, insn
, brw_imm_w(0));
1558 brw_set_src0(p
, insn
, brw_imm_d(0));
1561 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1562 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1563 if (devinfo
->gen
< 6)
1564 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1566 /* Also pop item off the stack in the endif instruction: */
1567 if (devinfo
->gen
< 6) {
1568 brw_inst_set_gen4_jump_count(devinfo
, insn
, 0);
1569 brw_inst_set_gen4_pop_count(devinfo
, insn
, 1);
1570 } else if (devinfo
->gen
== 6) {
1571 brw_inst_set_gen6_jump_count(devinfo
, insn
, 2);
1573 brw_inst_set_jip(devinfo
, insn
, 2);
1575 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1579 brw_BREAK(struct brw_codegen
*p
)
1581 const struct gen_device_info
*devinfo
= p
->devinfo
;
1584 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1585 if (devinfo
->gen
>= 8) {
1586 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1587 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1588 } else if (devinfo
->gen
>= 6) {
1589 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1590 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1591 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1593 brw_set_dest(p
, insn
, brw_ip_reg());
1594 brw_set_src0(p
, insn
, brw_ip_reg());
1595 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1596 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1597 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1599 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1600 brw_inst_set_exec_size(devinfo
, insn
,
1601 brw_inst_exec_size(devinfo
, p
->current
));
1607 brw_CONT(struct brw_codegen
*p
)
1609 const struct gen_device_info
*devinfo
= p
->devinfo
;
1612 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1613 brw_set_dest(p
, insn
, brw_ip_reg());
1614 if (devinfo
->gen
>= 8) {
1615 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1617 brw_set_src0(p
, insn
, brw_ip_reg());
1618 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1621 if (devinfo
->gen
< 6) {
1622 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1623 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1625 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1626 brw_inst_set_exec_size(devinfo
, insn
,
1627 brw_inst_exec_size(devinfo
, p
->current
));
1632 gen6_HALT(struct brw_codegen
*p
)
1634 const struct gen_device_info
*devinfo
= p
->devinfo
;
1637 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1638 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1639 if (devinfo
->gen
>= 8) {
1640 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1642 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1643 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1646 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1647 brw_inst_set_exec_size(devinfo
, insn
,
1648 brw_inst_exec_size(devinfo
, p
->current
));
1654 * The DO/WHILE is just an unterminated loop -- break or continue are
1655 * used for control within the loop. We have a few ways they can be
1658 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1659 * jip and no DO instruction.
1661 * For non-uniform control flow pre-gen6, there's a DO instruction to
1662 * push the mask, and a WHILE to jump back, and BREAK to get out and
1665 * For gen6, there's no more mask stack, so no need for DO. WHILE
1666 * just points back to the first instruction of the loop.
1669 brw_DO(struct brw_codegen
*p
, unsigned execute_size
)
1671 const struct gen_device_info
*devinfo
= p
->devinfo
;
1673 if (devinfo
->gen
>= 6 || p
->single_program_flow
) {
1674 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1675 return &p
->store
[p
->nr_insn
];
1677 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1679 push_loop_stack(p
, insn
);
1681 /* Override the defaults for this instruction:
1683 brw_set_dest(p
, insn
, brw_null_reg());
1684 brw_set_src0(p
, insn
, brw_null_reg());
1685 brw_set_src1(p
, insn
, brw_null_reg());
1687 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1688 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1689 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
);
1696 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1699 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1700 * nesting, since it can always just point to the end of the block/current loop.
1703 brw_patch_break_cont(struct brw_codegen
*p
, brw_inst
*while_inst
)
1705 const struct gen_device_info
*devinfo
= p
->devinfo
;
1706 brw_inst
*do_inst
= get_inner_do_insn(p
);
1708 unsigned br
= brw_jump_scale(devinfo
);
1710 assert(devinfo
->gen
< 6);
1712 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1713 /* If the jump count is != 0, that means that this instruction has already
1714 * been patched because it's part of a loop inside of the one we're
1717 if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_BREAK
&&
1718 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1719 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
*((while_inst
- inst
) + 1));
1720 } else if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_CONTINUE
&&
1721 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1722 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
* (while_inst
- inst
));
1728 brw_WHILE(struct brw_codegen
*p
)
1730 const struct gen_device_info
*devinfo
= p
->devinfo
;
1731 brw_inst
*insn
, *do_insn
;
1732 unsigned br
= brw_jump_scale(devinfo
);
1734 if (devinfo
->gen
>= 6) {
1735 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1736 do_insn
= get_inner_do_insn(p
);
1738 if (devinfo
->gen
>= 8) {
1739 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1740 brw_set_src0(p
, insn
, brw_imm_d(0));
1741 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1742 } else if (devinfo
->gen
== 7) {
1743 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1744 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1745 brw_set_src1(p
, insn
, brw_imm_w(0));
1746 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1748 brw_set_dest(p
, insn
, brw_imm_w(0));
1749 brw_inst_set_gen6_jump_count(devinfo
, insn
, br
* (do_insn
- insn
));
1750 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1751 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1754 brw_inst_set_exec_size(devinfo
, insn
,
1755 brw_inst_exec_size(devinfo
, p
->current
));
1758 if (p
->single_program_flow
) {
1759 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1760 do_insn
= get_inner_do_insn(p
);
1762 brw_set_dest(p
, insn
, brw_ip_reg());
1763 brw_set_src0(p
, insn
, brw_ip_reg());
1764 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1765 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
1767 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1768 do_insn
= get_inner_do_insn(p
);
1770 assert(brw_inst_opcode(devinfo
, do_insn
) == BRW_OPCODE_DO
);
1772 brw_set_dest(p
, insn
, brw_ip_reg());
1773 brw_set_src0(p
, insn
, brw_ip_reg());
1774 brw_set_src1(p
, insn
, brw_imm_d(0));
1776 brw_inst_set_exec_size(devinfo
, insn
, brw_inst_exec_size(devinfo
, do_insn
));
1777 brw_inst_set_gen4_jump_count(devinfo
, insn
, br
* (do_insn
- insn
+ 1));
1778 brw_inst_set_gen4_pop_count(devinfo
, insn
, 0);
1780 brw_patch_break_cont(p
, insn
);
1783 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1785 p
->loop_stack_depth
--;
1792 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
)
1794 const struct gen_device_info
*devinfo
= p
->devinfo
;
1795 brw_inst
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1798 if (devinfo
->gen
>= 5)
1801 assert(brw_inst_opcode(devinfo
, jmp_insn
) == BRW_OPCODE_JMPI
);
1802 assert(brw_inst_src1_reg_file(devinfo
, jmp_insn
) == BRW_IMMEDIATE_VALUE
);
1804 brw_inst_set_gen4_jump_count(devinfo
, jmp_insn
,
1805 jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1));
1808 /* To integrate with the above, it makes sense that the comparison
1809 * instruction should populate the flag register. It might be simpler
1810 * just to use the flag reg for most WM tasks?
1812 void brw_CMP(struct brw_codegen
*p
,
1813 struct brw_reg dest
,
1814 unsigned conditional
,
1815 struct brw_reg src0
,
1816 struct brw_reg src1
)
1818 const struct gen_device_info
*devinfo
= p
->devinfo
;
1819 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1821 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1822 brw_set_dest(p
, insn
, dest
);
1823 brw_set_src0(p
, insn
, src0
);
1824 brw_set_src1(p
, insn
, src1
);
1826 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1828 * "Any CMP instruction with a null destination must use a {switch}."
1830 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1831 * mentioned on their work-arounds pages.
1833 if (devinfo
->gen
== 7) {
1834 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1835 dest
.nr
== BRW_ARF_NULL
) {
1836 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1841 /***********************************************************************
1842 * Helpers for the various SEND message types:
1845 /** Extended math function, float[8].
1847 void gen4_math(struct brw_codegen
*p
,
1848 struct brw_reg dest
,
1850 unsigned msg_reg_nr
,
1852 unsigned precision
)
1854 const struct gen_device_info
*devinfo
= p
->devinfo
;
1855 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1857 if (has_scalar_region(src
)) {
1858 data_type
= BRW_MATH_DATA_SCALAR
;
1860 data_type
= BRW_MATH_DATA_VECTOR
;
1863 assert(devinfo
->gen
< 6);
1865 /* Example code doesn't set predicate_control for send
1868 brw_inst_set_pred_control(devinfo
, insn
, 0);
1869 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
1871 brw_set_dest(p
, insn
, dest
);
1872 brw_set_src0(p
, insn
, src
);
1873 brw_set_math_message(p
,
1876 src
.type
== BRW_REGISTER_TYPE_D
,
1881 void gen6_math(struct brw_codegen
*p
,
1882 struct brw_reg dest
,
1884 struct brw_reg src0
,
1885 struct brw_reg src1
)
1887 const struct gen_device_info
*devinfo
= p
->devinfo
;
1888 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1890 assert(devinfo
->gen
>= 6);
1892 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1893 (devinfo
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1895 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1896 if (devinfo
->gen
== 6) {
1897 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1898 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1901 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1902 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1903 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1904 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1905 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1906 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
1907 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
1909 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1910 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1913 /* Source modifiers are ignored for extended math instructions on Gen6. */
1914 if (devinfo
->gen
== 6) {
1915 assert(!src0
.negate
);
1917 assert(!src1
.negate
);
1921 brw_inst_set_math_function(devinfo
, insn
, function
);
1923 brw_set_dest(p
, insn
, dest
);
1924 brw_set_src0(p
, insn
, src0
);
1925 brw_set_src1(p
, insn
, src1
);
1929 * Return the right surface index to access the thread scratch space using
1930 * stateless dataport messages.
1933 brw_scratch_surface_idx(const struct brw_codegen
*p
)
1935 /* The scratch space is thread-local so IA coherency is unnecessary. */
1936 if (p
->devinfo
->gen
>= 8)
1937 return GEN8_BTI_STATELESS_NON_COHERENT
;
1939 return BRW_BTI_STATELESS
;
1943 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1944 * using a constant offset per channel.
1946 * The offset must be aligned to oword size (16 bytes). Used for
1947 * register spilling.
1949 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1954 const struct gen_device_info
*devinfo
= p
->devinfo
;
1955 const unsigned target_cache
=
1956 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
1957 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
1958 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
1961 if (devinfo
->gen
>= 6)
1964 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1966 const unsigned mlen
= 1 + num_regs
;
1968 /* Set up the message header. This is g0, with g0.2 filled with
1969 * the offset. We don't want to leave our offset around in g0 or
1970 * it'll screw up texture samples, so set it up inside the message
1974 brw_push_insn_state(p
);
1975 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
1976 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
1977 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
1979 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1981 /* set message header global offset field (reg 0, element 2) */
1983 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1985 2), BRW_REGISTER_TYPE_UD
),
1986 brw_imm_ud(offset
));
1988 brw_pop_insn_state(p
);
1992 struct brw_reg dest
;
1993 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1994 int send_commit_msg
;
1995 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1996 BRW_REGISTER_TYPE_UW
);
1998 brw_inst_set_compression(devinfo
, insn
, false);
2000 if (brw_inst_exec_size(devinfo
, insn
) >= 16)
2001 src_header
= vec16(src_header
);
2003 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2004 if (devinfo
->gen
< 6)
2005 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2007 /* Until gen6, writes followed by reads from the same location
2008 * are not guaranteed to be ordered unless write_commit is set.
2009 * If set, then a no-op write is issued to the destination
2010 * register to set a dependency, and a read from the destination
2011 * can be used to ensure the ordering.
2013 * For gen6, only writes between different threads need ordering
2014 * protection. Our use of DP writes is all about register
2015 * spilling within a thread.
2017 if (devinfo
->gen
>= 6) {
2018 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2019 send_commit_msg
= 0;
2022 send_commit_msg
= 1;
2025 brw_set_dest(p
, insn
, dest
);
2026 if (devinfo
->gen
>= 6) {
2027 brw_set_src0(p
, insn
, mrf
);
2029 brw_set_src0(p
, insn
, brw_null_reg());
2032 if (devinfo
->gen
>= 6)
2033 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2035 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2037 brw_set_dp_write_message(p
,
2039 brw_scratch_surface_idx(p
),
2040 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2044 true, /* header_present */
2045 0, /* not a render target */
2046 send_commit_msg
, /* response_length */
2054 * Read a block of owords (half a GRF each) from the scratch buffer
2055 * using a constant index per channel.
2057 * Offset must be aligned to oword size (16 bytes). Used for register
2061 brw_oword_block_read_scratch(struct brw_codegen
*p
,
2062 struct brw_reg dest
,
2067 const struct gen_device_info
*devinfo
= p
->devinfo
;
2069 if (devinfo
->gen
>= 6)
2072 if (p
->devinfo
->gen
>= 7) {
2073 /* On gen 7 and above, we no longer have message registers and we can
2074 * send from any register we want. By using the destination register
2075 * for the message, we guarantee that the implied message write won't
2076 * accidentally overwrite anything. This has been a problem because
2077 * the MRF registers and source for the final FB write are both fixed
2080 mrf
= retype(dest
, BRW_REGISTER_TYPE_UD
);
2082 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2084 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2086 const unsigned rlen
= num_regs
;
2087 const unsigned target_cache
=
2088 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2089 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2090 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2093 brw_push_insn_state(p
);
2094 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2095 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2096 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2098 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2100 /* set message header global offset field (reg 0, element 2) */
2101 brw_MOV(p
, get_element_ud(mrf
, 2), brw_imm_ud(offset
));
2103 brw_pop_insn_state(p
);
2107 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2109 assert(brw_inst_pred_control(devinfo
, insn
) == 0);
2110 brw_inst_set_compression(devinfo
, insn
, false);
2112 brw_set_dest(p
, insn
, dest
); /* UW? */
2113 if (devinfo
->gen
>= 6) {
2114 brw_set_src0(p
, insn
, mrf
);
2116 brw_set_src0(p
, insn
, brw_null_reg());
2117 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2120 brw_set_dp_read_message(p
,
2122 brw_scratch_surface_idx(p
),
2123 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2124 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2127 true, /* header_present */
2133 gen7_block_read_scratch(struct brw_codegen
*p
,
2134 struct brw_reg dest
,
2138 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2139 assert(brw_inst_pred_control(p
->devinfo
, insn
) == BRW_PREDICATE_NONE
);
2141 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UW
));
2143 /* The HW requires that the header is present; this is to get the g0.5
2146 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2148 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2149 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2150 * is 32 bytes, which happens to be the size of a register.
2153 assert(offset
< (1 << 12));
2155 gen7_set_dp_scratch_message(p
, insn
,
2156 false, /* scratch read */
2158 false, /* invalidate after read */
2161 1, /* mlen: just g0 */
2162 num_regs
, /* rlen */
2163 true); /* header present */
2167 * Read float[4] vectors from the data port constant cache.
2168 * Location (in buffer) should be a multiple of 16.
2169 * Used for fetching shader constants.
2171 void brw_oword_block_read(struct brw_codegen
*p
,
2172 struct brw_reg dest
,
2175 uint32_t bind_table_index
)
2177 const struct gen_device_info
*devinfo
= p
->devinfo
;
2178 const unsigned target_cache
=
2179 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE
:
2180 BRW_DATAPORT_READ_TARGET_DATA_CACHE
);
2181 const unsigned exec_size
= 1 << brw_inst_exec_size(devinfo
, p
->current
);
2183 /* On newer hardware, offset is in units of owords. */
2184 if (devinfo
->gen
>= 6)
2187 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2189 brw_push_insn_state(p
);
2190 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2191 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2192 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2194 brw_push_insn_state(p
);
2195 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2196 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2198 /* set message header global offset field (reg 0, element 2) */
2200 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2202 2), BRW_REGISTER_TYPE_UD
),
2203 brw_imm_ud(offset
));
2204 brw_pop_insn_state(p
);
2206 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2208 /* cast dest to a uword[8] vector */
2209 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2211 brw_set_dest(p
, insn
, dest
);
2212 if (devinfo
->gen
>= 6) {
2213 brw_set_src0(p
, insn
, mrf
);
2215 brw_set_src0(p
, insn
, brw_null_reg());
2216 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2219 brw_set_dp_read_message(p
, insn
, bind_table_index
,
2220 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size
),
2221 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2224 true, /* header_present */
2225 DIV_ROUND_UP(exec_size
, 8)); /* response_length */
2227 brw_pop_insn_state(p
);
2231 void brw_fb_WRITE(struct brw_codegen
*p
,
2232 struct brw_reg payload
,
2233 struct brw_reg implied_header
,
2234 unsigned msg_control
,
2235 unsigned binding_table_index
,
2236 unsigned msg_length
,
2237 unsigned response_length
,
2239 bool last_render_target
,
2240 bool header_present
)
2242 const struct gen_device_info
*devinfo
= p
->devinfo
;
2243 const unsigned target_cache
=
2244 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2245 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2248 struct brw_reg dest
, src0
;
2250 if (brw_inst_exec_size(devinfo
, p
->current
) >= BRW_EXECUTE_16
)
2251 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2253 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2255 if (devinfo
->gen
>= 6) {
2256 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2258 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2260 brw_inst_set_compression(devinfo
, insn
, false);
2262 if (devinfo
->gen
>= 6) {
2263 /* headerless version, just submit color payload */
2266 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2268 assert(payload
.file
== BRW_MESSAGE_REGISTER_FILE
);
2269 brw_inst_set_base_mrf(devinfo
, insn
, payload
.nr
);
2270 src0
= implied_header
;
2272 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2275 brw_set_dest(p
, insn
, dest
);
2276 brw_set_src0(p
, insn
, src0
);
2277 brw_set_dp_write_message(p
,
2279 binding_table_index
,
2288 0 /* send_commit_msg */);
2292 gen9_fb_READ(struct brw_codegen
*p
,
2294 struct brw_reg payload
,
2295 unsigned binding_table_index
,
2296 unsigned msg_length
,
2297 unsigned response_length
,
2300 const struct gen_device_info
*devinfo
= p
->devinfo
;
2301 assert(devinfo
->gen
>= 9);
2302 const unsigned msg_subtype
=
2303 brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
? 0 : 1;
2304 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2306 brw_set_dest(p
, insn
, dst
);
2307 brw_set_src0(p
, insn
, payload
);
2308 brw_set_dp_read_message(p
, insn
, binding_table_index
,
2309 per_sample
<< 5 | msg_subtype
,
2310 GEN9_DATAPORT_RC_RENDER_TARGET_READ
,
2311 GEN6_SFID_DATAPORT_RENDER_CACHE
,
2312 msg_length
, true /* header_present */,
2314 brw_inst_set_rt_slot_group(devinfo
, insn
,
2315 brw_inst_qtr_control(devinfo
, p
->current
) / 2);
2321 * Texture sample instruction.
2322 * Note: the msg_type plus msg_length values determine exactly what kind
2323 * of sampling operation is performed. See volume 4, page 161 of docs.
2325 void brw_SAMPLE(struct brw_codegen
*p
,
2326 struct brw_reg dest
,
2327 unsigned msg_reg_nr
,
2328 struct brw_reg src0
,
2329 unsigned binding_table_index
,
2332 unsigned response_length
,
2333 unsigned msg_length
,
2334 unsigned header_present
,
2336 unsigned return_format
)
2338 const struct gen_device_info
*devinfo
= p
->devinfo
;
2341 if (msg_reg_nr
!= -1)
2342 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2344 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2345 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
); /* XXX */
2347 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2349 * "Instruction compression is not allowed for this instruction (that
2350 * is, send). The hardware behavior is undefined if this instruction is
2351 * set as compressed. However, compress control can be set to "SecHalf"
2352 * to affect the EMask generation."
2354 * No similar wording is found in later PRMs, but there are examples
2355 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2356 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2357 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2359 brw_inst_set_compression(devinfo
, insn
, false);
2361 if (devinfo
->gen
< 6)
2362 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2364 brw_set_dest(p
, insn
, dest
);
2365 brw_set_src0(p
, insn
, src0
);
2366 brw_set_sampler_message(p
, insn
,
2367 binding_table_index
,
2377 /* Adjust the message header's sampler state pointer to
2378 * select the correct group of 16 samplers.
2380 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
2381 struct brw_reg header
,
2382 struct brw_reg sampler_index
)
2384 /* The "Sampler Index" field can only store values between 0 and 15.
2385 * However, we can add an offset to the "Sampler State Pointer"
2386 * field, effectively selecting a different set of 16 samplers.
2388 * The "Sampler State Pointer" needs to be aligned to a 32-byte
2389 * offset, and each sampler state is only 16-bytes, so we can't
2390 * exclusively use the offset - we have to use both.
2393 const struct gen_device_info
*devinfo
= p
->devinfo
;
2395 if (sampler_index
.file
== BRW_IMMEDIATE_VALUE
) {
2396 const int sampler_state_size
= 16; /* 16 bytes */
2397 uint32_t sampler
= sampler_index
.ud
;
2399 if (sampler
>= 16) {
2400 assert(devinfo
->is_haswell
|| devinfo
->gen
>= 8);
2402 get_element_ud(header
, 3),
2403 get_element_ud(brw_vec8_grf(0, 0), 3),
2404 brw_imm_ud(16 * (sampler
/ 16) * sampler_state_size
));
2407 /* Non-const sampler array indexing case */
2408 if (devinfo
->gen
< 8 && !devinfo
->is_haswell
) {
2412 struct brw_reg temp
= get_element_ud(header
, 3);
2414 brw_AND(p
, temp
, get_element_ud(sampler_index
, 0), brw_imm_ud(0x0f0));
2415 brw_SHL(p
, temp
, temp
, brw_imm_ud(4));
2417 get_element_ud(header
, 3),
2418 get_element_ud(brw_vec8_grf(0, 0), 3),
2423 /* All these variables are pretty confusing - we might be better off
2424 * using bitmasks and macros for this, in the old style. Or perhaps
2425 * just having the caller instantiate the fields in dword3 itself.
2427 void brw_urb_WRITE(struct brw_codegen
*p
,
2428 struct brw_reg dest
,
2429 unsigned msg_reg_nr
,
2430 struct brw_reg src0
,
2431 enum brw_urb_write_flags flags
,
2432 unsigned msg_length
,
2433 unsigned response_length
,
2437 const struct gen_device_info
*devinfo
= p
->devinfo
;
2440 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2442 if (devinfo
->gen
>= 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2443 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2444 brw_push_insn_state(p
);
2445 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2446 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2447 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2448 BRW_REGISTER_TYPE_UD
),
2449 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2450 brw_imm_ud(0xff00));
2451 brw_pop_insn_state(p
);
2454 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2456 assert(msg_length
< BRW_MAX_MRF(devinfo
->gen
));
2458 brw_set_dest(p
, insn
, dest
);
2459 brw_set_src0(p
, insn
, src0
);
2460 brw_set_src1(p
, insn
, brw_imm_d(0));
2462 if (devinfo
->gen
< 6)
2463 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2465 brw_set_urb_message(p
,
2475 brw_send_indirect_message(struct brw_codegen
*p
,
2478 struct brw_reg payload
,
2479 struct brw_reg desc
)
2481 const struct gen_device_info
*devinfo
= p
->devinfo
;
2482 struct brw_inst
*send
;
2485 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
2487 assert(desc
.type
== BRW_REGISTER_TYPE_UD
);
2489 /* We hold on to the setup instruction (the SEND in the direct case, the OR
2490 * in the indirect case) by its index in the instruction store. The
2491 * pointer returned by next_insn() may become invalid if emitting the SEND
2492 * in the indirect case reallocs the store.
2495 if (desc
.file
== BRW_IMMEDIATE_VALUE
) {
2497 send
= next_insn(p
, BRW_OPCODE_SEND
);
2498 brw_set_src1(p
, send
, desc
);
2501 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2503 brw_push_insn_state(p
);
2504 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2505 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2506 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2508 /* Load the indirect descriptor to an address register using OR so the
2509 * caller can specify additional descriptor bits with the usual
2510 * brw_set_*_message() helper functions.
2513 brw_OR(p
, addr
, desc
, brw_imm_ud(0));
2515 brw_pop_insn_state(p
);
2517 send
= next_insn(p
, BRW_OPCODE_SEND
);
2518 brw_set_src1(p
, send
, addr
);
2521 if (dst
.width
< BRW_EXECUTE_8
)
2522 brw_inst_set_exec_size(devinfo
, send
, dst
.width
);
2524 brw_set_dest(p
, send
, dst
);
2525 brw_set_src0(p
, send
, retype(payload
, BRW_REGISTER_TYPE_UD
));
2526 brw_inst_set_sfid(devinfo
, send
, sfid
);
2528 return &p
->store
[setup
];
2531 static struct brw_inst
*
2532 brw_send_indirect_surface_message(struct brw_codegen
*p
,
2535 struct brw_reg payload
,
2536 struct brw_reg surface
,
2537 unsigned message_len
,
2538 unsigned response_len
,
2539 bool header_present
)
2541 const struct gen_device_info
*devinfo
= p
->devinfo
;
2542 struct brw_inst
*insn
;
2544 if (surface
.file
!= BRW_IMMEDIATE_VALUE
) {
2545 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2547 brw_push_insn_state(p
);
2548 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2549 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2550 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2552 /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2553 * some surface array is accessed out of bounds.
2555 insn
= brw_AND(p
, addr
,
2556 suboffset(vec1(retype(surface
, BRW_REGISTER_TYPE_UD
)),
2557 BRW_GET_SWZ(surface
.swizzle
, 0)),
2560 brw_pop_insn_state(p
);
2565 insn
= brw_send_indirect_message(p
, sfid
, dst
, payload
, surface
);
2566 brw_inst_set_mlen(devinfo
, insn
, message_len
);
2567 brw_inst_set_rlen(devinfo
, insn
, response_len
);
2568 brw_inst_set_header_present(devinfo
, insn
, header_present
);
2574 while_jumps_before_offset(const struct gen_device_info
*devinfo
,
2575 brw_inst
*insn
, int while_offset
, int start_offset
)
2577 int scale
= 16 / brw_jump_scale(devinfo
);
2578 int jip
= devinfo
->gen
== 6 ? brw_inst_gen6_jump_count(devinfo
, insn
)
2579 : brw_inst_jip(devinfo
, insn
);
2581 return while_offset
+ jip
* scale
<= start_offset
;
2586 brw_find_next_block_end(struct brw_codegen
*p
, int start_offset
)
2589 void *store
= p
->store
;
2590 const struct gen_device_info
*devinfo
= p
->devinfo
;
2594 for (offset
= next_offset(devinfo
, store
, start_offset
);
2595 offset
< p
->next_insn_offset
;
2596 offset
= next_offset(devinfo
, store
, offset
)) {
2597 brw_inst
*insn
= store
+ offset
;
2599 switch (brw_inst_opcode(devinfo
, insn
)) {
2603 case BRW_OPCODE_ENDIF
:
2608 case BRW_OPCODE_WHILE
:
2609 /* If the while doesn't jump before our instruction, it's the end
2610 * of a sibling do...while loop. Ignore it.
2612 if (!while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2615 case BRW_OPCODE_ELSE
:
2616 case BRW_OPCODE_HALT
:
2625 /* There is no DO instruction on gen6, so to find the end of the loop
2626 * we have to see if the loop is jumping back before our start
2630 brw_find_loop_end(struct brw_codegen
*p
, int start_offset
)
2632 const struct gen_device_info
*devinfo
= p
->devinfo
;
2634 void *store
= p
->store
;
2636 assert(devinfo
->gen
>= 6);
2638 /* Always start after the instruction (such as a WHILE) we're trying to fix
2641 for (offset
= next_offset(devinfo
, store
, start_offset
);
2642 offset
< p
->next_insn_offset
;
2643 offset
= next_offset(devinfo
, store
, offset
)) {
2644 brw_inst
*insn
= store
+ offset
;
2646 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
) {
2647 if (while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2651 assert(!"not reached");
2652 return start_offset
;
2655 /* After program generation, go back and update the UIP and JIP of
2656 * BREAK, CONT, and HALT instructions to their correct locations.
2659 brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
)
2661 const struct gen_device_info
*devinfo
= p
->devinfo
;
2663 int br
= brw_jump_scale(devinfo
);
2664 int scale
= 16 / br
;
2665 void *store
= p
->store
;
2667 if (devinfo
->gen
< 6)
2670 for (offset
= start_offset
; offset
< p
->next_insn_offset
; offset
+= 16) {
2671 brw_inst
*insn
= store
+ offset
;
2672 assert(brw_inst_cmpt_control(devinfo
, insn
) == 0);
2674 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2675 switch (brw_inst_opcode(devinfo
, insn
)) {
2676 case BRW_OPCODE_BREAK
:
2677 assert(block_end_offset
!= 0);
2678 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2679 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2680 brw_inst_set_uip(devinfo
, insn
,
2681 (brw_find_loop_end(p
, offset
) - offset
+
2682 (devinfo
->gen
== 6 ? 16 : 0)) / scale
);
2684 case BRW_OPCODE_CONTINUE
:
2685 assert(block_end_offset
!= 0);
2686 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2687 brw_inst_set_uip(devinfo
, insn
,
2688 (brw_find_loop_end(p
, offset
) - offset
) / scale
);
2690 assert(brw_inst_uip(devinfo
, insn
) != 0);
2691 assert(brw_inst_jip(devinfo
, insn
) != 0);
2694 case BRW_OPCODE_ENDIF
: {
2695 int32_t jump
= (block_end_offset
== 0) ?
2696 1 * br
: (block_end_offset
- offset
) / scale
;
2697 if (devinfo
->gen
>= 7)
2698 brw_inst_set_jip(devinfo
, insn
, jump
);
2700 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump
);
2704 case BRW_OPCODE_HALT
:
2705 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2707 * "In case of the halt instruction not inside any conditional
2708 * code block, the value of <JIP> and <UIP> should be the
2709 * same. In case of the halt instruction inside conditional code
2710 * block, the <UIP> should be the end of the program, and the
2711 * <JIP> should be end of the most inner conditional code block."
2713 * The uip will have already been set by whoever set up the
2716 if (block_end_offset
== 0) {
2717 brw_inst_set_jip(devinfo
, insn
, brw_inst_uip(devinfo
, insn
));
2719 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2721 assert(brw_inst_uip(devinfo
, insn
) != 0);
2722 assert(brw_inst_jip(devinfo
, insn
) != 0);
2728 void brw_ff_sync(struct brw_codegen
*p
,
2729 struct brw_reg dest
,
2730 unsigned msg_reg_nr
,
2731 struct brw_reg src0
,
2733 unsigned response_length
,
2736 const struct gen_device_info
*devinfo
= p
->devinfo
;
2739 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2741 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2742 brw_set_dest(p
, insn
, dest
);
2743 brw_set_src0(p
, insn
, src0
);
2744 brw_set_src1(p
, insn
, brw_imm_d(0));
2746 if (devinfo
->gen
< 6)
2747 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2749 brw_set_ff_sync_message(p
,
2757 * Emit the SEND instruction necessary to generate stream output data on Gen6
2758 * (for transform feedback).
2760 * If send_commit_msg is true, this is the last piece of stream output data
2761 * from this thread, so send the data as a committed write. According to the
2762 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2764 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2765 * writes are complete by sending the final write as a committed write."
2768 brw_svb_write(struct brw_codegen
*p
,
2769 struct brw_reg dest
,
2770 unsigned msg_reg_nr
,
2771 struct brw_reg src0
,
2772 unsigned binding_table_index
,
2773 bool send_commit_msg
)
2775 const struct gen_device_info
*devinfo
= p
->devinfo
;
2776 const unsigned target_cache
=
2777 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2778 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2779 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2782 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2784 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2785 brw_set_dest(p
, insn
, dest
);
2786 brw_set_src0(p
, insn
, src0
);
2787 brw_set_src1(p
, insn
, brw_imm_d(0));
2788 brw_set_dp_write_message(p
, insn
,
2789 binding_table_index
,
2790 0, /* msg_control: ignored */
2791 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2794 true, /* header_present */
2795 0, /* last_render_target: ignored */
2796 send_commit_msg
, /* response_length */
2797 0, /* end_of_thread */
2798 send_commit_msg
); /* send_commit_msg */
2802 brw_surface_payload_size(struct brw_codegen
*p
,
2803 unsigned num_channels
,
2808 brw_inst_access_mode(p
->devinfo
, p
->current
) == BRW_ALIGN_16
)
2810 else if (has_simd16
&&
2811 brw_inst_exec_size(p
->devinfo
, p
->current
) == BRW_EXECUTE_16
)
2812 return 2 * num_channels
;
2814 return num_channels
;
2818 brw_set_dp_untyped_atomic_message(struct brw_codegen
*p
,
2821 bool response_expected
)
2823 const struct gen_device_info
*devinfo
= p
->devinfo
;
2824 unsigned msg_control
=
2825 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2826 (response_expected
? 1 << 5 : 0); /* Return data expected */
2828 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2829 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2830 if (brw_inst_exec_size(devinfo
, p
->current
) != BRW_EXECUTE_16
)
2831 msg_control
|= 1 << 4; /* SIMD8 mode */
2833 brw_inst_set_dp_msg_type(devinfo
, insn
,
2834 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
);
2836 brw_inst_set_dp_msg_type(devinfo
, insn
,
2837 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
);
2840 brw_inst_set_dp_msg_type(devinfo
, insn
,
2841 GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
);
2843 if (brw_inst_exec_size(devinfo
, p
->current
) != BRW_EXECUTE_16
)
2844 msg_control
|= 1 << 4; /* SIMD8 mode */
2847 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2851 brw_untyped_atomic(struct brw_codegen
*p
,
2853 struct brw_reg payload
,
2854 struct brw_reg surface
,
2856 unsigned msg_length
,
2857 bool response_expected
)
2859 const struct gen_device_info
*devinfo
= p
->devinfo
;
2860 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2861 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2862 GEN7_SFID_DATAPORT_DATA_CACHE
);
2863 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2864 /* Mask out unused components -- This is especially important in Align16
2865 * mode on generations that don't have native support for SIMD4x2 atomics,
2866 * because unused but enabled components will cause the dataport to perform
2867 * additional atomic operations on the addresses that happen to be in the
2868 * uninitialized Y, Z and W coordinates of the payload.
2870 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2871 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2872 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
2873 brw_surface_payload_size(p
, response_expected
,
2874 devinfo
->gen
>= 8 || devinfo
->is_haswell
, true),
2877 brw_set_dp_untyped_atomic_message(
2878 p
, insn
, atomic_op
, response_expected
);
2882 brw_set_dp_untyped_surface_read_message(struct brw_codegen
*p
,
2883 struct brw_inst
*insn
,
2884 unsigned num_channels
)
2886 const struct gen_device_info
*devinfo
= p
->devinfo
;
2887 /* Set mask of 32-bit channels to drop. */
2888 unsigned msg_control
= 0xf & (0xf << num_channels
);
2890 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2891 if (brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
)
2892 msg_control
|= 1 << 4; /* SIMD16 mode */
2894 msg_control
|= 2 << 4; /* SIMD8 mode */
2897 brw_inst_set_dp_msg_type(devinfo
, insn
,
2898 (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2899 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
:
2900 GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
));
2901 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2905 brw_untyped_surface_read(struct brw_codegen
*p
,
2907 struct brw_reg payload
,
2908 struct brw_reg surface
,
2909 unsigned msg_length
,
2910 unsigned num_channels
)
2912 const struct gen_device_info
*devinfo
= p
->devinfo
;
2913 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2914 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2915 GEN7_SFID_DATAPORT_DATA_CACHE
);
2916 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2917 p
, sfid
, dst
, payload
, surface
, msg_length
,
2918 brw_surface_payload_size(p
, num_channels
, true, true),
2921 brw_set_dp_untyped_surface_read_message(
2922 p
, insn
, num_channels
);
2926 brw_set_dp_untyped_surface_write_message(struct brw_codegen
*p
,
2927 struct brw_inst
*insn
,
2928 unsigned num_channels
)
2930 const struct gen_device_info
*devinfo
= p
->devinfo
;
2931 /* Set mask of 32-bit channels to drop. */
2932 unsigned msg_control
= 0xf & (0xf << num_channels
);
2934 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2935 if (brw_inst_exec_size(devinfo
, p
->current
) == BRW_EXECUTE_16
)
2936 msg_control
|= 1 << 4; /* SIMD16 mode */
2938 msg_control
|= 2 << 4; /* SIMD8 mode */
2940 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
2941 msg_control
|= 0 << 4; /* SIMD4x2 mode */
2943 msg_control
|= 2 << 4; /* SIMD8 mode */
2946 brw_inst_set_dp_msg_type(devinfo
, insn
,
2947 devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2948 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
:
2949 GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
);
2950 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2954 brw_untyped_surface_write(struct brw_codegen
*p
,
2955 struct brw_reg payload
,
2956 struct brw_reg surface
,
2957 unsigned msg_length
,
2958 unsigned num_channels
)
2960 const struct gen_device_info
*devinfo
= p
->devinfo
;
2961 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2962 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2963 GEN7_SFID_DATAPORT_DATA_CACHE
);
2964 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
2965 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2966 const unsigned mask
= devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
2967 WRITEMASK_X
: WRITEMASK_XYZW
;
2968 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2969 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
2970 payload
, surface
, msg_length
, 0, align1
);
2972 brw_set_dp_untyped_surface_write_message(
2973 p
, insn
, num_channels
);
2977 brw_set_dp_typed_atomic_message(struct brw_codegen
*p
,
2978 struct brw_inst
*insn
,
2980 bool response_expected
)
2982 const struct gen_device_info
*devinfo
= p
->devinfo
;
2983 unsigned msg_control
=
2984 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2985 (response_expected
? 1 << 5 : 0); /* Return data expected */
2987 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2988 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
2989 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
2990 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
2992 brw_inst_set_dp_msg_type(devinfo
, insn
,
2993 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
);
2995 brw_inst_set_dp_msg_type(devinfo
, insn
,
2996 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
);
3000 brw_inst_set_dp_msg_type(devinfo
, insn
,
3001 GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
);
3003 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3004 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3007 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3011 brw_typed_atomic(struct brw_codegen
*p
,
3013 struct brw_reg payload
,
3014 struct brw_reg surface
,
3016 unsigned msg_length
,
3017 bool response_expected
) {
3018 const struct gen_device_info
*devinfo
= p
->devinfo
;
3019 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3020 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3021 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3022 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3023 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3024 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
3025 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3026 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
3027 brw_surface_payload_size(p
, response_expected
,
3028 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3031 brw_set_dp_typed_atomic_message(
3032 p
, insn
, atomic_op
, response_expected
);
3036 brw_set_dp_typed_surface_read_message(struct brw_codegen
*p
,
3037 struct brw_inst
*insn
,
3038 unsigned num_channels
)
3040 const struct gen_device_info
*devinfo
= p
->devinfo
;
3041 /* Set mask of unused channels. */
3042 unsigned msg_control
= 0xf & (0xf << num_channels
);
3044 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3045 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3046 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3047 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3049 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3052 brw_inst_set_dp_msg_type(devinfo
, insn
,
3053 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
);
3055 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3056 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3057 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3060 brw_inst_set_dp_msg_type(devinfo
, insn
,
3061 GEN7_DATAPORT_RC_TYPED_SURFACE_READ
);
3064 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3068 brw_typed_surface_read(struct brw_codegen
*p
,
3070 struct brw_reg payload
,
3071 struct brw_reg surface
,
3072 unsigned msg_length
,
3073 unsigned num_channels
)
3075 const struct gen_device_info
*devinfo
= p
->devinfo
;
3076 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3077 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3078 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3079 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3080 p
, sfid
, dst
, payload
, surface
, msg_length
,
3081 brw_surface_payload_size(p
, num_channels
,
3082 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3085 brw_set_dp_typed_surface_read_message(
3086 p
, insn
, num_channels
);
3090 brw_set_dp_typed_surface_write_message(struct brw_codegen
*p
,
3091 struct brw_inst
*insn
,
3092 unsigned num_channels
)
3094 const struct gen_device_info
*devinfo
= p
->devinfo
;
3095 /* Set mask of unused channels. */
3096 unsigned msg_control
= 0xf & (0xf << num_channels
);
3098 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3099 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3100 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3101 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3103 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3106 brw_inst_set_dp_msg_type(devinfo
, insn
,
3107 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
);
3110 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3111 if (brw_inst_qtr_control(devinfo
, p
->current
) % 2 == 1)
3112 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3115 brw_inst_set_dp_msg_type(devinfo
, insn
,
3116 GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
);
3119 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3123 brw_typed_surface_write(struct brw_codegen
*p
,
3124 struct brw_reg payload
,
3125 struct brw_reg surface
,
3126 unsigned msg_length
,
3127 unsigned num_channels
)
3129 const struct gen_device_info
*devinfo
= p
->devinfo
;
3130 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3131 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3132 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3133 const bool align1
= (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
);
3134 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3135 const unsigned mask
= (devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3136 WRITEMASK_X
: WRITEMASK_XYZW
);
3137 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3138 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3139 payload
, surface
, msg_length
, 0, true);
3141 brw_set_dp_typed_surface_write_message(
3142 p
, insn
, num_channels
);
3146 brw_set_memory_fence_message(struct brw_codegen
*p
,
3147 struct brw_inst
*insn
,
3148 enum brw_message_target sfid
,
3151 const struct gen_device_info
*devinfo
= p
->devinfo
;
3153 brw_set_message_descriptor(p
, insn
, sfid
,
3154 1 /* message length */,
3155 (commit_enable
? 1 : 0) /* response length */,
3156 true /* header present */,
3160 case GEN6_SFID_DATAPORT_RENDER_CACHE
:
3161 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_RC_MEMORY_FENCE
);
3163 case GEN7_SFID_DATAPORT_DATA_CACHE
:
3164 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_DC_MEMORY_FENCE
);
3167 unreachable("Not reached");
3171 brw_inst_set_dp_msg_control(devinfo
, insn
, 1 << 5);
3175 brw_memory_fence(struct brw_codegen
*p
,
3178 const struct gen_device_info
*devinfo
= p
->devinfo
;
3179 const bool commit_enable
= devinfo
->gen
== 7 && !devinfo
->is_haswell
;
3180 struct brw_inst
*insn
;
3182 brw_push_insn_state(p
);
3183 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3184 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3187 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3188 * message doesn't write anything back.
3190 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3191 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
3192 brw_set_dest(p
, insn
, dst
);
3193 brw_set_src0(p
, insn
, dst
);
3194 brw_set_memory_fence_message(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
3197 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
3198 /* IVB does typed surface access through the render cache, so we need to
3199 * flush it too. Use a different register so both flushes can be
3200 * pipelined by the hardware.
3202 insn
= next_insn(p
, BRW_OPCODE_SEND
);
3203 brw_set_dest(p
, insn
, offset(dst
, 1));
3204 brw_set_src0(p
, insn
, offset(dst
, 1));
3205 brw_set_memory_fence_message(p
, insn
, GEN6_SFID_DATAPORT_RENDER_CACHE
,
3208 /* Now write the response of the second message into the response of the
3209 * first to trigger a pipeline stall -- This way future render and data
3210 * cache messages will be properly ordered with respect to past data and
3211 * render cache messages.
3213 brw_MOV(p
, dst
, offset(dst
, 1));
3216 brw_pop_insn_state(p
);
3220 brw_pixel_interpolator_query(struct brw_codegen
*p
,
3221 struct brw_reg dest
,
3225 struct brw_reg data
,
3226 unsigned msg_length
,
3227 unsigned response_length
)
3229 const struct gen_device_info
*devinfo
= p
->devinfo
;
3230 struct brw_inst
*insn
;
3231 const uint16_t exec_size
= brw_inst_exec_size(devinfo
, p
->current
);
3233 /* brw_send_indirect_message will automatically use a direct send message
3234 * if data is actually immediate.
3236 insn
= brw_send_indirect_message(p
,
3237 GEN7_SFID_PIXEL_INTERPOLATOR
,
3241 brw_inst_set_mlen(devinfo
, insn
, msg_length
);
3242 brw_inst_set_rlen(devinfo
, insn
, response_length
);
3244 brw_inst_set_pi_simd_mode(devinfo
, insn
, exec_size
== BRW_EXECUTE_16
);
3245 brw_inst_set_pi_slot_group(devinfo
, insn
, 0); /* zero unless 32/64px dispatch */
3246 brw_inst_set_pi_nopersp(devinfo
, insn
, noperspective
);
3247 brw_inst_set_pi_message_type(devinfo
, insn
, mode
);
3251 brw_find_live_channel(struct brw_codegen
*p
, struct brw_reg dst
,
3252 struct brw_reg mask
)
3254 const struct gen_device_info
*devinfo
= p
->devinfo
;
3255 const unsigned exec_size
= 1 << brw_inst_exec_size(devinfo
, p
->current
);
3256 const unsigned qtr_control
= brw_inst_qtr_control(devinfo
, p
->current
);
3259 assert(devinfo
->gen
>= 7);
3260 assert(mask
.type
== BRW_REGISTER_TYPE_UD
);
3262 brw_push_insn_state(p
);
3264 if (brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
) {
3265 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3267 if (devinfo
->gen
>= 8) {
3268 /* Getting the first active channel index is easy on Gen8: Just find
3269 * the first bit set in the execution mask. The register exists on
3270 * HSW already but it reads back as all ones when the current
3271 * instruction has execution masking disabled, so it's kind of
3274 struct brw_reg exec_mask
=
3275 retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
);
3277 if (mask
.file
!= BRW_IMMEDIATE_VALUE
|| mask
.ud
!= 0xffffffff) {
3278 /* Unfortunately, ce0 does not take into account the thread
3279 * dispatch mask, which may be a problem in cases where it's not
3280 * tightly packed (i.e. it doesn't have the form '2^n - 1' for
3281 * some n). Combine ce0 with the given dispatch (or vector) mask
3282 * to mask off those channels which were never dispatched by the
3285 brw_SHR(p
, vec1(dst
), mask
, brw_imm_ud(qtr_control
* 8));
3286 brw_AND(p
, vec1(dst
), exec_mask
, vec1(dst
));
3287 exec_mask
= vec1(dst
);
3290 /* Quarter control has the effect of magically shifting the value of
3291 * ce0 so you'll get the first active channel relative to the
3292 * specified quarter control as result.
3294 inst
= brw_FBL(p
, vec1(dst
), exec_mask
);
3296 const struct brw_reg flag
= brw_flag_reg(1, 0);
3298 brw_MOV(p
, retype(flag
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
3300 /* Run enough instructions returning zero with execution masking and
3301 * a conditional modifier enabled in order to get the full execution
3302 * mask in f1.0. We could use a single 32-wide move here if it
3303 * weren't because of the hardware bug that causes channel enables to
3304 * be applied incorrectly to the second half of 32-wide instructions
3307 const unsigned lower_size
= MIN2(16, exec_size
);
3308 for (unsigned i
= 0; i
< exec_size
/ lower_size
; i
++) {
3309 inst
= brw_MOV(p
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
),
3311 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3312 brw_inst_set_group(devinfo
, inst
, lower_size
* i
+ 8 * qtr_control
);
3313 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_Z
);
3314 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3315 brw_inst_set_exec_size(devinfo
, inst
, cvt(lower_size
) - 1);
3318 /* Find the first bit set in the exec_size-wide portion of the flag
3319 * register that was updated by the last sequence of MOV
3322 const enum brw_reg_type type
= brw_int_type(exec_size
/ 8, false);
3323 brw_FBL(p
, vec1(dst
), byte_offset(retype(flag
, type
), qtr_control
));
3326 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3328 if (devinfo
->gen
>= 8 &&
3329 mask
.file
== BRW_IMMEDIATE_VALUE
&& mask
.ud
== 0xffffffff) {
3330 /* In SIMD4x2 mode the first active channel index is just the
3331 * negation of the first bit of the mask register. Note that ce0
3332 * doesn't take into account the dispatch mask, so the Gen7 path
3333 * should be used instead unless you have the guarantee that the
3334 * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
3337 inst
= brw_AND(p
, brw_writemask(dst
, WRITEMASK_X
),
3338 negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
)),
3342 /* Overwrite the destination without and with execution masking to
3343 * find out which of the channels is active.
3345 brw_push_insn_state(p
);
3346 brw_set_default_exec_size(p
, BRW_EXECUTE_4
);
3347 brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3350 inst
= brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3352 brw_pop_insn_state(p
);
3353 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3357 brw_pop_insn_state(p
);
3361 brw_broadcast(struct brw_codegen
*p
,
3366 const struct gen_device_info
*devinfo
= p
->devinfo
;
3367 const bool align1
= brw_inst_access_mode(devinfo
, p
->current
) == BRW_ALIGN_1
;
3370 brw_push_insn_state(p
);
3371 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3372 brw_set_default_exec_size(p
, align1
? BRW_EXECUTE_1
: BRW_EXECUTE_4
);
3374 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
&&
3375 src
.address_mode
== BRW_ADDRESS_DIRECT
);
3376 assert(!src
.abs
&& !src
.negate
);
3377 assert(src
.type
== dst
.type
);
3379 if ((src
.vstride
== 0 && (src
.hstride
== 0 || !align1
)) ||
3380 idx
.file
== BRW_IMMEDIATE_VALUE
) {
3381 /* Trivial, the source is already uniform or the index is a constant.
3382 * We will typically not get here if the optimizer is doing its job, but
3383 * asserting would be mean.
3385 const unsigned i
= idx
.file
== BRW_IMMEDIATE_VALUE
? idx
.ud
: 0;
3387 (align1
? stride(suboffset(src
, i
), 0, 1, 0) :
3388 stride(suboffset(src
, 4 * i
), 0, 4, 1)));
3390 /* From the Haswell PRM section "Register Region Restrictions":
3392 * "The lower bits of the AddressImmediate must not overflow to
3393 * change the register address. The lower 5 bits of Address
3394 * Immediate when added to lower 5 bits of address register gives
3395 * the sub-register offset. The upper bits of Address Immediate
3396 * when added to upper bits of address register gives the register
3397 * address. Any overflow from sub-register offset is dropped."
3399 * Fortunately, for broadcast, we never have a sub-register offset so
3400 * this isn't an issue.
3402 assert(src
.subnr
== 0);
3405 const struct brw_reg addr
=
3406 retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
3407 const unsigned offset
= src
.nr
* REG_SIZE
+ src
.subnr
;
3408 /* Limit in bytes of the signed indirect addressing immediate. */
3409 const unsigned limit
= 512;
3411 brw_push_insn_state(p
);
3412 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3413 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
3415 /* Take into account the component size and horizontal stride. */
3416 assert(src
.vstride
== src
.hstride
+ src
.width
);
3417 brw_SHL(p
, addr
, vec1(idx
),
3418 brw_imm_ud(_mesa_logbase2(type_sz(src
.type
)) +
3421 /* We can only address up to limit bytes using the indirect
3422 * addressing immediate, account for the difference if the source
3423 * register is above this limit.
3425 if (offset
>= limit
)
3426 brw_ADD(p
, addr
, addr
, brw_imm_ud(offset
- offset
% limit
));
3428 brw_pop_insn_state(p
);
3430 /* Use indirect addressing to fetch the specified component. */
3432 retype(brw_vec1_indirect(addr
.subnr
, offset
% limit
),
3435 /* In SIMD4x2 mode the index can be either zero or one, replicate it
3436 * to all bits of a flag register,
3440 stride(brw_swizzle(idx
, BRW_SWIZZLE_XXXX
), 4, 4, 1));
3441 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NONE
);
3442 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_NZ
);
3443 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3445 /* and use predicated SEL to pick the right channel. */
3446 inst
= brw_SEL(p
, dst
,
3447 stride(suboffset(src
, 4), 4, 4, 1),
3448 stride(src
, 4, 4, 1));
3449 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NORMAL
);
3450 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3454 brw_pop_insn_state(p
);
3458 * This instruction is generated as a single-channel align1 instruction by
3459 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3461 * We can't use the typed atomic op in the FS because that has the execution
3462 * mask ANDed with the pixel mask, but we just want to write the one dword for
3465 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3466 * one u32. So we use the same untyped atomic write message as the pixel
3469 * The untyped atomic operation requires a BUFFER surface type with RAW
3470 * format, and is only accessible through the legacy DATA_CACHE dataport
3473 void brw_shader_time_add(struct brw_codegen
*p
,
3474 struct brw_reg payload
,
3475 uint32_t surf_index
)
3477 const unsigned sfid
= (p
->devinfo
->gen
>= 8 || p
->devinfo
->is_haswell
?
3478 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3479 GEN7_SFID_DATAPORT_DATA_CACHE
);
3480 assert(p
->devinfo
->gen
>= 7);
3482 brw_push_insn_state(p
);
3483 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3484 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3485 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3486 brw_inst
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
3488 /* We use brw_vec1_reg and unmasked because we want to increment the given
3491 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
3493 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
3495 brw_set_src1(p
, send
, brw_imm_ud(0));
3496 brw_set_message_descriptor(p
, send
, sfid
, 2, 0, false, false);
3497 brw_inst_set_binding_table_index(p
->devinfo
, send
, surf_index
);
3498 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, false);
3500 brw_pop_insn_state(p
);
3505 * Emit the SEND message for a barrier
3508 brw_barrier(struct brw_codegen
*p
, struct brw_reg src
)
3510 const struct gen_device_info
*devinfo
= p
->devinfo
;
3511 struct brw_inst
*inst
;
3513 assert(devinfo
->gen
>= 7);
3515 brw_push_insn_state(p
);
3516 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3517 inst
= next_insn(p
, BRW_OPCODE_SEND
);
3518 brw_set_dest(p
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
));
3519 brw_set_src0(p
, inst
, src
);
3520 brw_set_src1(p
, inst
, brw_null_reg());
3522 brw_set_message_descriptor(p
, inst
, BRW_SFID_MESSAGE_GATEWAY
,
3524 0 /* response_length */,
3525 false /* header_present */,
3526 false /* end_of_thread */);
3528 brw_inst_set_gateway_notify(devinfo
, inst
, 1);
3529 brw_inst_set_gateway_subfuncid(devinfo
, inst
,
3530 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG
);
3532 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
3533 brw_pop_insn_state(p
);
3538 * Emit the wait instruction for a barrier
3541 brw_WAIT(struct brw_codegen
*p
)
3543 const struct gen_device_info
*devinfo
= p
->devinfo
;
3544 struct brw_inst
*insn
;
3546 struct brw_reg src
= brw_notification_reg();
3548 insn
= next_insn(p
, BRW_OPCODE_WAIT
);
3549 brw_set_dest(p
, insn
, src
);
3550 brw_set_src0(p
, insn
, src
);
3551 brw_set_src1(p
, insn
, brw_null_reg());
3553 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
3554 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_DISABLE
);