2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_eu_defines.h"
36 #include "util/ralloc.h"
39 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
40 * registers, implicitly moving the operand to a message register.
42 * On Sandybridge, this is no longer the case. This function performs the
43 * explicit move; it should be called before emitting a SEND instruction.
46 gen6_resolve_implied_move(struct brw_codegen
*p
,
50 const struct gen_device_info
*devinfo
= p
->devinfo
;
54 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
57 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
58 brw_push_insn_state(p
);
59 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
60 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
61 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
62 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
63 retype(*src
, BRW_REGISTER_TYPE_UD
));
64 brw_pop_insn_state(p
);
66 *src
= brw_message_reg(msg_reg_nr
);
70 gen7_convert_mrf_to_grf(struct brw_codegen
*p
, struct brw_reg
*reg
)
72 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
73 * "The send with EOT should use register space R112-R127 for <src>. This is
74 * to enable loading of a new thread into the same slot while the message
75 * with EOT for current thread is pending dispatch."
77 * Since we're pretending to have 16 MRFs anyway, we may as well use the
78 * registers required for messages with EOT.
80 const struct gen_device_info
*devinfo
= p
->devinfo
;
81 if (devinfo
->gen
>= 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
82 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
83 reg
->nr
+= GEN7_MRF_HACK_START
;
88 brw_set_dest(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg dest
)
90 const struct gen_device_info
*devinfo
= p
->devinfo
;
92 if (dest
.file
== BRW_MESSAGE_REGISTER_FILE
)
93 assert((dest
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
94 else if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
95 assert(dest
.nr
< 128);
97 gen7_convert_mrf_to_grf(p
, &dest
);
99 brw_inst_set_dst_file_type(devinfo
, inst
, dest
.file
, dest
.type
);
100 brw_inst_set_dst_address_mode(devinfo
, inst
, dest
.address_mode
);
102 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
103 brw_inst_set_dst_da_reg_nr(devinfo
, inst
, dest
.nr
);
105 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
106 brw_inst_set_dst_da1_subreg_nr(devinfo
, inst
, dest
.subnr
);
107 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
108 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
109 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
111 brw_inst_set_dst_da16_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
112 brw_inst_set_da16_writemask(devinfo
, inst
, dest
.writemask
);
113 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
114 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
115 assert(dest
.writemask
!= 0);
117 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
118 * Although Dst.HorzStride is a don't care for Align16, HW needs
119 * this to be programmed as "01".
121 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
124 brw_inst_set_dst_ia_subreg_nr(devinfo
, inst
, dest
.subnr
);
126 /* These are different sizes in align1 vs align16:
128 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
129 brw_inst_set_dst_ia1_addr_imm(devinfo
, inst
,
130 dest
.indirect_offset
);
131 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
132 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
133 brw_inst_set_dst_hstride(devinfo
, inst
, dest
.hstride
);
135 brw_inst_set_dst_ia16_addr_imm(devinfo
, inst
,
136 dest
.indirect_offset
);
137 /* even ignored in da16, still need to set as '01' */
138 brw_inst_set_dst_hstride(devinfo
, inst
, 1);
142 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
143 * or 16 (SIMD16), as that's normally correct. However, when dealing with
144 * small registers, it can be useful for us to automatically reduce it to
145 * match the register size.
147 if (p
->automatic_exec_sizes
) {
149 * In platforms that support fp64 we can emit instructions with a width
150 * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
151 * these cases we need to make sure that these instructions have their
152 * exec sizes set properly when they are emitted and we can't rely on
153 * this code to fix it.
156 if (devinfo
->gen
>= 6)
157 fix_exec_size
= dest
.width
< BRW_EXECUTE_4
;
159 fix_exec_size
= dest
.width
< BRW_EXECUTE_8
;
162 brw_inst_set_exec_size(devinfo
, inst
, dest
.width
);
167 brw_set_src0(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
169 const struct gen_device_info
*devinfo
= p
->devinfo
;
171 if (reg
.file
== BRW_MESSAGE_REGISTER_FILE
)
172 assert((reg
.nr
& ~BRW_MRF_COMPR4
) < BRW_MAX_MRF(devinfo
->gen
));
173 else if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
174 assert(reg
.nr
< 128);
176 gen7_convert_mrf_to_grf(p
, ®
);
178 if (devinfo
->gen
>= 6 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
179 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
)) {
180 /* Any source modifiers or regions will be ignored, since this just
181 * identifies the MRF/GRF to start reading the message contents from.
182 * Check for some likely failures.
186 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
189 brw_inst_set_src0_file_type(devinfo
, inst
, reg
.file
, reg
.type
);
190 brw_inst_set_src0_abs(devinfo
, inst
, reg
.abs
);
191 brw_inst_set_src0_negate(devinfo
, inst
, reg
.negate
);
192 brw_inst_set_src0_address_mode(devinfo
, inst
, reg
.address_mode
);
194 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
195 if (reg
.type
== BRW_REGISTER_TYPE_DF
||
196 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_DIM
)
197 brw_inst_set_imm_df(devinfo
, inst
, reg
.df
);
198 else if (reg
.type
== BRW_REGISTER_TYPE_UQ
||
199 reg
.type
== BRW_REGISTER_TYPE_Q
)
200 brw_inst_set_imm_uq(devinfo
, inst
, reg
.u64
);
202 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
204 if (type_sz(reg
.type
) < 8) {
205 brw_inst_set_src1_reg_file(devinfo
, inst
,
206 BRW_ARCHITECTURE_REGISTER_FILE
);
207 brw_inst_set_src1_reg_hw_type(devinfo
, inst
,
208 brw_inst_src0_reg_hw_type(devinfo
, inst
));
211 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
212 brw_inst_set_src0_da_reg_nr(devinfo
, inst
, reg
.nr
);
213 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
214 brw_inst_set_src0_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
216 brw_inst_set_src0_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
219 brw_inst_set_src0_ia_subreg_nr(devinfo
, inst
, reg
.subnr
);
221 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
222 brw_inst_set_src0_ia1_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
224 brw_inst_set_src0_ia16_addr_imm(devinfo
, inst
, reg
.indirect_offset
);
228 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
229 if (reg
.width
== BRW_WIDTH_1
&&
230 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
231 brw_inst_set_src0_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
232 brw_inst_set_src0_width(devinfo
, inst
, BRW_WIDTH_1
);
233 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
235 brw_inst_set_src0_hstride(devinfo
, inst
, reg
.hstride
);
236 brw_inst_set_src0_width(devinfo
, inst
, reg
.width
);
237 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
240 brw_inst_set_src0_da16_swiz_x(devinfo
, inst
,
241 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
242 brw_inst_set_src0_da16_swiz_y(devinfo
, inst
,
243 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
244 brw_inst_set_src0_da16_swiz_z(devinfo
, inst
,
245 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
246 brw_inst_set_src0_da16_swiz_w(devinfo
, inst
,
247 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
249 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
250 /* This is an oddity of the fact we're using the same
251 * descriptions for registers in align_16 as align_1:
253 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
254 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
255 reg
.type
== BRW_REGISTER_TYPE_DF
&&
256 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
259 * "For Align16 access mode, only encodings of 0000 and 0011
260 * are allowed. Other codes are reserved."
262 * Presumably the DevSNB behavior applies to IVB as well.
264 brw_inst_set_src0_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
266 brw_inst_set_src0_vstride(devinfo
, inst
, reg
.vstride
);
274 brw_set_src1(struct brw_codegen
*p
, brw_inst
*inst
, struct brw_reg reg
)
276 const struct gen_device_info
*devinfo
= p
->devinfo
;
278 if (reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
)
279 assert(reg
.nr
< 128);
281 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
283 * "Accumulator registers may be accessed explicitly as src0
286 assert(reg
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
287 reg
.nr
!= BRW_ARF_ACCUMULATOR
);
289 gen7_convert_mrf_to_grf(p
, ®
);
290 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
292 brw_inst_set_src1_file_type(devinfo
, inst
, reg
.file
, reg
.type
);
293 brw_inst_set_src1_abs(devinfo
, inst
, reg
.abs
);
294 brw_inst_set_src1_negate(devinfo
, inst
, reg
.negate
);
296 /* Only src1 can be immediate in two-argument instructions.
298 assert(brw_inst_src0_reg_file(devinfo
, inst
) != BRW_IMMEDIATE_VALUE
);
300 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
301 /* two-argument instructions can only use 32-bit immediates */
302 assert(type_sz(reg
.type
) < 8);
303 brw_inst_set_imm_ud(devinfo
, inst
, reg
.ud
);
305 /* This is a hardware restriction, which may or may not be lifted
308 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
309 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
311 brw_inst_set_src1_da_reg_nr(devinfo
, inst
, reg
.nr
);
312 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
313 brw_inst_set_src1_da1_subreg_nr(devinfo
, inst
, reg
.subnr
);
315 brw_inst_set_src1_da16_subreg_nr(devinfo
, inst
, reg
.subnr
/ 16);
318 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
319 if (reg
.width
== BRW_WIDTH_1
&&
320 brw_inst_exec_size(devinfo
, inst
) == BRW_EXECUTE_1
) {
321 brw_inst_set_src1_hstride(devinfo
, inst
, BRW_HORIZONTAL_STRIDE_0
);
322 brw_inst_set_src1_width(devinfo
, inst
, BRW_WIDTH_1
);
323 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_0
);
325 brw_inst_set_src1_hstride(devinfo
, inst
, reg
.hstride
);
326 brw_inst_set_src1_width(devinfo
, inst
, reg
.width
);
327 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
330 brw_inst_set_src1_da16_swiz_x(devinfo
, inst
,
331 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_X
));
332 brw_inst_set_src1_da16_swiz_y(devinfo
, inst
,
333 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Y
));
334 brw_inst_set_src1_da16_swiz_z(devinfo
, inst
,
335 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_Z
));
336 brw_inst_set_src1_da16_swiz_w(devinfo
, inst
,
337 BRW_GET_SWZ(reg
.swizzle
, BRW_CHANNEL_W
));
339 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
) {
340 /* This is an oddity of the fact we're using the same
341 * descriptions for registers in align_16 as align_1:
343 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
344 } else if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
345 reg
.type
== BRW_REGISTER_TYPE_DF
&&
346 reg
.vstride
== BRW_VERTICAL_STRIDE_2
) {
349 * "For Align16 access mode, only encodings of 0000 and 0011
350 * are allowed. Other codes are reserved."
352 * Presumably the DevSNB behavior applies to IVB as well.
354 brw_inst_set_src1_vstride(devinfo
, inst
, BRW_VERTICAL_STRIDE_4
);
356 brw_inst_set_src1_vstride(devinfo
, inst
, reg
.vstride
);
363 * Specify the descriptor and extended descriptor immediate for a SEND(C)
364 * message instruction.
367 brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*inst
,
368 unsigned desc
, unsigned ex_desc
)
370 const struct gen_device_info
*devinfo
= p
->devinfo
;
371 brw_inst_set_src1_file_type(devinfo
, inst
,
372 BRW_IMMEDIATE_VALUE
, BRW_REGISTER_TYPE_D
);
373 brw_inst_set_send_desc(devinfo
, inst
, desc
);
374 if (devinfo
->gen
>= 9 && (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SEND
||
375 brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_SENDC
))
376 brw_inst_set_send_ex_desc(devinfo
, inst
, ex_desc
);
379 static void brw_set_math_message( struct brw_codegen
*p
,
382 unsigned integer_type
,
386 const struct gen_device_info
*devinfo
= p
->devinfo
;
388 unsigned response_length
;
390 /* Infer message length from the function */
392 case BRW_MATH_FUNCTION_POW
:
393 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
394 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
395 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
403 /* Infer response length from the function */
405 case BRW_MATH_FUNCTION_SINCOS
:
406 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
414 brw_set_desc(p
, inst
, brw_message_desc(
415 devinfo
, msg_length
, response_length
, false));
417 brw_inst_set_sfid(devinfo
, inst
, BRW_SFID_MATH
);
418 brw_inst_set_math_msg_function(devinfo
, inst
, function
);
419 brw_inst_set_math_msg_signed_int(devinfo
, inst
, integer_type
);
420 brw_inst_set_math_msg_precision(devinfo
, inst
, low_precision
);
421 brw_inst_set_math_msg_saturate(devinfo
, inst
, brw_inst_saturate(devinfo
, inst
));
422 brw_inst_set_math_msg_data_type(devinfo
, inst
, dataType
);
423 brw_inst_set_saturate(devinfo
, inst
, 0);
427 static void brw_set_ff_sync_message(struct brw_codegen
*p
,
430 unsigned response_length
,
433 const struct gen_device_info
*devinfo
= p
->devinfo
;
435 brw_set_desc(p
, insn
, brw_message_desc(
436 devinfo
, 1, response_length
, true));
438 brw_inst_set_sfid(devinfo
, insn
, BRW_SFID_URB
);
439 brw_inst_set_eot(devinfo
, insn
, end_of_thread
);
440 brw_inst_set_urb_opcode(devinfo
, insn
, 1); /* FF_SYNC */
441 brw_inst_set_urb_allocate(devinfo
, insn
, allocate
);
442 /* The following fields are not used by FF_SYNC: */
443 brw_inst_set_urb_global_offset(devinfo
, insn
, 0);
444 brw_inst_set_urb_swizzle_control(devinfo
, insn
, 0);
445 brw_inst_set_urb_used(devinfo
, insn
, 0);
446 brw_inst_set_urb_complete(devinfo
, insn
, 0);
449 static void brw_set_urb_message( struct brw_codegen
*p
,
451 enum brw_urb_write_flags flags
,
453 unsigned response_length
,
455 unsigned swizzle_control
)
457 const struct gen_device_info
*devinfo
= p
->devinfo
;
459 assert(devinfo
->gen
< 7 || swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
460 assert(devinfo
->gen
< 7 || !(flags
& BRW_URB_WRITE_ALLOCATE
));
461 assert(devinfo
->gen
>= 7 || !(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
463 brw_set_desc(p
, insn
, brw_message_desc(
464 devinfo
, msg_length
, response_length
, true));
466 brw_inst_set_sfid(devinfo
, insn
, BRW_SFID_URB
);
467 brw_inst_set_eot(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_EOT
));
469 if (flags
& BRW_URB_WRITE_OWORD
) {
470 assert(msg_length
== 2); /* header + one OWORD of data */
471 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_OWORD
);
473 brw_inst_set_urb_opcode(devinfo
, insn
, BRW_URB_OPCODE_WRITE_HWORD
);
476 brw_inst_set_urb_global_offset(devinfo
, insn
, offset
);
477 brw_inst_set_urb_swizzle_control(devinfo
, insn
, swizzle_control
);
479 if (devinfo
->gen
< 8) {
480 brw_inst_set_urb_complete(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_COMPLETE
));
483 if (devinfo
->gen
< 7) {
484 brw_inst_set_urb_allocate(devinfo
, insn
, !!(flags
& BRW_URB_WRITE_ALLOCATE
));
485 brw_inst_set_urb_used(devinfo
, insn
, !(flags
& BRW_URB_WRITE_UNUSED
));
487 brw_inst_set_urb_per_slot_offset(devinfo
, insn
,
488 !!(flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
));
493 brw_set_dp_write_message(struct brw_codegen
*p
,
495 unsigned binding_table_index
,
496 unsigned msg_control
,
498 unsigned target_cache
,
501 unsigned last_render_target
,
502 unsigned response_length
,
503 unsigned end_of_thread
,
504 unsigned send_commit_msg
)
506 const struct gen_device_info
*devinfo
= p
->devinfo
;
507 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
508 BRW_SFID_DATAPORT_WRITE
);
510 brw_set_desc(p
, insn
, brw_message_desc(
511 devinfo
, msg_length
, response_length
, header_present
));
513 brw_inst_set_sfid(devinfo
, insn
, sfid
);
514 brw_inst_set_eot(devinfo
, insn
, !!end_of_thread
);
515 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
516 brw_inst_set_dp_write_msg_type(devinfo
, insn
, msg_type
);
517 brw_inst_set_dp_write_msg_control(devinfo
, insn
, msg_control
);
518 brw_inst_set_rt_last(devinfo
, insn
, last_render_target
);
519 if (devinfo
->gen
< 7) {
520 brw_inst_set_dp_write_commit(devinfo
, insn
, send_commit_msg
);
523 if (devinfo
->gen
>= 11)
524 brw_inst_set_null_rt(devinfo
, insn
, false);
528 brw_set_dp_read_message(struct brw_codegen
*p
,
530 unsigned binding_table_index
,
531 unsigned msg_control
,
533 unsigned target_cache
,
536 unsigned response_length
)
538 const struct gen_device_info
*devinfo
= p
->devinfo
;
539 const unsigned sfid
= (devinfo
->gen
>= 6 ? target_cache
:
540 BRW_SFID_DATAPORT_READ
);
542 brw_set_desc(p
, insn
, brw_message_desc(
543 devinfo
, msg_length
, response_length
, header_present
));
545 const unsigned opcode
= brw_inst_opcode(devinfo
, insn
);
546 if (opcode
== BRW_OPCODE_SEND
|| opcode
== BRW_OPCODE_SENDC
)
547 brw_inst_set_sfid(devinfo
, insn
, sfid
);
548 brw_inst_set_binding_table_index(devinfo
, insn
, binding_table_index
);
549 brw_inst_set_dp_read_msg_type(devinfo
, insn
, msg_type
);
550 brw_inst_set_dp_read_msg_control(devinfo
, insn
, msg_control
);
551 if (devinfo
->gen
< 6)
552 brw_inst_set_dp_read_target_cache(devinfo
, insn
, target_cache
);
556 gen7_set_dp_scratch_message(struct brw_codegen
*p
,
560 bool invalidate_after_read
,
562 unsigned addr_offset
,
567 const struct gen_device_info
*devinfo
= p
->devinfo
;
568 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 ||
569 (devinfo
->gen
>= 8 && num_regs
== 8));
570 const unsigned block_size
= (devinfo
->gen
>= 8 ? _mesa_logbase2(num_regs
) :
573 brw_set_desc(p
, inst
, brw_message_desc(
574 devinfo
, mlen
, rlen
, header_present
));
576 brw_inst_set_sfid(devinfo
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
);
577 brw_inst_set_dp_category(devinfo
, inst
, 1); /* Scratch Block Read/Write msgs */
578 brw_inst_set_scratch_read_write(devinfo
, inst
, write
);
579 brw_inst_set_scratch_type(devinfo
, inst
, dword
);
580 brw_inst_set_scratch_invalidate_after_read(devinfo
, inst
, invalidate_after_read
);
581 brw_inst_set_scratch_block_size(devinfo
, inst
, block_size
);
582 brw_inst_set_scratch_addr_offset(devinfo
, inst
, addr_offset
);
586 brw_inst_set_state(const struct gen_device_info
*devinfo
,
588 const struct brw_insn_state
*state
)
590 brw_inst_set_exec_size(devinfo
, insn
, state
->exec_size
);
591 brw_inst_set_group(devinfo
, insn
, state
->group
);
592 brw_inst_set_compression(devinfo
, insn
, state
->compressed
);
593 brw_inst_set_access_mode(devinfo
, insn
, state
->access_mode
);
594 brw_inst_set_mask_control(devinfo
, insn
, state
->mask_control
);
595 brw_inst_set_saturate(devinfo
, insn
, state
->saturate
);
596 brw_inst_set_pred_control(devinfo
, insn
, state
->predicate
);
597 brw_inst_set_pred_inv(devinfo
, insn
, state
->pred_inv
);
599 if (is_3src(devinfo
, brw_inst_opcode(devinfo
, insn
)) &&
600 state
->access_mode
== BRW_ALIGN_16
) {
601 brw_inst_set_3src_a16_flag_subreg_nr(devinfo
, insn
, state
->flag_subreg
% 2);
602 if (devinfo
->gen
>= 7)
603 brw_inst_set_3src_a16_flag_reg_nr(devinfo
, insn
, state
->flag_subreg
/ 2);
605 brw_inst_set_flag_subreg_nr(devinfo
, insn
, state
->flag_subreg
% 2);
606 if (devinfo
->gen
>= 7)
607 brw_inst_set_flag_reg_nr(devinfo
, insn
, state
->flag_subreg
/ 2);
610 if (devinfo
->gen
>= 6)
611 brw_inst_set_acc_wr_control(devinfo
, insn
, state
->acc_wr_control
);
614 #define next_insn brw_next_insn
616 brw_next_insn(struct brw_codegen
*p
, unsigned opcode
)
618 const struct gen_device_info
*devinfo
= p
->devinfo
;
621 if (p
->nr_insn
+ 1 > p
->store_size
) {
623 p
->store
= reralloc(p
->mem_ctx
, p
->store
, brw_inst
, p
->store_size
);
626 p
->next_insn_offset
+= 16;
627 insn
= &p
->store
[p
->nr_insn
++];
629 memset(insn
, 0, sizeof(*insn
));
630 brw_inst_set_opcode(devinfo
, insn
, opcode
);
632 /* Apply the default instruction state */
633 brw_inst_set_state(devinfo
, insn
, p
->current
);
639 brw_alu1(struct brw_codegen
*p
, unsigned opcode
,
640 struct brw_reg dest
, struct brw_reg src
)
642 brw_inst
*insn
= next_insn(p
, opcode
);
643 brw_set_dest(p
, insn
, dest
);
644 brw_set_src0(p
, insn
, src
);
649 brw_alu2(struct brw_codegen
*p
, unsigned opcode
,
650 struct brw_reg dest
, struct brw_reg src0
, struct brw_reg src1
)
652 /* 64-bit immediates are only supported on 1-src instructions */
653 assert(src0
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src0
.type
) <= 4);
654 assert(src1
.file
!= BRW_IMMEDIATE_VALUE
|| type_sz(src1
.type
) <= 4);
656 brw_inst
*insn
= next_insn(p
, opcode
);
657 brw_set_dest(p
, insn
, dest
);
658 brw_set_src0(p
, insn
, src0
);
659 brw_set_src1(p
, insn
, src1
);
664 get_3src_subreg_nr(struct brw_reg reg
)
666 /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
667 * use 32-bit units (components 0..7). Since they only support F/D/UD
668 * types, this doesn't lose any flexibility, but uses fewer bits.
670 return reg
.subnr
/ 4;
673 static enum gen10_align1_3src_vertical_stride
674 to_3src_align1_vstride(enum brw_vertical_stride vstride
)
677 case BRW_VERTICAL_STRIDE_0
:
678 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0
;
679 case BRW_VERTICAL_STRIDE_2
:
680 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2
;
681 case BRW_VERTICAL_STRIDE_4
:
682 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4
;
683 case BRW_VERTICAL_STRIDE_8
:
684 case BRW_VERTICAL_STRIDE_16
:
685 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8
;
687 unreachable("invalid vstride");
692 static enum gen10_align1_3src_src_horizontal_stride
693 to_3src_align1_hstride(enum brw_horizontal_stride hstride
)
696 case BRW_HORIZONTAL_STRIDE_0
:
697 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0
;
698 case BRW_HORIZONTAL_STRIDE_1
:
699 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1
;
700 case BRW_HORIZONTAL_STRIDE_2
:
701 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2
;
702 case BRW_HORIZONTAL_STRIDE_4
:
703 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4
;
705 unreachable("invalid hstride");
710 brw_alu3(struct brw_codegen
*p
, unsigned opcode
, struct brw_reg dest
,
711 struct brw_reg src0
, struct brw_reg src1
, struct brw_reg src2
)
713 const struct gen_device_info
*devinfo
= p
->devinfo
;
714 brw_inst
*inst
= next_insn(p
, opcode
);
716 gen7_convert_mrf_to_grf(p
, &dest
);
718 assert(dest
.nr
< 128);
719 assert(src0
.nr
< 128);
720 assert(src1
.nr
< 128);
721 assert(src2
.nr
< 128);
722 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
723 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
724 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
725 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
727 if (brw_inst_access_mode(devinfo
, inst
) == BRW_ALIGN_1
) {
728 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
729 dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
);
731 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
) {
732 brw_inst_set_3src_a1_dst_reg_file(devinfo
, inst
,
733 BRW_ALIGN1_3SRC_ACCUMULATOR
);
734 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, BRW_ARF_ACCUMULATOR
);
736 brw_inst_set_3src_a1_dst_reg_file(devinfo
, inst
,
737 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
);
738 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
740 brw_inst_set_3src_a1_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 8);
742 brw_inst_set_3src_a1_dst_hstride(devinfo
, inst
, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1
);
744 if (brw_reg_type_is_floating_point(dest
.type
)) {
745 brw_inst_set_3src_a1_exec_type(devinfo
, inst
,
746 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT
);
748 brw_inst_set_3src_a1_exec_type(devinfo
, inst
,
749 BRW_ALIGN1_3SRC_EXEC_TYPE_INT
);
752 brw_inst_set_3src_a1_dst_type(devinfo
, inst
, dest
.type
);
753 brw_inst_set_3src_a1_src0_type(devinfo
, inst
, src0
.type
);
754 brw_inst_set_3src_a1_src1_type(devinfo
, inst
, src1
.type
);
755 brw_inst_set_3src_a1_src2_type(devinfo
, inst
, src2
.type
);
757 brw_inst_set_3src_a1_src0_vstride(devinfo
, inst
,
758 to_3src_align1_vstride(src0
.vstride
));
759 brw_inst_set_3src_a1_src1_vstride(devinfo
, inst
,
760 to_3src_align1_vstride(src1
.vstride
));
761 /* no vstride on src2 */
763 brw_inst_set_3src_a1_src0_hstride(devinfo
, inst
,
764 to_3src_align1_hstride(src0
.hstride
));
765 brw_inst_set_3src_a1_src1_hstride(devinfo
, inst
,
766 to_3src_align1_hstride(src1
.hstride
));
767 brw_inst_set_3src_a1_src2_hstride(devinfo
, inst
,
768 to_3src_align1_hstride(src2
.hstride
));
770 brw_inst_set_3src_a1_src0_subreg_nr(devinfo
, inst
, src0
.subnr
);
771 if (src0
.type
== BRW_REGISTER_TYPE_NF
) {
772 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, BRW_ARF_ACCUMULATOR
);
774 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
776 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
777 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
779 brw_inst_set_3src_a1_src1_subreg_nr(devinfo
, inst
, src1
.subnr
);
780 if (src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
) {
781 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, BRW_ARF_ACCUMULATOR
);
783 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
785 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
786 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
788 brw_inst_set_3src_a1_src2_subreg_nr(devinfo
, inst
, src2
.subnr
);
789 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
790 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
791 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
793 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
||
794 src0
.file
== BRW_IMMEDIATE_VALUE
||
795 (src0
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
796 src0
.type
== BRW_REGISTER_TYPE_NF
));
797 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
798 src1
.file
== BRW_ARCHITECTURE_REGISTER_FILE
);
799 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
||
800 src2
.file
== BRW_IMMEDIATE_VALUE
);
802 brw_inst_set_3src_a1_src0_reg_file(devinfo
, inst
,
803 src0
.file
== BRW_GENERAL_REGISTER_FILE
?
804 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
805 BRW_ALIGN1_3SRC_IMMEDIATE_VALUE
);
806 brw_inst_set_3src_a1_src1_reg_file(devinfo
, inst
,
807 src1
.file
== BRW_GENERAL_REGISTER_FILE
?
808 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
809 BRW_ALIGN1_3SRC_ACCUMULATOR
);
810 brw_inst_set_3src_a1_src2_reg_file(devinfo
, inst
,
811 src2
.file
== BRW_GENERAL_REGISTER_FILE
?
812 BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE
:
813 BRW_ALIGN1_3SRC_IMMEDIATE_VALUE
);
815 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
816 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
817 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
818 dest
.type
== BRW_REGISTER_TYPE_DF
||
819 dest
.type
== BRW_REGISTER_TYPE_D
||
820 dest
.type
== BRW_REGISTER_TYPE_UD
);
821 if (devinfo
->gen
== 6) {
822 brw_inst_set_3src_a16_dst_reg_file(devinfo
, inst
,
823 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
825 brw_inst_set_3src_dst_reg_nr(devinfo
, inst
, dest
.nr
);
826 brw_inst_set_3src_a16_dst_subreg_nr(devinfo
, inst
, dest
.subnr
/ 16);
827 brw_inst_set_3src_a16_dst_writemask(devinfo
, inst
, dest
.writemask
);
829 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
830 brw_inst_set_3src_a16_src0_swizzle(devinfo
, inst
, src0
.swizzle
);
831 brw_inst_set_3src_a16_src0_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src0
));
832 brw_inst_set_3src_src0_reg_nr(devinfo
, inst
, src0
.nr
);
833 brw_inst_set_3src_src0_abs(devinfo
, inst
, src0
.abs
);
834 brw_inst_set_3src_src0_negate(devinfo
, inst
, src0
.negate
);
835 brw_inst_set_3src_a16_src0_rep_ctrl(devinfo
, inst
,
836 src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
838 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
839 brw_inst_set_3src_a16_src1_swizzle(devinfo
, inst
, src1
.swizzle
);
840 brw_inst_set_3src_a16_src1_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src1
));
841 brw_inst_set_3src_src1_reg_nr(devinfo
, inst
, src1
.nr
);
842 brw_inst_set_3src_src1_abs(devinfo
, inst
, src1
.abs
);
843 brw_inst_set_3src_src1_negate(devinfo
, inst
, src1
.negate
);
844 brw_inst_set_3src_a16_src1_rep_ctrl(devinfo
, inst
,
845 src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
847 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
848 brw_inst_set_3src_a16_src2_swizzle(devinfo
, inst
, src2
.swizzle
);
849 brw_inst_set_3src_a16_src2_subreg_nr(devinfo
, inst
, get_3src_subreg_nr(src2
));
850 brw_inst_set_3src_src2_reg_nr(devinfo
, inst
, src2
.nr
);
851 brw_inst_set_3src_src2_abs(devinfo
, inst
, src2
.abs
);
852 brw_inst_set_3src_src2_negate(devinfo
, inst
, src2
.negate
);
853 brw_inst_set_3src_a16_src2_rep_ctrl(devinfo
, inst
,
854 src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
856 if (devinfo
->gen
>= 7) {
857 /* Set both the source and destination types based on dest.type,
858 * ignoring the source register types. The MAD and LRP emitters ensure
859 * that all four types are float. The BFE and BFI2 emitters, however,
860 * may send us mixed D and UD types and want us to ignore that and use
861 * the destination type.
863 brw_inst_set_3src_a16_src_type(devinfo
, inst
, dest
.type
);
864 brw_inst_set_3src_a16_dst_type(devinfo
, inst
, dest
.type
);
872 /***********************************************************************
873 * Convenience routines.
876 brw_inst *brw_##OP(struct brw_codegen *p, \
877 struct brw_reg dest, \
878 struct brw_reg src0) \
880 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
884 brw_inst *brw_##OP(struct brw_codegen *p, \
885 struct brw_reg dest, \
886 struct brw_reg src0, \
887 struct brw_reg src1) \
889 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
893 brw_inst *brw_##OP(struct brw_codegen *p, \
894 struct brw_reg dest, \
895 struct brw_reg src0, \
896 struct brw_reg src1, \
897 struct brw_reg src2) \
899 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
903 brw_inst *brw_##OP(struct brw_codegen *p, \
904 struct brw_reg dest, \
905 struct brw_reg src0, \
906 struct brw_reg src1, \
907 struct brw_reg src2) \
909 assert(dest.type == BRW_REGISTER_TYPE_F || \
910 dest.type == BRW_REGISTER_TYPE_DF); \
911 if (dest.type == BRW_REGISTER_TYPE_F) { \
912 assert(src0.type == BRW_REGISTER_TYPE_F); \
913 assert(src1.type == BRW_REGISTER_TYPE_F); \
914 assert(src2.type == BRW_REGISTER_TYPE_F); \
915 } else if (dest.type == BRW_REGISTER_TYPE_DF) { \
916 assert(src0.type == BRW_REGISTER_TYPE_DF); \
917 assert(src1.type == BRW_REGISTER_TYPE_DF); \
918 assert(src2.type == BRW_REGISTER_TYPE_DF); \
920 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
923 /* Rounding operations (other than RNDD) require two instructions - the first
924 * stores a rounded value (possibly the wrong way) in the dest register, but
925 * also sets a per-channel "increment bit" in the flag register. A predicated
926 * add of 1.0 fixes dest to contain the desired result.
928 * Sandybridge and later appear to round correctly without an ADD.
931 void brw_##OP(struct brw_codegen *p, \
932 struct brw_reg dest, \
933 struct brw_reg src) \
935 const struct gen_device_info *devinfo = p->devinfo; \
936 brw_inst *rnd, *add; \
937 rnd = next_insn(p, BRW_OPCODE_##OP); \
938 brw_set_dest(p, rnd, dest); \
939 brw_set_src0(p, rnd, src); \
941 if (devinfo->gen < 6) { \
942 /* turn on round-increments */ \
943 brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \
944 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
945 brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \
985 brw_MOV(struct brw_codegen
*p
, struct brw_reg dest
, struct brw_reg src0
)
987 const struct gen_device_info
*devinfo
= p
->devinfo
;
989 /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
990 * To avoid the problems that causes, we use a <1,2,0> source region to read
991 * each element twice.
993 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
994 brw_get_default_access_mode(p
) == BRW_ALIGN_1
&&
995 dest
.type
== BRW_REGISTER_TYPE_DF
&&
996 (src0
.type
== BRW_REGISTER_TYPE_F
||
997 src0
.type
== BRW_REGISTER_TYPE_D
||
998 src0
.type
== BRW_REGISTER_TYPE_UD
) &&
999 !has_scalar_region(src0
)) {
1000 assert(src0
.vstride
== BRW_VERTICAL_STRIDE_4
&&
1001 src0
.width
== BRW_WIDTH_4
&&
1002 src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1004 src0
.vstride
= BRW_VERTICAL_STRIDE_1
;
1005 src0
.width
= BRW_WIDTH_2
;
1006 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1009 return brw_alu1(p
, BRW_OPCODE_MOV
, dest
, src0
);
1013 brw_ADD(struct brw_codegen
*p
, struct brw_reg dest
,
1014 struct brw_reg src0
, struct brw_reg src1
)
1017 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1018 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1019 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1020 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1021 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1024 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1025 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1026 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1027 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1028 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1031 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1035 brw_AVG(struct brw_codegen
*p
, struct brw_reg dest
,
1036 struct brw_reg src0
, struct brw_reg src1
)
1038 assert(dest
.type
== src0
.type
);
1039 assert(src0
.type
== src1
.type
);
1040 switch (src0
.type
) {
1041 case BRW_REGISTER_TYPE_B
:
1042 case BRW_REGISTER_TYPE_UB
:
1043 case BRW_REGISTER_TYPE_W
:
1044 case BRW_REGISTER_TYPE_UW
:
1045 case BRW_REGISTER_TYPE_D
:
1046 case BRW_REGISTER_TYPE_UD
:
1049 unreachable("Bad type for brw_AVG");
1052 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1056 brw_MUL(struct brw_codegen
*p
, struct brw_reg dest
,
1057 struct brw_reg src0
, struct brw_reg src1
)
1060 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1061 src0
.type
== BRW_REGISTER_TYPE_UD
||
1062 src1
.type
== BRW_REGISTER_TYPE_D
||
1063 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1064 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1067 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1068 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1069 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1070 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1071 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1074 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1075 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1076 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1077 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1078 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1081 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1082 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1083 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1084 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1086 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1090 brw_LINE(struct brw_codegen
*p
, struct brw_reg dest
,
1091 struct brw_reg src0
, struct brw_reg src1
)
1093 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1094 src0
.width
= BRW_WIDTH_1
;
1095 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1096 return brw_alu2(p
, BRW_OPCODE_LINE
, dest
, src0
, src1
);
1100 brw_PLN(struct brw_codegen
*p
, struct brw_reg dest
,
1101 struct brw_reg src0
, struct brw_reg src1
)
1103 src0
.vstride
= BRW_VERTICAL_STRIDE_0
;
1104 src0
.width
= BRW_WIDTH_1
;
1105 src0
.hstride
= BRW_HORIZONTAL_STRIDE_0
;
1106 src1
.vstride
= BRW_VERTICAL_STRIDE_8
;
1107 src1
.width
= BRW_WIDTH_8
;
1108 src1
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
1109 return brw_alu2(p
, BRW_OPCODE_PLN
, dest
, src0
, src1
);
1113 brw_F32TO16(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1115 const struct gen_device_info
*devinfo
= p
->devinfo
;
1116 const bool align16
= brw_get_default_access_mode(p
) == BRW_ALIGN_16
;
1117 /* The F32TO16 instruction doesn't support 32-bit destination types in
1118 * Align1 mode, and neither does the Gen8 implementation in terms of a
1119 * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
1120 * an undocumented feature.
1122 const bool needs_zero_fill
= (dst
.type
== BRW_REGISTER_TYPE_UD
&&
1123 (!align16
|| devinfo
->gen
>= 8));
1127 assert(dst
.type
== BRW_REGISTER_TYPE_UD
);
1129 assert(dst
.type
== BRW_REGISTER_TYPE_UD
||
1130 dst
.type
== BRW_REGISTER_TYPE_W
||
1131 dst
.type
== BRW_REGISTER_TYPE_UW
||
1132 dst
.type
== BRW_REGISTER_TYPE_HF
);
1135 brw_push_insn_state(p
);
1137 if (needs_zero_fill
) {
1138 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
1139 dst
= spread(retype(dst
, BRW_REGISTER_TYPE_W
), 2);
1142 if (devinfo
->gen
>= 8) {
1143 inst
= brw_MOV(p
, retype(dst
, BRW_REGISTER_TYPE_HF
), src
);
1145 assert(devinfo
->gen
== 7);
1146 inst
= brw_alu1(p
, BRW_OPCODE_F32TO16
, dst
, src
);
1149 if (needs_zero_fill
) {
1150 brw_inst_set_no_dd_clear(devinfo
, inst
, true);
1151 inst
= brw_MOV(p
, suboffset(dst
, 1), brw_imm_w(0));
1152 brw_inst_set_no_dd_check(devinfo
, inst
, true);
1155 brw_pop_insn_state(p
);
1160 brw_F16TO32(struct brw_codegen
*p
, struct brw_reg dst
, struct brw_reg src
)
1162 const struct gen_device_info
*devinfo
= p
->devinfo
;
1163 bool align16
= brw_get_default_access_mode(p
) == BRW_ALIGN_16
;
1166 assert(src
.type
== BRW_REGISTER_TYPE_UD
);
1168 /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1170 * Because this instruction does not have a 16-bit floating-point
1171 * type, the source data type must be Word (W). The destination type
1172 * must be F (Float).
1174 if (src
.type
== BRW_REGISTER_TYPE_UD
)
1175 src
= spread(retype(src
, BRW_REGISTER_TYPE_W
), 2);
1177 assert(src
.type
== BRW_REGISTER_TYPE_W
||
1178 src
.type
== BRW_REGISTER_TYPE_UW
||
1179 src
.type
== BRW_REGISTER_TYPE_HF
);
1182 if (devinfo
->gen
>= 8) {
1183 return brw_MOV(p
, dst
, retype(src
, BRW_REGISTER_TYPE_HF
));
1185 assert(devinfo
->gen
== 7);
1186 return brw_alu1(p
, BRW_OPCODE_F16TO32
, dst
, src
);
1191 void brw_NOP(struct brw_codegen
*p
)
1193 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1194 memset(insn
, 0, sizeof(*insn
));
1195 brw_inst_set_opcode(p
->devinfo
, insn
, BRW_OPCODE_NOP
);
1202 /***********************************************************************
1203 * Comparisons, if/else/endif
1207 brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1208 unsigned predicate_control
)
1210 const struct gen_device_info
*devinfo
= p
->devinfo
;
1211 struct brw_reg ip
= brw_ip_reg();
1212 brw_inst
*inst
= brw_alu2(p
, BRW_OPCODE_JMPI
, ip
, ip
, index
);
1214 brw_inst_set_exec_size(devinfo
, inst
, BRW_EXECUTE_1
);
1215 brw_inst_set_qtr_control(devinfo
, inst
, BRW_COMPRESSION_NONE
);
1216 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
1217 brw_inst_set_pred_control(devinfo
, inst
, predicate_control
);
1223 push_if_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1225 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1227 p
->if_stack_depth
++;
1228 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1229 p
->if_stack_array_size
*= 2;
1230 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1231 p
->if_stack_array_size
);
1236 pop_if_stack(struct brw_codegen
*p
)
1238 p
->if_stack_depth
--;
1239 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1243 push_loop_stack(struct brw_codegen
*p
, brw_inst
*inst
)
1245 if (p
->loop_stack_array_size
<= (p
->loop_stack_depth
+ 1)) {
1246 p
->loop_stack_array_size
*= 2;
1247 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1248 p
->loop_stack_array_size
);
1249 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1250 p
->loop_stack_array_size
);
1253 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1254 p
->loop_stack_depth
++;
1255 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1259 get_inner_do_insn(struct brw_codegen
*p
)
1261 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1264 /* EU takes the value from the flag register and pushes it onto some
1265 * sort of a stack (presumably merging with any flag value already on
1266 * the stack). Within an if block, the flags at the top of the stack
1267 * control execution on each channel of the unit, eg. on each of the
1268 * 16 pixel values in our wm programs.
1270 * When the matching 'else' instruction is reached (presumably by
1271 * countdown of the instruction count patched in by our ELSE/ENDIF
1272 * functions), the relevant flags are inverted.
1274 * When the matching 'endif' instruction is reached, the flags are
1275 * popped off. If the stack is now empty, normal execution resumes.
1278 brw_IF(struct brw_codegen
*p
, unsigned execute_size
)
1280 const struct gen_device_info
*devinfo
= p
->devinfo
;
1283 insn
= next_insn(p
, BRW_OPCODE_IF
);
1285 /* Override the defaults for this instruction:
1287 if (devinfo
->gen
< 6) {
1288 brw_set_dest(p
, insn
, brw_ip_reg());
1289 brw_set_src0(p
, insn
, brw_ip_reg());
1290 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1291 } else if (devinfo
->gen
== 6) {
1292 brw_set_dest(p
, insn
, brw_imm_w(0));
1293 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1294 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1295 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1296 } else if (devinfo
->gen
== 7) {
1297 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1298 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1299 brw_set_src1(p
, insn
, brw_imm_w(0));
1300 brw_inst_set_jip(devinfo
, insn
, 0);
1301 brw_inst_set_uip(devinfo
, insn
, 0);
1303 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1304 brw_set_src0(p
, insn
, brw_imm_d(0));
1305 brw_inst_set_jip(devinfo
, insn
, 0);
1306 brw_inst_set_uip(devinfo
, insn
, 0);
1309 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1310 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1311 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NORMAL
);
1312 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1313 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1314 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1316 push_if_stack(p
, insn
);
1317 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1321 /* This function is only used for gen6-style IF instructions with an
1322 * embedded comparison (conditional modifier). It is not used on gen7.
1325 gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1326 struct brw_reg src0
, struct brw_reg src1
)
1328 const struct gen_device_info
*devinfo
= p
->devinfo
;
1331 insn
= next_insn(p
, BRW_OPCODE_IF
);
1333 brw_set_dest(p
, insn
, brw_imm_w(0));
1334 brw_inst_set_exec_size(devinfo
, insn
, brw_get_default_exec_size(p
));
1335 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1336 brw_set_src0(p
, insn
, src0
);
1337 brw_set_src1(p
, insn
, src1
);
1339 assert(brw_inst_qtr_control(devinfo
, insn
) == BRW_COMPRESSION_NONE
);
1340 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
1341 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1343 push_if_stack(p
, insn
);
1348 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1351 convert_IF_ELSE_to_ADD(struct brw_codegen
*p
,
1352 brw_inst
*if_inst
, brw_inst
*else_inst
)
1354 const struct gen_device_info
*devinfo
= p
->devinfo
;
1356 /* The next instruction (where the ENDIF would be, if it existed) */
1357 brw_inst
*next_inst
= &p
->store
[p
->nr_insn
];
1359 assert(p
->single_program_flow
);
1360 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1361 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1362 assert(brw_inst_exec_size(devinfo
, if_inst
) == BRW_EXECUTE_1
);
1364 /* Convert IF to an ADD instruction that moves the instruction pointer
1365 * to the first instruction of the ELSE block. If there is no ELSE
1366 * block, point to where ENDIF would be. Reverse the predicate.
1368 * There's no need to execute an ENDIF since we don't need to do any
1369 * stack operations, and if we're currently executing, we just want to
1370 * continue normally.
1372 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_ADD
);
1373 brw_inst_set_pred_inv(devinfo
, if_inst
, true);
1375 if (else_inst
!= NULL
) {
1376 /* Convert ELSE to an ADD instruction that points where the ENDIF
1379 brw_inst_set_opcode(devinfo
, else_inst
, BRW_OPCODE_ADD
);
1381 brw_inst_set_imm_ud(devinfo
, if_inst
, (else_inst
- if_inst
+ 1) * 16);
1382 brw_inst_set_imm_ud(devinfo
, else_inst
, (next_inst
- else_inst
) * 16);
1384 brw_inst_set_imm_ud(devinfo
, if_inst
, (next_inst
- if_inst
) * 16);
1389 * Patch IF and ELSE instructions with appropriate jump targets.
1392 patch_IF_ELSE(struct brw_codegen
*p
,
1393 brw_inst
*if_inst
, brw_inst
*else_inst
, brw_inst
*endif_inst
)
1395 const struct gen_device_info
*devinfo
= p
->devinfo
;
1397 /* We shouldn't be patching IF and ELSE instructions in single program flow
1398 * mode when gen < 6, because in single program flow mode on those
1399 * platforms, we convert flow control instructions to conditional ADDs that
1400 * operate on IP (see brw_ENDIF).
1402 * However, on Gen6, writing to IP doesn't work in single program flow mode
1403 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1404 * not be updated by non-flow control instructions."). And on later
1405 * platforms, there is no significant benefit to converting control flow
1406 * instructions to conditional ADDs. So we do patch IF and ELSE
1407 * instructions in single program flow mode on those platforms.
1409 if (devinfo
->gen
< 6)
1410 assert(!p
->single_program_flow
);
1412 assert(if_inst
!= NULL
&& brw_inst_opcode(devinfo
, if_inst
) == BRW_OPCODE_IF
);
1413 assert(endif_inst
!= NULL
);
1414 assert(else_inst
== NULL
|| brw_inst_opcode(devinfo
, else_inst
) == BRW_OPCODE_ELSE
);
1416 unsigned br
= brw_jump_scale(devinfo
);
1418 assert(brw_inst_opcode(devinfo
, endif_inst
) == BRW_OPCODE_ENDIF
);
1419 brw_inst_set_exec_size(devinfo
, endif_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1421 if (else_inst
== NULL
) {
1422 /* Patch IF -> ENDIF */
1423 if (devinfo
->gen
< 6) {
1424 /* Turn it into an IFF, which means no mask stack operations for
1425 * all-false and jumping past the ENDIF.
1427 brw_inst_set_opcode(devinfo
, if_inst
, BRW_OPCODE_IFF
);
1428 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1429 br
* (endif_inst
- if_inst
+ 1));
1430 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1431 } else if (devinfo
->gen
== 6) {
1432 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1433 brw_inst_set_gen6_jump_count(devinfo
, if_inst
, br
*(endif_inst
- if_inst
));
1435 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1436 brw_inst_set_jip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1439 brw_inst_set_exec_size(devinfo
, else_inst
, brw_inst_exec_size(devinfo
, if_inst
));
1441 /* Patch IF -> ELSE */
1442 if (devinfo
->gen
< 6) {
1443 brw_inst_set_gen4_jump_count(devinfo
, if_inst
,
1444 br
* (else_inst
- if_inst
));
1445 brw_inst_set_gen4_pop_count(devinfo
, if_inst
, 0);
1446 } else if (devinfo
->gen
== 6) {
1447 brw_inst_set_gen6_jump_count(devinfo
, if_inst
,
1448 br
* (else_inst
- if_inst
+ 1));
1451 /* Patch ELSE -> ENDIF */
1452 if (devinfo
->gen
< 6) {
1453 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1456 brw_inst_set_gen4_jump_count(devinfo
, else_inst
,
1457 br
* (endif_inst
- else_inst
+ 1));
1458 brw_inst_set_gen4_pop_count(devinfo
, else_inst
, 1);
1459 } else if (devinfo
->gen
== 6) {
1460 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1461 brw_inst_set_gen6_jump_count(devinfo
, else_inst
,
1462 br
* (endif_inst
- else_inst
));
1464 /* The IF instruction's JIP should point just past the ELSE */
1465 brw_inst_set_jip(devinfo
, if_inst
, br
* (else_inst
- if_inst
+ 1));
1466 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1467 brw_inst_set_uip(devinfo
, if_inst
, br
* (endif_inst
- if_inst
));
1468 brw_inst_set_jip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1469 if (devinfo
->gen
>= 8) {
1470 /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1471 * should point to ENDIF.
1473 brw_inst_set_uip(devinfo
, else_inst
, br
* (endif_inst
- else_inst
));
1480 brw_ELSE(struct brw_codegen
*p
)
1482 const struct gen_device_info
*devinfo
= p
->devinfo
;
1485 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1487 if (devinfo
->gen
< 6) {
1488 brw_set_dest(p
, insn
, brw_ip_reg());
1489 brw_set_src0(p
, insn
, brw_ip_reg());
1490 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1491 } else if (devinfo
->gen
== 6) {
1492 brw_set_dest(p
, insn
, brw_imm_w(0));
1493 brw_inst_set_gen6_jump_count(devinfo
, insn
, 0);
1494 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1495 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1496 } else if (devinfo
->gen
== 7) {
1497 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1498 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1499 brw_set_src1(p
, insn
, brw_imm_w(0));
1500 brw_inst_set_jip(devinfo
, insn
, 0);
1501 brw_inst_set_uip(devinfo
, insn
, 0);
1503 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1504 brw_set_src0(p
, insn
, brw_imm_d(0));
1505 brw_inst_set_jip(devinfo
, insn
, 0);
1506 brw_inst_set_uip(devinfo
, insn
, 0);
1509 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1510 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1511 if (!p
->single_program_flow
&& devinfo
->gen
< 6)
1512 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1514 push_if_stack(p
, insn
);
1518 brw_ENDIF(struct brw_codegen
*p
)
1520 const struct gen_device_info
*devinfo
= p
->devinfo
;
1521 brw_inst
*insn
= NULL
;
1522 brw_inst
*else_inst
= NULL
;
1523 brw_inst
*if_inst
= NULL
;
1525 bool emit_endif
= true;
1527 /* In single program flow mode, we can express IF and ELSE instructions
1528 * equivalently as ADD instructions that operate on IP. On platforms prior
1529 * to Gen6, flow control instructions cause an implied thread switch, so
1530 * this is a significant savings.
1532 * However, on Gen6, writing to IP doesn't work in single program flow mode
1533 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1534 * not be updated by non-flow control instructions."). And on later
1535 * platforms, there is no significant benefit to converting control flow
1536 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1539 if (devinfo
->gen
< 6 && p
->single_program_flow
)
1543 * A single next_insn() may change the base address of instruction store
1544 * memory(p->store), so call it first before referencing the instruction
1545 * store pointer from an index
1548 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1550 /* Pop the IF and (optional) ELSE instructions from the stack */
1551 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1552 tmp
= pop_if_stack(p
);
1553 if (brw_inst_opcode(devinfo
, tmp
) == BRW_OPCODE_ELSE
) {
1555 tmp
= pop_if_stack(p
);
1560 /* ENDIF is useless; don't bother emitting it. */
1561 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1565 if (devinfo
->gen
< 6) {
1566 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1567 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1568 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1569 } else if (devinfo
->gen
== 6) {
1570 brw_set_dest(p
, insn
, brw_imm_w(0));
1571 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1572 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1573 } else if (devinfo
->gen
== 7) {
1574 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1575 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1576 brw_set_src1(p
, insn
, brw_imm_w(0));
1578 brw_set_src0(p
, insn
, brw_imm_d(0));
1581 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1582 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_ENABLE
);
1583 if (devinfo
->gen
< 6)
1584 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1586 /* Also pop item off the stack in the endif instruction: */
1587 if (devinfo
->gen
< 6) {
1588 brw_inst_set_gen4_jump_count(devinfo
, insn
, 0);
1589 brw_inst_set_gen4_pop_count(devinfo
, insn
, 1);
1590 } else if (devinfo
->gen
== 6) {
1591 brw_inst_set_gen6_jump_count(devinfo
, insn
, 2);
1593 brw_inst_set_jip(devinfo
, insn
, 2);
1595 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1599 brw_BREAK(struct brw_codegen
*p
)
1601 const struct gen_device_info
*devinfo
= p
->devinfo
;
1604 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1605 if (devinfo
->gen
>= 8) {
1606 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1607 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1608 } else if (devinfo
->gen
>= 6) {
1609 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1610 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1611 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1613 brw_set_dest(p
, insn
, brw_ip_reg());
1614 brw_set_src0(p
, insn
, brw_ip_reg());
1615 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1616 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1617 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1619 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1620 brw_inst_set_exec_size(devinfo
, insn
, brw_get_default_exec_size(p
));
1626 brw_CONT(struct brw_codegen
*p
)
1628 const struct gen_device_info
*devinfo
= p
->devinfo
;
1631 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1632 brw_set_dest(p
, insn
, brw_ip_reg());
1633 if (devinfo
->gen
>= 8) {
1634 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1636 brw_set_src0(p
, insn
, brw_ip_reg());
1637 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1640 if (devinfo
->gen
< 6) {
1641 brw_inst_set_gen4_pop_count(devinfo
, insn
,
1642 p
->if_depth_in_loop
[p
->loop_stack_depth
]);
1644 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1645 brw_inst_set_exec_size(devinfo
, insn
, brw_get_default_exec_size(p
));
1650 gen6_HALT(struct brw_codegen
*p
)
1652 const struct gen_device_info
*devinfo
= p
->devinfo
;
1655 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1656 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1657 if (devinfo
->gen
>= 8) {
1658 brw_set_src0(p
, insn
, brw_imm_d(0x0));
1660 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1661 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1664 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1665 brw_inst_set_exec_size(devinfo
, insn
, brw_get_default_exec_size(p
));
1671 * The DO/WHILE is just an unterminated loop -- break or continue are
1672 * used for control within the loop. We have a few ways they can be
1675 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1676 * jip and no DO instruction.
1678 * For non-uniform control flow pre-gen6, there's a DO instruction to
1679 * push the mask, and a WHILE to jump back, and BREAK to get out and
1682 * For gen6, there's no more mask stack, so no need for DO. WHILE
1683 * just points back to the first instruction of the loop.
1686 brw_DO(struct brw_codegen
*p
, unsigned execute_size
)
1688 const struct gen_device_info
*devinfo
= p
->devinfo
;
1690 if (devinfo
->gen
>= 6 || p
->single_program_flow
) {
1691 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1692 return &p
->store
[p
->nr_insn
];
1694 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1696 push_loop_stack(p
, insn
);
1698 /* Override the defaults for this instruction:
1700 brw_set_dest(p
, insn
, brw_null_reg());
1701 brw_set_src0(p
, insn
, brw_null_reg());
1702 brw_set_src1(p
, insn
, brw_null_reg());
1704 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1705 brw_inst_set_exec_size(devinfo
, insn
, execute_size
);
1706 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
);
1713 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1716 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1717 * nesting, since it can always just point to the end of the block/current loop.
1720 brw_patch_break_cont(struct brw_codegen
*p
, brw_inst
*while_inst
)
1722 const struct gen_device_info
*devinfo
= p
->devinfo
;
1723 brw_inst
*do_inst
= get_inner_do_insn(p
);
1725 unsigned br
= brw_jump_scale(devinfo
);
1727 assert(devinfo
->gen
< 6);
1729 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1730 /* If the jump count is != 0, that means that this instruction has already
1731 * been patched because it's part of a loop inside of the one we're
1734 if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_BREAK
&&
1735 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1736 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
*((while_inst
- inst
) + 1));
1737 } else if (brw_inst_opcode(devinfo
, inst
) == BRW_OPCODE_CONTINUE
&&
1738 brw_inst_gen4_jump_count(devinfo
, inst
) == 0) {
1739 brw_inst_set_gen4_jump_count(devinfo
, inst
, br
* (while_inst
- inst
));
1745 brw_WHILE(struct brw_codegen
*p
)
1747 const struct gen_device_info
*devinfo
= p
->devinfo
;
1748 brw_inst
*insn
, *do_insn
;
1749 unsigned br
= brw_jump_scale(devinfo
);
1751 if (devinfo
->gen
>= 6) {
1752 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1753 do_insn
= get_inner_do_insn(p
);
1755 if (devinfo
->gen
>= 8) {
1756 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1757 brw_set_src0(p
, insn
, brw_imm_d(0));
1758 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1759 } else if (devinfo
->gen
== 7) {
1760 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1761 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1762 brw_set_src1(p
, insn
, brw_imm_w(0));
1763 brw_inst_set_jip(devinfo
, insn
, br
* (do_insn
- insn
));
1765 brw_set_dest(p
, insn
, brw_imm_w(0));
1766 brw_inst_set_gen6_jump_count(devinfo
, insn
, br
* (do_insn
- insn
));
1767 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1768 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1771 brw_inst_set_exec_size(devinfo
, insn
, brw_get_default_exec_size(p
));
1774 if (p
->single_program_flow
) {
1775 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1776 do_insn
= get_inner_do_insn(p
);
1778 brw_set_dest(p
, insn
, brw_ip_reg());
1779 brw_set_src0(p
, insn
, brw_ip_reg());
1780 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1781 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
1783 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1784 do_insn
= get_inner_do_insn(p
);
1786 assert(brw_inst_opcode(devinfo
, do_insn
) == BRW_OPCODE_DO
);
1788 brw_set_dest(p
, insn
, brw_ip_reg());
1789 brw_set_src0(p
, insn
, brw_ip_reg());
1790 brw_set_src1(p
, insn
, brw_imm_d(0));
1792 brw_inst_set_exec_size(devinfo
, insn
, brw_inst_exec_size(devinfo
, do_insn
));
1793 brw_inst_set_gen4_jump_count(devinfo
, insn
, br
* (do_insn
- insn
+ 1));
1794 brw_inst_set_gen4_pop_count(devinfo
, insn
, 0);
1796 brw_patch_break_cont(p
, insn
);
1799 brw_inst_set_qtr_control(devinfo
, insn
, BRW_COMPRESSION_NONE
);
1801 p
->loop_stack_depth
--;
1808 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
)
1810 const struct gen_device_info
*devinfo
= p
->devinfo
;
1811 brw_inst
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1814 if (devinfo
->gen
>= 5)
1817 assert(brw_inst_opcode(devinfo
, jmp_insn
) == BRW_OPCODE_JMPI
);
1818 assert(brw_inst_src1_reg_file(devinfo
, jmp_insn
) == BRW_IMMEDIATE_VALUE
);
1820 brw_inst_set_gen4_jump_count(devinfo
, jmp_insn
,
1821 jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1));
1824 /* To integrate with the above, it makes sense that the comparison
1825 * instruction should populate the flag register. It might be simpler
1826 * just to use the flag reg for most WM tasks?
1828 void brw_CMP(struct brw_codegen
*p
,
1829 struct brw_reg dest
,
1830 unsigned conditional
,
1831 struct brw_reg src0
,
1832 struct brw_reg src1
)
1834 const struct gen_device_info
*devinfo
= p
->devinfo
;
1835 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1837 brw_inst_set_cond_modifier(devinfo
, insn
, conditional
);
1838 brw_set_dest(p
, insn
, dest
);
1839 brw_set_src0(p
, insn
, src0
);
1840 brw_set_src1(p
, insn
, src1
);
1842 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1844 * "Any CMP instruction with a null destination must use a {switch}."
1846 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1847 * mentioned on their work-arounds pages.
1849 if (devinfo
->gen
== 7) {
1850 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1851 dest
.nr
== BRW_ARF_NULL
) {
1852 brw_inst_set_thread_control(devinfo
, insn
, BRW_THREAD_SWITCH
);
1857 /***********************************************************************
1858 * Helpers for the various SEND message types:
1861 /** Extended math function, float[8].
1863 void gen4_math(struct brw_codegen
*p
,
1864 struct brw_reg dest
,
1866 unsigned msg_reg_nr
,
1868 unsigned precision
)
1870 const struct gen_device_info
*devinfo
= p
->devinfo
;
1871 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1873 if (has_scalar_region(src
)) {
1874 data_type
= BRW_MATH_DATA_SCALAR
;
1876 data_type
= BRW_MATH_DATA_VECTOR
;
1879 assert(devinfo
->gen
< 6);
1881 /* Example code doesn't set predicate_control for send
1884 brw_inst_set_pred_control(devinfo
, insn
, 0);
1885 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
1887 brw_set_dest(p
, insn
, dest
);
1888 brw_set_src0(p
, insn
, src
);
1889 brw_set_math_message(p
,
1892 src
.type
== BRW_REGISTER_TYPE_D
,
1897 void gen6_math(struct brw_codegen
*p
,
1898 struct brw_reg dest
,
1900 struct brw_reg src0
,
1901 struct brw_reg src1
)
1903 const struct gen_device_info
*devinfo
= p
->devinfo
;
1904 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1906 assert(devinfo
->gen
>= 6);
1908 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1909 (devinfo
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1911 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1912 if (devinfo
->gen
== 6) {
1913 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1914 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1917 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1918 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1919 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1920 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1921 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1922 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
||
1923 (devinfo
->gen
>= 8 && src1
.file
== BRW_IMMEDIATE_VALUE
));
1925 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1926 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1929 /* Source modifiers are ignored for extended math instructions on Gen6. */
1930 if (devinfo
->gen
== 6) {
1931 assert(!src0
.negate
);
1933 assert(!src1
.negate
);
1937 brw_inst_set_math_function(devinfo
, insn
, function
);
1939 brw_set_dest(p
, insn
, dest
);
1940 brw_set_src0(p
, insn
, src0
);
1941 brw_set_src1(p
, insn
, src1
);
1945 * Return the right surface index to access the thread scratch space using
1946 * stateless dataport messages.
1949 brw_scratch_surface_idx(const struct brw_codegen
*p
)
1951 /* The scratch space is thread-local so IA coherency is unnecessary. */
1952 if (p
->devinfo
->gen
>= 8)
1953 return GEN8_BTI_STATELESS_NON_COHERENT
;
1955 return BRW_BTI_STATELESS
;
1959 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1960 * using a constant offset per channel.
1962 * The offset must be aligned to oword size (16 bytes). Used for
1963 * register spilling.
1965 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1970 const struct gen_device_info
*devinfo
= p
->devinfo
;
1971 const unsigned target_cache
=
1972 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
1973 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
1974 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
1977 if (devinfo
->gen
>= 6)
1980 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1982 const unsigned mlen
= 1 + num_regs
;
1984 /* Set up the message header. This is g0, with g0.2 filled with
1985 * the offset. We don't want to leave our offset around in g0 or
1986 * it'll screw up texture samples, so set it up inside the message
1990 brw_push_insn_state(p
);
1991 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
1992 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
1993 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
1995 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1997 /* set message header global offset field (reg 0, element 2) */
1998 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2000 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2002 2), BRW_REGISTER_TYPE_UD
),
2003 brw_imm_ud(offset
));
2005 brw_pop_insn_state(p
);
2009 struct brw_reg dest
;
2010 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2011 int send_commit_msg
;
2012 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2013 BRW_REGISTER_TYPE_UW
);
2015 brw_inst_set_compression(devinfo
, insn
, false);
2017 if (brw_inst_exec_size(devinfo
, insn
) >= 16)
2018 src_header
= vec16(src_header
);
2020 assert(brw_inst_pred_control(devinfo
, insn
) == BRW_PREDICATE_NONE
);
2021 if (devinfo
->gen
< 6)
2022 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2024 /* Until gen6, writes followed by reads from the same location
2025 * are not guaranteed to be ordered unless write_commit is set.
2026 * If set, then a no-op write is issued to the destination
2027 * register to set a dependency, and a read from the destination
2028 * can be used to ensure the ordering.
2030 * For gen6, only writes between different threads need ordering
2031 * protection. Our use of DP writes is all about register
2032 * spilling within a thread.
2034 if (devinfo
->gen
>= 6) {
2035 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2036 send_commit_msg
= 0;
2039 send_commit_msg
= 1;
2042 brw_set_dest(p
, insn
, dest
);
2043 if (devinfo
->gen
>= 6) {
2044 brw_set_src0(p
, insn
, mrf
);
2046 brw_set_src0(p
, insn
, brw_null_reg());
2049 if (devinfo
->gen
>= 6)
2050 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2052 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2054 brw_set_dp_write_message(p
,
2056 brw_scratch_surface_idx(p
),
2057 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2061 true, /* header_present */
2062 0, /* not a render target */
2063 send_commit_msg
, /* response_length */
2071 * Read a block of owords (half a GRF each) from the scratch buffer
2072 * using a constant index per channel.
2074 * Offset must be aligned to oword size (16 bytes). Used for register
2078 brw_oword_block_read_scratch(struct brw_codegen
*p
,
2079 struct brw_reg dest
,
2084 const struct gen_device_info
*devinfo
= p
->devinfo
;
2086 if (devinfo
->gen
>= 6)
2089 if (p
->devinfo
->gen
>= 7) {
2090 /* On gen 7 and above, we no longer have message registers and we can
2091 * send from any register we want. By using the destination register
2092 * for the message, we guarantee that the implied message write won't
2093 * accidentally overwrite anything. This has been a problem because
2094 * the MRF registers and source for the final FB write are both fixed
2097 mrf
= retype(dest
, BRW_REGISTER_TYPE_UD
);
2099 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2101 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2103 const unsigned rlen
= num_regs
;
2104 const unsigned target_cache
=
2105 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2106 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2107 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2110 brw_push_insn_state(p
);
2111 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2112 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2113 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2115 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2117 /* set message header global offset field (reg 0, element 2) */
2118 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2119 brw_MOV(p
, get_element_ud(mrf
, 2), brw_imm_ud(offset
));
2121 brw_pop_insn_state(p
);
2125 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2127 assert(brw_inst_pred_control(devinfo
, insn
) == 0);
2128 brw_inst_set_compression(devinfo
, insn
, false);
2130 brw_set_dest(p
, insn
, dest
); /* UW? */
2131 if (devinfo
->gen
>= 6) {
2132 brw_set_src0(p
, insn
, mrf
);
2134 brw_set_src0(p
, insn
, brw_null_reg());
2135 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2138 brw_set_dp_read_message(p
,
2140 brw_scratch_surface_idx(p
),
2141 BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs
* 8),
2142 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2145 true, /* header_present */
2151 gen7_block_read_scratch(struct brw_codegen
*p
,
2152 struct brw_reg dest
,
2156 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2157 assert(brw_inst_pred_control(p
->devinfo
, insn
) == BRW_PREDICATE_NONE
);
2159 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UW
));
2161 /* The HW requires that the header is present; this is to get the g0.5
2164 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2166 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2167 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2168 * is 32 bytes, which happens to be the size of a register.
2171 assert(offset
< (1 << 12));
2173 gen7_set_dp_scratch_message(p
, insn
,
2174 false, /* scratch read */
2176 false, /* invalidate after read */
2179 1, /* mlen: just g0 */
2180 num_regs
, /* rlen */
2181 true); /* header present */
2185 * Read float[4] vectors from the data port constant cache.
2186 * Location (in buffer) should be a multiple of 16.
2187 * Used for fetching shader constants.
2189 void brw_oword_block_read(struct brw_codegen
*p
,
2190 struct brw_reg dest
,
2193 uint32_t bind_table_index
)
2195 const struct gen_device_info
*devinfo
= p
->devinfo
;
2196 const unsigned target_cache
=
2197 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE
:
2198 BRW_DATAPORT_READ_TARGET_DATA_CACHE
);
2199 const unsigned exec_size
= 1 << brw_get_default_exec_size(p
);
2201 /* On newer hardware, offset is in units of owords. */
2202 if (devinfo
->gen
>= 6)
2205 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2207 brw_push_insn_state(p
);
2208 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2209 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
2210 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2212 brw_push_insn_state(p
);
2213 brw_set_default_exec_size(p
, BRW_EXECUTE_8
);
2214 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2216 /* set message header global offset field (reg 0, element 2) */
2217 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2219 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2221 2), BRW_REGISTER_TYPE_UD
),
2222 brw_imm_ud(offset
));
2223 brw_pop_insn_state(p
);
2225 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2227 /* cast dest to a uword[8] vector */
2228 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2230 brw_set_dest(p
, insn
, dest
);
2231 if (devinfo
->gen
>= 6) {
2232 brw_set_src0(p
, insn
, mrf
);
2234 brw_set_src0(p
, insn
, brw_null_reg());
2235 brw_inst_set_base_mrf(devinfo
, insn
, mrf
.nr
);
2238 brw_set_dp_read_message(p
, insn
, bind_table_index
,
2239 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size
),
2240 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2243 true, /* header_present */
2244 DIV_ROUND_UP(exec_size
, 8)); /* response_length */
2246 brw_pop_insn_state(p
);
2250 brw_fb_WRITE(struct brw_codegen
*p
,
2251 struct brw_reg payload
,
2252 struct brw_reg implied_header
,
2253 unsigned msg_control
,
2254 unsigned binding_table_index
,
2255 unsigned msg_length
,
2256 unsigned response_length
,
2258 bool last_render_target
,
2259 bool header_present
)
2261 const struct gen_device_info
*devinfo
= p
->devinfo
;
2262 const unsigned target_cache
=
2263 (devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2264 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2267 struct brw_reg dest
, src0
;
2269 if (brw_get_default_exec_size(p
) >= BRW_EXECUTE_16
)
2270 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2272 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2274 if (devinfo
->gen
>= 6) {
2275 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2277 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2279 brw_inst_set_compression(devinfo
, insn
, false);
2281 if (devinfo
->gen
>= 6) {
2282 /* headerless version, just submit color payload */
2285 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2287 assert(payload
.file
== BRW_MESSAGE_REGISTER_FILE
);
2288 brw_inst_set_base_mrf(devinfo
, insn
, payload
.nr
);
2289 src0
= implied_header
;
2291 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2294 brw_set_dest(p
, insn
, dest
);
2295 brw_set_src0(p
, insn
, src0
);
2296 brw_set_dp_write_message(p
,
2298 binding_table_index
,
2307 0 /* send_commit_msg */);
2313 gen9_fb_READ(struct brw_codegen
*p
,
2315 struct brw_reg payload
,
2316 unsigned binding_table_index
,
2317 unsigned msg_length
,
2318 unsigned response_length
,
2321 const struct gen_device_info
*devinfo
= p
->devinfo
;
2322 assert(devinfo
->gen
>= 9);
2323 const unsigned msg_subtype
=
2324 brw_get_default_exec_size(p
) == BRW_EXECUTE_16
? 0 : 1;
2325 brw_inst
*insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2327 brw_set_dest(p
, insn
, dst
);
2328 brw_set_src0(p
, insn
, payload
);
2329 brw_set_dp_read_message(p
, insn
, binding_table_index
,
2330 per_sample
<< 5 | msg_subtype
,
2331 GEN9_DATAPORT_RC_RENDER_TARGET_READ
,
2332 GEN6_SFID_DATAPORT_RENDER_CACHE
,
2333 msg_length
, true /* header_present */,
2335 brw_inst_set_rt_slot_group(devinfo
, insn
, brw_get_default_group(p
) / 16);
2341 * Texture sample instruction.
2342 * Note: the msg_type plus msg_length values determine exactly what kind
2343 * of sampling operation is performed. See volume 4, page 161 of docs.
2345 void brw_SAMPLE(struct brw_codegen
*p
,
2346 struct brw_reg dest
,
2347 unsigned msg_reg_nr
,
2348 struct brw_reg src0
,
2349 unsigned binding_table_index
,
2352 unsigned response_length
,
2353 unsigned msg_length
,
2354 unsigned header_present
,
2356 unsigned return_format
)
2358 const struct gen_device_info
*devinfo
= p
->devinfo
;
2361 if (msg_reg_nr
!= -1)
2362 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2364 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2365 brw_inst_set_sfid(devinfo
, insn
, BRW_SFID_SAMPLER
);
2366 brw_inst_set_pred_control(devinfo
, insn
, BRW_PREDICATE_NONE
); /* XXX */
2368 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2370 * "Instruction compression is not allowed for this instruction (that
2371 * is, send). The hardware behavior is undefined if this instruction is
2372 * set as compressed. However, compress control can be set to "SecHalf"
2373 * to affect the EMask generation."
2375 * No similar wording is found in later PRMs, but there are examples
2376 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2377 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2378 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2380 brw_inst_set_compression(devinfo
, insn
, false);
2382 if (devinfo
->gen
< 6)
2383 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2385 brw_set_dest(p
, insn
, dest
);
2386 brw_set_src0(p
, insn
, src0
);
2387 brw_set_desc(p
, insn
,
2388 brw_message_desc(devinfo
, msg_length
, response_length
,
2390 brw_sampler_desc(devinfo
, binding_table_index
, sampler
,
2391 msg_type
, simd_mode
, return_format
));
2394 /* Adjust the message header's sampler state pointer to
2395 * select the correct group of 16 samplers.
2397 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
2398 struct brw_reg header
,
2399 struct brw_reg sampler_index
)
2401 /* The "Sampler Index" field can only store values between 0 and 15.
2402 * However, we can add an offset to the "Sampler State Pointer"
2403 * field, effectively selecting a different set of 16 samplers.
2405 * The "Sampler State Pointer" needs to be aligned to a 32-byte
2406 * offset, and each sampler state is only 16-bytes, so we can't
2407 * exclusively use the offset - we have to use both.
2410 const struct gen_device_info
*devinfo
= p
->devinfo
;
2412 if (sampler_index
.file
== BRW_IMMEDIATE_VALUE
) {
2413 const int sampler_state_size
= 16; /* 16 bytes */
2414 uint32_t sampler
= sampler_index
.ud
;
2416 if (sampler
>= 16) {
2417 assert(devinfo
->is_haswell
|| devinfo
->gen
>= 8);
2419 get_element_ud(header
, 3),
2420 get_element_ud(brw_vec8_grf(0, 0), 3),
2421 brw_imm_ud(16 * (sampler
/ 16) * sampler_state_size
));
2424 /* Non-const sampler array indexing case */
2425 if (devinfo
->gen
< 8 && !devinfo
->is_haswell
) {
2429 struct brw_reg temp
= get_element_ud(header
, 3);
2431 brw_AND(p
, temp
, get_element_ud(sampler_index
, 0), brw_imm_ud(0x0f0));
2432 brw_SHL(p
, temp
, temp
, brw_imm_ud(4));
2434 get_element_ud(header
, 3),
2435 get_element_ud(brw_vec8_grf(0, 0), 3),
2440 /* All these variables are pretty confusing - we might be better off
2441 * using bitmasks and macros for this, in the old style. Or perhaps
2442 * just having the caller instantiate the fields in dword3 itself.
2444 void brw_urb_WRITE(struct brw_codegen
*p
,
2445 struct brw_reg dest
,
2446 unsigned msg_reg_nr
,
2447 struct brw_reg src0
,
2448 enum brw_urb_write_flags flags
,
2449 unsigned msg_length
,
2450 unsigned response_length
,
2454 const struct gen_device_info
*devinfo
= p
->devinfo
;
2457 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2459 if (devinfo
->gen
>= 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2460 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2461 brw_push_insn_state(p
);
2462 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2463 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2464 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2465 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2466 BRW_REGISTER_TYPE_UD
),
2467 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2468 brw_imm_ud(0xff00));
2469 brw_pop_insn_state(p
);
2472 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2474 assert(msg_length
< BRW_MAX_MRF(devinfo
->gen
));
2476 brw_set_dest(p
, insn
, dest
);
2477 brw_set_src0(p
, insn
, src0
);
2478 brw_set_src1(p
, insn
, brw_imm_d(0));
2480 if (devinfo
->gen
< 6)
2481 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2483 brw_set_urb_message(p
,
2493 brw_send_indirect_message(struct brw_codegen
*p
,
2496 struct brw_reg payload
,
2497 struct brw_reg desc
,
2500 const struct gen_device_info
*devinfo
= p
->devinfo
;
2501 struct brw_inst
*send
;
2504 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
2506 assert(desc
.type
== BRW_REGISTER_TYPE_UD
);
2508 /* We hold on to the setup instruction (the SEND in the direct case, the OR
2509 * in the indirect case) by its index in the instruction store. The
2510 * pointer returned by next_insn() may become invalid if emitting the SEND
2511 * in the indirect case reallocs the store.
2514 if (desc
.file
== BRW_IMMEDIATE_VALUE
) {
2516 send
= next_insn(p
, BRW_OPCODE_SEND
);
2517 brw_set_desc(p
, send
, desc
.ud
| desc_imm
);
2520 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2522 brw_push_insn_state(p
);
2523 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2524 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2525 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2526 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2528 /* Load the indirect descriptor to an address register using OR so the
2529 * caller can specify additional descriptor bits with the usual
2530 * brw_set_*_message() helper functions.
2533 brw_OR(p
, addr
, desc
, brw_imm_ud(desc_imm
));
2535 brw_pop_insn_state(p
);
2537 send
= next_insn(p
, BRW_OPCODE_SEND
);
2538 brw_set_src1(p
, send
, addr
);
2541 if (dst
.width
< BRW_EXECUTE_8
)
2542 brw_inst_set_exec_size(devinfo
, send
, dst
.width
);
2544 brw_set_dest(p
, send
, dst
);
2545 brw_set_src0(p
, send
, retype(payload
, BRW_REGISTER_TYPE_UD
));
2546 brw_inst_set_sfid(devinfo
, send
, sfid
);
2548 return &p
->store
[setup
];
2551 static struct brw_inst
*
2552 brw_send_indirect_surface_message(struct brw_codegen
*p
,
2555 struct brw_reg payload
,
2556 struct brw_reg surface
,
2557 unsigned message_len
,
2558 unsigned response_len
,
2559 bool header_present
)
2561 const struct gen_device_info
*devinfo
= p
->devinfo
;
2562 struct brw_inst
*insn
;
2564 if (surface
.file
!= BRW_IMMEDIATE_VALUE
) {
2565 struct brw_reg addr
= retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
2567 brw_push_insn_state(p
);
2568 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
2569 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
2570 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
2571 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
2573 /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2574 * some surface array is accessed out of bounds.
2576 insn
= brw_AND(p
, addr
,
2577 suboffset(vec1(retype(surface
, BRW_REGISTER_TYPE_UD
)),
2578 BRW_GET_SWZ(surface
.swizzle
, 0)),
2581 brw_pop_insn_state(p
);
2586 insn
= brw_send_indirect_message(p
, sfid
, dst
, payload
, surface
, 0);
2587 brw_inst_set_mlen(devinfo
, insn
, message_len
);
2588 brw_inst_set_rlen(devinfo
, insn
, response_len
);
2589 brw_inst_set_header_present(devinfo
, insn
, header_present
);
2595 while_jumps_before_offset(const struct gen_device_info
*devinfo
,
2596 brw_inst
*insn
, int while_offset
, int start_offset
)
2598 int scale
= 16 / brw_jump_scale(devinfo
);
2599 int jip
= devinfo
->gen
== 6 ? brw_inst_gen6_jump_count(devinfo
, insn
)
2600 : brw_inst_jip(devinfo
, insn
);
2602 return while_offset
+ jip
* scale
<= start_offset
;
2607 brw_find_next_block_end(struct brw_codegen
*p
, int start_offset
)
2610 void *store
= p
->store
;
2611 const struct gen_device_info
*devinfo
= p
->devinfo
;
2615 for (offset
= next_offset(devinfo
, store
, start_offset
);
2616 offset
< p
->next_insn_offset
;
2617 offset
= next_offset(devinfo
, store
, offset
)) {
2618 brw_inst
*insn
= store
+ offset
;
2620 switch (brw_inst_opcode(devinfo
, insn
)) {
2624 case BRW_OPCODE_ENDIF
:
2629 case BRW_OPCODE_WHILE
:
2630 /* If the while doesn't jump before our instruction, it's the end
2631 * of a sibling do...while loop. Ignore it.
2633 if (!while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2636 case BRW_OPCODE_ELSE
:
2637 case BRW_OPCODE_HALT
:
2646 /* There is no DO instruction on gen6, so to find the end of the loop
2647 * we have to see if the loop is jumping back before our start
2651 brw_find_loop_end(struct brw_codegen
*p
, int start_offset
)
2653 const struct gen_device_info
*devinfo
= p
->devinfo
;
2655 void *store
= p
->store
;
2657 assert(devinfo
->gen
>= 6);
2659 /* Always start after the instruction (such as a WHILE) we're trying to fix
2662 for (offset
= next_offset(devinfo
, store
, start_offset
);
2663 offset
< p
->next_insn_offset
;
2664 offset
= next_offset(devinfo
, store
, offset
)) {
2665 brw_inst
*insn
= store
+ offset
;
2667 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
) {
2668 if (while_jumps_before_offset(devinfo
, insn
, offset
, start_offset
))
2672 assert(!"not reached");
2673 return start_offset
;
2676 /* After program generation, go back and update the UIP and JIP of
2677 * BREAK, CONT, and HALT instructions to their correct locations.
2680 brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
)
2682 const struct gen_device_info
*devinfo
= p
->devinfo
;
2684 int br
= brw_jump_scale(devinfo
);
2685 int scale
= 16 / br
;
2686 void *store
= p
->store
;
2688 if (devinfo
->gen
< 6)
2691 for (offset
= start_offset
; offset
< p
->next_insn_offset
; offset
+= 16) {
2692 brw_inst
*insn
= store
+ offset
;
2693 assert(brw_inst_cmpt_control(devinfo
, insn
) == 0);
2695 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2696 switch (brw_inst_opcode(devinfo
, insn
)) {
2697 case BRW_OPCODE_BREAK
:
2698 assert(block_end_offset
!= 0);
2699 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2700 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2701 brw_inst_set_uip(devinfo
, insn
,
2702 (brw_find_loop_end(p
, offset
) - offset
+
2703 (devinfo
->gen
== 6 ? 16 : 0)) / scale
);
2705 case BRW_OPCODE_CONTINUE
:
2706 assert(block_end_offset
!= 0);
2707 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2708 brw_inst_set_uip(devinfo
, insn
,
2709 (brw_find_loop_end(p
, offset
) - offset
) / scale
);
2711 assert(brw_inst_uip(devinfo
, insn
) != 0);
2712 assert(brw_inst_jip(devinfo
, insn
) != 0);
2715 case BRW_OPCODE_ENDIF
: {
2716 int32_t jump
= (block_end_offset
== 0) ?
2717 1 * br
: (block_end_offset
- offset
) / scale
;
2718 if (devinfo
->gen
>= 7)
2719 brw_inst_set_jip(devinfo
, insn
, jump
);
2721 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump
);
2725 case BRW_OPCODE_HALT
:
2726 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2728 * "In case of the halt instruction not inside any conditional
2729 * code block, the value of <JIP> and <UIP> should be the
2730 * same. In case of the halt instruction inside conditional code
2731 * block, the <UIP> should be the end of the program, and the
2732 * <JIP> should be end of the most inner conditional code block."
2734 * The uip will have already been set by whoever set up the
2737 if (block_end_offset
== 0) {
2738 brw_inst_set_jip(devinfo
, insn
, brw_inst_uip(devinfo
, insn
));
2740 brw_inst_set_jip(devinfo
, insn
, (block_end_offset
- offset
) / scale
);
2742 assert(brw_inst_uip(devinfo
, insn
) != 0);
2743 assert(brw_inst_jip(devinfo
, insn
) != 0);
2749 void brw_ff_sync(struct brw_codegen
*p
,
2750 struct brw_reg dest
,
2751 unsigned msg_reg_nr
,
2752 struct brw_reg src0
,
2754 unsigned response_length
,
2757 const struct gen_device_info
*devinfo
= p
->devinfo
;
2760 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2762 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2763 brw_set_dest(p
, insn
, dest
);
2764 brw_set_src0(p
, insn
, src0
);
2765 brw_set_src1(p
, insn
, brw_imm_d(0));
2767 if (devinfo
->gen
< 6)
2768 brw_inst_set_base_mrf(devinfo
, insn
, msg_reg_nr
);
2770 brw_set_ff_sync_message(p
,
2778 * Emit the SEND instruction necessary to generate stream output data on Gen6
2779 * (for transform feedback).
2781 * If send_commit_msg is true, this is the last piece of stream output data
2782 * from this thread, so send the data as a committed write. According to the
2783 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2785 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2786 * writes are complete by sending the final write as a committed write."
2789 brw_svb_write(struct brw_codegen
*p
,
2790 struct brw_reg dest
,
2791 unsigned msg_reg_nr
,
2792 struct brw_reg src0
,
2793 unsigned binding_table_index
,
2794 bool send_commit_msg
)
2796 const struct gen_device_info
*devinfo
= p
->devinfo
;
2797 const unsigned target_cache
=
2798 (devinfo
->gen
>= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE
:
2799 devinfo
->gen
>= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE
:
2800 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
);
2803 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2805 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2806 brw_set_dest(p
, insn
, dest
);
2807 brw_set_src0(p
, insn
, src0
);
2808 brw_set_src1(p
, insn
, brw_imm_d(0));
2809 brw_set_dp_write_message(p
, insn
,
2810 binding_table_index
,
2811 0, /* msg_control: ignored */
2812 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2815 true, /* header_present */
2816 0, /* last_render_target: ignored */
2817 send_commit_msg
, /* response_length */
2818 0, /* end_of_thread */
2819 send_commit_msg
); /* send_commit_msg */
2823 brw_surface_payload_size(struct brw_codegen
*p
,
2824 unsigned num_channels
,
2828 if (has_simd4x2
&& brw_get_default_access_mode(p
) == BRW_ALIGN_16
)
2830 else if (has_simd16
&& brw_get_default_exec_size(p
) == BRW_EXECUTE_16
)
2831 return 2 * num_channels
;
2833 return num_channels
;
2837 brw_set_dp_untyped_atomic_message(struct brw_codegen
*p
,
2840 bool response_expected
)
2842 const struct gen_device_info
*devinfo
= p
->devinfo
;
2843 unsigned msg_control
=
2844 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
2845 (response_expected
? 1 << 5 : 0); /* Return data expected */
2847 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
2848 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
2849 if (brw_get_default_exec_size(p
) != BRW_EXECUTE_16
)
2850 msg_control
|= 1 << 4; /* SIMD8 mode */
2852 brw_inst_set_dp_msg_type(devinfo
, insn
,
2853 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
);
2855 brw_inst_set_dp_msg_type(devinfo
, insn
,
2856 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
);
2859 brw_inst_set_dp_msg_type(devinfo
, insn
,
2860 GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
);
2862 if (brw_get_default_exec_size(p
) != BRW_EXECUTE_16
)
2863 msg_control
|= 1 << 4; /* SIMD8 mode */
2866 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2870 brw_untyped_atomic(struct brw_codegen
*p
,
2872 struct brw_reg payload
,
2873 struct brw_reg surface
,
2875 unsigned msg_length
,
2876 bool response_expected
,
2877 bool header_present
)
2879 const struct gen_device_info
*devinfo
= p
->devinfo
;
2880 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2881 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2882 GEN7_SFID_DATAPORT_DATA_CACHE
);
2883 const bool align1
= brw_get_default_access_mode(p
) == BRW_ALIGN_1
;
2884 /* Mask out unused components -- This is especially important in Align16
2885 * mode on generations that don't have native support for SIMD4x2 atomics,
2886 * because unused but enabled components will cause the dataport to perform
2887 * additional atomic operations on the addresses that happen to be in the
2888 * uninitialized Y, Z and W coordinates of the payload.
2890 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
2891 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2892 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
2893 brw_surface_payload_size(p
, response_expected
,
2894 devinfo
->gen
>= 8 || devinfo
->is_haswell
, true),
2897 brw_set_dp_untyped_atomic_message(
2898 p
, insn
, atomic_op
, response_expected
);
2902 brw_set_dp_untyped_surface_read_message(struct brw_codegen
*p
,
2903 struct brw_inst
*insn
,
2904 unsigned num_channels
)
2906 const struct gen_device_info
*devinfo
= p
->devinfo
;
2907 /* Set mask of 32-bit channels to drop. */
2908 unsigned msg_control
= 0xf & (0xf << num_channels
);
2910 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
2911 if (brw_get_default_exec_size(p
) == BRW_EXECUTE_16
)
2912 msg_control
|= 1 << 4; /* SIMD16 mode */
2914 msg_control
|= 2 << 4; /* SIMD8 mode */
2917 brw_inst_set_dp_msg_type(devinfo
, insn
,
2918 (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2919 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
:
2920 GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
));
2921 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2925 brw_untyped_surface_read(struct brw_codegen
*p
,
2927 struct brw_reg payload
,
2928 struct brw_reg surface
,
2929 unsigned msg_length
,
2930 unsigned num_channels
)
2932 const struct gen_device_info
*devinfo
= p
->devinfo
;
2933 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2934 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2935 GEN7_SFID_DATAPORT_DATA_CACHE
);
2936 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2937 p
, sfid
, dst
, payload
, surface
, msg_length
,
2938 brw_surface_payload_size(p
, num_channels
, true, true),
2941 brw_set_dp_untyped_surface_read_message(
2942 p
, insn
, num_channels
);
2946 brw_set_dp_untyped_surface_write_message(struct brw_codegen
*p
,
2947 struct brw_inst
*insn
,
2948 unsigned num_channels
)
2950 const struct gen_device_info
*devinfo
= p
->devinfo
;
2951 /* Set mask of 32-bit channels to drop. */
2952 unsigned msg_control
= 0xf & (0xf << num_channels
);
2954 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
2955 if (brw_get_default_exec_size(p
) == BRW_EXECUTE_16
)
2956 msg_control
|= 1 << 4; /* SIMD16 mode */
2958 msg_control
|= 2 << 4; /* SIMD8 mode */
2960 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
)
2961 msg_control
|= 0 << 4; /* SIMD4x2 mode */
2963 msg_control
|= 2 << 4; /* SIMD8 mode */
2966 brw_inst_set_dp_msg_type(devinfo
, insn
,
2967 devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2968 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
:
2969 GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
);
2970 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
2974 brw_untyped_surface_write(struct brw_codegen
*p
,
2975 struct brw_reg payload
,
2976 struct brw_reg surface
,
2977 unsigned msg_length
,
2978 unsigned num_channels
,
2979 bool header_present
)
2981 const struct gen_device_info
*devinfo
= p
->devinfo
;
2982 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
2983 HSW_SFID_DATAPORT_DATA_CACHE_1
:
2984 GEN7_SFID_DATAPORT_DATA_CACHE
);
2985 const bool align1
= brw_get_default_access_mode(p
) == BRW_ALIGN_1
;
2986 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2987 const unsigned mask
= devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
2988 WRITEMASK_X
: WRITEMASK_XYZW
;
2989 struct brw_inst
*insn
= brw_send_indirect_surface_message(
2990 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
2991 payload
, surface
, msg_length
, 0, header_present
);
2993 brw_set_dp_untyped_surface_write_message(
2994 p
, insn
, num_channels
);
2998 brw_byte_scattered_data_element_from_bit_size(unsigned bit_size
)
3002 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
3004 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
3006 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
3008 unreachable("Unsupported bit_size for byte scattered messages");
3014 brw_byte_scattered_read(struct brw_codegen
*p
,
3016 struct brw_reg payload
,
3017 struct brw_reg surface
,
3018 unsigned msg_length
,
3021 const struct gen_device_info
*devinfo
= p
->devinfo
;
3022 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
3023 assert(brw_get_default_access_mode(p
) == BRW_ALIGN_1
);
3024 const unsigned sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
3026 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3027 p
, sfid
, dst
, payload
, surface
, msg_length
,
3028 brw_surface_payload_size(p
, 1, true, true),
3031 unsigned msg_control
=
3032 brw_byte_scattered_data_element_from_bit_size(bit_size
) << 2;
3034 if (brw_get_default_exec_size(p
) == BRW_EXECUTE_16
)
3035 msg_control
|= 1; /* SIMD16 mode */
3037 msg_control
|= 0; /* SIMD8 mode */
3039 brw_inst_set_dp_msg_type(devinfo
, insn
,
3040 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
);
3041 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3045 brw_byte_scattered_write(struct brw_codegen
*p
,
3046 struct brw_reg payload
,
3047 struct brw_reg surface
,
3048 unsigned msg_length
,
3050 bool header_present
)
3052 const struct gen_device_info
*devinfo
= p
->devinfo
;
3053 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
3054 assert(brw_get_default_access_mode(p
) == BRW_ALIGN_1
);
3055 const unsigned sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
3057 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3058 p
, sfid
, brw_writemask(brw_null_reg(), WRITEMASK_XYZW
),
3059 payload
, surface
, msg_length
, 0, header_present
);
3061 unsigned msg_control
=
3062 brw_byte_scattered_data_element_from_bit_size(bit_size
) << 2;
3064 if (brw_get_default_exec_size(p
) == BRW_EXECUTE_16
)
3069 brw_inst_set_dp_msg_type(devinfo
, insn
,
3070 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
);
3071 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3075 brw_set_dp_typed_atomic_message(struct brw_codegen
*p
,
3076 struct brw_inst
*insn
,
3078 bool response_expected
)
3080 const struct gen_device_info
*devinfo
= p
->devinfo
;
3081 unsigned msg_control
=
3082 atomic_op
| /* Atomic Operation Type: BRW_AOP_* */
3083 (response_expected
? 1 << 5 : 0); /* Return data expected */
3085 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3086 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3087 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3088 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3090 brw_inst_set_dp_msg_type(devinfo
, insn
,
3091 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
);
3093 brw_inst_set_dp_msg_type(devinfo
, insn
,
3094 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
);
3098 brw_inst_set_dp_msg_type(devinfo
, insn
,
3099 GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
);
3101 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3102 msg_control
|= 1 << 4; /* Use high 8 slots of the sample mask */
3105 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3109 brw_typed_atomic(struct brw_codegen
*p
,
3111 struct brw_reg payload
,
3112 struct brw_reg surface
,
3114 unsigned msg_length
,
3115 bool response_expected
,
3116 bool header_present
) {
3117 const struct gen_device_info
*devinfo
= p
->devinfo
;
3118 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3119 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3120 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3121 const bool align1
= brw_get_default_access_mode(p
) == BRW_ALIGN_1
;
3122 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3123 const unsigned mask
= align1
? WRITEMASK_XYZW
: WRITEMASK_X
;
3124 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3125 p
, sfid
, brw_writemask(dst
, mask
), payload
, surface
, msg_length
,
3126 brw_surface_payload_size(p
, response_expected
,
3127 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3130 brw_set_dp_typed_atomic_message(
3131 p
, insn
, atomic_op
, response_expected
);
3135 brw_set_dp_typed_surface_read_message(struct brw_codegen
*p
,
3136 struct brw_inst
*insn
,
3137 unsigned num_channels
)
3139 const struct gen_device_info
*devinfo
= p
->devinfo
;
3140 /* Set mask of unused channels. */
3141 unsigned msg_control
= 0xf & (0xf << num_channels
);
3143 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3144 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3145 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3146 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3148 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3151 brw_inst_set_dp_msg_type(devinfo
, insn
,
3152 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
);
3154 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3155 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3156 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3159 brw_inst_set_dp_msg_type(devinfo
, insn
,
3160 GEN7_DATAPORT_RC_TYPED_SURFACE_READ
);
3163 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3167 brw_typed_surface_read(struct brw_codegen
*p
,
3169 struct brw_reg payload
,
3170 struct brw_reg surface
,
3171 unsigned msg_length
,
3172 unsigned num_channels
,
3173 bool header_present
)
3175 const struct gen_device_info
*devinfo
= p
->devinfo
;
3176 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3177 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3178 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3179 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3180 p
, sfid
, dst
, payload
, surface
, msg_length
,
3181 brw_surface_payload_size(p
, num_channels
,
3182 devinfo
->gen
>= 8 || devinfo
->is_haswell
, false),
3185 brw_set_dp_typed_surface_read_message(
3186 p
, insn
, num_channels
);
3190 brw_set_dp_typed_surface_write_message(struct brw_codegen
*p
,
3191 struct brw_inst
*insn
,
3192 unsigned num_channels
)
3194 const struct gen_device_info
*devinfo
= p
->devinfo
;
3195 /* Set mask of unused channels. */
3196 unsigned msg_control
= 0xf & (0xf << num_channels
);
3198 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
3199 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3200 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3201 msg_control
|= 2 << 4; /* Use high 8 slots of the sample mask */
3203 msg_control
|= 1 << 4; /* Use low 8 slots of the sample mask */
3206 brw_inst_set_dp_msg_type(devinfo
, insn
,
3207 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
);
3210 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3211 if ((brw_get_default_group(p
) / 8) % 2 == 1)
3212 msg_control
|= 1 << 5; /* Use high 8 slots of the sample mask */
3215 brw_inst_set_dp_msg_type(devinfo
, insn
,
3216 GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
);
3219 brw_inst_set_dp_msg_control(devinfo
, insn
, msg_control
);
3223 brw_typed_surface_write(struct brw_codegen
*p
,
3224 struct brw_reg payload
,
3225 struct brw_reg surface
,
3226 unsigned msg_length
,
3227 unsigned num_channels
,
3228 bool header_present
)
3230 const struct gen_device_info
*devinfo
= p
->devinfo
;
3231 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3232 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3233 GEN6_SFID_DATAPORT_RENDER_CACHE
);
3234 const bool align1
= brw_get_default_access_mode(p
) == BRW_ALIGN_1
;
3235 /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3236 const unsigned mask
= (devinfo
->gen
== 7 && !devinfo
->is_haswell
&& !align1
?
3237 WRITEMASK_X
: WRITEMASK_XYZW
);
3238 struct brw_inst
*insn
= brw_send_indirect_surface_message(
3239 p
, sfid
, brw_writemask(brw_null_reg(), mask
),
3240 payload
, surface
, msg_length
, 0, header_present
);
3242 brw_set_dp_typed_surface_write_message(
3243 p
, insn
, num_channels
);
3247 brw_set_memory_fence_message(struct brw_codegen
*p
,
3248 struct brw_inst
*insn
,
3249 enum brw_message_target sfid
,
3252 const struct gen_device_info
*devinfo
= p
->devinfo
;
3254 brw_set_desc(p
, insn
, brw_message_desc(
3255 devinfo
, 1, (commit_enable
? 1 : 0), true));
3257 brw_inst_set_sfid(devinfo
, insn
, sfid
);
3260 case GEN6_SFID_DATAPORT_RENDER_CACHE
:
3261 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_RC_MEMORY_FENCE
);
3263 case GEN7_SFID_DATAPORT_DATA_CACHE
:
3264 brw_inst_set_dp_msg_type(devinfo
, insn
, GEN7_DATAPORT_DC_MEMORY_FENCE
);
3267 unreachable("Not reached");
3271 brw_inst_set_dp_msg_control(devinfo
, insn
, 1 << 5);
3275 brw_memory_fence(struct brw_codegen
*p
,
3277 enum opcode send_op
)
3279 const struct gen_device_info
*devinfo
= p
->devinfo
;
3280 const bool commit_enable
=
3281 devinfo
->gen
>= 10 || /* HSD ES # 1404612949 */
3282 (devinfo
->gen
== 7 && !devinfo
->is_haswell
);
3283 struct brw_inst
*insn
;
3285 brw_push_insn_state(p
);
3286 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3287 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3290 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3291 * message doesn't write anything back.
3293 insn
= next_insn(p
, send_op
);
3294 dst
= retype(dst
, BRW_REGISTER_TYPE_UW
);
3295 brw_set_dest(p
, insn
, dst
);
3296 brw_set_src0(p
, insn
, dst
);
3297 brw_set_memory_fence_message(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
3300 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
3301 /* IVB does typed surface access through the render cache, so we need to
3302 * flush it too. Use a different register so both flushes can be
3303 * pipelined by the hardware.
3305 insn
= next_insn(p
, send_op
);
3306 brw_set_dest(p
, insn
, offset(dst
, 1));
3307 brw_set_src0(p
, insn
, offset(dst
, 1));
3308 brw_set_memory_fence_message(p
, insn
, GEN6_SFID_DATAPORT_RENDER_CACHE
,
3311 /* Now write the response of the second message into the response of the
3312 * first to trigger a pipeline stall -- This way future render and data
3313 * cache messages will be properly ordered with respect to past data and
3314 * render cache messages.
3316 brw_MOV(p
, dst
, offset(dst
, 1));
3319 brw_pop_insn_state(p
);
3323 brw_pixel_interpolator_query(struct brw_codegen
*p
,
3324 struct brw_reg dest
,
3328 struct brw_reg data
,
3329 unsigned msg_length
,
3330 unsigned response_length
)
3332 const struct gen_device_info
*devinfo
= p
->devinfo
;
3333 struct brw_inst
*insn
;
3334 const uint16_t exec_size
= brw_get_default_exec_size(p
);
3335 const uint16_t qtr_ctrl
= brw_get_default_group(p
) / 8;
3337 /* brw_send_indirect_message will automatically use a direct send message
3338 * if data is actually immediate.
3340 insn
= brw_send_indirect_message(p
,
3341 GEN7_SFID_PIXEL_INTERPOLATOR
,
3345 brw_inst_set_mlen(devinfo
, insn
, msg_length
);
3346 brw_inst_set_rlen(devinfo
, insn
, response_length
);
3348 brw_inst_set_pi_simd_mode(devinfo
, insn
, exec_size
== BRW_EXECUTE_16
);
3349 brw_inst_set_pi_slot_group(devinfo
, insn
, qtr_ctrl
/ 2);
3350 brw_inst_set_pi_nopersp(devinfo
, insn
, noperspective
);
3351 brw_inst_set_pi_message_type(devinfo
, insn
, mode
);
3355 brw_find_live_channel(struct brw_codegen
*p
, struct brw_reg dst
,
3356 struct brw_reg mask
)
3358 const struct gen_device_info
*devinfo
= p
->devinfo
;
3359 const unsigned exec_size
= 1 << brw_get_default_exec_size(p
);
3360 const unsigned qtr_control
= brw_get_default_group(p
) / 8;
3363 assert(devinfo
->gen
>= 7);
3364 assert(mask
.type
== BRW_REGISTER_TYPE_UD
);
3366 brw_push_insn_state(p
);
3368 if (brw_get_default_access_mode(p
) == BRW_ALIGN_1
) {
3369 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3371 if (devinfo
->gen
>= 8) {
3372 /* Getting the first active channel index is easy on Gen8: Just find
3373 * the first bit set in the execution mask. The register exists on
3374 * HSW already but it reads back as all ones when the current
3375 * instruction has execution masking disabled, so it's kind of
3378 struct brw_reg exec_mask
=
3379 retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
);
3381 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3382 if (mask
.file
!= BRW_IMMEDIATE_VALUE
|| mask
.ud
!= 0xffffffff) {
3383 /* Unfortunately, ce0 does not take into account the thread
3384 * dispatch mask, which may be a problem in cases where it's not
3385 * tightly packed (i.e. it doesn't have the form '2^n - 1' for
3386 * some n). Combine ce0 with the given dispatch (or vector) mask
3387 * to mask off those channels which were never dispatched by the
3390 brw_SHR(p
, vec1(dst
), mask
, brw_imm_ud(qtr_control
* 8));
3391 brw_AND(p
, vec1(dst
), exec_mask
, vec1(dst
));
3392 exec_mask
= vec1(dst
);
3395 /* Quarter control has the effect of magically shifting the value of
3396 * ce0 so you'll get the first active channel relative to the
3397 * specified quarter control as result.
3399 inst
= brw_FBL(p
, vec1(dst
), exec_mask
);
3401 const struct brw_reg flag
= brw_flag_reg(p
->current
->flag_subreg
/ 2,
3402 p
->current
->flag_subreg
% 2);
3404 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3405 brw_MOV(p
, retype(flag
, BRW_REGISTER_TYPE_UD
), brw_imm_ud(0));
3407 /* Run enough instructions returning zero with execution masking and
3408 * a conditional modifier enabled in order to get the full execution
3409 * mask in f1.0. We could use a single 32-wide move here if it
3410 * weren't because of the hardware bug that causes channel enables to
3411 * be applied incorrectly to the second half of 32-wide instructions
3414 const unsigned lower_size
= MIN2(16, exec_size
);
3415 for (unsigned i
= 0; i
< exec_size
/ lower_size
; i
++) {
3416 inst
= brw_MOV(p
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
),
3418 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3419 brw_inst_set_group(devinfo
, inst
, lower_size
* i
+ 8 * qtr_control
);
3420 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_Z
);
3421 brw_inst_set_exec_size(devinfo
, inst
, cvt(lower_size
) - 1);
3424 /* Find the first bit set in the exec_size-wide portion of the flag
3425 * register that was updated by the last sequence of MOV
3428 const enum brw_reg_type type
= brw_int_type(exec_size
/ 8, false);
3429 brw_set_default_exec_size(p
, BRW_EXECUTE_1
);
3430 brw_FBL(p
, vec1(dst
), byte_offset(retype(flag
, type
), qtr_control
));
3433 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3435 if (devinfo
->gen
>= 8 &&
3436 mask
.file
== BRW_IMMEDIATE_VALUE
&& mask
.ud
== 0xffffffff) {
3437 /* In SIMD4x2 mode the first active channel index is just the
3438 * negation of the first bit of the mask register. Note that ce0
3439 * doesn't take into account the dispatch mask, so the Gen7 path
3440 * should be used instead unless you have the guarantee that the
3441 * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
3444 inst
= brw_AND(p
, brw_writemask(dst
, WRITEMASK_X
),
3445 negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD
)),
3449 /* Overwrite the destination without and with execution masking to
3450 * find out which of the channels is active.
3452 brw_push_insn_state(p
);
3453 brw_set_default_exec_size(p
, BRW_EXECUTE_4
);
3454 brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3457 inst
= brw_MOV(p
, brw_writemask(vec4(dst
), WRITEMASK_X
),
3459 brw_pop_insn_state(p
);
3460 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_ENABLE
);
3464 brw_pop_insn_state(p
);
3468 brw_broadcast(struct brw_codegen
*p
,
3473 const struct gen_device_info
*devinfo
= p
->devinfo
;
3474 const bool align1
= brw_get_default_access_mode(p
) == BRW_ALIGN_1
;
3477 brw_push_insn_state(p
);
3478 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3479 brw_set_default_exec_size(p
, align1
? BRW_EXECUTE_1
: BRW_EXECUTE_4
);
3481 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
&&
3482 src
.address_mode
== BRW_ADDRESS_DIRECT
);
3483 assert(!src
.abs
&& !src
.negate
);
3484 assert(src
.type
== dst
.type
);
3486 if ((src
.vstride
== 0 && (src
.hstride
== 0 || !align1
)) ||
3487 idx
.file
== BRW_IMMEDIATE_VALUE
) {
3488 /* Trivial, the source is already uniform or the index is a constant.
3489 * We will typically not get here if the optimizer is doing its job, but
3490 * asserting would be mean.
3492 const unsigned i
= idx
.file
== BRW_IMMEDIATE_VALUE
? idx
.ud
: 0;
3494 (align1
? stride(suboffset(src
, i
), 0, 1, 0) :
3495 stride(suboffset(src
, 4 * i
), 0, 4, 1)));
3497 /* From the Haswell PRM section "Register Region Restrictions":
3499 * "The lower bits of the AddressImmediate must not overflow to
3500 * change the register address. The lower 5 bits of Address
3501 * Immediate when added to lower 5 bits of address register gives
3502 * the sub-register offset. The upper bits of Address Immediate
3503 * when added to upper bits of address register gives the register
3504 * address. Any overflow from sub-register offset is dropped."
3506 * Fortunately, for broadcast, we never have a sub-register offset so
3507 * this isn't an issue.
3509 assert(src
.subnr
== 0);
3512 const struct brw_reg addr
=
3513 retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD
);
3514 unsigned offset
= src
.nr
* REG_SIZE
+ src
.subnr
;
3515 /* Limit in bytes of the signed indirect addressing immediate. */
3516 const unsigned limit
= 512;
3518 brw_push_insn_state(p
);
3519 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3520 brw_set_default_predicate_control(p
, BRW_PREDICATE_NONE
);
3522 /* Take into account the component size and horizontal stride. */
3523 assert(src
.vstride
== src
.hstride
+ src
.width
);
3524 brw_SHL(p
, addr
, vec1(idx
),
3525 brw_imm_ud(_mesa_logbase2(type_sz(src
.type
)) +
3528 /* We can only address up to limit bytes using the indirect
3529 * addressing immediate, account for the difference if the source
3530 * register is above this limit.
3532 if (offset
>= limit
) {
3533 brw_ADD(p
, addr
, addr
, brw_imm_ud(offset
- offset
% limit
));
3534 offset
= offset
% limit
;
3537 brw_pop_insn_state(p
);
3539 /* Use indirect addressing to fetch the specified component. */
3540 if (type_sz(src
.type
) > 4 &&
3541 (devinfo
->is_cherryview
|| gen_device_info_is_9lp(devinfo
))) {
3542 /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
3544 * "When source or destination datatype is 64b or operation is
3545 * integer DWord multiply, indirect addressing must not be
3548 * To work around both of this issue, we do two integer MOVs
3549 * insead of one 64-bit MOV. Because no double value should ever
3550 * cross a register boundary, it's safe to use the immediate
3551 * offset in the indirect here to handle adding 4 bytes to the
3552 * offset and avoid the extra ADD to the register file.
3554 brw_MOV(p
, subscript(dst
, BRW_REGISTER_TYPE_D
, 0),
3555 retype(brw_vec1_indirect(addr
.subnr
, offset
),
3556 BRW_REGISTER_TYPE_D
));
3557 brw_MOV(p
, subscript(dst
, BRW_REGISTER_TYPE_D
, 1),
3558 retype(brw_vec1_indirect(addr
.subnr
, offset
+ 4),
3559 BRW_REGISTER_TYPE_D
));
3562 retype(brw_vec1_indirect(addr
.subnr
, offset
), src
.type
));
3565 /* In SIMD4x2 mode the index can be either zero or one, replicate it
3566 * to all bits of a flag register,
3570 stride(brw_swizzle(idx
, BRW_SWIZZLE_XXXX
), 4, 4, 1));
3571 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NONE
);
3572 brw_inst_set_cond_modifier(devinfo
, inst
, BRW_CONDITIONAL_NZ
);
3573 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3575 /* and use predicated SEL to pick the right channel. */
3576 inst
= brw_SEL(p
, dst
,
3577 stride(suboffset(src
, 4), 4, 4, 1),
3578 stride(src
, 4, 4, 1));
3579 brw_inst_set_pred_control(devinfo
, inst
, BRW_PREDICATE_NORMAL
);
3580 brw_inst_set_flag_reg_nr(devinfo
, inst
, 1);
3584 brw_pop_insn_state(p
);
3588 * This instruction is generated as a single-channel align1 instruction by
3589 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3591 * We can't use the typed atomic op in the FS because that has the execution
3592 * mask ANDed with the pixel mask, but we just want to write the one dword for
3595 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3596 * one u32. So we use the same untyped atomic write message as the pixel
3599 * The untyped atomic operation requires a BUFFER surface type with RAW
3600 * format, and is only accessible through the legacy DATA_CACHE dataport
3603 void brw_shader_time_add(struct brw_codegen
*p
,
3604 struct brw_reg payload
,
3605 uint32_t surf_index
)
3607 const struct gen_device_info
*devinfo
= p
->devinfo
;
3608 const unsigned sfid
= (devinfo
->gen
>= 8 || devinfo
->is_haswell
?
3609 HSW_SFID_DATAPORT_DATA_CACHE_1
:
3610 GEN7_SFID_DATAPORT_DATA_CACHE
);
3611 assert(devinfo
->gen
>= 7);
3613 brw_push_insn_state(p
);
3614 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3615 brw_set_default_mask_control(p
, BRW_MASK_DISABLE
);
3616 brw_set_default_compression_control(p
, BRW_COMPRESSION_NONE
);
3617 brw_inst
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
3619 /* We use brw_vec1_reg and unmasked because we want to increment the given
3622 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
3624 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
3626 brw_set_src1(p
, send
, brw_imm_ud(0));
3627 brw_set_desc(p
, send
, brw_message_desc(devinfo
, 2, 0, false));
3628 brw_inst_set_sfid(devinfo
, send
, sfid
);
3629 brw_inst_set_binding_table_index(devinfo
, send
, surf_index
);
3630 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, false);
3632 brw_pop_insn_state(p
);
3637 * Emit the SEND message for a barrier
3640 brw_barrier(struct brw_codegen
*p
, struct brw_reg src
)
3642 const struct gen_device_info
*devinfo
= p
->devinfo
;
3643 struct brw_inst
*inst
;
3645 assert(devinfo
->gen
>= 7);
3647 brw_push_insn_state(p
);
3648 brw_set_default_access_mode(p
, BRW_ALIGN_1
);
3649 inst
= next_insn(p
, BRW_OPCODE_SEND
);
3650 brw_set_dest(p
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
));
3651 brw_set_src0(p
, inst
, src
);
3652 brw_set_src1(p
, inst
, brw_null_reg());
3653 brw_set_desc(p
, inst
, brw_message_desc(devinfo
, 1, 0, false));
3655 brw_inst_set_sfid(devinfo
, inst
, BRW_SFID_MESSAGE_GATEWAY
);
3656 brw_inst_set_gateway_notify(devinfo
, inst
, 1);
3657 brw_inst_set_gateway_subfuncid(devinfo
, inst
,
3658 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG
);
3660 brw_inst_set_mask_control(devinfo
, inst
, BRW_MASK_DISABLE
);
3661 brw_pop_insn_state(p
);
3666 * Emit the wait instruction for a barrier
3669 brw_WAIT(struct brw_codegen
*p
)
3671 const struct gen_device_info
*devinfo
= p
->devinfo
;
3672 struct brw_inst
*insn
;
3674 struct brw_reg src
= brw_notification_reg();
3676 insn
= next_insn(p
, BRW_OPCODE_WAIT
);
3677 brw_set_dest(p
, insn
, src
);
3678 brw_set_src0(p
, insn
, src
);
3679 brw_set_src1(p
, insn
, brw_null_reg());
3681 brw_inst_set_exec_size(devinfo
, insn
, BRW_EXECUTE_1
);
3682 brw_inst_set_mask_control(devinfo
, insn
, BRW_MASK_DISABLE
);
3686 * Changes the floating point rounding mode updating the control register
3687 * field defined at cr0.0[5-6] bits. This function supports the changes to
3688 * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
3689 * Only RTNE and RTZ rounding are enabled at nir.
3692 brw_rounding_mode(struct brw_codegen
*p
,
3693 enum brw_rnd_mode mode
)
3695 const unsigned bits
= mode
<< BRW_CR0_RND_MODE_SHIFT
;
3697 if (bits
!= BRW_CR0_RND_MODE_MASK
) {
3698 brw_inst
*inst
= brw_AND(p
, brw_cr0_reg(0), brw_cr0_reg(0),
3699 brw_imm_ud(~BRW_CR0_RND_MODE_MASK
));
3700 brw_inst_set_exec_size(p
->devinfo
, inst
, BRW_EXECUTE_1
);
3702 /* From the Skylake PRM, Volume 7, page 760:
3703 * "Implementation Restriction on Register Access: When the control
3704 * register is used as an explicit source and/or destination, hardware
3705 * does not ensure execution pipeline coherency. Software must set the
3706 * thread control field to ‘switch’ for an instruction that uses
3707 * control register as an explicit operand."
3709 brw_inst_set_thread_control(p
->devinfo
, inst
, BRW_THREAD_SWITCH
);
3713 brw_inst
*inst
= brw_OR(p
, brw_cr0_reg(0), brw_cr0_reg(0),
3715 brw_inst_set_exec_size(p
->devinfo
, inst
, BRW_EXECUTE_1
);
3716 brw_inst_set_thread_control(p
->devinfo
, inst
, BRW_THREAD_SWITCH
);