2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 is_compactable_immediate(unsigned imm
)
301 /* We get the low 12 bits as-is. */
304 /* We get one bit replicated through the top 20 bits. */
305 return imm
== 0 || imm
== 0xfffff000;
309 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
312 struct brw_context
*brw
= p
->brw
;
314 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
315 assert(reg
.nr
< 128);
317 gen7_convert_mrf_to_grf(p
, ®
);
319 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
320 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
321 /* Any source modifiers or regions will be ignored, since this just
322 * identifies the MRF/GRF to start reading the message contents from.
323 * Check for some likely failures.
327 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
330 validate_reg(insn
, reg
);
332 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
333 insn
->bits1
.da1
.src0_reg_type
=
334 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
335 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
336 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
337 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
339 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
340 insn
->bits3
.ud
= reg
.dw1
.ud
;
342 /* The Bspec's section titled "Non-present Operands" claims that if src0
343 * is an immediate that src1's type must be the same as that of src0.
345 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
346 * that do not follow this rule. E.g., from the IVB/HSW table:
348 * DataTypeIndex 18-Bit Mapping Mapped Meaning
349 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
351 * And from the SNB table:
353 * DataTypeIndex 18-Bit Mapping Mapped Meaning
354 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
356 * Neither of these cause warnings from the simulator when used,
357 * compacted or otherwise. In fact, all compaction mappings that have an
358 * immediate in src0 use a:ud for src1.
360 * The GM45 instruction compaction tables do not contain mapped meanings
361 * so it's not clear whether it has the restriction. We'll assume it was
362 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
364 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
366 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
368 insn
->bits1
.da1
.src1_reg_type
= BRW_HW_REG_TYPE_UD
;
371 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
372 * for immediate values. Presumably the hardware engineers realized
373 * that the only useful floating-point value that could be represented
374 * in this format is 0.0, which can also be represented as a VF-typed
375 * immediate, so they gave us the previously mentioned mapping on IVB+.
377 * Strangely, we do have a mapping for imm:f in src1, so we don't need
380 * If we see a 0.0:F, change the type to VF so that it can be compacted.
382 if (insn
->bits3
.ud
== 0x0 &&
383 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_F
) {
384 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_IMM_TYPE_VF
;
387 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
388 * set the types to :UD so the instruction can be compacted.
390 if (is_compactable_immediate(insn
->bits3
.ud
) &&
391 insn
->header
.destreg__conditionalmod
== BRW_CONDITIONAL_NONE
&&
392 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_D
&&
393 insn
->bits1
.da1
.dest_reg_type
== BRW_HW_REG_TYPE_D
) {
394 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_TYPE_UD
;
395 insn
->bits1
.da1
.dest_reg_type
= BRW_HW_REG_TYPE_UD
;
400 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
401 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
402 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
403 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
406 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
407 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
411 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
413 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
414 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
417 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
421 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
422 if (reg
.width
== BRW_WIDTH_1
&&
423 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
424 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
425 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
426 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
429 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
430 insn
->bits2
.da1
.src0_width
= reg
.width
;
431 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
435 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
436 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
437 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
438 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
440 /* This is an oddity of the fact we're using the same
441 * descriptions for registers in align_16 as align_1:
443 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
444 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
446 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
453 brw_set_src1(struct brw_compile
*p
,
454 struct brw_instruction
*insn
,
457 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
459 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
460 assert(reg
.nr
< 128);
462 gen7_convert_mrf_to_grf(p
, ®
);
464 validate_reg(insn
, reg
);
466 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
467 insn
->bits1
.da1
.src1_reg_type
=
468 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
469 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
470 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
472 /* Only src1 can be immediate in two-argument instructions.
474 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
476 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
477 insn
->bits3
.ud
= reg
.dw1
.ud
;
480 /* This is a hardware restriction, which may or may not be lifted
483 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
484 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
486 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
487 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
488 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
491 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
492 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
495 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
496 if (reg
.width
== BRW_WIDTH_1
&&
497 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
498 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
499 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
500 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
503 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
504 insn
->bits3
.da1
.src1_width
= reg
.width
;
505 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
509 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
510 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
511 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
512 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
514 /* This is an oddity of the fact we're using the same
515 * descriptions for registers in align_16 as align_1:
517 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
518 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
520 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
526 * Set the Message Descriptor and Extended Message Descriptor fields
529 * \note This zeroes out the Function Control bits, so it must be called
530 * \b before filling out any message-specific data. Callers can
531 * choose not to fill in irrelevant bits; they will be zero.
534 brw_set_message_descriptor(struct brw_compile
*p
,
535 struct brw_instruction
*inst
,
536 enum brw_message_target sfid
,
538 unsigned response_length
,
542 struct brw_context
*brw
= p
->brw
;
544 brw_set_src1(p
, inst
, brw_imm_d(0));
547 inst
->bits3
.generic_gen5
.header_present
= header_present
;
548 inst
->bits3
.generic_gen5
.response_length
= response_length
;
549 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
550 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
553 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
554 inst
->header
.destreg__conditionalmod
= sfid
;
556 /* Set Extended Message Descriptor (ex_desc) */
557 inst
->bits2
.send_gen5
.sfid
= sfid
;
558 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
561 inst
->bits3
.generic
.response_length
= response_length
;
562 inst
->bits3
.generic
.msg_length
= msg_length
;
563 inst
->bits3
.generic
.msg_target
= sfid
;
564 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
568 static void brw_set_math_message( struct brw_compile
*p
,
569 struct brw_instruction
*insn
,
571 unsigned integer_type
,
575 struct brw_context
*brw
= p
->brw
;
577 unsigned response_length
;
579 /* Infer message length from the function */
581 case BRW_MATH_FUNCTION_POW
:
582 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
583 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
584 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
592 /* Infer response length from the function */
594 case BRW_MATH_FUNCTION_SINCOS
:
595 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
604 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
605 msg_length
, response_length
, false, false);
607 insn
->bits3
.math_gen5
.function
= function
;
608 insn
->bits3
.math_gen5
.int_type
= integer_type
;
609 insn
->bits3
.math_gen5
.precision
= low_precision
;
610 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
611 insn
->bits3
.math_gen5
.data_type
= dataType
;
612 insn
->bits3
.math_gen5
.snapshot
= 0;
614 insn
->bits3
.math
.function
= function
;
615 insn
->bits3
.math
.int_type
= integer_type
;
616 insn
->bits3
.math
.precision
= low_precision
;
617 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
618 insn
->bits3
.math
.data_type
= dataType
;
620 insn
->header
.saturate
= 0;
624 static void brw_set_ff_sync_message(struct brw_compile
*p
,
625 struct brw_instruction
*insn
,
627 unsigned response_length
,
630 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
631 1, response_length
, true, end_of_thread
);
632 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
633 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
634 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
635 insn
->bits3
.urb_gen5
.allocate
= allocate
;
636 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
637 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
640 static void brw_set_urb_message( struct brw_compile
*p
,
641 struct brw_instruction
*insn
,
642 enum brw_urb_write_flags flags
,
644 unsigned response_length
,
646 unsigned swizzle_control
)
648 struct brw_context
*brw
= p
->brw
;
650 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
651 msg_length
, response_length
, true,
652 flags
& BRW_URB_WRITE_EOT
);
654 if (flags
& BRW_URB_WRITE_OWORD
) {
655 assert(msg_length
== 2); /* header + one OWORD of data */
656 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
658 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
660 insn
->bits3
.urb_gen7
.offset
= offset
;
661 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
662 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
663 insn
->bits3
.urb_gen7
.per_slot_offset
=
664 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
665 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
666 } else if (brw
->gen
>= 5) {
667 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
668 insn
->bits3
.urb_gen5
.offset
= offset
;
669 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
670 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
671 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
672 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
674 insn
->bits3
.urb
.opcode
= 0; /* ? */
675 insn
->bits3
.urb
.offset
= offset
;
676 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
677 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
678 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
679 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
684 brw_set_dp_write_message(struct brw_compile
*p
,
685 struct brw_instruction
*insn
,
686 unsigned binding_table_index
,
687 unsigned msg_control
,
691 unsigned last_render_target
,
692 unsigned response_length
,
693 unsigned end_of_thread
,
694 unsigned send_commit_msg
)
696 struct brw_context
*brw
= p
->brw
;
700 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
701 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
702 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
704 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
705 } else if (brw
->gen
== 6) {
706 /* Use the render cache for all write messages. */
707 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
709 sfid
= BRW_SFID_DATAPORT_WRITE
;
712 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
713 header_present
, end_of_thread
);
716 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
717 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
718 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
719 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
720 } else if (brw
->gen
== 6) {
721 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
722 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
723 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
724 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
725 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
726 } else if (brw
->gen
== 5) {
727 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
728 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
729 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
730 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
731 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
733 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
734 insn
->bits3
.dp_write
.msg_control
= msg_control
;
735 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
736 insn
->bits3
.dp_write
.msg_type
= msg_type
;
737 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
742 brw_set_dp_read_message(struct brw_compile
*p
,
743 struct brw_instruction
*insn
,
744 unsigned binding_table_index
,
745 unsigned msg_control
,
747 unsigned target_cache
,
750 unsigned response_length
)
752 struct brw_context
*brw
= p
->brw
;
756 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
757 } else if (brw
->gen
== 6) {
758 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
759 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
761 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
763 sfid
= BRW_SFID_DATAPORT_READ
;
766 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
767 header_present
, false);
770 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
771 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
772 insn
->bits3
.gen7_dp
.last_render_target
= 0;
773 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
774 } else if (brw
->gen
== 6) {
775 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
776 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
777 insn
->bits3
.gen6_dp
.last_render_target
= 0;
778 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
779 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
780 } else if (brw
->gen
== 5) {
781 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
782 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
783 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
784 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
785 } else if (brw
->is_g4x
) {
786 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
787 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
788 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
789 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
791 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
792 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
793 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
794 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
799 brw_set_sampler_message(struct brw_compile
*p
,
800 struct brw_instruction
*insn
,
801 unsigned binding_table_index
,
804 unsigned response_length
,
806 unsigned header_present
,
808 unsigned return_format
)
810 struct brw_context
*brw
= p
->brw
;
812 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
813 response_length
, header_present
, false);
816 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
817 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
818 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
819 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
820 } else if (brw
->gen
>= 5) {
821 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
822 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
823 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
824 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
825 } else if (brw
->is_g4x
) {
826 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
827 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
828 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
830 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
831 insn
->bits3
.sampler
.sampler
= sampler
;
832 insn
->bits3
.sampler
.msg_type
= msg_type
;
833 insn
->bits3
.sampler
.return_format
= return_format
;
838 #define next_insn brw_next_insn
839 struct brw_instruction
*
840 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
842 struct brw_instruction
*insn
;
844 if (p
->nr_insn
+ 1 > p
->store_size
) {
846 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
847 struct brw_instruction
, p
->store_size
);
850 p
->next_insn_offset
+= 16;
851 insn
= &p
->store
[p
->nr_insn
++];
852 memcpy(insn
, p
->current
, sizeof(*insn
));
854 /* Reset this one-shot flag:
857 if (p
->current
->header
.destreg__conditionalmod
) {
858 p
->current
->header
.destreg__conditionalmod
= 0;
859 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
862 insn
->header
.opcode
= opcode
;
866 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
871 struct brw_instruction
*insn
= next_insn(p
, opcode
);
872 brw_set_dest(p
, insn
, dest
);
873 brw_set_src0(p
, insn
, src
);
877 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
881 struct brw_reg src1
)
883 struct brw_instruction
*insn
= next_insn(p
, opcode
);
884 brw_set_dest(p
, insn
, dest
);
885 brw_set_src0(p
, insn
, src0
);
886 brw_set_src1(p
, insn
, src1
);
891 get_3src_subreg_nr(struct brw_reg reg
)
893 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
894 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
895 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
897 return reg
.subnr
/ 4;
901 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
908 struct brw_context
*brw
= p
->brw
;
909 struct brw_instruction
*insn
= next_insn(p
, opcode
);
911 gen7_convert_mrf_to_grf(p
, &dest
);
913 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
915 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
916 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
917 assert(dest
.nr
< 128);
918 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
919 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
920 dest
.type
== BRW_REGISTER_TYPE_D
||
921 dest
.type
== BRW_REGISTER_TYPE_UD
);
922 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
923 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
924 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
925 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
926 guess_execution_size(p
, insn
, dest
);
928 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
929 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
930 assert(src0
.nr
< 128);
931 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
932 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
933 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
934 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
935 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
936 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
938 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
939 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
940 assert(src1
.nr
< 128);
941 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
942 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
943 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
944 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
945 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
946 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
947 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
949 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
950 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
951 assert(src2
.nr
< 128);
952 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
953 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
954 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
955 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
956 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
957 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
960 /* Set both the source and destination types based on dest.type,
961 * ignoring the source register types. The MAD and LRP emitters ensure
962 * that all four types are float. The BFE and BFI2 emitters, however,
963 * may send us mixed D and UD types and want us to ignore that and use
964 * the destination type.
967 case BRW_REGISTER_TYPE_F
:
968 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
969 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
971 case BRW_REGISTER_TYPE_D
:
972 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
973 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
975 case BRW_REGISTER_TYPE_UD
:
976 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
977 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
986 /***********************************************************************
987 * Convenience routines.
990 struct brw_instruction *brw_##OP(struct brw_compile *p, \
991 struct brw_reg dest, \
992 struct brw_reg src0) \
994 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
998 struct brw_instruction *brw_##OP(struct brw_compile *p, \
999 struct brw_reg dest, \
1000 struct brw_reg src0, \
1001 struct brw_reg src1) \
1003 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
1007 struct brw_instruction *brw_##OP(struct brw_compile *p, \
1008 struct brw_reg dest, \
1009 struct brw_reg src0, \
1010 struct brw_reg src1, \
1011 struct brw_reg src2) \
1013 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1017 struct brw_instruction *brw_##OP(struct brw_compile *p, \
1018 struct brw_reg dest, \
1019 struct brw_reg src0, \
1020 struct brw_reg src1, \
1021 struct brw_reg src2) \
1023 assert(dest.type == BRW_REGISTER_TYPE_F); \
1024 assert(src0.type == BRW_REGISTER_TYPE_F); \
1025 assert(src1.type == BRW_REGISTER_TYPE_F); \
1026 assert(src2.type == BRW_REGISTER_TYPE_F); \
1027 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1030 /* Rounding operations (other than RNDD) require two instructions - the first
1031 * stores a rounded value (possibly the wrong way) in the dest register, but
1032 * also sets a per-channel "increment bit" in the flag register. A predicated
1033 * add of 1.0 fixes dest to contain the desired result.
1035 * Sandybridge and later appear to round correctly without an ADD.
1038 void brw_##OP(struct brw_compile *p, \
1039 struct brw_reg dest, \
1040 struct brw_reg src) \
1042 struct brw_instruction *rnd, *add; \
1043 rnd = next_insn(p, BRW_OPCODE_##OP); \
1044 brw_set_dest(p, rnd, dest); \
1045 brw_set_src0(p, rnd, src); \
1047 if (p->brw->gen < 6) { \
1048 /* turn on round-increments */ \
1049 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1050 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1051 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1094 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1095 struct brw_reg dest
,
1096 struct brw_reg src0
,
1097 struct brw_reg src1
)
1100 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1101 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1102 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1103 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1104 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1107 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1108 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1109 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1110 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1111 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1114 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1117 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1118 struct brw_reg dest
,
1119 struct brw_reg src0
,
1120 struct brw_reg src1
)
1122 assert(dest
.type
== src0
.type
);
1123 assert(src0
.type
== src1
.type
);
1124 switch (src0
.type
) {
1125 case BRW_REGISTER_TYPE_B
:
1126 case BRW_REGISTER_TYPE_UB
:
1127 case BRW_REGISTER_TYPE_W
:
1128 case BRW_REGISTER_TYPE_UW
:
1129 case BRW_REGISTER_TYPE_D
:
1130 case BRW_REGISTER_TYPE_UD
:
1133 assert(!"Bad type for brw_AVG");
1136 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1139 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1140 struct brw_reg dest
,
1141 struct brw_reg src0
,
1142 struct brw_reg src1
)
1145 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1146 src0
.type
== BRW_REGISTER_TYPE_UD
||
1147 src1
.type
== BRW_REGISTER_TYPE_D
||
1148 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1149 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1152 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1153 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1154 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1155 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1156 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1159 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1160 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1161 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1162 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1163 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1166 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1167 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1168 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1169 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1171 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1175 void brw_NOP(struct brw_compile
*p
)
1177 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1178 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1179 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1180 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1187 /***********************************************************************
1188 * Comparisons, if/else/endif
1191 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1192 struct brw_reg dest
,
1193 struct brw_reg src0
,
1194 struct brw_reg src1
)
1196 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1198 insn
->header
.execution_size
= 1;
1199 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1200 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1202 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1208 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1210 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1212 p
->if_stack_depth
++;
1213 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1214 p
->if_stack_array_size
*= 2;
1215 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1216 p
->if_stack_array_size
);
1220 static struct brw_instruction
*
1221 pop_if_stack(struct brw_compile
*p
)
1223 p
->if_stack_depth
--;
1224 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1228 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1230 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1231 p
->loop_stack_array_size
*= 2;
1232 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1233 p
->loop_stack_array_size
);
1234 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1235 p
->loop_stack_array_size
);
1238 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1239 p
->loop_stack_depth
++;
1240 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1243 static struct brw_instruction
*
1244 get_inner_do_insn(struct brw_compile
*p
)
1246 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1249 /* EU takes the value from the flag register and pushes it onto some
1250 * sort of a stack (presumably merging with any flag value already on
1251 * the stack). Within an if block, the flags at the top of the stack
1252 * control execution on each channel of the unit, eg. on each of the
1253 * 16 pixel values in our wm programs.
1255 * When the matching 'else' instruction is reached (presumably by
1256 * countdown of the instruction count patched in by our ELSE/ENDIF
1257 * functions), the relevent flags are inverted.
1259 * When the matching 'endif' instruction is reached, the flags are
1260 * popped off. If the stack is now empty, normal execution resumes.
1262 struct brw_instruction
*
1263 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1265 struct brw_context
*brw
= p
->brw
;
1266 struct brw_instruction
*insn
;
1268 insn
= next_insn(p
, BRW_OPCODE_IF
);
1270 /* Override the defaults for this instruction:
1273 brw_set_dest(p
, insn
, brw_ip_reg());
1274 brw_set_src0(p
, insn
, brw_ip_reg());
1275 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1276 } else if (brw
->gen
== 6) {
1277 brw_set_dest(p
, insn
, brw_imm_w(0));
1278 insn
->bits1
.branch_gen6
.jump_count
= 0;
1279 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1280 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1282 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1283 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1284 brw_set_src1(p
, insn
, brw_imm_ud(0));
1285 insn
->bits3
.break_cont
.jip
= 0;
1286 insn
->bits3
.break_cont
.uip
= 0;
1289 insn
->header
.execution_size
= execute_size
;
1290 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1291 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1292 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1293 if (!p
->single_program_flow
)
1294 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1296 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1298 push_if_stack(p
, insn
);
1299 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1303 /* This function is only used for gen6-style IF instructions with an
1304 * embedded comparison (conditional modifier). It is not used on gen7.
1306 struct brw_instruction
*
1307 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1308 struct brw_reg src0
, struct brw_reg src1
)
1310 struct brw_instruction
*insn
;
1312 insn
= next_insn(p
, BRW_OPCODE_IF
);
1314 brw_set_dest(p
, insn
, brw_imm_w(0));
1315 if (p
->compressed
) {
1316 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1318 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1320 insn
->bits1
.branch_gen6
.jump_count
= 0;
1321 brw_set_src0(p
, insn
, src0
);
1322 brw_set_src1(p
, insn
, src1
);
1324 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1325 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1326 insn
->header
.destreg__conditionalmod
= conditional
;
1328 if (!p
->single_program_flow
)
1329 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1331 push_if_stack(p
, insn
);
1336 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1339 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1340 struct brw_instruction
*if_inst
,
1341 struct brw_instruction
*else_inst
)
1343 /* The next instruction (where the ENDIF would be, if it existed) */
1344 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1346 assert(p
->single_program_flow
);
1347 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1348 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1349 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1351 /* Convert IF to an ADD instruction that moves the instruction pointer
1352 * to the first instruction of the ELSE block. If there is no ELSE
1353 * block, point to where ENDIF would be. Reverse the predicate.
1355 * There's no need to execute an ENDIF since we don't need to do any
1356 * stack operations, and if we're currently executing, we just want to
1357 * continue normally.
1359 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1360 if_inst
->header
.predicate_inverse
= 1;
1362 if (else_inst
!= NULL
) {
1363 /* Convert ELSE to an ADD instruction that points where the ENDIF
1366 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1368 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1369 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1371 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1376 * Patch IF and ELSE instructions with appropriate jump targets.
1379 patch_IF_ELSE(struct brw_compile
*p
,
1380 struct brw_instruction
*if_inst
,
1381 struct brw_instruction
*else_inst
,
1382 struct brw_instruction
*endif_inst
)
1384 struct brw_context
*brw
= p
->brw
;
1386 /* We shouldn't be patching IF and ELSE instructions in single program flow
1387 * mode when gen < 6, because in single program flow mode on those
1388 * platforms, we convert flow control instructions to conditional ADDs that
1389 * operate on IP (see brw_ENDIF).
1391 * However, on Gen6, writing to IP doesn't work in single program flow mode
1392 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1393 * not be updated by non-flow control instructions."). And on later
1394 * platforms, there is no significant benefit to converting control flow
1395 * instructions to conditional ADDs. So we do patch IF and ELSE
1396 * instructions in single program flow mode on those platforms.
1399 assert(!p
->single_program_flow
);
1401 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1402 assert(endif_inst
!= NULL
);
1403 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1406 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1407 * requires 2 chunks.
1412 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1413 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1415 if (else_inst
== NULL
) {
1416 /* Patch IF -> ENDIF */
1418 /* Turn it into an IFF, which means no mask stack operations for
1419 * all-false and jumping past the ENDIF.
1421 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1422 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1423 if_inst
->bits3
.if_else
.pop_count
= 0;
1424 if_inst
->bits3
.if_else
.pad0
= 0;
1425 } else if (brw
->gen
== 6) {
1426 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1427 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1429 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1430 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1433 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1435 /* Patch IF -> ELSE */
1437 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1438 if_inst
->bits3
.if_else
.pop_count
= 0;
1439 if_inst
->bits3
.if_else
.pad0
= 0;
1440 } else if (brw
->gen
== 6) {
1441 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1444 /* Patch ELSE -> ENDIF */
1446 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1449 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1450 else_inst
->bits3
.if_else
.pop_count
= 1;
1451 else_inst
->bits3
.if_else
.pad0
= 0;
1452 } else if (brw
->gen
== 6) {
1453 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1454 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1456 /* The IF instruction's JIP should point just past the ELSE */
1457 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1458 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1459 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1460 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1466 brw_ELSE(struct brw_compile
*p
)
1468 struct brw_context
*brw
= p
->brw
;
1469 struct brw_instruction
*insn
;
1471 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1474 brw_set_dest(p
, insn
, brw_ip_reg());
1475 brw_set_src0(p
, insn
, brw_ip_reg());
1476 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1477 } else if (brw
->gen
== 6) {
1478 brw_set_dest(p
, insn
, brw_imm_w(0));
1479 insn
->bits1
.branch_gen6
.jump_count
= 0;
1480 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1481 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1483 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1484 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1485 brw_set_src1(p
, insn
, brw_imm_ud(0));
1486 insn
->bits3
.break_cont
.jip
= 0;
1487 insn
->bits3
.break_cont
.uip
= 0;
1490 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1491 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1492 if (!p
->single_program_flow
)
1493 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1495 push_if_stack(p
, insn
);
1499 brw_ENDIF(struct brw_compile
*p
)
1501 struct brw_context
*brw
= p
->brw
;
1502 struct brw_instruction
*insn
= NULL
;
1503 struct brw_instruction
*else_inst
= NULL
;
1504 struct brw_instruction
*if_inst
= NULL
;
1505 struct brw_instruction
*tmp
;
1506 bool emit_endif
= true;
1508 /* In single program flow mode, we can express IF and ELSE instructions
1509 * equivalently as ADD instructions that operate on IP. On platforms prior
1510 * to Gen6, flow control instructions cause an implied thread switch, so
1511 * this is a significant savings.
1513 * However, on Gen6, writing to IP doesn't work in single program flow mode
1514 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1515 * not be updated by non-flow control instructions."). And on later
1516 * platforms, there is no significant benefit to converting control flow
1517 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1520 if (brw
->gen
< 6 && p
->single_program_flow
)
1524 * A single next_insn() may change the base adress of instruction store
1525 * memory(p->store), so call it first before referencing the instruction
1526 * store pointer from an index
1529 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1531 /* Pop the IF and (optional) ELSE instructions from the stack */
1532 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1533 tmp
= pop_if_stack(p
);
1534 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1536 tmp
= pop_if_stack(p
);
1541 /* ENDIF is useless; don't bother emitting it. */
1542 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1547 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1548 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1549 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1550 } else if (brw
->gen
== 6) {
1551 brw_set_dest(p
, insn
, brw_imm_w(0));
1552 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1553 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1555 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1556 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1557 brw_set_src1(p
, insn
, brw_imm_ud(0));
1560 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1561 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1562 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1564 /* Also pop item off the stack in the endif instruction: */
1566 insn
->bits3
.if_else
.jump_count
= 0;
1567 insn
->bits3
.if_else
.pop_count
= 1;
1568 insn
->bits3
.if_else
.pad0
= 0;
1569 } else if (brw
->gen
== 6) {
1570 insn
->bits1
.branch_gen6
.jump_count
= 2;
1572 insn
->bits3
.break_cont
.jip
= 2;
1574 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1577 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1579 struct brw_context
*brw
= p
->brw
;
1580 struct brw_instruction
*insn
;
1582 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1583 if (brw
->gen
>= 6) {
1584 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1585 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1586 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1588 brw_set_dest(p
, insn
, brw_ip_reg());
1589 brw_set_src0(p
, insn
, brw_ip_reg());
1590 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1591 insn
->bits3
.if_else
.pad0
= 0;
1592 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1594 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1595 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1600 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1602 struct brw_instruction
*insn
;
1604 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1605 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1606 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1607 brw_set_dest(p
, insn
, brw_ip_reg());
1608 brw_set_src0(p
, insn
, brw_ip_reg());
1609 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1611 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1612 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1616 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1618 struct brw_instruction
*insn
;
1619 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1620 brw_set_dest(p
, insn
, brw_ip_reg());
1621 brw_set_src0(p
, insn
, brw_ip_reg());
1622 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1623 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1624 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1625 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1626 insn
->bits3
.if_else
.pad0
= 0;
1627 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1631 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1633 struct brw_instruction
*insn
;
1635 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1636 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1637 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1638 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1640 if (p
->compressed
) {
1641 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1643 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1644 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1651 * The DO/WHILE is just an unterminated loop -- break or continue are
1652 * used for control within the loop. We have a few ways they can be
1655 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1656 * jip and no DO instruction.
1658 * For non-uniform control flow pre-gen6, there's a DO instruction to
1659 * push the mask, and a WHILE to jump back, and BREAK to get out and
1662 * For gen6, there's no more mask stack, so no need for DO. WHILE
1663 * just points back to the first instruction of the loop.
1665 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1667 struct brw_context
*brw
= p
->brw
;
1669 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1670 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1671 return &p
->store
[p
->nr_insn
];
1673 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1675 push_loop_stack(p
, insn
);
1677 /* Override the defaults for this instruction:
1679 brw_set_dest(p
, insn
, brw_null_reg());
1680 brw_set_src0(p
, insn
, brw_null_reg());
1681 brw_set_src1(p
, insn
, brw_null_reg());
1683 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1684 insn
->header
.execution_size
= execute_size
;
1685 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1686 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1687 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1694 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1697 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1698 * nesting, since it can always just point to the end of the block/current loop.
1701 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1703 struct brw_context
*brw
= p
->brw
;
1704 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1705 struct brw_instruction
*inst
;
1706 int br
= (brw
->gen
== 5) ? 2 : 1;
1708 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1709 /* If the jump count is != 0, that means that this instruction has already
1710 * been patched because it's part of a loop inside of the one we're
1713 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1714 inst
->bits3
.if_else
.jump_count
== 0) {
1715 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1716 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1717 inst
->bits3
.if_else
.jump_count
== 0) {
1718 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1723 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1725 struct brw_context
*brw
= p
->brw
;
1726 struct brw_instruction
*insn
, *do_insn
;
1732 if (brw
->gen
>= 7) {
1733 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1734 do_insn
= get_inner_do_insn(p
);
1736 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1737 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1738 brw_set_src1(p
, insn
, brw_imm_ud(0));
1739 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1741 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1742 } else if (brw
->gen
== 6) {
1743 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1744 do_insn
= get_inner_do_insn(p
);
1746 brw_set_dest(p
, insn
, brw_imm_w(0));
1747 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1748 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1749 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1751 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1753 if (p
->single_program_flow
) {
1754 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1755 do_insn
= get_inner_do_insn(p
);
1757 brw_set_dest(p
, insn
, brw_ip_reg());
1758 brw_set_src0(p
, insn
, brw_ip_reg());
1759 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1760 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1762 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1763 do_insn
= get_inner_do_insn(p
);
1765 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1767 brw_set_dest(p
, insn
, brw_ip_reg());
1768 brw_set_src0(p
, insn
, brw_ip_reg());
1769 brw_set_src1(p
, insn
, brw_imm_d(0));
1771 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1772 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1773 insn
->bits3
.if_else
.pop_count
= 0;
1774 insn
->bits3
.if_else
.pad0
= 0;
1776 brw_patch_break_cont(p
, insn
);
1779 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1780 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1782 p
->loop_stack_depth
--;
1787 /* To integrate with the above, it makes sense that the comparison
1788 * instruction should populate the flag register. It might be simpler
1789 * just to use the flag reg for most WM tasks?
1791 void brw_CMP(struct brw_compile
*p
,
1792 struct brw_reg dest
,
1793 unsigned conditional
,
1794 struct brw_reg src0
,
1795 struct brw_reg src1
)
1797 struct brw_context
*brw
= p
->brw
;
1798 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1800 insn
->header
.destreg__conditionalmod
= conditional
;
1801 brw_set_dest(p
, insn
, dest
);
1802 brw_set_src0(p
, insn
, src0
);
1803 brw_set_src1(p
, insn
, src1
);
1805 /* guess_execution_size(insn, src0); */
1808 /* Make it so that future instructions will use the computed flag
1809 * value until brw_set_predicate_control_flag_value() is called
1812 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1814 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1815 p
->flag_value
= 0xff;
1818 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1820 * "Any CMP instruction with a null destination must use a {switch}."
1822 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1823 * mentioned on their work-arounds pages.
1825 if (brw
->gen
== 7) {
1826 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1827 dest
.nr
== BRW_ARF_NULL
) {
1828 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1833 /* Issue 'wait' instruction for n1, host could program MMIO
1834 to wake up thread. */
1835 void brw_WAIT (struct brw_compile
*p
)
1837 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1838 struct brw_reg src
= brw_notification_1_reg();
1840 brw_set_dest(p
, insn
, src
);
1841 brw_set_src0(p
, insn
, src
);
1842 brw_set_src1(p
, insn
, brw_null_reg());
1843 insn
->header
.execution_size
= 0; /* must */
1844 insn
->header
.predicate_control
= 0;
1845 insn
->header
.compression_control
= 0;
1849 /***********************************************************************
1850 * Helpers for the various SEND message types:
1853 /** Extended math function, float[8].
1855 void brw_math( struct brw_compile
*p
,
1856 struct brw_reg dest
,
1858 unsigned msg_reg_nr
,
1861 unsigned precision
)
1863 struct brw_context
*brw
= p
->brw
;
1865 if (brw
->gen
>= 6) {
1866 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1868 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1869 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1870 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1872 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1874 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1876 /* Source modifiers are ignored for extended math instructions on Gen6. */
1877 if (brw
->gen
== 6) {
1878 assert(!src
.negate
);
1882 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1883 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1884 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1885 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1887 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1890 /* Math is the same ISA format as other opcodes, except that CondModifier
1891 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1893 insn
->header
.destreg__conditionalmod
= function
;
1895 brw_set_dest(p
, insn
, dest
);
1896 brw_set_src0(p
, insn
, src
);
1897 brw_set_src1(p
, insn
, brw_null_reg());
1899 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1901 /* Example code doesn't set predicate_control for send
1904 insn
->header
.predicate_control
= 0;
1905 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1907 brw_set_dest(p
, insn
, dest
);
1908 brw_set_src0(p
, insn
, src
);
1909 brw_set_math_message(p
,
1912 src
.type
== BRW_REGISTER_TYPE_D
,
1918 /** Extended math function, float[8].
1920 void brw_math2(struct brw_compile
*p
,
1921 struct brw_reg dest
,
1923 struct brw_reg src0
,
1924 struct brw_reg src1
)
1926 struct brw_context
*brw
= p
->brw
;
1927 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1929 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1930 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1931 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1932 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1934 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1935 if (brw
->gen
== 6) {
1936 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1937 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1940 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1941 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1942 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1943 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1944 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1946 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1947 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1950 /* Source modifiers are ignored for extended math instructions on Gen6. */
1951 if (brw
->gen
== 6) {
1952 assert(!src0
.negate
);
1954 assert(!src1
.negate
);
1958 /* Math is the same ISA format as other opcodes, except that CondModifier
1959 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1961 insn
->header
.destreg__conditionalmod
= function
;
1963 brw_set_dest(p
, insn
, dest
);
1964 brw_set_src0(p
, insn
, src0
);
1965 brw_set_src1(p
, insn
, src1
);
1970 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1971 * using a constant offset per channel.
1973 * The offset must be aligned to oword size (16 bytes). Used for
1974 * register spilling.
1976 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1981 struct brw_context
*brw
= p
->brw
;
1982 uint32_t msg_control
, msg_type
;
1988 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1990 if (num_regs
== 1) {
1991 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1994 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1998 /* Set up the message header. This is g0, with g0.2 filled with
1999 * the offset. We don't want to leave our offset around in g0 or
2000 * it'll screw up texture samples, so set it up inside the message
2004 brw_push_insn_state(p
);
2005 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2006 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2008 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2010 /* set message header global offset field (reg 0, element 2) */
2012 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2014 2), BRW_REGISTER_TYPE_UD
),
2015 brw_imm_ud(offset
));
2017 brw_pop_insn_state(p
);
2021 struct brw_reg dest
;
2022 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2023 int send_commit_msg
;
2024 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2025 BRW_REGISTER_TYPE_UW
);
2027 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
2028 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2029 src_header
= vec16(src_header
);
2031 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2032 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2034 /* Until gen6, writes followed by reads from the same location
2035 * are not guaranteed to be ordered unless write_commit is set.
2036 * If set, then a no-op write is issued to the destination
2037 * register to set a dependency, and a read from the destination
2038 * can be used to ensure the ordering.
2040 * For gen6, only writes between different threads need ordering
2041 * protection. Our use of DP writes is all about register
2042 * spilling within a thread.
2044 if (brw
->gen
>= 6) {
2045 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2046 send_commit_msg
= 0;
2049 send_commit_msg
= 1;
2052 brw_set_dest(p
, insn
, dest
);
2053 if (brw
->gen
>= 6) {
2054 brw_set_src0(p
, insn
, mrf
);
2056 brw_set_src0(p
, insn
, brw_null_reg());
2060 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2062 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2064 brw_set_dp_write_message(p
,
2066 255, /* binding table index (255=stateless) */
2070 true, /* header_present */
2071 0, /* not a render target */
2072 send_commit_msg
, /* response_length */
2080 * Read a block of owords (half a GRF each) from the scratch buffer
2081 * using a constant index per channel.
2083 * Offset must be aligned to oword size (16 bytes). Used for register
2087 brw_oword_block_read_scratch(struct brw_compile
*p
,
2088 struct brw_reg dest
,
2093 struct brw_context
*brw
= p
->brw
;
2094 uint32_t msg_control
;
2100 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2101 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2103 if (num_regs
== 1) {
2104 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2107 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2112 brw_push_insn_state(p
);
2113 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2114 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2116 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2118 /* set message header global offset field (reg 0, element 2) */
2120 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2122 2), BRW_REGISTER_TYPE_UD
),
2123 brw_imm_ud(offset
));
2125 brw_pop_insn_state(p
);
2129 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2131 assert(insn
->header
.predicate_control
== 0);
2132 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2133 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2135 brw_set_dest(p
, insn
, dest
); /* UW? */
2136 if (brw
->gen
>= 6) {
2137 brw_set_src0(p
, insn
, mrf
);
2139 brw_set_src0(p
, insn
, brw_null_reg());
2142 brw_set_dp_read_message(p
,
2144 255, /* binding table index (255=stateless) */
2146 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2147 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2149 true, /* header_present */
2155 gen7_block_read_scratch(struct brw_compile
*p
,
2156 struct brw_reg dest
,
2160 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2162 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2164 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2165 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2167 brw_set_dest(p
, insn
, dest
);
2169 /* The HW requires that the header is present; this is to get the g0.5
2172 bool header_present
= true;
2173 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2175 brw_set_message_descriptor(p
, insn
,
2176 GEN7_SFID_DATAPORT_DATA_CACHE
,
2177 1, /* mlen: just g0 */
2182 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2184 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2185 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2187 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2188 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2189 * is 32 bytes, which happens to be the size of a register.
2192 assert(offset
< (1 << 12));
2193 insn
->bits3
.ud
|= offset
;
2197 * Read a float[4] vector from the data port Data Cache (const buffer).
2198 * Location (in buffer) should be a multiple of 16.
2199 * Used for fetching shader constants.
2201 void brw_oword_block_read(struct brw_compile
*p
,
2202 struct brw_reg dest
,
2205 uint32_t bind_table_index
)
2207 struct brw_context
*brw
= p
->brw
;
2209 /* On newer hardware, offset is in units of owords. */
2213 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2215 brw_push_insn_state(p
);
2216 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2217 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2218 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2220 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2222 /* set message header global offset field (reg 0, element 2) */
2224 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2226 2), BRW_REGISTER_TYPE_UD
),
2227 brw_imm_ud(offset
));
2229 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2230 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2232 /* cast dest to a uword[8] vector */
2233 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2235 brw_set_dest(p
, insn
, dest
);
2236 if (brw
->gen
>= 6) {
2237 brw_set_src0(p
, insn
, mrf
);
2239 brw_set_src0(p
, insn
, brw_null_reg());
2242 brw_set_dp_read_message(p
,
2245 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2246 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2247 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2249 true, /* header_present */
2250 1); /* response_length (1 reg, 2 owords!) */
2252 brw_pop_insn_state(p
);
2256 void brw_fb_WRITE(struct brw_compile
*p
,
2258 unsigned msg_reg_nr
,
2259 struct brw_reg src0
,
2260 unsigned msg_control
,
2261 unsigned binding_table_index
,
2262 unsigned msg_length
,
2263 unsigned response_length
,
2265 bool header_present
)
2267 struct brw_context
*brw
= p
->brw
;
2268 struct brw_instruction
*insn
;
2270 struct brw_reg dest
;
2272 if (dispatch_width
== 16)
2273 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2275 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2277 if (brw
->gen
>= 6) {
2278 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2280 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2282 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2284 if (brw
->gen
>= 6) {
2285 /* headerless version, just submit color payload */
2286 src0
= brw_message_reg(msg_reg_nr
);
2288 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2290 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2292 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2295 brw_set_dest(p
, insn
, dest
);
2296 brw_set_src0(p
, insn
, src0
);
2297 brw_set_dp_write_message(p
,
2299 binding_table_index
,
2304 eot
, /* last render target write */
2307 0 /* send_commit_msg */);
2312 * Texture sample instruction.
2313 * Note: the msg_type plus msg_length values determine exactly what kind
2314 * of sampling operation is performed. See volume 4, page 161 of docs.
2316 void brw_SAMPLE(struct brw_compile
*p
,
2317 struct brw_reg dest
,
2318 unsigned msg_reg_nr
,
2319 struct brw_reg src0
,
2320 unsigned binding_table_index
,
2323 unsigned response_length
,
2324 unsigned msg_length
,
2325 unsigned header_present
,
2327 unsigned return_format
)
2329 struct brw_context
*brw
= p
->brw
;
2330 struct brw_instruction
*insn
;
2332 if (msg_reg_nr
!= -1)
2333 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2335 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2336 insn
->header
.predicate_control
= 0; /* XXX */
2338 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2340 * "Instruction compression is not allowed for this instruction (that
2341 * is, send). The hardware behavior is undefined if this instruction is
2342 * set as compressed. However, compress control can be set to "SecHalf"
2343 * to affect the EMask generation."
2345 * No similar wording is found in later PRMs, but there are examples
2346 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2347 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2348 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2350 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2351 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2354 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2356 brw_set_dest(p
, insn
, dest
);
2357 brw_set_src0(p
, insn
, src0
);
2358 brw_set_sampler_message(p
, insn
,
2359 binding_table_index
,
2369 /* All these variables are pretty confusing - we might be better off
2370 * using bitmasks and macros for this, in the old style. Or perhaps
2371 * just having the caller instantiate the fields in dword3 itself.
2373 void brw_urb_WRITE(struct brw_compile
*p
,
2374 struct brw_reg dest
,
2375 unsigned msg_reg_nr
,
2376 struct brw_reg src0
,
2377 enum brw_urb_write_flags flags
,
2378 unsigned msg_length
,
2379 unsigned response_length
,
2383 struct brw_context
*brw
= p
->brw
;
2384 struct brw_instruction
*insn
;
2386 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2388 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2389 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2390 brw_push_insn_state(p
);
2391 brw_set_access_mode(p
, BRW_ALIGN_1
);
2392 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2393 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2394 BRW_REGISTER_TYPE_UD
),
2395 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2396 brw_imm_ud(0xff00));
2397 brw_pop_insn_state(p
);
2400 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2402 assert(msg_length
< BRW_MAX_MRF
);
2404 brw_set_dest(p
, insn
, dest
);
2405 brw_set_src0(p
, insn
, src0
);
2406 brw_set_src1(p
, insn
, brw_imm_d(0));
2409 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2411 brw_set_urb_message(p
,
2421 brw_find_next_block_end(struct brw_compile
*p
, int start_offset
)
2424 void *store
= p
->store
;
2426 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2427 offset
= next_offset(store
, offset
)) {
2428 struct brw_instruction
*insn
= store
+ offset
;
2430 switch (insn
->header
.opcode
) {
2431 case BRW_OPCODE_ENDIF
:
2432 case BRW_OPCODE_ELSE
:
2433 case BRW_OPCODE_WHILE
:
2434 case BRW_OPCODE_HALT
:
2442 /* There is no DO instruction on gen6, so to find the end of the loop
2443 * we have to see if the loop is jumping back before our start
2447 brw_find_loop_end(struct brw_compile
*p
, int start_offset
)
2449 struct brw_context
*brw
= p
->brw
;
2452 void *store
= p
->store
;
2454 /* Always start after the instruction (such as a WHILE) we're trying to fix
2457 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2458 offset
= next_offset(store
, offset
)) {
2459 struct brw_instruction
*insn
= store
+ offset
;
2461 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2462 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2463 : insn
->bits3
.break_cont
.jip
;
2464 if (offset
+ jip
* scale
<= start_offset
)
2468 assert(!"not reached");
2469 return start_offset
;
2472 /* After program generation, go back and update the UIP and JIP of
2473 * BREAK, CONT, and HALT instructions to their correct locations.
2476 brw_set_uip_jip(struct brw_compile
*p
)
2478 struct brw_context
*brw
= p
->brw
;
2481 void *store
= p
->store
;
2486 for (offset
= 0; offset
< p
->next_insn_offset
;
2487 offset
= next_offset(store
, offset
)) {
2488 struct brw_instruction
*insn
= store
+ offset
;
2490 if (insn
->header
.cmpt_control
) {
2491 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2492 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2493 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2494 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2498 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2499 switch (insn
->header
.opcode
) {
2500 case BRW_OPCODE_BREAK
:
2501 assert(block_end_offset
!= 0);
2502 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2503 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2504 insn
->bits3
.break_cont
.uip
=
2505 (brw_find_loop_end(p
, offset
) - offset
+
2506 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2508 case BRW_OPCODE_CONTINUE
:
2509 assert(block_end_offset
!= 0);
2510 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2511 insn
->bits3
.break_cont
.uip
=
2512 (brw_find_loop_end(p
, offset
) - offset
) / scale
;
2514 assert(insn
->bits3
.break_cont
.uip
!= 0);
2515 assert(insn
->bits3
.break_cont
.jip
!= 0);
2518 case BRW_OPCODE_ENDIF
:
2519 if (block_end_offset
== 0)
2520 insn
->bits3
.break_cont
.jip
= 2;
2522 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2525 case BRW_OPCODE_HALT
:
2526 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2528 * "In case of the halt instruction not inside any conditional
2529 * code block, the value of <JIP> and <UIP> should be the
2530 * same. In case of the halt instruction inside conditional code
2531 * block, the <UIP> should be the end of the program, and the
2532 * <JIP> should be end of the most inner conditional code block."
2534 * The uip will have already been set by whoever set up the
2537 if (block_end_offset
== 0) {
2538 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2540 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2542 assert(insn
->bits3
.break_cont
.uip
!= 0);
2543 assert(insn
->bits3
.break_cont
.jip
!= 0);
2549 void brw_ff_sync(struct brw_compile
*p
,
2550 struct brw_reg dest
,
2551 unsigned msg_reg_nr
,
2552 struct brw_reg src0
,
2554 unsigned response_length
,
2557 struct brw_context
*brw
= p
->brw
;
2558 struct brw_instruction
*insn
;
2560 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2562 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2563 brw_set_dest(p
, insn
, dest
);
2564 brw_set_src0(p
, insn
, src0
);
2565 brw_set_src1(p
, insn
, brw_imm_d(0));
2568 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2570 brw_set_ff_sync_message(p
,
2578 * Emit the SEND instruction necessary to generate stream output data on Gen6
2579 * (for transform feedback).
2581 * If send_commit_msg is true, this is the last piece of stream output data
2582 * from this thread, so send the data as a committed write. According to the
2583 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2585 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2586 * writes are complete by sending the final write as a committed write."
2589 brw_svb_write(struct brw_compile
*p
,
2590 struct brw_reg dest
,
2591 unsigned msg_reg_nr
,
2592 struct brw_reg src0
,
2593 unsigned binding_table_index
,
2594 bool send_commit_msg
)
2596 struct brw_instruction
*insn
;
2598 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2600 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2601 brw_set_dest(p
, insn
, dest
);
2602 brw_set_src0(p
, insn
, src0
);
2603 brw_set_src1(p
, insn
, brw_imm_d(0));
2604 brw_set_dp_write_message(p
, insn
,
2605 binding_table_index
,
2606 0, /* msg_control: ignored */
2607 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2609 true, /* header_present */
2610 0, /* last_render_target: ignored */
2611 send_commit_msg
, /* response_length */
2612 0, /* end_of_thread */
2613 send_commit_msg
); /* send_commit_msg */
2617 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2618 struct brw_instruction
*insn
,
2620 unsigned bind_table_index
,
2621 unsigned msg_length
,
2622 unsigned response_length
,
2623 bool header_present
)
2625 if (p
->brw
->is_haswell
) {
2626 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2627 msg_length
, response_length
,
2628 header_present
, false);
2631 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2632 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2633 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2635 insn
->bits3
.gen7_dp
.msg_type
=
2636 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2638 insn
->bits3
.gen7_dp
.msg_type
=
2639 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2643 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2644 msg_length
, response_length
,
2645 header_present
, false);
2647 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2649 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2650 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2653 if (response_length
)
2654 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2656 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2657 insn
->bits3
.ud
|= atomic_op
<< 8;
2661 brw_untyped_atomic(struct brw_compile
*p
,
2662 struct brw_reg dest
,
2665 unsigned bind_table_index
,
2666 unsigned msg_length
,
2667 unsigned response_length
) {
2668 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2670 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2671 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2672 brw_set_src1(p
, insn
, brw_imm_d(0));
2673 brw_set_dp_untyped_atomic_message(
2674 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2675 insn
->header
.access_mode
== BRW_ALIGN_1
);
2679 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2680 struct brw_instruction
*insn
,
2681 unsigned bind_table_index
,
2682 unsigned msg_length
,
2683 unsigned response_length
,
2684 bool header_present
)
2686 const unsigned dispatch_width
=
2687 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2688 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2690 if (p
->brw
->is_haswell
) {
2691 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2692 msg_length
, response_length
,
2693 header_present
, false);
2695 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2697 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2698 msg_length
, response_length
,
2699 header_present
, false);
2701 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2704 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2705 if (dispatch_width
== 16)
2706 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2708 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2711 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2713 /* Set mask of 32-bit channels to drop. */
2714 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2718 brw_untyped_surface_read(struct brw_compile
*p
,
2719 struct brw_reg dest
,
2721 unsigned bind_table_index
,
2722 unsigned msg_length
,
2723 unsigned response_length
)
2725 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2727 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2728 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2729 brw_set_dp_untyped_surface_read_message(
2730 p
, insn
, bind_table_index
, msg_length
, response_length
,
2731 insn
->header
.access_mode
== BRW_ALIGN_1
);
2735 * This instruction is generated as a single-channel align1 instruction by
2736 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2738 * We can't use the typed atomic op in the FS because that has the execution
2739 * mask ANDed with the pixel mask, but we just want to write the one dword for
2742 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2743 * one u32. So we use the same untyped atomic write message as the pixel
2746 * The untyped atomic operation requires a BUFFER surface type with RAW
2747 * format, and is only accessible through the legacy DATA_CACHE dataport
2750 void brw_shader_time_add(struct brw_compile
*p
,
2751 struct brw_reg payload
,
2752 uint32_t surf_index
)
2754 struct brw_context
*brw
= p
->brw
;
2755 assert(brw
->gen
>= 7);
2757 brw_push_insn_state(p
);
2758 brw_set_access_mode(p
, BRW_ALIGN_1
);
2759 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2760 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2761 brw_pop_insn_state(p
);
2763 /* We use brw_vec1_reg and unmasked because we want to increment the given
2766 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2768 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2770 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2771 2 /* message length */,
2772 0 /* response length */,
2773 false /* header present */);