2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 is_compactable_immediate(unsigned imm
)
301 /* We get the low 12 bits as-is. */
304 /* We get one bit replicated through the top 20 bits. */
305 return imm
== 0 || imm
== 0xfffff000;
309 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
312 struct brw_context
*brw
= p
->brw
;
314 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
315 assert(reg
.nr
< 128);
317 gen7_convert_mrf_to_grf(p
, ®
);
319 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
320 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
321 /* Any source modifiers or regions will be ignored, since this just
322 * identifies the MRF/GRF to start reading the message contents from.
323 * Check for some likely failures.
327 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
330 validate_reg(insn
, reg
);
332 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
333 insn
->bits1
.da1
.src0_reg_type
=
334 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
335 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
336 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
337 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
339 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
340 insn
->bits3
.ud
= reg
.dw1
.ud
;
342 /* The Bspec's section titled "Non-present Operands" claims that if src0
343 * is an immediate that src1's type must be the same as that of src0.
345 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
346 * that do not follow this rule. E.g., from the IVB/HSW table:
348 * DataTypeIndex 18-Bit Mapping Mapped Meaning
349 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
351 * And from the SNB table:
353 * DataTypeIndex 18-Bit Mapping Mapped Meaning
354 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
356 * Neither of these cause warnings from the simulator when used,
357 * compacted or otherwise. In fact, all compaction mappings that have an
358 * immediate in src0 use a:ud for src1.
360 * The GM45 instruction compaction tables do not contain mapped meanings
361 * so it's not clear whether it has the restriction. We'll assume it was
362 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
364 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
366 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
368 insn
->bits1
.da1
.src1_reg_type
= BRW_HW_REG_TYPE_UD
;
371 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
372 * for immediate values. Presumably the hardware engineers realized
373 * that the only useful floating-point value that could be represented
374 * in this format is 0.0, which can also be represented as a VF-typed
375 * immediate, so they gave us the previously mentioned mapping on IVB+.
377 * Strangely, we do have a mapping for imm:f in src1, so we don't need
380 * If we see a 0.0:F, change the type to VF so that it can be compacted.
382 if (insn
->bits3
.ud
== 0x0 &&
383 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_F
) {
384 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_IMM_TYPE_VF
;
387 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
388 * set the types to :UD so the instruction can be compacted.
390 if (is_compactable_immediate(insn
->bits3
.ud
) &&
391 insn
->header
.destreg__conditionalmod
== BRW_CONDITIONAL_NONE
&&
392 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_D
&&
393 insn
->bits1
.da1
.dest_reg_type
== BRW_HW_REG_TYPE_D
) {
394 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_TYPE_UD
;
395 insn
->bits1
.da1
.dest_reg_type
= BRW_HW_REG_TYPE_UD
;
400 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
401 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
402 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
403 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
406 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
407 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
411 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
413 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
414 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
417 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
421 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
422 if (reg
.width
== BRW_WIDTH_1
&&
423 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
424 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
425 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
426 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
429 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
430 insn
->bits2
.da1
.src0_width
= reg
.width
;
431 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
435 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
436 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
437 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
438 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
440 /* This is an oddity of the fact we're using the same
441 * descriptions for registers in align_16 as align_1:
443 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
444 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
446 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
453 brw_set_src1(struct brw_compile
*p
,
454 struct brw_instruction
*insn
,
457 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
459 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
460 assert(reg
.nr
< 128);
462 gen7_convert_mrf_to_grf(p
, ®
);
464 validate_reg(insn
, reg
);
466 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
467 insn
->bits1
.da1
.src1_reg_type
=
468 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
469 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
470 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
472 /* Only src1 can be immediate in two-argument instructions.
474 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
476 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
477 insn
->bits3
.ud
= reg
.dw1
.ud
;
480 /* This is a hardware restriction, which may or may not be lifted
483 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
484 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
486 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
487 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
488 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
491 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
492 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
495 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
496 if (reg
.width
== BRW_WIDTH_1
&&
497 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
498 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
499 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
500 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
503 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
504 insn
->bits3
.da1
.src1_width
= reg
.width
;
505 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
509 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
510 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
511 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
512 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
514 /* This is an oddity of the fact we're using the same
515 * descriptions for registers in align_16 as align_1:
517 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
518 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
520 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
526 * Set the Message Descriptor and Extended Message Descriptor fields
529 * \note This zeroes out the Function Control bits, so it must be called
530 * \b before filling out any message-specific data. Callers can
531 * choose not to fill in irrelevant bits; they will be zero.
534 brw_set_message_descriptor(struct brw_compile
*p
,
535 struct brw_instruction
*inst
,
536 enum brw_message_target sfid
,
538 unsigned response_length
,
542 struct brw_context
*brw
= p
->brw
;
544 brw_set_src1(p
, inst
, brw_imm_d(0));
547 inst
->bits3
.generic_gen5
.header_present
= header_present
;
548 inst
->bits3
.generic_gen5
.response_length
= response_length
;
549 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
550 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
553 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
554 inst
->header
.destreg__conditionalmod
= sfid
;
556 /* Set Extended Message Descriptor (ex_desc) */
557 inst
->bits2
.send_gen5
.sfid
= sfid
;
558 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
561 inst
->bits3
.generic
.response_length
= response_length
;
562 inst
->bits3
.generic
.msg_length
= msg_length
;
563 inst
->bits3
.generic
.msg_target
= sfid
;
564 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
568 static void brw_set_math_message( struct brw_compile
*p
,
569 struct brw_instruction
*insn
,
571 unsigned integer_type
,
575 struct brw_context
*brw
= p
->brw
;
577 unsigned response_length
;
579 /* Infer message length from the function */
581 case BRW_MATH_FUNCTION_POW
:
582 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
583 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
584 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
592 /* Infer response length from the function */
594 case BRW_MATH_FUNCTION_SINCOS
:
595 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
604 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
605 msg_length
, response_length
, false, false);
607 insn
->bits3
.math_gen5
.function
= function
;
608 insn
->bits3
.math_gen5
.int_type
= integer_type
;
609 insn
->bits3
.math_gen5
.precision
= low_precision
;
610 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
611 insn
->bits3
.math_gen5
.data_type
= dataType
;
612 insn
->bits3
.math_gen5
.snapshot
= 0;
614 insn
->bits3
.math
.function
= function
;
615 insn
->bits3
.math
.int_type
= integer_type
;
616 insn
->bits3
.math
.precision
= low_precision
;
617 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
618 insn
->bits3
.math
.data_type
= dataType
;
620 insn
->header
.saturate
= 0;
624 static void brw_set_ff_sync_message(struct brw_compile
*p
,
625 struct brw_instruction
*insn
,
627 unsigned response_length
,
630 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
631 1, response_length
, true, end_of_thread
);
632 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
633 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
634 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
635 insn
->bits3
.urb_gen5
.allocate
= allocate
;
636 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
637 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
640 static void brw_set_urb_message( struct brw_compile
*p
,
641 struct brw_instruction
*insn
,
642 enum brw_urb_write_flags flags
,
644 unsigned response_length
,
646 unsigned swizzle_control
)
648 struct brw_context
*brw
= p
->brw
;
650 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
651 msg_length
, response_length
, true,
652 flags
& BRW_URB_WRITE_EOT
);
654 if (flags
& BRW_URB_WRITE_OWORD
) {
655 assert(msg_length
== 2); /* header + one OWORD of data */
656 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
658 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
660 insn
->bits3
.urb_gen7
.offset
= offset
;
661 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
662 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
663 insn
->bits3
.urb_gen7
.per_slot_offset
=
664 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
665 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
666 } else if (brw
->gen
>= 5) {
667 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
668 insn
->bits3
.urb_gen5
.offset
= offset
;
669 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
670 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
671 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
672 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
674 insn
->bits3
.urb
.opcode
= 0; /* ? */
675 insn
->bits3
.urb
.offset
= offset
;
676 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
677 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
678 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
679 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
684 brw_set_dp_write_message(struct brw_compile
*p
,
685 struct brw_instruction
*insn
,
686 unsigned binding_table_index
,
687 unsigned msg_control
,
691 unsigned last_render_target
,
692 unsigned response_length
,
693 unsigned end_of_thread
,
694 unsigned send_commit_msg
)
696 struct brw_context
*brw
= p
->brw
;
700 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
701 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
702 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
704 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
705 } else if (brw
->gen
== 6) {
706 /* Use the render cache for all write messages. */
707 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
709 sfid
= BRW_SFID_DATAPORT_WRITE
;
712 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
713 header_present
, end_of_thread
);
716 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
717 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
718 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
719 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
720 } else if (brw
->gen
== 6) {
721 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
722 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
723 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
724 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
725 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
726 } else if (brw
->gen
== 5) {
727 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
728 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
729 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
730 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
731 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
733 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
734 insn
->bits3
.dp_write
.msg_control
= msg_control
;
735 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
736 insn
->bits3
.dp_write
.msg_type
= msg_type
;
737 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
742 brw_set_dp_read_message(struct brw_compile
*p
,
743 struct brw_instruction
*insn
,
744 unsigned binding_table_index
,
745 unsigned msg_control
,
747 unsigned target_cache
,
750 unsigned response_length
)
752 struct brw_context
*brw
= p
->brw
;
756 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
757 } else if (brw
->gen
== 6) {
758 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
759 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
761 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
763 sfid
= BRW_SFID_DATAPORT_READ
;
766 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
767 header_present
, false);
770 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
771 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
772 insn
->bits3
.gen7_dp
.last_render_target
= 0;
773 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
774 } else if (brw
->gen
== 6) {
775 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
776 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
777 insn
->bits3
.gen6_dp
.last_render_target
= 0;
778 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
779 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
780 } else if (brw
->gen
== 5) {
781 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
782 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
783 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
784 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
785 } else if (brw
->is_g4x
) {
786 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
787 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
788 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
789 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
791 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
792 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
793 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
794 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
799 brw_set_sampler_message(struct brw_compile
*p
,
800 struct brw_instruction
*insn
,
801 unsigned binding_table_index
,
804 unsigned response_length
,
806 unsigned header_present
,
808 unsigned return_format
)
810 struct brw_context
*brw
= p
->brw
;
812 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
813 response_length
, header_present
, false);
816 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
817 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
818 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
819 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
820 } else if (brw
->gen
>= 5) {
821 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
822 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
823 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
824 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
825 } else if (brw
->is_g4x
) {
826 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
827 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
828 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
830 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
831 insn
->bits3
.sampler
.sampler
= sampler
;
832 insn
->bits3
.sampler
.msg_type
= msg_type
;
833 insn
->bits3
.sampler
.return_format
= return_format
;
838 #define next_insn brw_next_insn
839 struct brw_instruction
*
840 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
842 struct brw_instruction
*insn
;
844 if (p
->nr_insn
+ 1 > p
->store_size
) {
846 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
847 struct brw_instruction
, p
->store_size
);
850 p
->next_insn_offset
+= 16;
851 insn
= &p
->store
[p
->nr_insn
++];
852 memcpy(insn
, p
->current
, sizeof(*insn
));
854 /* Reset this one-shot flag:
857 if (p
->current
->header
.destreg__conditionalmod
) {
858 p
->current
->header
.destreg__conditionalmod
= 0;
859 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
862 insn
->header
.opcode
= opcode
;
866 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
871 struct brw_instruction
*insn
= next_insn(p
, opcode
);
872 brw_set_dest(p
, insn
, dest
);
873 brw_set_src0(p
, insn
, src
);
877 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
881 struct brw_reg src1
)
883 struct brw_instruction
*insn
= next_insn(p
, opcode
);
884 brw_set_dest(p
, insn
, dest
);
885 brw_set_src0(p
, insn
, src0
);
886 brw_set_src1(p
, insn
, src1
);
891 get_3src_subreg_nr(struct brw_reg reg
)
893 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
894 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
895 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
897 return reg
.subnr
/ 4;
901 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
908 struct brw_context
*brw
= p
->brw
;
909 struct brw_instruction
*insn
= next_insn(p
, opcode
);
911 gen7_convert_mrf_to_grf(p
, &dest
);
913 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
915 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
916 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
917 assert(dest
.nr
< 128);
918 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
919 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
920 dest
.type
== BRW_REGISTER_TYPE_D
||
921 dest
.type
== BRW_REGISTER_TYPE_UD
);
922 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
923 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
924 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
925 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
926 guess_execution_size(p
, insn
, dest
);
928 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
929 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
930 assert(src0
.nr
< 128);
931 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
932 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
933 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
934 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
935 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
936 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
938 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
939 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
940 assert(src1
.nr
< 128);
941 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
942 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
943 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
944 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
945 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
946 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
947 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
949 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
950 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
951 assert(src2
.nr
< 128);
952 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
953 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
954 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
955 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
956 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
957 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
960 /* Set both the source and destination types based on dest.type,
961 * ignoring the source register types. The MAD and LRP emitters ensure
962 * that all four types are float. The BFE and BFI2 emitters, however,
963 * may send us mixed D and UD types and want us to ignore that and use
964 * the destination type.
967 case BRW_REGISTER_TYPE_F
:
968 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
969 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
971 case BRW_REGISTER_TYPE_D
:
972 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
973 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
975 case BRW_REGISTER_TYPE_UD
:
976 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
977 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
986 /***********************************************************************
987 * Convenience routines.
990 struct brw_instruction *brw_##OP(struct brw_compile *p, \
991 struct brw_reg dest, \
992 struct brw_reg src0) \
994 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
998 struct brw_instruction *brw_##OP(struct brw_compile *p, \
999 struct brw_reg dest, \
1000 struct brw_reg src0, \
1001 struct brw_reg src1) \
1003 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
1007 struct brw_instruction *brw_##OP(struct brw_compile *p, \
1008 struct brw_reg dest, \
1009 struct brw_reg src0, \
1010 struct brw_reg src1, \
1011 struct brw_reg src2) \
1013 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1017 struct brw_instruction *brw_##OP(struct brw_compile *p, \
1018 struct brw_reg dest, \
1019 struct brw_reg src0, \
1020 struct brw_reg src1, \
1021 struct brw_reg src2) \
1023 assert(dest.type == BRW_REGISTER_TYPE_F); \
1024 assert(src0.type == BRW_REGISTER_TYPE_F); \
1025 assert(src1.type == BRW_REGISTER_TYPE_F); \
1026 assert(src2.type == BRW_REGISTER_TYPE_F); \
1027 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1030 /* Rounding operations (other than RNDD) require two instructions - the first
1031 * stores a rounded value (possibly the wrong way) in the dest register, but
1032 * also sets a per-channel "increment bit" in the flag register. A predicated
1033 * add of 1.0 fixes dest to contain the desired result.
1035 * Sandybridge and later appear to round correctly without an ADD.
1038 void brw_##OP(struct brw_compile *p, \
1039 struct brw_reg dest, \
1040 struct brw_reg src) \
1042 struct brw_instruction *rnd, *add; \
1043 rnd = next_insn(p, BRW_OPCODE_##OP); \
1044 brw_set_dest(p, rnd, dest); \
1045 brw_set_src0(p, rnd, src); \
1047 if (p->brw->gen < 6) { \
1048 /* turn on round-increments */ \
1049 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1050 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1051 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1094 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1095 struct brw_reg dest
,
1096 struct brw_reg src0
,
1097 struct brw_reg src1
)
1100 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1101 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1102 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1103 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1104 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1107 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1108 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1109 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1110 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1111 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1114 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1117 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1118 struct brw_reg dest
,
1119 struct brw_reg src0
,
1120 struct brw_reg src1
)
1122 assert(dest
.type
== src0
.type
);
1123 assert(src0
.type
== src1
.type
);
1124 switch (src0
.type
) {
1125 case BRW_REGISTER_TYPE_B
:
1126 case BRW_REGISTER_TYPE_UB
:
1127 case BRW_REGISTER_TYPE_W
:
1128 case BRW_REGISTER_TYPE_UW
:
1129 case BRW_REGISTER_TYPE_D
:
1130 case BRW_REGISTER_TYPE_UD
:
1133 assert(!"Bad type for brw_AVG");
1136 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1139 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1140 struct brw_reg dest
,
1141 struct brw_reg src0
,
1142 struct brw_reg src1
)
1145 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1146 src0
.type
== BRW_REGISTER_TYPE_UD
||
1147 src1
.type
== BRW_REGISTER_TYPE_D
||
1148 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1149 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1152 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1153 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1154 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1155 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1156 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1159 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1160 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1161 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1162 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1163 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1166 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1167 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1168 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1169 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1171 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1175 void brw_NOP(struct brw_compile
*p
)
1177 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1178 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1179 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1180 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1187 /***********************************************************************
1188 * Comparisons, if/else/endif
1191 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1192 struct brw_reg dest
,
1193 struct brw_reg src0
,
1194 struct brw_reg src1
)
1196 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1198 insn
->header
.execution_size
= 1;
1199 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1200 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1202 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1208 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1210 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1212 p
->if_stack_depth
++;
1213 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1214 p
->if_stack_array_size
*= 2;
1215 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1216 p
->if_stack_array_size
);
1220 static struct brw_instruction
*
1221 pop_if_stack(struct brw_compile
*p
)
1223 p
->if_stack_depth
--;
1224 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1228 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1230 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1231 p
->loop_stack_array_size
*= 2;
1232 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1233 p
->loop_stack_array_size
);
1234 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1235 p
->loop_stack_array_size
);
1238 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1239 p
->loop_stack_depth
++;
1240 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1243 static struct brw_instruction
*
1244 get_inner_do_insn(struct brw_compile
*p
)
1246 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1249 /* EU takes the value from the flag register and pushes it onto some
1250 * sort of a stack (presumably merging with any flag value already on
1251 * the stack). Within an if block, the flags at the top of the stack
1252 * control execution on each channel of the unit, eg. on each of the
1253 * 16 pixel values in our wm programs.
1255 * When the matching 'else' instruction is reached (presumably by
1256 * countdown of the instruction count patched in by our ELSE/ENDIF
1257 * functions), the relevent flags are inverted.
1259 * When the matching 'endif' instruction is reached, the flags are
1260 * popped off. If the stack is now empty, normal execution resumes.
1262 struct brw_instruction
*
1263 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1265 struct brw_context
*brw
= p
->brw
;
1266 struct brw_instruction
*insn
;
1268 insn
= next_insn(p
, BRW_OPCODE_IF
);
1270 /* Override the defaults for this instruction:
1273 brw_set_dest(p
, insn
, brw_ip_reg());
1274 brw_set_src0(p
, insn
, brw_ip_reg());
1275 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1276 } else if (brw
->gen
== 6) {
1277 brw_set_dest(p
, insn
, brw_imm_w(0));
1278 insn
->bits1
.branch_gen6
.jump_count
= 0;
1279 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1280 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1282 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1283 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1284 brw_set_src1(p
, insn
, brw_imm_ud(0));
1285 insn
->bits3
.break_cont
.jip
= 0;
1286 insn
->bits3
.break_cont
.uip
= 0;
1289 insn
->header
.execution_size
= execute_size
;
1290 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1291 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1292 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1293 if (!p
->single_program_flow
)
1294 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1296 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1298 push_if_stack(p
, insn
);
1299 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1303 /* This function is only used for gen6-style IF instructions with an
1304 * embedded comparison (conditional modifier). It is not used on gen7.
1306 struct brw_instruction
*
1307 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1308 struct brw_reg src0
, struct brw_reg src1
)
1310 struct brw_instruction
*insn
;
1312 insn
= next_insn(p
, BRW_OPCODE_IF
);
1314 brw_set_dest(p
, insn
, brw_imm_w(0));
1315 if (p
->compressed
) {
1316 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1318 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1320 insn
->bits1
.branch_gen6
.jump_count
= 0;
1321 brw_set_src0(p
, insn
, src0
);
1322 brw_set_src1(p
, insn
, src1
);
1324 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1325 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1326 insn
->header
.destreg__conditionalmod
= conditional
;
1328 if (!p
->single_program_flow
)
1329 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1331 push_if_stack(p
, insn
);
1336 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1339 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1340 struct brw_instruction
*if_inst
,
1341 struct brw_instruction
*else_inst
)
1343 /* The next instruction (where the ENDIF would be, if it existed) */
1344 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1346 assert(p
->single_program_flow
);
1347 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1348 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1349 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1351 /* Convert IF to an ADD instruction that moves the instruction pointer
1352 * to the first instruction of the ELSE block. If there is no ELSE
1353 * block, point to where ENDIF would be. Reverse the predicate.
1355 * There's no need to execute an ENDIF since we don't need to do any
1356 * stack operations, and if we're currently executing, we just want to
1357 * continue normally.
1359 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1360 if_inst
->header
.predicate_inverse
= 1;
1362 if (else_inst
!= NULL
) {
1363 /* Convert ELSE to an ADD instruction that points where the ENDIF
1366 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1368 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1369 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1371 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1376 * Patch IF and ELSE instructions with appropriate jump targets.
1379 patch_IF_ELSE(struct brw_compile
*p
,
1380 struct brw_instruction
*if_inst
,
1381 struct brw_instruction
*else_inst
,
1382 struct brw_instruction
*endif_inst
)
1384 struct brw_context
*brw
= p
->brw
;
1386 /* We shouldn't be patching IF and ELSE instructions in single program flow
1387 * mode when gen < 6, because in single program flow mode on those
1388 * platforms, we convert flow control instructions to conditional ADDs that
1389 * operate on IP (see brw_ENDIF).
1391 * However, on Gen6, writing to IP doesn't work in single program flow mode
1392 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1393 * not be updated by non-flow control instructions."). And on later
1394 * platforms, there is no significant benefit to converting control flow
1395 * instructions to conditional ADDs. So we do patch IF and ELSE
1396 * instructions in single program flow mode on those platforms.
1399 assert(!p
->single_program_flow
);
1401 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1402 assert(endif_inst
!= NULL
);
1403 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1406 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1407 * requires 2 chunks.
1412 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1413 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1415 if (else_inst
== NULL
) {
1416 /* Patch IF -> ENDIF */
1418 /* Turn it into an IFF, which means no mask stack operations for
1419 * all-false and jumping past the ENDIF.
1421 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1422 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1423 if_inst
->bits3
.if_else
.pop_count
= 0;
1424 if_inst
->bits3
.if_else
.pad0
= 0;
1425 } else if (brw
->gen
== 6) {
1426 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1427 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1429 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1430 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1433 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1435 /* Patch IF -> ELSE */
1437 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1438 if_inst
->bits3
.if_else
.pop_count
= 0;
1439 if_inst
->bits3
.if_else
.pad0
= 0;
1440 } else if (brw
->gen
== 6) {
1441 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1444 /* Patch ELSE -> ENDIF */
1446 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1449 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1450 else_inst
->bits3
.if_else
.pop_count
= 1;
1451 else_inst
->bits3
.if_else
.pad0
= 0;
1452 } else if (brw
->gen
== 6) {
1453 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1454 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1456 /* The IF instruction's JIP should point just past the ELSE */
1457 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1458 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1459 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1460 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1466 brw_ELSE(struct brw_compile
*p
)
1468 struct brw_context
*brw
= p
->brw
;
1469 struct brw_instruction
*insn
;
1471 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1474 brw_set_dest(p
, insn
, brw_ip_reg());
1475 brw_set_src0(p
, insn
, brw_ip_reg());
1476 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1477 } else if (brw
->gen
== 6) {
1478 brw_set_dest(p
, insn
, brw_imm_w(0));
1479 insn
->bits1
.branch_gen6
.jump_count
= 0;
1480 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1481 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1483 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1484 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1485 brw_set_src1(p
, insn
, brw_imm_ud(0));
1486 insn
->bits3
.break_cont
.jip
= 0;
1487 insn
->bits3
.break_cont
.uip
= 0;
1490 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1491 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1492 if (!p
->single_program_flow
)
1493 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1495 push_if_stack(p
, insn
);
1499 brw_ENDIF(struct brw_compile
*p
)
1501 struct brw_context
*brw
= p
->brw
;
1502 struct brw_instruction
*insn
= NULL
;
1503 struct brw_instruction
*else_inst
= NULL
;
1504 struct brw_instruction
*if_inst
= NULL
;
1505 struct brw_instruction
*tmp
;
1506 bool emit_endif
= true;
1508 /* In single program flow mode, we can express IF and ELSE instructions
1509 * equivalently as ADD instructions that operate on IP. On platforms prior
1510 * to Gen6, flow control instructions cause an implied thread switch, so
1511 * this is a significant savings.
1513 * However, on Gen6, writing to IP doesn't work in single program flow mode
1514 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1515 * not be updated by non-flow control instructions."). And on later
1516 * platforms, there is no significant benefit to converting control flow
1517 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1520 if (brw
->gen
< 6 && p
->single_program_flow
)
1524 * A single next_insn() may change the base adress of instruction store
1525 * memory(p->store), so call it first before referencing the instruction
1526 * store pointer from an index
1529 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1531 /* Pop the IF and (optional) ELSE instructions from the stack */
1532 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1533 tmp
= pop_if_stack(p
);
1534 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1536 tmp
= pop_if_stack(p
);
1541 /* ENDIF is useless; don't bother emitting it. */
1542 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1547 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1548 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1549 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1550 } else if (brw
->gen
== 6) {
1551 brw_set_dest(p
, insn
, brw_imm_w(0));
1552 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1553 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1555 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1556 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1557 brw_set_src1(p
, insn
, brw_imm_ud(0));
1560 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1561 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1562 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1564 /* Also pop item off the stack in the endif instruction: */
1566 insn
->bits3
.if_else
.jump_count
= 0;
1567 insn
->bits3
.if_else
.pop_count
= 1;
1568 insn
->bits3
.if_else
.pad0
= 0;
1569 } else if (brw
->gen
== 6) {
1570 insn
->bits1
.branch_gen6
.jump_count
= 2;
1572 insn
->bits3
.break_cont
.jip
= 2;
1574 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1577 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1579 struct brw_context
*brw
= p
->brw
;
1580 struct brw_instruction
*insn
;
1582 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1583 if (brw
->gen
>= 6) {
1584 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1585 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1586 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1588 brw_set_dest(p
, insn
, brw_ip_reg());
1589 brw_set_src0(p
, insn
, brw_ip_reg());
1590 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1591 insn
->bits3
.if_else
.pad0
= 0;
1592 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1594 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1595 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1600 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1602 struct brw_instruction
*insn
;
1604 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1605 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1606 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1607 brw_set_dest(p
, insn
, brw_ip_reg());
1608 brw_set_src0(p
, insn
, brw_ip_reg());
1609 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1611 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1612 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1616 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1618 struct brw_instruction
*insn
;
1619 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1620 brw_set_dest(p
, insn
, brw_ip_reg());
1621 brw_set_src0(p
, insn
, brw_ip_reg());
1622 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1623 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1624 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1625 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1626 insn
->bits3
.if_else
.pad0
= 0;
1627 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1631 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1633 struct brw_instruction
*insn
;
1635 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1636 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1637 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1638 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1640 if (p
->compressed
) {
1641 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1643 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1644 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1651 * The DO/WHILE is just an unterminated loop -- break or continue are
1652 * used for control within the loop. We have a few ways they can be
1655 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1656 * jip and no DO instruction.
1658 * For non-uniform control flow pre-gen6, there's a DO instruction to
1659 * push the mask, and a WHILE to jump back, and BREAK to get out and
1662 * For gen6, there's no more mask stack, so no need for DO. WHILE
1663 * just points back to the first instruction of the loop.
1665 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1667 struct brw_context
*brw
= p
->brw
;
1669 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1670 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1671 return &p
->store
[p
->nr_insn
];
1673 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1675 push_loop_stack(p
, insn
);
1677 /* Override the defaults for this instruction:
1679 brw_set_dest(p
, insn
, brw_null_reg());
1680 brw_set_src0(p
, insn
, brw_null_reg());
1681 brw_set_src1(p
, insn
, brw_null_reg());
1683 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1684 insn
->header
.execution_size
= execute_size
;
1685 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1686 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1687 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1694 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1697 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1698 * nesting, since it can always just point to the end of the block/current loop.
1701 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1703 struct brw_context
*brw
= p
->brw
;
1704 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1705 struct brw_instruction
*inst
;
1706 int br
= (brw
->gen
== 5) ? 2 : 1;
1708 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1709 /* If the jump count is != 0, that means that this instruction has already
1710 * been patched because it's part of a loop inside of the one we're
1713 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1714 inst
->bits3
.if_else
.jump_count
== 0) {
1715 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1716 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1717 inst
->bits3
.if_else
.jump_count
== 0) {
1718 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1723 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1725 struct brw_context
*brw
= p
->brw
;
1726 struct brw_instruction
*insn
, *do_insn
;
1732 if (brw
->gen
>= 7) {
1733 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1734 do_insn
= get_inner_do_insn(p
);
1736 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1737 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1738 brw_set_src1(p
, insn
, brw_imm_ud(0));
1739 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1741 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1742 } else if (brw
->gen
== 6) {
1743 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1744 do_insn
= get_inner_do_insn(p
);
1746 brw_set_dest(p
, insn
, brw_imm_w(0));
1747 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1748 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1749 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1751 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1753 if (p
->single_program_flow
) {
1754 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1755 do_insn
= get_inner_do_insn(p
);
1757 brw_set_dest(p
, insn
, brw_ip_reg());
1758 brw_set_src0(p
, insn
, brw_ip_reg());
1759 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1760 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1762 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1763 do_insn
= get_inner_do_insn(p
);
1765 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1767 brw_set_dest(p
, insn
, brw_ip_reg());
1768 brw_set_src0(p
, insn
, brw_ip_reg());
1769 brw_set_src1(p
, insn
, brw_imm_d(0));
1771 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1772 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1773 insn
->bits3
.if_else
.pop_count
= 0;
1774 insn
->bits3
.if_else
.pad0
= 0;
1776 brw_patch_break_cont(p
, insn
);
1779 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1780 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1782 p
->loop_stack_depth
--;
1787 /* To integrate with the above, it makes sense that the comparison
1788 * instruction should populate the flag register. It might be simpler
1789 * just to use the flag reg for most WM tasks?
1791 void brw_CMP(struct brw_compile
*p
,
1792 struct brw_reg dest
,
1793 unsigned conditional
,
1794 struct brw_reg src0
,
1795 struct brw_reg src1
)
1797 struct brw_context
*brw
= p
->brw
;
1798 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1800 insn
->header
.destreg__conditionalmod
= conditional
;
1801 brw_set_dest(p
, insn
, dest
);
1802 brw_set_src0(p
, insn
, src0
);
1803 brw_set_src1(p
, insn
, src1
);
1805 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1807 * "Any CMP instruction with a null destination must use a {switch}."
1809 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1810 * mentioned on their work-arounds pages.
1812 if (brw
->gen
== 7) {
1813 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1814 dest
.nr
== BRW_ARF_NULL
) {
1815 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1820 /* Issue 'wait' instruction for n1, host could program MMIO
1821 to wake up thread. */
1822 void brw_WAIT (struct brw_compile
*p
)
1824 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1825 struct brw_reg src
= brw_notification_1_reg();
1827 brw_set_dest(p
, insn
, src
);
1828 brw_set_src0(p
, insn
, src
);
1829 brw_set_src1(p
, insn
, brw_null_reg());
1830 insn
->header
.execution_size
= 0; /* must */
1831 insn
->header
.predicate_control
= 0;
1832 insn
->header
.compression_control
= 0;
1836 /***********************************************************************
1837 * Helpers for the various SEND message types:
1840 /** Extended math function, float[8].
1842 void brw_math( struct brw_compile
*p
,
1843 struct brw_reg dest
,
1845 unsigned msg_reg_nr
,
1848 unsigned precision
)
1850 struct brw_context
*brw
= p
->brw
;
1852 if (brw
->gen
>= 6) {
1853 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1855 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1856 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1857 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1859 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1861 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1863 /* Source modifiers are ignored for extended math instructions on Gen6. */
1864 if (brw
->gen
== 6) {
1865 assert(!src
.negate
);
1869 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1870 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1871 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1872 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1874 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1877 /* Math is the same ISA format as other opcodes, except that CondModifier
1878 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1880 insn
->header
.destreg__conditionalmod
= function
;
1882 brw_set_dest(p
, insn
, dest
);
1883 brw_set_src0(p
, insn
, src
);
1884 brw_set_src1(p
, insn
, brw_null_reg());
1886 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1888 /* Example code doesn't set predicate_control for send
1891 insn
->header
.predicate_control
= 0;
1892 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1894 brw_set_dest(p
, insn
, dest
);
1895 brw_set_src0(p
, insn
, src
);
1896 brw_set_math_message(p
,
1899 src
.type
== BRW_REGISTER_TYPE_D
,
1905 /** Extended math function, float[8].
1907 void brw_math2(struct brw_compile
*p
,
1908 struct brw_reg dest
,
1910 struct brw_reg src0
,
1911 struct brw_reg src1
)
1913 struct brw_context
*brw
= p
->brw
;
1914 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1916 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1917 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1918 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1919 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1921 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1922 if (brw
->gen
== 6) {
1923 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1924 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1927 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1928 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1929 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1930 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1931 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1933 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1934 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1937 /* Source modifiers are ignored for extended math instructions on Gen6. */
1938 if (brw
->gen
== 6) {
1939 assert(!src0
.negate
);
1941 assert(!src1
.negate
);
1945 /* Math is the same ISA format as other opcodes, except that CondModifier
1946 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1948 insn
->header
.destreg__conditionalmod
= function
;
1950 brw_set_dest(p
, insn
, dest
);
1951 brw_set_src0(p
, insn
, src0
);
1952 brw_set_src1(p
, insn
, src1
);
1957 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1958 * using a constant offset per channel.
1960 * The offset must be aligned to oword size (16 bytes). Used for
1961 * register spilling.
1963 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1968 struct brw_context
*brw
= p
->brw
;
1969 uint32_t msg_control
, msg_type
;
1975 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1977 if (num_regs
== 1) {
1978 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1981 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1985 /* Set up the message header. This is g0, with g0.2 filled with
1986 * the offset. We don't want to leave our offset around in g0 or
1987 * it'll screw up texture samples, so set it up inside the message
1991 brw_push_insn_state(p
);
1992 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1993 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1995 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1997 /* set message header global offset field (reg 0, element 2) */
1999 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2001 2), BRW_REGISTER_TYPE_UD
),
2002 brw_imm_ud(offset
));
2004 brw_pop_insn_state(p
);
2008 struct brw_reg dest
;
2009 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2010 int send_commit_msg
;
2011 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2012 BRW_REGISTER_TYPE_UW
);
2014 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
2015 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2016 src_header
= vec16(src_header
);
2018 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2019 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2021 /* Until gen6, writes followed by reads from the same location
2022 * are not guaranteed to be ordered unless write_commit is set.
2023 * If set, then a no-op write is issued to the destination
2024 * register to set a dependency, and a read from the destination
2025 * can be used to ensure the ordering.
2027 * For gen6, only writes between different threads need ordering
2028 * protection. Our use of DP writes is all about register
2029 * spilling within a thread.
2031 if (brw
->gen
>= 6) {
2032 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2033 send_commit_msg
= 0;
2036 send_commit_msg
= 1;
2039 brw_set_dest(p
, insn
, dest
);
2040 if (brw
->gen
>= 6) {
2041 brw_set_src0(p
, insn
, mrf
);
2043 brw_set_src0(p
, insn
, brw_null_reg());
2047 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2049 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2051 brw_set_dp_write_message(p
,
2053 255, /* binding table index (255=stateless) */
2057 true, /* header_present */
2058 0, /* not a render target */
2059 send_commit_msg
, /* response_length */
2067 * Read a block of owords (half a GRF each) from the scratch buffer
2068 * using a constant index per channel.
2070 * Offset must be aligned to oword size (16 bytes). Used for register
2074 brw_oword_block_read_scratch(struct brw_compile
*p
,
2075 struct brw_reg dest
,
2080 struct brw_context
*brw
= p
->brw
;
2081 uint32_t msg_control
;
2087 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2088 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2090 if (num_regs
== 1) {
2091 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2094 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2099 brw_push_insn_state(p
);
2100 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2101 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2103 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2105 /* set message header global offset field (reg 0, element 2) */
2107 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2109 2), BRW_REGISTER_TYPE_UD
),
2110 brw_imm_ud(offset
));
2112 brw_pop_insn_state(p
);
2116 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2118 assert(insn
->header
.predicate_control
== 0);
2119 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2120 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2122 brw_set_dest(p
, insn
, dest
); /* UW? */
2123 if (brw
->gen
>= 6) {
2124 brw_set_src0(p
, insn
, mrf
);
2126 brw_set_src0(p
, insn
, brw_null_reg());
2129 brw_set_dp_read_message(p
,
2131 255, /* binding table index (255=stateless) */
2133 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2134 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2136 true, /* header_present */
2142 gen7_block_read_scratch(struct brw_compile
*p
,
2143 struct brw_reg dest
,
2147 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2149 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2151 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2152 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2154 brw_set_dest(p
, insn
, dest
);
2156 /* The HW requires that the header is present; this is to get the g0.5
2159 bool header_present
= true;
2160 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2162 brw_set_message_descriptor(p
, insn
,
2163 GEN7_SFID_DATAPORT_DATA_CACHE
,
2164 1, /* mlen: just g0 */
2169 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2171 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2172 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2174 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2175 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2176 * is 32 bytes, which happens to be the size of a register.
2179 assert(offset
< (1 << 12));
2180 insn
->bits3
.ud
|= offset
;
2184 * Read a float[4] vector from the data port Data Cache (const buffer).
2185 * Location (in buffer) should be a multiple of 16.
2186 * Used for fetching shader constants.
2188 void brw_oword_block_read(struct brw_compile
*p
,
2189 struct brw_reg dest
,
2192 uint32_t bind_table_index
)
2194 struct brw_context
*brw
= p
->brw
;
2196 /* On newer hardware, offset is in units of owords. */
2200 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2202 brw_push_insn_state(p
);
2203 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2204 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2205 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2207 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2209 /* set message header global offset field (reg 0, element 2) */
2211 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2213 2), BRW_REGISTER_TYPE_UD
),
2214 brw_imm_ud(offset
));
2216 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2217 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2219 /* cast dest to a uword[8] vector */
2220 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2222 brw_set_dest(p
, insn
, dest
);
2223 if (brw
->gen
>= 6) {
2224 brw_set_src0(p
, insn
, mrf
);
2226 brw_set_src0(p
, insn
, brw_null_reg());
2229 brw_set_dp_read_message(p
,
2232 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2233 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2234 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2236 true, /* header_present */
2237 1); /* response_length (1 reg, 2 owords!) */
2239 brw_pop_insn_state(p
);
2243 void brw_fb_WRITE(struct brw_compile
*p
,
2245 unsigned msg_reg_nr
,
2246 struct brw_reg src0
,
2247 unsigned msg_control
,
2248 unsigned binding_table_index
,
2249 unsigned msg_length
,
2250 unsigned response_length
,
2252 bool header_present
)
2254 struct brw_context
*brw
= p
->brw
;
2255 struct brw_instruction
*insn
;
2257 struct brw_reg dest
;
2259 if (dispatch_width
== 16)
2260 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2262 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2264 if (brw
->gen
>= 6) {
2265 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2267 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2269 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2271 if (brw
->gen
>= 6) {
2272 /* headerless version, just submit color payload */
2273 src0
= brw_message_reg(msg_reg_nr
);
2275 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2277 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2279 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2282 brw_set_dest(p
, insn
, dest
);
2283 brw_set_src0(p
, insn
, src0
);
2284 brw_set_dp_write_message(p
,
2286 binding_table_index
,
2291 eot
, /* last render target write */
2294 0 /* send_commit_msg */);
2299 * Texture sample instruction.
2300 * Note: the msg_type plus msg_length values determine exactly what kind
2301 * of sampling operation is performed. See volume 4, page 161 of docs.
2303 void brw_SAMPLE(struct brw_compile
*p
,
2304 struct brw_reg dest
,
2305 unsigned msg_reg_nr
,
2306 struct brw_reg src0
,
2307 unsigned binding_table_index
,
2310 unsigned response_length
,
2311 unsigned msg_length
,
2312 unsigned header_present
,
2314 unsigned return_format
)
2316 struct brw_context
*brw
= p
->brw
;
2317 struct brw_instruction
*insn
;
2319 if (msg_reg_nr
!= -1)
2320 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2322 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2323 insn
->header
.predicate_control
= 0; /* XXX */
2325 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2327 * "Instruction compression is not allowed for this instruction (that
2328 * is, send). The hardware behavior is undefined if this instruction is
2329 * set as compressed. However, compress control can be set to "SecHalf"
2330 * to affect the EMask generation."
2332 * No similar wording is found in later PRMs, but there are examples
2333 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2334 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2335 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2337 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2338 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2341 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2343 brw_set_dest(p
, insn
, dest
);
2344 brw_set_src0(p
, insn
, src0
);
2345 brw_set_sampler_message(p
, insn
,
2346 binding_table_index
,
2356 /* All these variables are pretty confusing - we might be better off
2357 * using bitmasks and macros for this, in the old style. Or perhaps
2358 * just having the caller instantiate the fields in dword3 itself.
2360 void brw_urb_WRITE(struct brw_compile
*p
,
2361 struct brw_reg dest
,
2362 unsigned msg_reg_nr
,
2363 struct brw_reg src0
,
2364 enum brw_urb_write_flags flags
,
2365 unsigned msg_length
,
2366 unsigned response_length
,
2370 struct brw_context
*brw
= p
->brw
;
2371 struct brw_instruction
*insn
;
2373 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2375 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2376 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2377 brw_push_insn_state(p
);
2378 brw_set_access_mode(p
, BRW_ALIGN_1
);
2379 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2380 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2381 BRW_REGISTER_TYPE_UD
),
2382 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2383 brw_imm_ud(0xff00));
2384 brw_pop_insn_state(p
);
2387 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2389 assert(msg_length
< BRW_MAX_MRF
);
2391 brw_set_dest(p
, insn
, dest
);
2392 brw_set_src0(p
, insn
, src0
);
2393 brw_set_src1(p
, insn
, brw_imm_d(0));
2396 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2398 brw_set_urb_message(p
,
2408 brw_find_next_block_end(struct brw_compile
*p
, int start_offset
)
2411 void *store
= p
->store
;
2413 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2414 offset
= next_offset(store
, offset
)) {
2415 struct brw_instruction
*insn
= store
+ offset
;
2417 switch (insn
->header
.opcode
) {
2418 case BRW_OPCODE_ENDIF
:
2419 case BRW_OPCODE_ELSE
:
2420 case BRW_OPCODE_WHILE
:
2421 case BRW_OPCODE_HALT
:
2429 /* There is no DO instruction on gen6, so to find the end of the loop
2430 * we have to see if the loop is jumping back before our start
2434 brw_find_loop_end(struct brw_compile
*p
, int start_offset
)
2436 struct brw_context
*brw
= p
->brw
;
2439 void *store
= p
->store
;
2441 /* Always start after the instruction (such as a WHILE) we're trying to fix
2444 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2445 offset
= next_offset(store
, offset
)) {
2446 struct brw_instruction
*insn
= store
+ offset
;
2448 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2449 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2450 : insn
->bits3
.break_cont
.jip
;
2451 if (offset
+ jip
* scale
<= start_offset
)
2455 assert(!"not reached");
2456 return start_offset
;
2459 /* After program generation, go back and update the UIP and JIP of
2460 * BREAK, CONT, and HALT instructions to their correct locations.
2463 brw_set_uip_jip(struct brw_compile
*p
)
2465 struct brw_context
*brw
= p
->brw
;
2468 void *store
= p
->store
;
2473 for (offset
= 0; offset
< p
->next_insn_offset
;
2474 offset
= next_offset(store
, offset
)) {
2475 struct brw_instruction
*insn
= store
+ offset
;
2477 if (insn
->header
.cmpt_control
) {
2478 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2479 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2480 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2481 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2485 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2486 switch (insn
->header
.opcode
) {
2487 case BRW_OPCODE_BREAK
:
2488 assert(block_end_offset
!= 0);
2489 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2490 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2491 insn
->bits3
.break_cont
.uip
=
2492 (brw_find_loop_end(p
, offset
) - offset
+
2493 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2495 case BRW_OPCODE_CONTINUE
:
2496 assert(block_end_offset
!= 0);
2497 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2498 insn
->bits3
.break_cont
.uip
=
2499 (brw_find_loop_end(p
, offset
) - offset
) / scale
;
2501 assert(insn
->bits3
.break_cont
.uip
!= 0);
2502 assert(insn
->bits3
.break_cont
.jip
!= 0);
2505 case BRW_OPCODE_ENDIF
:
2506 if (block_end_offset
== 0)
2507 insn
->bits3
.break_cont
.jip
= 2;
2509 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2512 case BRW_OPCODE_HALT
:
2513 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2515 * "In case of the halt instruction not inside any conditional
2516 * code block, the value of <JIP> and <UIP> should be the
2517 * same. In case of the halt instruction inside conditional code
2518 * block, the <UIP> should be the end of the program, and the
2519 * <JIP> should be end of the most inner conditional code block."
2521 * The uip will have already been set by whoever set up the
2524 if (block_end_offset
== 0) {
2525 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2527 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2529 assert(insn
->bits3
.break_cont
.uip
!= 0);
2530 assert(insn
->bits3
.break_cont
.jip
!= 0);
2536 void brw_ff_sync(struct brw_compile
*p
,
2537 struct brw_reg dest
,
2538 unsigned msg_reg_nr
,
2539 struct brw_reg src0
,
2541 unsigned response_length
,
2544 struct brw_context
*brw
= p
->brw
;
2545 struct brw_instruction
*insn
;
2547 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2549 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2550 brw_set_dest(p
, insn
, dest
);
2551 brw_set_src0(p
, insn
, src0
);
2552 brw_set_src1(p
, insn
, brw_imm_d(0));
2555 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2557 brw_set_ff_sync_message(p
,
2565 * Emit the SEND instruction necessary to generate stream output data on Gen6
2566 * (for transform feedback).
2568 * If send_commit_msg is true, this is the last piece of stream output data
2569 * from this thread, so send the data as a committed write. According to the
2570 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2572 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2573 * writes are complete by sending the final write as a committed write."
2576 brw_svb_write(struct brw_compile
*p
,
2577 struct brw_reg dest
,
2578 unsigned msg_reg_nr
,
2579 struct brw_reg src0
,
2580 unsigned binding_table_index
,
2581 bool send_commit_msg
)
2583 struct brw_instruction
*insn
;
2585 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2587 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2588 brw_set_dest(p
, insn
, dest
);
2589 brw_set_src0(p
, insn
, src0
);
2590 brw_set_src1(p
, insn
, brw_imm_d(0));
2591 brw_set_dp_write_message(p
, insn
,
2592 binding_table_index
,
2593 0, /* msg_control: ignored */
2594 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2596 true, /* header_present */
2597 0, /* last_render_target: ignored */
2598 send_commit_msg
, /* response_length */
2599 0, /* end_of_thread */
2600 send_commit_msg
); /* send_commit_msg */
2604 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2605 struct brw_instruction
*insn
,
2607 unsigned bind_table_index
,
2608 unsigned msg_length
,
2609 unsigned response_length
,
2610 bool header_present
)
2612 if (p
->brw
->is_haswell
) {
2613 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2614 msg_length
, response_length
,
2615 header_present
, false);
2618 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2619 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2620 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2622 insn
->bits3
.gen7_dp
.msg_type
=
2623 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2625 insn
->bits3
.gen7_dp
.msg_type
=
2626 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2630 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2631 msg_length
, response_length
,
2632 header_present
, false);
2634 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2636 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2637 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2640 if (response_length
)
2641 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2643 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2644 insn
->bits3
.ud
|= atomic_op
<< 8;
2648 brw_untyped_atomic(struct brw_compile
*p
,
2649 struct brw_reg dest
,
2652 unsigned bind_table_index
,
2653 unsigned msg_length
,
2654 unsigned response_length
) {
2655 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2657 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2658 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2659 brw_set_src1(p
, insn
, brw_imm_d(0));
2660 brw_set_dp_untyped_atomic_message(
2661 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2662 insn
->header
.access_mode
== BRW_ALIGN_1
);
2666 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2667 struct brw_instruction
*insn
,
2668 unsigned bind_table_index
,
2669 unsigned msg_length
,
2670 unsigned response_length
,
2671 bool header_present
)
2673 const unsigned dispatch_width
=
2674 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2675 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2677 if (p
->brw
->is_haswell
) {
2678 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2679 msg_length
, response_length
,
2680 header_present
, false);
2682 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2684 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2685 msg_length
, response_length
,
2686 header_present
, false);
2688 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2691 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2692 if (dispatch_width
== 16)
2693 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2695 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2698 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2700 /* Set mask of 32-bit channels to drop. */
2701 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2705 brw_untyped_surface_read(struct brw_compile
*p
,
2706 struct brw_reg dest
,
2708 unsigned bind_table_index
,
2709 unsigned msg_length
,
2710 unsigned response_length
)
2712 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2714 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2715 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2716 brw_set_dp_untyped_surface_read_message(
2717 p
, insn
, bind_table_index
, msg_length
, response_length
,
2718 insn
->header
.access_mode
== BRW_ALIGN_1
);
2722 * This instruction is generated as a single-channel align1 instruction by
2723 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2725 * We can't use the typed atomic op in the FS because that has the execution
2726 * mask ANDed with the pixel mask, but we just want to write the one dword for
2729 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2730 * one u32. So we use the same untyped atomic write message as the pixel
2733 * The untyped atomic operation requires a BUFFER surface type with RAW
2734 * format, and is only accessible through the legacy DATA_CACHE dataport
2737 void brw_shader_time_add(struct brw_compile
*p
,
2738 struct brw_reg payload
,
2739 uint32_t surf_index
)
2741 struct brw_context
*brw
= p
->brw
;
2742 assert(brw
->gen
>= 7);
2744 brw_push_insn_state(p
);
2745 brw_set_access_mode(p
, BRW_ALIGN_1
);
2746 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2747 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2748 brw_pop_insn_state(p
);
2750 /* We use brw_vec1_reg and unmasked because we want to increment the given
2753 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2755 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2757 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2758 2 /* message length */,
2759 0 /* response length */,
2760 false /* header present */);