2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
302 struct brw_context
*brw
= p
->brw
;
304 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
305 assert(reg
.nr
< 128);
307 gen7_convert_mrf_to_grf(p
, ®
);
309 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
310 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
317 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
320 validate_reg(insn
, reg
);
322 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
323 insn
->bits1
.da1
.src0_reg_type
=
324 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
325 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
326 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
327 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
329 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
330 insn
->bits3
.ud
= reg
.dw1
.ud
;
332 /* The Bspec's section titled "Non-present Operands" claims that if src0
333 * is an immediate that src1's type must be the same as that of src0.
335 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
336 * that do not follow this rule. E.g., from the IVB/HSW table:
338 * DataTypeIndex 18-Bit Mapping Mapped Meaning
339 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
341 * And from the SNB table:
343 * DataTypeIndex 18-Bit Mapping Mapped Meaning
344 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
346 * Neither of these cause warnings from the simulator when used,
347 * compacted or otherwise. In fact, all compaction mappings that have an
348 * immediate in src0 use a:ud for src1.
350 * The GM45 instruction compaction tables do not contain mapped meanings
351 * so it's not clear whether it has the restriction. We'll assume it was
352 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
354 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
356 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
358 insn
->bits1
.da1
.src1_reg_type
= BRW_HW_REG_TYPE_UD
;
361 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
362 * for immediate values. Presumably the hardware engineers realized
363 * that the only useful floating-point value that could be represented
364 * in this format is 0.0, which can also be represented as a VF-typed
365 * immediate, so they gave us the previously mentioned mapping on IVB+.
367 * Strangely, we do have a mapping for imm:f in src1, so we don't need
370 * If we see a 0.0:F, change the type to VF so that it can be compacted.
372 if (insn
->bits3
.ud
== 0x0 &&
373 insn
->bits1
.da1
.src0_reg_type
== BRW_HW_REG_TYPE_F
) {
374 insn
->bits1
.da1
.src0_reg_type
= BRW_HW_REG_IMM_TYPE_VF
;
379 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
380 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
381 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
382 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
385 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
386 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
390 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
392 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
393 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
396 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
400 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
401 if (reg
.width
== BRW_WIDTH_1
&&
402 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
403 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
404 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
405 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
408 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
409 insn
->bits2
.da1
.src0_width
= reg
.width
;
410 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
414 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
415 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
416 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
417 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
419 /* This is an oddity of the fact we're using the same
420 * descriptions for registers in align_16 as align_1:
422 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
423 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
425 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
432 brw_set_src1(struct brw_compile
*p
,
433 struct brw_instruction
*insn
,
436 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
438 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
439 assert(reg
.nr
< 128);
441 gen7_convert_mrf_to_grf(p
, ®
);
443 validate_reg(insn
, reg
);
445 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
446 insn
->bits1
.da1
.src1_reg_type
=
447 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
448 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
449 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
451 /* Only src1 can be immediate in two-argument instructions.
453 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
455 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
456 insn
->bits3
.ud
= reg
.dw1
.ud
;
459 /* This is a hardware restriction, which may or may not be lifted
462 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
463 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
465 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
466 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
467 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
470 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
471 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
474 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
475 if (reg
.width
== BRW_WIDTH_1
&&
476 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
477 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
478 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
479 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
482 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
483 insn
->bits3
.da1
.src1_width
= reg
.width
;
484 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
488 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
489 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
490 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
491 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
493 /* This is an oddity of the fact we're using the same
494 * descriptions for registers in align_16 as align_1:
496 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
497 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
499 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
505 * Set the Message Descriptor and Extended Message Descriptor fields
508 * \note This zeroes out the Function Control bits, so it must be called
509 * \b before filling out any message-specific data. Callers can
510 * choose not to fill in irrelevant bits; they will be zero.
513 brw_set_message_descriptor(struct brw_compile
*p
,
514 struct brw_instruction
*inst
,
515 enum brw_message_target sfid
,
517 unsigned response_length
,
521 struct brw_context
*brw
= p
->brw
;
523 brw_set_src1(p
, inst
, brw_imm_d(0));
526 inst
->bits3
.generic_gen5
.header_present
= header_present
;
527 inst
->bits3
.generic_gen5
.response_length
= response_length
;
528 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
529 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
532 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
533 inst
->header
.destreg__conditionalmod
= sfid
;
535 /* Set Extended Message Descriptor (ex_desc) */
536 inst
->bits2
.send_gen5
.sfid
= sfid
;
537 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
540 inst
->bits3
.generic
.response_length
= response_length
;
541 inst
->bits3
.generic
.msg_length
= msg_length
;
542 inst
->bits3
.generic
.msg_target
= sfid
;
543 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
547 static void brw_set_math_message( struct brw_compile
*p
,
548 struct brw_instruction
*insn
,
550 unsigned integer_type
,
554 struct brw_context
*brw
= p
->brw
;
556 unsigned response_length
;
558 /* Infer message length from the function */
560 case BRW_MATH_FUNCTION_POW
:
561 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
562 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
563 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
571 /* Infer response length from the function */
573 case BRW_MATH_FUNCTION_SINCOS
:
574 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
583 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
584 msg_length
, response_length
, false, false);
586 insn
->bits3
.math_gen5
.function
= function
;
587 insn
->bits3
.math_gen5
.int_type
= integer_type
;
588 insn
->bits3
.math_gen5
.precision
= low_precision
;
589 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
590 insn
->bits3
.math_gen5
.data_type
= dataType
;
591 insn
->bits3
.math_gen5
.snapshot
= 0;
593 insn
->bits3
.math
.function
= function
;
594 insn
->bits3
.math
.int_type
= integer_type
;
595 insn
->bits3
.math
.precision
= low_precision
;
596 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
597 insn
->bits3
.math
.data_type
= dataType
;
599 insn
->header
.saturate
= 0;
603 static void brw_set_ff_sync_message(struct brw_compile
*p
,
604 struct brw_instruction
*insn
,
606 unsigned response_length
,
609 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
610 1, response_length
, true, end_of_thread
);
611 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
612 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
613 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
614 insn
->bits3
.urb_gen5
.allocate
= allocate
;
615 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
616 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
619 static void brw_set_urb_message( struct brw_compile
*p
,
620 struct brw_instruction
*insn
,
621 enum brw_urb_write_flags flags
,
623 unsigned response_length
,
625 unsigned swizzle_control
)
627 struct brw_context
*brw
= p
->brw
;
629 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
630 msg_length
, response_length
, true,
631 flags
& BRW_URB_WRITE_EOT
);
633 if (flags
& BRW_URB_WRITE_OWORD
) {
634 assert(msg_length
== 2); /* header + one OWORD of data */
635 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
637 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
639 insn
->bits3
.urb_gen7
.offset
= offset
;
640 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
641 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
642 insn
->bits3
.urb_gen7
.per_slot_offset
=
643 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
644 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
645 } else if (brw
->gen
>= 5) {
646 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
647 insn
->bits3
.urb_gen5
.offset
= offset
;
648 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
649 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
650 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
651 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
653 insn
->bits3
.urb
.opcode
= 0; /* ? */
654 insn
->bits3
.urb
.offset
= offset
;
655 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
656 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
657 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
658 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
663 brw_set_dp_write_message(struct brw_compile
*p
,
664 struct brw_instruction
*insn
,
665 unsigned binding_table_index
,
666 unsigned msg_control
,
670 unsigned last_render_target
,
671 unsigned response_length
,
672 unsigned end_of_thread
,
673 unsigned send_commit_msg
)
675 struct brw_context
*brw
= p
->brw
;
679 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
680 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
681 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
683 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
684 } else if (brw
->gen
== 6) {
685 /* Use the render cache for all write messages. */
686 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
688 sfid
= BRW_SFID_DATAPORT_WRITE
;
691 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
692 header_present
, end_of_thread
);
695 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
696 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
697 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
698 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
699 } else if (brw
->gen
== 6) {
700 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
701 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
702 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
703 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
704 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
705 } else if (brw
->gen
== 5) {
706 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
707 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
708 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
709 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
710 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
712 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
713 insn
->bits3
.dp_write
.msg_control
= msg_control
;
714 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
715 insn
->bits3
.dp_write
.msg_type
= msg_type
;
716 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
721 brw_set_dp_read_message(struct brw_compile
*p
,
722 struct brw_instruction
*insn
,
723 unsigned binding_table_index
,
724 unsigned msg_control
,
726 unsigned target_cache
,
729 unsigned response_length
)
731 struct brw_context
*brw
= p
->brw
;
735 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
736 } else if (brw
->gen
== 6) {
737 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
738 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
740 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
742 sfid
= BRW_SFID_DATAPORT_READ
;
745 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
746 header_present
, false);
749 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
750 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
751 insn
->bits3
.gen7_dp
.last_render_target
= 0;
752 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
753 } else if (brw
->gen
== 6) {
754 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
755 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
756 insn
->bits3
.gen6_dp
.last_render_target
= 0;
757 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
758 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
759 } else if (brw
->gen
== 5) {
760 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
761 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
762 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
763 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
764 } else if (brw
->is_g4x
) {
765 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
766 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
767 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
768 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
770 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
771 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
772 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
773 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
778 brw_set_sampler_message(struct brw_compile
*p
,
779 struct brw_instruction
*insn
,
780 unsigned binding_table_index
,
783 unsigned response_length
,
785 unsigned header_present
,
787 unsigned return_format
)
789 struct brw_context
*brw
= p
->brw
;
791 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
792 response_length
, header_present
, false);
795 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
796 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
797 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
798 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
799 } else if (brw
->gen
>= 5) {
800 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
801 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
802 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
803 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
804 } else if (brw
->is_g4x
) {
805 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
806 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
807 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
809 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
810 insn
->bits3
.sampler
.sampler
= sampler
;
811 insn
->bits3
.sampler
.msg_type
= msg_type
;
812 insn
->bits3
.sampler
.return_format
= return_format
;
817 #define next_insn brw_next_insn
818 struct brw_instruction
*
819 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
821 struct brw_instruction
*insn
;
823 if (p
->nr_insn
+ 1 > p
->store_size
) {
825 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
826 struct brw_instruction
, p
->store_size
);
829 p
->next_insn_offset
+= 16;
830 insn
= &p
->store
[p
->nr_insn
++];
831 memcpy(insn
, p
->current
, sizeof(*insn
));
833 /* Reset this one-shot flag:
836 if (p
->current
->header
.destreg__conditionalmod
) {
837 p
->current
->header
.destreg__conditionalmod
= 0;
838 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
841 insn
->header
.opcode
= opcode
;
845 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
850 struct brw_instruction
*insn
= next_insn(p
, opcode
);
851 brw_set_dest(p
, insn
, dest
);
852 brw_set_src0(p
, insn
, src
);
856 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
860 struct brw_reg src1
)
862 struct brw_instruction
*insn
= next_insn(p
, opcode
);
863 brw_set_dest(p
, insn
, dest
);
864 brw_set_src0(p
, insn
, src0
);
865 brw_set_src1(p
, insn
, src1
);
870 get_3src_subreg_nr(struct brw_reg reg
)
872 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
873 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
874 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
876 return reg
.subnr
/ 4;
880 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
887 struct brw_context
*brw
= p
->brw
;
888 struct brw_instruction
*insn
= next_insn(p
, opcode
);
890 gen7_convert_mrf_to_grf(p
, &dest
);
892 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
894 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
895 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
896 assert(dest
.nr
< 128);
897 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
898 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
899 dest
.type
== BRW_REGISTER_TYPE_D
||
900 dest
.type
== BRW_REGISTER_TYPE_UD
);
901 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
902 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
903 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
904 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
905 guess_execution_size(p
, insn
, dest
);
907 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
908 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
909 assert(src0
.nr
< 128);
910 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
911 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
912 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
913 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
914 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
915 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
917 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
918 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
919 assert(src1
.nr
< 128);
920 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
921 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
922 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
923 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
924 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
925 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
926 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
928 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
929 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
930 assert(src2
.nr
< 128);
931 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
932 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
933 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
934 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
935 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
936 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
939 /* Set both the source and destination types based on dest.type,
940 * ignoring the source register types. The MAD and LRP emitters ensure
941 * that all four types are float. The BFE and BFI2 emitters, however,
942 * may send us mixed D and UD types and want us to ignore that and use
943 * the destination type.
946 case BRW_REGISTER_TYPE_F
:
947 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
948 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
950 case BRW_REGISTER_TYPE_D
:
951 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
952 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
954 case BRW_REGISTER_TYPE_UD
:
955 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
956 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
965 /***********************************************************************
966 * Convenience routines.
969 struct brw_instruction *brw_##OP(struct brw_compile *p, \
970 struct brw_reg dest, \
971 struct brw_reg src0) \
973 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
977 struct brw_instruction *brw_##OP(struct brw_compile *p, \
978 struct brw_reg dest, \
979 struct brw_reg src0, \
980 struct brw_reg src1) \
982 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
986 struct brw_instruction *brw_##OP(struct brw_compile *p, \
987 struct brw_reg dest, \
988 struct brw_reg src0, \
989 struct brw_reg src1, \
990 struct brw_reg src2) \
992 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
996 struct brw_instruction *brw_##OP(struct brw_compile *p, \
997 struct brw_reg dest, \
998 struct brw_reg src0, \
999 struct brw_reg src1, \
1000 struct brw_reg src2) \
1002 assert(dest.type == BRW_REGISTER_TYPE_F); \
1003 assert(src0.type == BRW_REGISTER_TYPE_F); \
1004 assert(src1.type == BRW_REGISTER_TYPE_F); \
1005 assert(src2.type == BRW_REGISTER_TYPE_F); \
1006 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1009 /* Rounding operations (other than RNDD) require two instructions - the first
1010 * stores a rounded value (possibly the wrong way) in the dest register, but
1011 * also sets a per-channel "increment bit" in the flag register. A predicated
1012 * add of 1.0 fixes dest to contain the desired result.
1014 * Sandybridge and later appear to round correctly without an ADD.
1017 void brw_##OP(struct brw_compile *p, \
1018 struct brw_reg dest, \
1019 struct brw_reg src) \
1021 struct brw_instruction *rnd, *add; \
1022 rnd = next_insn(p, BRW_OPCODE_##OP); \
1023 brw_set_dest(p, rnd, dest); \
1024 brw_set_src0(p, rnd, src); \
1026 if (p->brw->gen < 6) { \
1027 /* turn on round-increments */ \
1028 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1029 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
1030 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1073 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1074 struct brw_reg dest
,
1075 struct brw_reg src0
,
1076 struct brw_reg src1
)
1079 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1080 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1081 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1082 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1083 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1086 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1087 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1088 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1089 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1090 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1093 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1096 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1097 struct brw_reg dest
,
1098 struct brw_reg src0
,
1099 struct brw_reg src1
)
1101 assert(dest
.type
== src0
.type
);
1102 assert(src0
.type
== src1
.type
);
1103 switch (src0
.type
) {
1104 case BRW_REGISTER_TYPE_B
:
1105 case BRW_REGISTER_TYPE_UB
:
1106 case BRW_REGISTER_TYPE_W
:
1107 case BRW_REGISTER_TYPE_UW
:
1108 case BRW_REGISTER_TYPE_D
:
1109 case BRW_REGISTER_TYPE_UD
:
1112 assert(!"Bad type for brw_AVG");
1115 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1118 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1119 struct brw_reg dest
,
1120 struct brw_reg src0
,
1121 struct brw_reg src1
)
1124 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1125 src0
.type
== BRW_REGISTER_TYPE_UD
||
1126 src1
.type
== BRW_REGISTER_TYPE_D
||
1127 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1128 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1131 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1132 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1133 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1134 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1135 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1138 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1139 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1140 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1141 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1142 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1145 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1146 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1147 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1148 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1150 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1154 void brw_NOP(struct brw_compile
*p
)
1156 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1157 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1158 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1159 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1166 /***********************************************************************
1167 * Comparisons, if/else/endif
1170 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1171 struct brw_reg dest
,
1172 struct brw_reg src0
,
1173 struct brw_reg src1
)
1175 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1177 insn
->header
.execution_size
= 1;
1178 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1179 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1181 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1187 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1189 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1191 p
->if_stack_depth
++;
1192 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1193 p
->if_stack_array_size
*= 2;
1194 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1195 p
->if_stack_array_size
);
1199 static struct brw_instruction
*
1200 pop_if_stack(struct brw_compile
*p
)
1202 p
->if_stack_depth
--;
1203 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1207 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1209 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1210 p
->loop_stack_array_size
*= 2;
1211 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1212 p
->loop_stack_array_size
);
1213 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1214 p
->loop_stack_array_size
);
1217 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1218 p
->loop_stack_depth
++;
1219 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1222 static struct brw_instruction
*
1223 get_inner_do_insn(struct brw_compile
*p
)
1225 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1228 /* EU takes the value from the flag register and pushes it onto some
1229 * sort of a stack (presumably merging with any flag value already on
1230 * the stack). Within an if block, the flags at the top of the stack
1231 * control execution on each channel of the unit, eg. on each of the
1232 * 16 pixel values in our wm programs.
1234 * When the matching 'else' instruction is reached (presumably by
1235 * countdown of the instruction count patched in by our ELSE/ENDIF
1236 * functions), the relevent flags are inverted.
1238 * When the matching 'endif' instruction is reached, the flags are
1239 * popped off. If the stack is now empty, normal execution resumes.
1241 struct brw_instruction
*
1242 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1244 struct brw_context
*brw
= p
->brw
;
1245 struct brw_instruction
*insn
;
1247 insn
= next_insn(p
, BRW_OPCODE_IF
);
1249 /* Override the defaults for this instruction:
1252 brw_set_dest(p
, insn
, brw_ip_reg());
1253 brw_set_src0(p
, insn
, brw_ip_reg());
1254 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1255 } else if (brw
->gen
== 6) {
1256 brw_set_dest(p
, insn
, brw_imm_w(0));
1257 insn
->bits1
.branch_gen6
.jump_count
= 0;
1258 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1259 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1261 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1262 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1263 brw_set_src1(p
, insn
, brw_imm_ud(0));
1264 insn
->bits3
.break_cont
.jip
= 0;
1265 insn
->bits3
.break_cont
.uip
= 0;
1268 insn
->header
.execution_size
= execute_size
;
1269 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1270 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1271 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1272 if (!p
->single_program_flow
)
1273 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1275 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1277 push_if_stack(p
, insn
);
1278 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1282 /* This function is only used for gen6-style IF instructions with an
1283 * embedded comparison (conditional modifier). It is not used on gen7.
1285 struct brw_instruction
*
1286 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1287 struct brw_reg src0
, struct brw_reg src1
)
1289 struct brw_instruction
*insn
;
1291 insn
= next_insn(p
, BRW_OPCODE_IF
);
1293 brw_set_dest(p
, insn
, brw_imm_w(0));
1294 if (p
->compressed
) {
1295 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1297 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1299 insn
->bits1
.branch_gen6
.jump_count
= 0;
1300 brw_set_src0(p
, insn
, src0
);
1301 brw_set_src1(p
, insn
, src1
);
1303 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1304 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1305 insn
->header
.destreg__conditionalmod
= conditional
;
1307 if (!p
->single_program_flow
)
1308 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1310 push_if_stack(p
, insn
);
1315 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1318 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1319 struct brw_instruction
*if_inst
,
1320 struct brw_instruction
*else_inst
)
1322 /* The next instruction (where the ENDIF would be, if it existed) */
1323 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1325 assert(p
->single_program_flow
);
1326 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1327 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1328 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1330 /* Convert IF to an ADD instruction that moves the instruction pointer
1331 * to the first instruction of the ELSE block. If there is no ELSE
1332 * block, point to where ENDIF would be. Reverse the predicate.
1334 * There's no need to execute an ENDIF since we don't need to do any
1335 * stack operations, and if we're currently executing, we just want to
1336 * continue normally.
1338 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1339 if_inst
->header
.predicate_inverse
= 1;
1341 if (else_inst
!= NULL
) {
1342 /* Convert ELSE to an ADD instruction that points where the ENDIF
1345 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1347 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1348 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1350 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1355 * Patch IF and ELSE instructions with appropriate jump targets.
1358 patch_IF_ELSE(struct brw_compile
*p
,
1359 struct brw_instruction
*if_inst
,
1360 struct brw_instruction
*else_inst
,
1361 struct brw_instruction
*endif_inst
)
1363 struct brw_context
*brw
= p
->brw
;
1365 /* We shouldn't be patching IF and ELSE instructions in single program flow
1366 * mode when gen < 6, because in single program flow mode on those
1367 * platforms, we convert flow control instructions to conditional ADDs that
1368 * operate on IP (see brw_ENDIF).
1370 * However, on Gen6, writing to IP doesn't work in single program flow mode
1371 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1372 * not be updated by non-flow control instructions."). And on later
1373 * platforms, there is no significant benefit to converting control flow
1374 * instructions to conditional ADDs. So we do patch IF and ELSE
1375 * instructions in single program flow mode on those platforms.
1378 assert(!p
->single_program_flow
);
1380 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1381 assert(endif_inst
!= NULL
);
1382 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1385 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1386 * requires 2 chunks.
1391 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1392 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1394 if (else_inst
== NULL
) {
1395 /* Patch IF -> ENDIF */
1397 /* Turn it into an IFF, which means no mask stack operations for
1398 * all-false and jumping past the ENDIF.
1400 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1401 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1402 if_inst
->bits3
.if_else
.pop_count
= 0;
1403 if_inst
->bits3
.if_else
.pad0
= 0;
1404 } else if (brw
->gen
== 6) {
1405 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1406 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1408 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1409 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1412 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1414 /* Patch IF -> ELSE */
1416 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1417 if_inst
->bits3
.if_else
.pop_count
= 0;
1418 if_inst
->bits3
.if_else
.pad0
= 0;
1419 } else if (brw
->gen
== 6) {
1420 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1423 /* Patch ELSE -> ENDIF */
1425 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1428 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1429 else_inst
->bits3
.if_else
.pop_count
= 1;
1430 else_inst
->bits3
.if_else
.pad0
= 0;
1431 } else if (brw
->gen
== 6) {
1432 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1433 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1435 /* The IF instruction's JIP should point just past the ELSE */
1436 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1437 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1438 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1439 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1445 brw_ELSE(struct brw_compile
*p
)
1447 struct brw_context
*brw
= p
->brw
;
1448 struct brw_instruction
*insn
;
1450 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1453 brw_set_dest(p
, insn
, brw_ip_reg());
1454 brw_set_src0(p
, insn
, brw_ip_reg());
1455 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1456 } else if (brw
->gen
== 6) {
1457 brw_set_dest(p
, insn
, brw_imm_w(0));
1458 insn
->bits1
.branch_gen6
.jump_count
= 0;
1459 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1460 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1462 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1463 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1464 brw_set_src1(p
, insn
, brw_imm_ud(0));
1465 insn
->bits3
.break_cont
.jip
= 0;
1466 insn
->bits3
.break_cont
.uip
= 0;
1469 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1470 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1471 if (!p
->single_program_flow
)
1472 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1474 push_if_stack(p
, insn
);
1478 brw_ENDIF(struct brw_compile
*p
)
1480 struct brw_context
*brw
= p
->brw
;
1481 struct brw_instruction
*insn
= NULL
;
1482 struct brw_instruction
*else_inst
= NULL
;
1483 struct brw_instruction
*if_inst
= NULL
;
1484 struct brw_instruction
*tmp
;
1485 bool emit_endif
= true;
1487 /* In single program flow mode, we can express IF and ELSE instructions
1488 * equivalently as ADD instructions that operate on IP. On platforms prior
1489 * to Gen6, flow control instructions cause an implied thread switch, so
1490 * this is a significant savings.
1492 * However, on Gen6, writing to IP doesn't work in single program flow mode
1493 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1494 * not be updated by non-flow control instructions."). And on later
1495 * platforms, there is no significant benefit to converting control flow
1496 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1499 if (brw
->gen
< 6 && p
->single_program_flow
)
1503 * A single next_insn() may change the base adress of instruction store
1504 * memory(p->store), so call it first before referencing the instruction
1505 * store pointer from an index
1508 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1510 /* Pop the IF and (optional) ELSE instructions from the stack */
1511 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1512 tmp
= pop_if_stack(p
);
1513 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1515 tmp
= pop_if_stack(p
);
1520 /* ENDIF is useless; don't bother emitting it. */
1521 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1526 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1527 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1528 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1529 } else if (brw
->gen
== 6) {
1530 brw_set_dest(p
, insn
, brw_imm_w(0));
1531 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1532 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1534 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1535 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1536 brw_set_src1(p
, insn
, brw_imm_ud(0));
1539 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1540 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1541 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1543 /* Also pop item off the stack in the endif instruction: */
1545 insn
->bits3
.if_else
.jump_count
= 0;
1546 insn
->bits3
.if_else
.pop_count
= 1;
1547 insn
->bits3
.if_else
.pad0
= 0;
1548 } else if (brw
->gen
== 6) {
1549 insn
->bits1
.branch_gen6
.jump_count
= 2;
1551 insn
->bits3
.break_cont
.jip
= 2;
1553 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1556 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1558 struct brw_context
*brw
= p
->brw
;
1559 struct brw_instruction
*insn
;
1561 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1562 if (brw
->gen
>= 6) {
1563 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1564 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1565 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1567 brw_set_dest(p
, insn
, brw_ip_reg());
1568 brw_set_src0(p
, insn
, brw_ip_reg());
1569 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1570 insn
->bits3
.if_else
.pad0
= 0;
1571 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1573 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1574 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1579 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1581 struct brw_instruction
*insn
;
1583 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1584 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1585 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1586 brw_set_dest(p
, insn
, brw_ip_reg());
1587 brw_set_src0(p
, insn
, brw_ip_reg());
1588 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1590 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1591 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1595 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1597 struct brw_instruction
*insn
;
1598 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1599 brw_set_dest(p
, insn
, brw_ip_reg());
1600 brw_set_src0(p
, insn
, brw_ip_reg());
1601 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1602 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1603 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1604 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1605 insn
->bits3
.if_else
.pad0
= 0;
1606 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1610 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1612 struct brw_instruction
*insn
;
1614 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1615 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1616 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1617 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1619 if (p
->compressed
) {
1620 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1622 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1623 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1630 * The DO/WHILE is just an unterminated loop -- break or continue are
1631 * used for control within the loop. We have a few ways they can be
1634 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1635 * jip and no DO instruction.
1637 * For non-uniform control flow pre-gen6, there's a DO instruction to
1638 * push the mask, and a WHILE to jump back, and BREAK to get out and
1641 * For gen6, there's no more mask stack, so no need for DO. WHILE
1642 * just points back to the first instruction of the loop.
1644 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1646 struct brw_context
*brw
= p
->brw
;
1648 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1649 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1650 return &p
->store
[p
->nr_insn
];
1652 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1654 push_loop_stack(p
, insn
);
1656 /* Override the defaults for this instruction:
1658 brw_set_dest(p
, insn
, brw_null_reg());
1659 brw_set_src0(p
, insn
, brw_null_reg());
1660 brw_set_src1(p
, insn
, brw_null_reg());
1662 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1663 insn
->header
.execution_size
= execute_size
;
1664 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1665 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1666 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1673 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1676 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1677 * nesting, since it can always just point to the end of the block/current loop.
1680 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1682 struct brw_context
*brw
= p
->brw
;
1683 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1684 struct brw_instruction
*inst
;
1685 int br
= (brw
->gen
== 5) ? 2 : 1;
1687 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1688 /* If the jump count is != 0, that means that this instruction has already
1689 * been patched because it's part of a loop inside of the one we're
1692 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1693 inst
->bits3
.if_else
.jump_count
== 0) {
1694 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1695 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1696 inst
->bits3
.if_else
.jump_count
== 0) {
1697 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1702 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1704 struct brw_context
*brw
= p
->brw
;
1705 struct brw_instruction
*insn
, *do_insn
;
1711 if (brw
->gen
>= 7) {
1712 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1713 do_insn
= get_inner_do_insn(p
);
1715 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1716 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1717 brw_set_src1(p
, insn
, brw_imm_ud(0));
1718 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1720 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1721 } else if (brw
->gen
== 6) {
1722 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1723 do_insn
= get_inner_do_insn(p
);
1725 brw_set_dest(p
, insn
, brw_imm_w(0));
1726 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1727 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1728 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1730 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1732 if (p
->single_program_flow
) {
1733 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1734 do_insn
= get_inner_do_insn(p
);
1736 brw_set_dest(p
, insn
, brw_ip_reg());
1737 brw_set_src0(p
, insn
, brw_ip_reg());
1738 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1739 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1741 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1742 do_insn
= get_inner_do_insn(p
);
1744 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1746 brw_set_dest(p
, insn
, brw_ip_reg());
1747 brw_set_src0(p
, insn
, brw_ip_reg());
1748 brw_set_src1(p
, insn
, brw_imm_d(0));
1750 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1751 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1752 insn
->bits3
.if_else
.pop_count
= 0;
1753 insn
->bits3
.if_else
.pad0
= 0;
1755 brw_patch_break_cont(p
, insn
);
1758 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1759 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1761 p
->loop_stack_depth
--;
1766 /* To integrate with the above, it makes sense that the comparison
1767 * instruction should populate the flag register. It might be simpler
1768 * just to use the flag reg for most WM tasks?
1770 void brw_CMP(struct brw_compile
*p
,
1771 struct brw_reg dest
,
1772 unsigned conditional
,
1773 struct brw_reg src0
,
1774 struct brw_reg src1
)
1776 struct brw_context
*brw
= p
->brw
;
1777 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1779 insn
->header
.destreg__conditionalmod
= conditional
;
1780 brw_set_dest(p
, insn
, dest
);
1781 brw_set_src0(p
, insn
, src0
);
1782 brw_set_src1(p
, insn
, src1
);
1784 /* guess_execution_size(insn, src0); */
1787 /* Make it so that future instructions will use the computed flag
1788 * value until brw_set_predicate_control_flag_value() is called
1791 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1793 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1794 p
->flag_value
= 0xff;
1797 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1799 * "Any CMP instruction with a null destination must use a {switch}."
1801 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1802 * mentioned on their work-arounds pages.
1804 if (brw
->gen
== 7) {
1805 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1806 dest
.nr
== BRW_ARF_NULL
) {
1807 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1812 /* Issue 'wait' instruction for n1, host could program MMIO
1813 to wake up thread. */
1814 void brw_WAIT (struct brw_compile
*p
)
1816 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1817 struct brw_reg src
= brw_notification_1_reg();
1819 brw_set_dest(p
, insn
, src
);
1820 brw_set_src0(p
, insn
, src
);
1821 brw_set_src1(p
, insn
, brw_null_reg());
1822 insn
->header
.execution_size
= 0; /* must */
1823 insn
->header
.predicate_control
= 0;
1824 insn
->header
.compression_control
= 0;
1828 /***********************************************************************
1829 * Helpers for the various SEND message types:
1832 /** Extended math function, float[8].
1834 void brw_math( struct brw_compile
*p
,
1835 struct brw_reg dest
,
1837 unsigned msg_reg_nr
,
1840 unsigned precision
)
1842 struct brw_context
*brw
= p
->brw
;
1844 if (brw
->gen
>= 6) {
1845 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1847 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1848 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1849 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1851 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1853 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1855 /* Source modifiers are ignored for extended math instructions on Gen6. */
1856 if (brw
->gen
== 6) {
1857 assert(!src
.negate
);
1861 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1862 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1863 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1864 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1866 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1869 /* Math is the same ISA format as other opcodes, except that CondModifier
1870 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1872 insn
->header
.destreg__conditionalmod
= function
;
1874 brw_set_dest(p
, insn
, dest
);
1875 brw_set_src0(p
, insn
, src
);
1876 brw_set_src1(p
, insn
, brw_null_reg());
1878 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1880 /* Example code doesn't set predicate_control for send
1883 insn
->header
.predicate_control
= 0;
1884 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1886 brw_set_dest(p
, insn
, dest
);
1887 brw_set_src0(p
, insn
, src
);
1888 brw_set_math_message(p
,
1891 src
.type
== BRW_REGISTER_TYPE_D
,
1897 /** Extended math function, float[8].
1899 void brw_math2(struct brw_compile
*p
,
1900 struct brw_reg dest
,
1902 struct brw_reg src0
,
1903 struct brw_reg src1
)
1905 struct brw_context
*brw
= p
->brw
;
1906 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1908 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1909 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1910 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1911 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1913 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1914 if (brw
->gen
== 6) {
1915 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1916 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1919 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1920 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1921 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1922 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1923 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1925 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1926 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1929 /* Source modifiers are ignored for extended math instructions on Gen6. */
1930 if (brw
->gen
== 6) {
1931 assert(!src0
.negate
);
1933 assert(!src1
.negate
);
1937 /* Math is the same ISA format as other opcodes, except that CondModifier
1938 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1940 insn
->header
.destreg__conditionalmod
= function
;
1942 brw_set_dest(p
, insn
, dest
);
1943 brw_set_src0(p
, insn
, src0
);
1944 brw_set_src1(p
, insn
, src1
);
1949 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1950 * using a constant offset per channel.
1952 * The offset must be aligned to oword size (16 bytes). Used for
1953 * register spilling.
1955 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1960 struct brw_context
*brw
= p
->brw
;
1961 uint32_t msg_control
, msg_type
;
1967 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1969 if (num_regs
== 1) {
1970 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1973 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1977 /* Set up the message header. This is g0, with g0.2 filled with
1978 * the offset. We don't want to leave our offset around in g0 or
1979 * it'll screw up texture samples, so set it up inside the message
1983 brw_push_insn_state(p
);
1984 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1985 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1987 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1989 /* set message header global offset field (reg 0, element 2) */
1991 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1993 2), BRW_REGISTER_TYPE_UD
),
1994 brw_imm_ud(offset
));
1996 brw_pop_insn_state(p
);
2000 struct brw_reg dest
;
2001 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2002 int send_commit_msg
;
2003 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
2004 BRW_REGISTER_TYPE_UW
);
2006 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
2007 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2008 src_header
= vec16(src_header
);
2010 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2011 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2013 /* Until gen6, writes followed by reads from the same location
2014 * are not guaranteed to be ordered unless write_commit is set.
2015 * If set, then a no-op write is issued to the destination
2016 * register to set a dependency, and a read from the destination
2017 * can be used to ensure the ordering.
2019 * For gen6, only writes between different threads need ordering
2020 * protection. Our use of DP writes is all about register
2021 * spilling within a thread.
2023 if (brw
->gen
>= 6) {
2024 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2025 send_commit_msg
= 0;
2028 send_commit_msg
= 1;
2031 brw_set_dest(p
, insn
, dest
);
2032 if (brw
->gen
>= 6) {
2033 brw_set_src0(p
, insn
, mrf
);
2035 brw_set_src0(p
, insn
, brw_null_reg());
2039 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2041 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2043 brw_set_dp_write_message(p
,
2045 255, /* binding table index (255=stateless) */
2049 true, /* header_present */
2050 0, /* not a render target */
2051 send_commit_msg
, /* response_length */
2059 * Read a block of owords (half a GRF each) from the scratch buffer
2060 * using a constant index per channel.
2062 * Offset must be aligned to oword size (16 bytes). Used for register
2066 brw_oword_block_read_scratch(struct brw_compile
*p
,
2067 struct brw_reg dest
,
2072 struct brw_context
*brw
= p
->brw
;
2073 uint32_t msg_control
;
2079 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2080 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2082 if (num_regs
== 1) {
2083 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2086 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2091 brw_push_insn_state(p
);
2092 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2093 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2095 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2097 /* set message header global offset field (reg 0, element 2) */
2099 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2101 2), BRW_REGISTER_TYPE_UD
),
2102 brw_imm_ud(offset
));
2104 brw_pop_insn_state(p
);
2108 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2110 assert(insn
->header
.predicate_control
== 0);
2111 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2112 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2114 brw_set_dest(p
, insn
, dest
); /* UW? */
2115 if (brw
->gen
>= 6) {
2116 brw_set_src0(p
, insn
, mrf
);
2118 brw_set_src0(p
, insn
, brw_null_reg());
2121 brw_set_dp_read_message(p
,
2123 255, /* binding table index (255=stateless) */
2125 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2126 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2128 true, /* header_present */
2134 gen7_block_read_scratch(struct brw_compile
*p
,
2135 struct brw_reg dest
,
2139 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2141 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2143 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2144 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2146 brw_set_dest(p
, insn
, dest
);
2148 /* The HW requires that the header is present; this is to get the g0.5
2151 bool header_present
= true;
2152 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2154 brw_set_message_descriptor(p
, insn
,
2155 GEN7_SFID_DATAPORT_DATA_CACHE
,
2156 1, /* mlen: just g0 */
2161 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2163 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2164 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2166 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2167 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2168 * is 32 bytes, which happens to be the size of a register.
2171 assert(offset
< (1 << 12));
2172 insn
->bits3
.ud
|= offset
;
2176 * Read a float[4] vector from the data port Data Cache (const buffer).
2177 * Location (in buffer) should be a multiple of 16.
2178 * Used for fetching shader constants.
2180 void brw_oword_block_read(struct brw_compile
*p
,
2181 struct brw_reg dest
,
2184 uint32_t bind_table_index
)
2186 struct brw_context
*brw
= p
->brw
;
2188 /* On newer hardware, offset is in units of owords. */
2192 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2194 brw_push_insn_state(p
);
2195 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2196 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2197 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2199 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2201 /* set message header global offset field (reg 0, element 2) */
2203 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2205 2), BRW_REGISTER_TYPE_UD
),
2206 brw_imm_ud(offset
));
2208 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2209 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2211 /* cast dest to a uword[8] vector */
2212 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2214 brw_set_dest(p
, insn
, dest
);
2215 if (brw
->gen
>= 6) {
2216 brw_set_src0(p
, insn
, mrf
);
2218 brw_set_src0(p
, insn
, brw_null_reg());
2221 brw_set_dp_read_message(p
,
2224 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2225 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2226 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2228 true, /* header_present */
2229 1); /* response_length (1 reg, 2 owords!) */
2231 brw_pop_insn_state(p
);
2235 void brw_fb_WRITE(struct brw_compile
*p
,
2237 unsigned msg_reg_nr
,
2238 struct brw_reg src0
,
2239 unsigned msg_control
,
2240 unsigned binding_table_index
,
2241 unsigned msg_length
,
2242 unsigned response_length
,
2244 bool header_present
)
2246 struct brw_context
*brw
= p
->brw
;
2247 struct brw_instruction
*insn
;
2249 struct brw_reg dest
;
2251 if (dispatch_width
== 16)
2252 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2254 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2256 if (brw
->gen
>= 6) {
2257 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2259 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2261 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2263 if (brw
->gen
>= 6) {
2264 /* headerless version, just submit color payload */
2265 src0
= brw_message_reg(msg_reg_nr
);
2267 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2269 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2271 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2274 brw_set_dest(p
, insn
, dest
);
2275 brw_set_src0(p
, insn
, src0
);
2276 brw_set_dp_write_message(p
,
2278 binding_table_index
,
2283 eot
, /* last render target write */
2286 0 /* send_commit_msg */);
2291 * Texture sample instruction.
2292 * Note: the msg_type plus msg_length values determine exactly what kind
2293 * of sampling operation is performed. See volume 4, page 161 of docs.
2295 void brw_SAMPLE(struct brw_compile
*p
,
2296 struct brw_reg dest
,
2297 unsigned msg_reg_nr
,
2298 struct brw_reg src0
,
2299 unsigned binding_table_index
,
2302 unsigned response_length
,
2303 unsigned msg_length
,
2304 unsigned header_present
,
2306 unsigned return_format
)
2308 struct brw_context
*brw
= p
->brw
;
2309 struct brw_instruction
*insn
;
2311 if (msg_reg_nr
!= -1)
2312 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2314 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2315 insn
->header
.predicate_control
= 0; /* XXX */
2317 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2319 * "Instruction compression is not allowed for this instruction (that
2320 * is, send). The hardware behavior is undefined if this instruction is
2321 * set as compressed. However, compress control can be set to "SecHalf"
2322 * to affect the EMask generation."
2324 * No similar wording is found in later PRMs, but there are examples
2325 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2326 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2327 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2329 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2330 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2333 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2335 brw_set_dest(p
, insn
, dest
);
2336 brw_set_src0(p
, insn
, src0
);
2337 brw_set_sampler_message(p
, insn
,
2338 binding_table_index
,
2348 /* All these variables are pretty confusing - we might be better off
2349 * using bitmasks and macros for this, in the old style. Or perhaps
2350 * just having the caller instantiate the fields in dword3 itself.
2352 void brw_urb_WRITE(struct brw_compile
*p
,
2353 struct brw_reg dest
,
2354 unsigned msg_reg_nr
,
2355 struct brw_reg src0
,
2356 enum brw_urb_write_flags flags
,
2357 unsigned msg_length
,
2358 unsigned response_length
,
2362 struct brw_context
*brw
= p
->brw
;
2363 struct brw_instruction
*insn
;
2365 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2367 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2368 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2369 brw_push_insn_state(p
);
2370 brw_set_access_mode(p
, BRW_ALIGN_1
);
2371 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2372 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2373 BRW_REGISTER_TYPE_UD
),
2374 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2375 brw_imm_ud(0xff00));
2376 brw_pop_insn_state(p
);
2379 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2381 assert(msg_length
< BRW_MAX_MRF
);
2383 brw_set_dest(p
, insn
, dest
);
2384 brw_set_src0(p
, insn
, src0
);
2385 brw_set_src1(p
, insn
, brw_imm_d(0));
2388 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2390 brw_set_urb_message(p
,
2400 brw_find_next_block_end(struct brw_compile
*p
, int start_offset
)
2403 void *store
= p
->store
;
2405 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2406 offset
= next_offset(store
, offset
)) {
2407 struct brw_instruction
*insn
= store
+ offset
;
2409 switch (insn
->header
.opcode
) {
2410 case BRW_OPCODE_ENDIF
:
2411 case BRW_OPCODE_ELSE
:
2412 case BRW_OPCODE_WHILE
:
2413 case BRW_OPCODE_HALT
:
2421 /* There is no DO instruction on gen6, so to find the end of the loop
2422 * we have to see if the loop is jumping back before our start
2426 brw_find_loop_end(struct brw_compile
*p
, int start_offset
)
2428 struct brw_context
*brw
= p
->brw
;
2431 void *store
= p
->store
;
2433 /* Always start after the instruction (such as a WHILE) we're trying to fix
2436 for (offset
= next_offset(store
, start_offset
); offset
< p
->next_insn_offset
;
2437 offset
= next_offset(store
, offset
)) {
2438 struct brw_instruction
*insn
= store
+ offset
;
2440 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2441 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2442 : insn
->bits3
.break_cont
.jip
;
2443 if (offset
+ jip
* scale
<= start_offset
)
2447 assert(!"not reached");
2448 return start_offset
;
2451 /* After program generation, go back and update the UIP and JIP of
2452 * BREAK, CONT, and HALT instructions to their correct locations.
2455 brw_set_uip_jip(struct brw_compile
*p
)
2457 struct brw_context
*brw
= p
->brw
;
2460 void *store
= p
->store
;
2465 for (offset
= 0; offset
< p
->next_insn_offset
;
2466 offset
= next_offset(store
, offset
)) {
2467 struct brw_instruction
*insn
= store
+ offset
;
2469 if (insn
->header
.cmpt_control
) {
2470 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2471 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2472 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2473 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2477 int block_end_offset
= brw_find_next_block_end(p
, offset
);
2478 switch (insn
->header
.opcode
) {
2479 case BRW_OPCODE_BREAK
:
2480 assert(block_end_offset
!= 0);
2481 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2482 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2483 insn
->bits3
.break_cont
.uip
=
2484 (brw_find_loop_end(p
, offset
) - offset
+
2485 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2487 case BRW_OPCODE_CONTINUE
:
2488 assert(block_end_offset
!= 0);
2489 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2490 insn
->bits3
.break_cont
.uip
=
2491 (brw_find_loop_end(p
, offset
) - offset
) / scale
;
2493 assert(insn
->bits3
.break_cont
.uip
!= 0);
2494 assert(insn
->bits3
.break_cont
.jip
!= 0);
2497 case BRW_OPCODE_ENDIF
:
2498 if (block_end_offset
== 0)
2499 insn
->bits3
.break_cont
.jip
= 2;
2501 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2504 case BRW_OPCODE_HALT
:
2505 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2507 * "In case of the halt instruction not inside any conditional
2508 * code block, the value of <JIP> and <UIP> should be the
2509 * same. In case of the halt instruction inside conditional code
2510 * block, the <UIP> should be the end of the program, and the
2511 * <JIP> should be end of the most inner conditional code block."
2513 * The uip will have already been set by whoever set up the
2516 if (block_end_offset
== 0) {
2517 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2519 insn
->bits3
.break_cont
.jip
= (block_end_offset
- offset
) / scale
;
2521 assert(insn
->bits3
.break_cont
.uip
!= 0);
2522 assert(insn
->bits3
.break_cont
.jip
!= 0);
2528 void brw_ff_sync(struct brw_compile
*p
,
2529 struct brw_reg dest
,
2530 unsigned msg_reg_nr
,
2531 struct brw_reg src0
,
2533 unsigned response_length
,
2536 struct brw_context
*brw
= p
->brw
;
2537 struct brw_instruction
*insn
;
2539 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2541 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2542 brw_set_dest(p
, insn
, dest
);
2543 brw_set_src0(p
, insn
, src0
);
2544 brw_set_src1(p
, insn
, brw_imm_d(0));
2547 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2549 brw_set_ff_sync_message(p
,
2557 * Emit the SEND instruction necessary to generate stream output data on Gen6
2558 * (for transform feedback).
2560 * If send_commit_msg is true, this is the last piece of stream output data
2561 * from this thread, so send the data as a committed write. According to the
2562 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2564 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2565 * writes are complete by sending the final write as a committed write."
2568 brw_svb_write(struct brw_compile
*p
,
2569 struct brw_reg dest
,
2570 unsigned msg_reg_nr
,
2571 struct brw_reg src0
,
2572 unsigned binding_table_index
,
2573 bool send_commit_msg
)
2575 struct brw_instruction
*insn
;
2577 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2579 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2580 brw_set_dest(p
, insn
, dest
);
2581 brw_set_src0(p
, insn
, src0
);
2582 brw_set_src1(p
, insn
, brw_imm_d(0));
2583 brw_set_dp_write_message(p
, insn
,
2584 binding_table_index
,
2585 0, /* msg_control: ignored */
2586 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2588 true, /* header_present */
2589 0, /* last_render_target: ignored */
2590 send_commit_msg
, /* response_length */
2591 0, /* end_of_thread */
2592 send_commit_msg
); /* send_commit_msg */
2596 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2597 struct brw_instruction
*insn
,
2599 unsigned bind_table_index
,
2600 unsigned msg_length
,
2601 unsigned response_length
,
2602 bool header_present
)
2604 if (p
->brw
->is_haswell
) {
2605 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2606 msg_length
, response_length
,
2607 header_present
, false);
2610 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2611 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2612 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2614 insn
->bits3
.gen7_dp
.msg_type
=
2615 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2617 insn
->bits3
.gen7_dp
.msg_type
=
2618 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2622 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2623 msg_length
, response_length
,
2624 header_present
, false);
2626 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2628 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2629 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2632 if (response_length
)
2633 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2635 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2636 insn
->bits3
.ud
|= atomic_op
<< 8;
2640 brw_untyped_atomic(struct brw_compile
*p
,
2641 struct brw_reg dest
,
2644 unsigned bind_table_index
,
2645 unsigned msg_length
,
2646 unsigned response_length
) {
2647 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2649 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2650 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2651 brw_set_src1(p
, insn
, brw_imm_d(0));
2652 brw_set_dp_untyped_atomic_message(
2653 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2654 insn
->header
.access_mode
== BRW_ALIGN_1
);
2658 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2659 struct brw_instruction
*insn
,
2660 unsigned bind_table_index
,
2661 unsigned msg_length
,
2662 unsigned response_length
,
2663 bool header_present
)
2665 const unsigned dispatch_width
=
2666 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2667 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2669 if (p
->brw
->is_haswell
) {
2670 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2671 msg_length
, response_length
,
2672 header_present
, false);
2674 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2676 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2677 msg_length
, response_length
,
2678 header_present
, false);
2680 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2683 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2684 if (dispatch_width
== 16)
2685 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2687 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2690 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2692 /* Set mask of 32-bit channels to drop. */
2693 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2697 brw_untyped_surface_read(struct brw_compile
*p
,
2698 struct brw_reg dest
,
2700 unsigned bind_table_index
,
2701 unsigned msg_length
,
2702 unsigned response_length
)
2704 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2706 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2707 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2708 brw_set_dp_untyped_surface_read_message(
2709 p
, insn
, bind_table_index
, msg_length
, response_length
,
2710 insn
->header
.access_mode
== BRW_ALIGN_1
);
2714 * This instruction is generated as a single-channel align1 instruction by
2715 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2717 * We can't use the typed atomic op in the FS because that has the execution
2718 * mask ANDed with the pixel mask, but we just want to write the one dword for
2721 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2722 * one u32. So we use the same untyped atomic write message as the pixel
2725 * The untyped atomic operation requires a BUFFER surface type with RAW
2726 * format, and is only accessible through the legacy DATA_CACHE dataport
2729 void brw_shader_time_add(struct brw_compile
*p
,
2730 struct brw_reg payload
,
2731 uint32_t surf_index
)
2733 struct brw_context
*brw
= p
->brw
;
2734 assert(brw
->gen
>= 7);
2736 brw_push_insn_state(p
);
2737 brw_set_access_mode(p
, BRW_ALIGN_1
);
2738 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2739 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2740 brw_pop_insn_state(p
);
2742 /* We use brw_vec1_reg and unmasked because we want to increment the given
2745 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2747 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2749 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2750 2 /* message length */,
2751 0 /* response length */,
2752 false /* header present */);