2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
302 struct brw_context
*brw
= p
->brw
;
304 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
305 assert(reg
.nr
< 128);
307 gen7_convert_mrf_to_grf(p
, ®
);
309 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
310 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
317 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
320 validate_reg(insn
, reg
);
322 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
323 insn
->bits1
.da1
.src0_reg_type
=
324 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
325 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
326 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
327 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
329 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
330 insn
->bits3
.ud
= reg
.dw1
.ud
;
332 /* Required to set some fields in src1 as well:
334 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
335 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
339 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
340 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
341 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
342 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
345 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
346 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
350 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
352 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
353 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
356 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
360 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
361 if (reg
.width
== BRW_WIDTH_1
&&
362 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
363 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
364 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
365 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
368 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
369 insn
->bits2
.da1
.src0_width
= reg
.width
;
370 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
374 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
375 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
376 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
377 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
382 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
383 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
385 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
392 brw_set_src1(struct brw_compile
*p
,
393 struct brw_instruction
*insn
,
396 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
398 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
399 assert(reg
.nr
< 128);
401 gen7_convert_mrf_to_grf(p
, ®
);
403 validate_reg(insn
, reg
);
405 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
406 insn
->bits1
.da1
.src1_reg_type
=
407 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
408 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
409 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
411 /* Only src1 can be immediate in two-argument instructions.
413 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
415 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
416 insn
->bits3
.ud
= reg
.dw1
.ud
;
419 /* This is a hardware restriction, which may or may not be lifted
422 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
423 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
425 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
426 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
427 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
430 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
431 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
434 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
435 if (reg
.width
== BRW_WIDTH_1
&&
436 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
437 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
438 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
439 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
442 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
443 insn
->bits3
.da1
.src1_width
= reg
.width
;
444 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
448 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
449 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
450 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
451 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
453 /* This is an oddity of the fact we're using the same
454 * descriptions for registers in align_16 as align_1:
456 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
457 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
459 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
465 * Set the Message Descriptor and Extended Message Descriptor fields
468 * \note This zeroes out the Function Control bits, so it must be called
469 * \b before filling out any message-specific data. Callers can
470 * choose not to fill in irrelevant bits; they will be zero.
473 brw_set_message_descriptor(struct brw_compile
*p
,
474 struct brw_instruction
*inst
,
475 enum brw_message_target sfid
,
477 unsigned response_length
,
481 struct brw_context
*brw
= p
->brw
;
483 brw_set_src1(p
, inst
, brw_imm_d(0));
486 inst
->bits3
.generic_gen5
.header_present
= header_present
;
487 inst
->bits3
.generic_gen5
.response_length
= response_length
;
488 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
489 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
492 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
493 inst
->header
.destreg__conditionalmod
= sfid
;
495 /* Set Extended Message Descriptor (ex_desc) */
496 inst
->bits2
.send_gen5
.sfid
= sfid
;
497 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
500 inst
->bits3
.generic
.response_length
= response_length
;
501 inst
->bits3
.generic
.msg_length
= msg_length
;
502 inst
->bits3
.generic
.msg_target
= sfid
;
503 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
507 static void brw_set_math_message( struct brw_compile
*p
,
508 struct brw_instruction
*insn
,
510 unsigned integer_type
,
514 struct brw_context
*brw
= p
->brw
;
516 unsigned response_length
;
518 /* Infer message length from the function */
520 case BRW_MATH_FUNCTION_POW
:
521 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
522 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
523 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
531 /* Infer response length from the function */
533 case BRW_MATH_FUNCTION_SINCOS
:
534 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
543 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
544 msg_length
, response_length
, false, false);
546 insn
->bits3
.math_gen5
.function
= function
;
547 insn
->bits3
.math_gen5
.int_type
= integer_type
;
548 insn
->bits3
.math_gen5
.precision
= low_precision
;
549 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
550 insn
->bits3
.math_gen5
.data_type
= dataType
;
551 insn
->bits3
.math_gen5
.snapshot
= 0;
553 insn
->bits3
.math
.function
= function
;
554 insn
->bits3
.math
.int_type
= integer_type
;
555 insn
->bits3
.math
.precision
= low_precision
;
556 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
557 insn
->bits3
.math
.data_type
= dataType
;
559 insn
->header
.saturate
= 0;
563 static void brw_set_ff_sync_message(struct brw_compile
*p
,
564 struct brw_instruction
*insn
,
566 unsigned response_length
,
569 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
570 1, response_length
, true, end_of_thread
);
571 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
572 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
573 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
574 insn
->bits3
.urb_gen5
.allocate
= allocate
;
575 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
576 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
579 static void brw_set_urb_message( struct brw_compile
*p
,
580 struct brw_instruction
*insn
,
581 enum brw_urb_write_flags flags
,
583 unsigned response_length
,
585 unsigned swizzle_control
)
587 struct brw_context
*brw
= p
->brw
;
589 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
590 msg_length
, response_length
, true,
591 flags
& BRW_URB_WRITE_EOT
);
593 if (flags
& BRW_URB_WRITE_OWORD
) {
594 assert(msg_length
== 2); /* header + one OWORD of data */
595 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
597 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
599 insn
->bits3
.urb_gen7
.offset
= offset
;
600 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
601 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
602 insn
->bits3
.urb_gen7
.per_slot_offset
=
603 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
604 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
605 } else if (brw
->gen
>= 5) {
606 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
607 insn
->bits3
.urb_gen5
.offset
= offset
;
608 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
609 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
610 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
611 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
613 insn
->bits3
.urb
.opcode
= 0; /* ? */
614 insn
->bits3
.urb
.offset
= offset
;
615 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
616 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
617 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
618 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
623 brw_set_dp_write_message(struct brw_compile
*p
,
624 struct brw_instruction
*insn
,
625 unsigned binding_table_index
,
626 unsigned msg_control
,
630 unsigned last_render_target
,
631 unsigned response_length
,
632 unsigned end_of_thread
,
633 unsigned send_commit_msg
)
635 struct brw_context
*brw
= p
->brw
;
639 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
640 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
641 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
643 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
644 } else if (brw
->gen
== 6) {
645 /* Use the render cache for all write messages. */
646 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
648 sfid
= BRW_SFID_DATAPORT_WRITE
;
651 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
652 header_present
, end_of_thread
);
655 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
656 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
657 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
658 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
659 } else if (brw
->gen
== 6) {
660 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
661 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
662 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
663 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
664 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
665 } else if (brw
->gen
== 5) {
666 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
667 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
668 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
669 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
670 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
672 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
673 insn
->bits3
.dp_write
.msg_control
= msg_control
;
674 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
675 insn
->bits3
.dp_write
.msg_type
= msg_type
;
676 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
681 brw_set_dp_read_message(struct brw_compile
*p
,
682 struct brw_instruction
*insn
,
683 unsigned binding_table_index
,
684 unsigned msg_control
,
686 unsigned target_cache
,
689 unsigned response_length
)
691 struct brw_context
*brw
= p
->brw
;
695 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
696 } else if (brw
->gen
== 6) {
697 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
698 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
700 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
702 sfid
= BRW_SFID_DATAPORT_READ
;
705 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
706 header_present
, false);
709 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
710 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
711 insn
->bits3
.gen7_dp
.last_render_target
= 0;
712 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
713 } else if (brw
->gen
== 6) {
714 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
715 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
716 insn
->bits3
.gen6_dp
.last_render_target
= 0;
717 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
718 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
719 } else if (brw
->gen
== 5) {
720 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
721 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
722 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
723 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
724 } else if (brw
->is_g4x
) {
725 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
726 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
727 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
728 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
730 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
731 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
732 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
733 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
738 brw_set_sampler_message(struct brw_compile
*p
,
739 struct brw_instruction
*insn
,
740 unsigned binding_table_index
,
743 unsigned response_length
,
745 unsigned header_present
,
747 unsigned return_format
)
749 struct brw_context
*brw
= p
->brw
;
751 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
752 response_length
, header_present
, false);
755 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
756 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
757 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
758 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
759 } else if (brw
->gen
>= 5) {
760 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
761 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
762 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
763 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
764 } else if (brw
->is_g4x
) {
765 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
766 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
767 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
769 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
770 insn
->bits3
.sampler
.sampler
= sampler
;
771 insn
->bits3
.sampler
.msg_type
= msg_type
;
772 insn
->bits3
.sampler
.return_format
= return_format
;
777 #define next_insn brw_next_insn
778 struct brw_instruction
*
779 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
781 struct brw_instruction
*insn
;
783 if (p
->nr_insn
+ 1 > p
->store_size
) {
785 fprintf(stderr
, "incresing the store size to %d\n",
789 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
790 struct brw_instruction
, p
->store_size
);
792 assert(!"realloc eu store memeory failed");
795 p
->next_insn_offset
+= 16;
796 insn
= &p
->store
[p
->nr_insn
++];
797 memcpy(insn
, p
->current
, sizeof(*insn
));
799 /* Reset this one-shot flag:
802 if (p
->current
->header
.destreg__conditionalmod
) {
803 p
->current
->header
.destreg__conditionalmod
= 0;
804 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
807 insn
->header
.opcode
= opcode
;
811 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
816 struct brw_instruction
*insn
= next_insn(p
, opcode
);
817 brw_set_dest(p
, insn
, dest
);
818 brw_set_src0(p
, insn
, src
);
822 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
826 struct brw_reg src1
)
828 struct brw_instruction
*insn
= next_insn(p
, opcode
);
829 brw_set_dest(p
, insn
, dest
);
830 brw_set_src0(p
, insn
, src0
);
831 brw_set_src1(p
, insn
, src1
);
836 get_3src_subreg_nr(struct brw_reg reg
)
838 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
839 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
840 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
842 return reg
.subnr
/ 4;
846 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
853 struct brw_context
*brw
= p
->brw
;
854 struct brw_instruction
*insn
= next_insn(p
, opcode
);
856 gen7_convert_mrf_to_grf(p
, &dest
);
858 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
860 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
861 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
862 assert(dest
.nr
< 128);
863 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
864 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
865 dest
.type
== BRW_REGISTER_TYPE_D
||
866 dest
.type
== BRW_REGISTER_TYPE_UD
);
867 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
868 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
869 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
870 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
871 guess_execution_size(p
, insn
, dest
);
873 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
874 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
875 assert(src0
.nr
< 128);
876 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
877 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
878 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
879 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
880 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
881 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
883 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
884 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
885 assert(src1
.nr
< 128);
886 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
887 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
888 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
889 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
890 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
891 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
892 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
894 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
895 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
896 assert(src2
.nr
< 128);
897 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
898 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
899 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
900 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
901 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
902 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
905 /* Set both the source and destination types based on dest.type,
906 * ignoring the source register types. The MAD and LRP emitters ensure
907 * that all four types are float. The BFE and BFI2 emitters, however,
908 * may send us mixed D and UD types and want us to ignore that and use
909 * the destination type.
912 case BRW_REGISTER_TYPE_F
:
913 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
914 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
916 case BRW_REGISTER_TYPE_D
:
917 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
918 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
920 case BRW_REGISTER_TYPE_UD
:
921 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
922 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
931 /***********************************************************************
932 * Convenience routines.
935 struct brw_instruction *brw_##OP(struct brw_compile *p, \
936 struct brw_reg dest, \
937 struct brw_reg src0) \
939 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
943 struct brw_instruction *brw_##OP(struct brw_compile *p, \
944 struct brw_reg dest, \
945 struct brw_reg src0, \
946 struct brw_reg src1) \
948 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
952 struct brw_instruction *brw_##OP(struct brw_compile *p, \
953 struct brw_reg dest, \
954 struct brw_reg src0, \
955 struct brw_reg src1, \
956 struct brw_reg src2) \
958 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
962 struct brw_instruction *brw_##OP(struct brw_compile *p, \
963 struct brw_reg dest, \
964 struct brw_reg src0, \
965 struct brw_reg src1, \
966 struct brw_reg src2) \
968 assert(dest.type == BRW_REGISTER_TYPE_F); \
969 assert(src0.type == BRW_REGISTER_TYPE_F); \
970 assert(src1.type == BRW_REGISTER_TYPE_F); \
971 assert(src2.type == BRW_REGISTER_TYPE_F); \
972 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
975 /* Rounding operations (other than RNDD) require two instructions - the first
976 * stores a rounded value (possibly the wrong way) in the dest register, but
977 * also sets a per-channel "increment bit" in the flag register. A predicated
978 * add of 1.0 fixes dest to contain the desired result.
980 * Sandybridge and later appear to round correctly without an ADD.
983 void brw_##OP(struct brw_compile *p, \
984 struct brw_reg dest, \
985 struct brw_reg src) \
987 struct brw_instruction *rnd, *add; \
988 rnd = next_insn(p, BRW_OPCODE_##OP); \
989 brw_set_dest(p, rnd, dest); \
990 brw_set_src0(p, rnd, src); \
992 if (p->brw->gen < 6) { \
993 /* turn on round-increments */ \
994 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
995 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
996 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1039 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1040 struct brw_reg dest
,
1041 struct brw_reg src0
,
1042 struct brw_reg src1
)
1045 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1046 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1047 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1048 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1049 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1052 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1053 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1054 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1055 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1056 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1059 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1062 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1063 struct brw_reg dest
,
1064 struct brw_reg src0
,
1065 struct brw_reg src1
)
1067 assert(dest
.type
== src0
.type
);
1068 assert(src0
.type
== src1
.type
);
1069 switch (src0
.type
) {
1070 case BRW_REGISTER_TYPE_B
:
1071 case BRW_REGISTER_TYPE_UB
:
1072 case BRW_REGISTER_TYPE_W
:
1073 case BRW_REGISTER_TYPE_UW
:
1074 case BRW_REGISTER_TYPE_D
:
1075 case BRW_REGISTER_TYPE_UD
:
1078 assert(!"Bad type for brw_AVG");
1081 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1084 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1085 struct brw_reg dest
,
1086 struct brw_reg src0
,
1087 struct brw_reg src1
)
1090 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1091 src0
.type
== BRW_REGISTER_TYPE_UD
||
1092 src1
.type
== BRW_REGISTER_TYPE_D
||
1093 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1094 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1097 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1098 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1099 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1100 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1101 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1104 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1105 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1106 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1107 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1108 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1111 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1112 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1113 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1114 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1116 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1120 void brw_NOP(struct brw_compile
*p
)
1122 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1123 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1124 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1125 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1132 /***********************************************************************
1133 * Comparisons, if/else/endif
1136 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1137 struct brw_reg dest
,
1138 struct brw_reg src0
,
1139 struct brw_reg src1
)
1141 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1143 insn
->header
.execution_size
= 1;
1144 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1145 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1147 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1153 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1155 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1157 p
->if_stack_depth
++;
1158 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1159 p
->if_stack_array_size
*= 2;
1160 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1161 p
->if_stack_array_size
);
1165 static struct brw_instruction
*
1166 pop_if_stack(struct brw_compile
*p
)
1168 p
->if_stack_depth
--;
1169 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1173 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1175 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1176 p
->loop_stack_array_size
*= 2;
1177 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1178 p
->loop_stack_array_size
);
1179 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1180 p
->loop_stack_array_size
);
1183 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1184 p
->loop_stack_depth
++;
1185 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1188 static struct brw_instruction
*
1189 get_inner_do_insn(struct brw_compile
*p
)
1191 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1194 /* EU takes the value from the flag register and pushes it onto some
1195 * sort of a stack (presumably merging with any flag value already on
1196 * the stack). Within an if block, the flags at the top of the stack
1197 * control execution on each channel of the unit, eg. on each of the
1198 * 16 pixel values in our wm programs.
1200 * When the matching 'else' instruction is reached (presumably by
1201 * countdown of the instruction count patched in by our ELSE/ENDIF
1202 * functions), the relevent flags are inverted.
1204 * When the matching 'endif' instruction is reached, the flags are
1205 * popped off. If the stack is now empty, normal execution resumes.
1207 struct brw_instruction
*
1208 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1210 struct brw_context
*brw
= p
->brw
;
1211 struct brw_instruction
*insn
;
1213 insn
= next_insn(p
, BRW_OPCODE_IF
);
1215 /* Override the defaults for this instruction:
1218 brw_set_dest(p
, insn
, brw_ip_reg());
1219 brw_set_src0(p
, insn
, brw_ip_reg());
1220 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1221 } else if (brw
->gen
== 6) {
1222 brw_set_dest(p
, insn
, brw_imm_w(0));
1223 insn
->bits1
.branch_gen6
.jump_count
= 0;
1224 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1225 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1227 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1228 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1229 brw_set_src1(p
, insn
, brw_imm_ud(0));
1230 insn
->bits3
.break_cont
.jip
= 0;
1231 insn
->bits3
.break_cont
.uip
= 0;
1234 insn
->header
.execution_size
= execute_size
;
1235 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1236 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1237 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1238 if (!p
->single_program_flow
)
1239 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1241 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1243 push_if_stack(p
, insn
);
1244 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1248 /* This function is only used for gen6-style IF instructions with an
1249 * embedded comparison (conditional modifier). It is not used on gen7.
1251 struct brw_instruction
*
1252 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1253 struct brw_reg src0
, struct brw_reg src1
)
1255 struct brw_instruction
*insn
;
1257 insn
= next_insn(p
, BRW_OPCODE_IF
);
1259 brw_set_dest(p
, insn
, brw_imm_w(0));
1260 if (p
->compressed
) {
1261 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1263 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1265 insn
->bits1
.branch_gen6
.jump_count
= 0;
1266 brw_set_src0(p
, insn
, src0
);
1267 brw_set_src1(p
, insn
, src1
);
1269 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1270 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1271 insn
->header
.destreg__conditionalmod
= conditional
;
1273 if (!p
->single_program_flow
)
1274 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1276 push_if_stack(p
, insn
);
1281 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1284 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1285 struct brw_instruction
*if_inst
,
1286 struct brw_instruction
*else_inst
)
1288 /* The next instruction (where the ENDIF would be, if it existed) */
1289 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1291 assert(p
->single_program_flow
);
1292 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1293 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1294 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1296 /* Convert IF to an ADD instruction that moves the instruction pointer
1297 * to the first instruction of the ELSE block. If there is no ELSE
1298 * block, point to where ENDIF would be. Reverse the predicate.
1300 * There's no need to execute an ENDIF since we don't need to do any
1301 * stack operations, and if we're currently executing, we just want to
1302 * continue normally.
1304 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1305 if_inst
->header
.predicate_inverse
= 1;
1307 if (else_inst
!= NULL
) {
1308 /* Convert ELSE to an ADD instruction that points where the ENDIF
1311 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1313 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1314 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1316 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1321 * Patch IF and ELSE instructions with appropriate jump targets.
1324 patch_IF_ELSE(struct brw_compile
*p
,
1325 struct brw_instruction
*if_inst
,
1326 struct brw_instruction
*else_inst
,
1327 struct brw_instruction
*endif_inst
)
1329 struct brw_context
*brw
= p
->brw
;
1331 /* We shouldn't be patching IF and ELSE instructions in single program flow
1332 * mode when gen < 6, because in single program flow mode on those
1333 * platforms, we convert flow control instructions to conditional ADDs that
1334 * operate on IP (see brw_ENDIF).
1336 * However, on Gen6, writing to IP doesn't work in single program flow mode
1337 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1338 * not be updated by non-flow control instructions."). And on later
1339 * platforms, there is no significant benefit to converting control flow
1340 * instructions to conditional ADDs. So we do patch IF and ELSE
1341 * instructions in single program flow mode on those platforms.
1344 assert(!p
->single_program_flow
);
1346 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1347 assert(endif_inst
!= NULL
);
1348 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1351 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1352 * requires 2 chunks.
1357 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1358 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1360 if (else_inst
== NULL
) {
1361 /* Patch IF -> ENDIF */
1363 /* Turn it into an IFF, which means no mask stack operations for
1364 * all-false and jumping past the ENDIF.
1366 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1367 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1368 if_inst
->bits3
.if_else
.pop_count
= 0;
1369 if_inst
->bits3
.if_else
.pad0
= 0;
1370 } else if (brw
->gen
== 6) {
1371 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1372 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1374 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1375 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1378 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1380 /* Patch IF -> ELSE */
1382 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1383 if_inst
->bits3
.if_else
.pop_count
= 0;
1384 if_inst
->bits3
.if_else
.pad0
= 0;
1385 } else if (brw
->gen
== 6) {
1386 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1389 /* Patch ELSE -> ENDIF */
1391 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1394 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1395 else_inst
->bits3
.if_else
.pop_count
= 1;
1396 else_inst
->bits3
.if_else
.pad0
= 0;
1397 } else if (brw
->gen
== 6) {
1398 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1399 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1401 /* The IF instruction's JIP should point just past the ELSE */
1402 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1403 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1404 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1405 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1411 brw_ELSE(struct brw_compile
*p
)
1413 struct brw_context
*brw
= p
->brw
;
1414 struct brw_instruction
*insn
;
1416 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1419 brw_set_dest(p
, insn
, brw_ip_reg());
1420 brw_set_src0(p
, insn
, brw_ip_reg());
1421 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1422 } else if (brw
->gen
== 6) {
1423 brw_set_dest(p
, insn
, brw_imm_w(0));
1424 insn
->bits1
.branch_gen6
.jump_count
= 0;
1425 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1426 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1428 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1429 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1430 brw_set_src1(p
, insn
, brw_imm_ud(0));
1431 insn
->bits3
.break_cont
.jip
= 0;
1432 insn
->bits3
.break_cont
.uip
= 0;
1435 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1436 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1437 if (!p
->single_program_flow
)
1438 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1440 push_if_stack(p
, insn
);
1444 brw_ENDIF(struct brw_compile
*p
)
1446 struct brw_context
*brw
= p
->brw
;
1447 struct brw_instruction
*insn
= NULL
;
1448 struct brw_instruction
*else_inst
= NULL
;
1449 struct brw_instruction
*if_inst
= NULL
;
1450 struct brw_instruction
*tmp
;
1451 bool emit_endif
= true;
1453 /* In single program flow mode, we can express IF and ELSE instructions
1454 * equivalently as ADD instructions that operate on IP. On platforms prior
1455 * to Gen6, flow control instructions cause an implied thread switch, so
1456 * this is a significant savings.
1458 * However, on Gen6, writing to IP doesn't work in single program flow mode
1459 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1460 * not be updated by non-flow control instructions."). And on later
1461 * platforms, there is no significant benefit to converting control flow
1462 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1465 if (brw
->gen
< 6 && p
->single_program_flow
)
1469 * A single next_insn() may change the base adress of instruction store
1470 * memory(p->store), so call it first before referencing the instruction
1471 * store pointer from an index
1474 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1476 /* Pop the IF and (optional) ELSE instructions from the stack */
1477 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1478 tmp
= pop_if_stack(p
);
1479 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1481 tmp
= pop_if_stack(p
);
1486 /* ENDIF is useless; don't bother emitting it. */
1487 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1492 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1493 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1494 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1495 } else if (brw
->gen
== 6) {
1496 brw_set_dest(p
, insn
, brw_imm_w(0));
1497 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1498 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1500 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1501 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1502 brw_set_src1(p
, insn
, brw_imm_ud(0));
1505 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1506 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1507 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1509 /* Also pop item off the stack in the endif instruction: */
1511 insn
->bits3
.if_else
.jump_count
= 0;
1512 insn
->bits3
.if_else
.pop_count
= 1;
1513 insn
->bits3
.if_else
.pad0
= 0;
1514 } else if (brw
->gen
== 6) {
1515 insn
->bits1
.branch_gen6
.jump_count
= 2;
1517 insn
->bits3
.break_cont
.jip
= 2;
1519 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1522 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1524 struct brw_context
*brw
= p
->brw
;
1525 struct brw_instruction
*insn
;
1527 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1528 if (brw
->gen
>= 6) {
1529 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1530 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1531 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1533 brw_set_dest(p
, insn
, brw_ip_reg());
1534 brw_set_src0(p
, insn
, brw_ip_reg());
1535 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1536 insn
->bits3
.if_else
.pad0
= 0;
1537 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1539 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1540 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1545 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1547 struct brw_instruction
*insn
;
1549 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1550 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1551 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1552 brw_set_dest(p
, insn
, brw_ip_reg());
1553 brw_set_src0(p
, insn
, brw_ip_reg());
1554 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1556 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1557 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1561 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1563 struct brw_instruction
*insn
;
1564 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1565 brw_set_dest(p
, insn
, brw_ip_reg());
1566 brw_set_src0(p
, insn
, brw_ip_reg());
1567 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1568 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1569 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1570 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1571 insn
->bits3
.if_else
.pad0
= 0;
1572 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1576 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1578 struct brw_instruction
*insn
;
1580 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1581 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1582 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1583 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1585 if (p
->compressed
) {
1586 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1588 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1589 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1596 * The DO/WHILE is just an unterminated loop -- break or continue are
1597 * used for control within the loop. We have a few ways they can be
1600 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1601 * jip and no DO instruction.
1603 * For non-uniform control flow pre-gen6, there's a DO instruction to
1604 * push the mask, and a WHILE to jump back, and BREAK to get out and
1607 * For gen6, there's no more mask stack, so no need for DO. WHILE
1608 * just points back to the first instruction of the loop.
1610 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1612 struct brw_context
*brw
= p
->brw
;
1614 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1615 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1616 return &p
->store
[p
->nr_insn
];
1618 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1620 push_loop_stack(p
, insn
);
1622 /* Override the defaults for this instruction:
1624 brw_set_dest(p
, insn
, brw_null_reg());
1625 brw_set_src0(p
, insn
, brw_null_reg());
1626 brw_set_src1(p
, insn
, brw_null_reg());
1628 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1629 insn
->header
.execution_size
= execute_size
;
1630 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1631 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1632 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1639 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1642 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1643 * nesting, since it can always just point to the end of the block/current loop.
1646 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1648 struct brw_context
*brw
= p
->brw
;
1649 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1650 struct brw_instruction
*inst
;
1651 int br
= (brw
->gen
== 5) ? 2 : 1;
1653 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1654 /* If the jump count is != 0, that means that this instruction has already
1655 * been patched because it's part of a loop inside of the one we're
1658 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1659 inst
->bits3
.if_else
.jump_count
== 0) {
1660 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1661 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1662 inst
->bits3
.if_else
.jump_count
== 0) {
1663 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1668 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1670 struct brw_context
*brw
= p
->brw
;
1671 struct brw_instruction
*insn
, *do_insn
;
1677 if (brw
->gen
>= 7) {
1678 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1679 do_insn
= get_inner_do_insn(p
);
1681 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1682 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1683 brw_set_src1(p
, insn
, brw_imm_ud(0));
1684 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1686 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1687 } else if (brw
->gen
== 6) {
1688 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1689 do_insn
= get_inner_do_insn(p
);
1691 brw_set_dest(p
, insn
, brw_imm_w(0));
1692 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1693 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1694 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1696 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1698 if (p
->single_program_flow
) {
1699 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1700 do_insn
= get_inner_do_insn(p
);
1702 brw_set_dest(p
, insn
, brw_ip_reg());
1703 brw_set_src0(p
, insn
, brw_ip_reg());
1704 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1705 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1707 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1708 do_insn
= get_inner_do_insn(p
);
1710 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1712 brw_set_dest(p
, insn
, brw_ip_reg());
1713 brw_set_src0(p
, insn
, brw_ip_reg());
1714 brw_set_src1(p
, insn
, brw_imm_d(0));
1716 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1717 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1718 insn
->bits3
.if_else
.pop_count
= 0;
1719 insn
->bits3
.if_else
.pad0
= 0;
1721 brw_patch_break_cont(p
, insn
);
1724 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1725 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1727 p
->loop_stack_depth
--;
1735 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1737 struct brw_context
*brw
= p
->brw
;
1738 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1744 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1745 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1747 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1752 /* To integrate with the above, it makes sense that the comparison
1753 * instruction should populate the flag register. It might be simpler
1754 * just to use the flag reg for most WM tasks?
1756 void brw_CMP(struct brw_compile
*p
,
1757 struct brw_reg dest
,
1758 unsigned conditional
,
1759 struct brw_reg src0
,
1760 struct brw_reg src1
)
1762 struct brw_context
*brw
= p
->brw
;
1763 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1765 insn
->header
.destreg__conditionalmod
= conditional
;
1766 brw_set_dest(p
, insn
, dest
);
1767 brw_set_src0(p
, insn
, src0
);
1768 brw_set_src1(p
, insn
, src1
);
1770 /* guess_execution_size(insn, src0); */
1773 /* Make it so that future instructions will use the computed flag
1774 * value until brw_set_predicate_control_flag_value() is called
1777 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1779 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1780 p
->flag_value
= 0xff;
1783 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1785 * "Any CMP instruction with a null destination must use a {switch}."
1787 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1788 * mentioned on their work-arounds pages.
1790 if (brw
->gen
== 7) {
1791 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1792 dest
.nr
== BRW_ARF_NULL
) {
1793 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1798 /* Issue 'wait' instruction for n1, host could program MMIO
1799 to wake up thread. */
1800 void brw_WAIT (struct brw_compile
*p
)
1802 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1803 struct brw_reg src
= brw_notification_1_reg();
1805 brw_set_dest(p
, insn
, src
);
1806 brw_set_src0(p
, insn
, src
);
1807 brw_set_src1(p
, insn
, brw_null_reg());
1808 insn
->header
.execution_size
= 0; /* must */
1809 insn
->header
.predicate_control
= 0;
1810 insn
->header
.compression_control
= 0;
1814 /***********************************************************************
1815 * Helpers for the various SEND message types:
1818 /** Extended math function, float[8].
1820 void brw_math( struct brw_compile
*p
,
1821 struct brw_reg dest
,
1823 unsigned msg_reg_nr
,
1826 unsigned precision
)
1828 struct brw_context
*brw
= p
->brw
;
1830 if (brw
->gen
>= 6) {
1831 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1833 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1834 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1835 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1837 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1839 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1841 /* Source modifiers are ignored for extended math instructions on Gen6. */
1842 if (brw
->gen
== 6) {
1843 assert(!src
.negate
);
1847 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1848 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1849 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1850 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1852 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1855 /* Math is the same ISA format as other opcodes, except that CondModifier
1856 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1858 insn
->header
.destreg__conditionalmod
= function
;
1860 brw_set_dest(p
, insn
, dest
);
1861 brw_set_src0(p
, insn
, src
);
1862 brw_set_src1(p
, insn
, brw_null_reg());
1864 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1866 /* Example code doesn't set predicate_control for send
1869 insn
->header
.predicate_control
= 0;
1870 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1872 brw_set_dest(p
, insn
, dest
);
1873 brw_set_src0(p
, insn
, src
);
1874 brw_set_math_message(p
,
1877 src
.type
== BRW_REGISTER_TYPE_D
,
1883 /** Extended math function, float[8].
1885 void brw_math2(struct brw_compile
*p
,
1886 struct brw_reg dest
,
1888 struct brw_reg src0
,
1889 struct brw_reg src1
)
1891 struct brw_context
*brw
= p
->brw
;
1892 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1894 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1895 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1896 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1897 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1899 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1900 if (brw
->gen
== 6) {
1901 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1902 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1905 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1906 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1907 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1908 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1909 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1911 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1912 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1915 /* Source modifiers are ignored for extended math instructions on Gen6. */
1916 if (brw
->gen
== 6) {
1917 assert(!src0
.negate
);
1919 assert(!src1
.negate
);
1923 /* Math is the same ISA format as other opcodes, except that CondModifier
1924 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1926 insn
->header
.destreg__conditionalmod
= function
;
1928 brw_set_dest(p
, insn
, dest
);
1929 brw_set_src0(p
, insn
, src0
);
1930 brw_set_src1(p
, insn
, src1
);
1935 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1936 * using a constant offset per channel.
1938 * The offset must be aligned to oword size (16 bytes). Used for
1939 * register spilling.
1941 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1946 struct brw_context
*brw
= p
->brw
;
1947 uint32_t msg_control
, msg_type
;
1953 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1955 if (num_regs
== 1) {
1956 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1959 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1963 /* Set up the message header. This is g0, with g0.2 filled with
1964 * the offset. We don't want to leave our offset around in g0 or
1965 * it'll screw up texture samples, so set it up inside the message
1969 brw_push_insn_state(p
);
1970 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1971 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1973 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1975 /* set message header global offset field (reg 0, element 2) */
1977 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1979 2), BRW_REGISTER_TYPE_UD
),
1980 brw_imm_ud(offset
));
1982 brw_pop_insn_state(p
);
1986 struct brw_reg dest
;
1987 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1988 int send_commit_msg
;
1989 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1990 BRW_REGISTER_TYPE_UW
);
1992 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1993 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1994 src_header
= vec16(src_header
);
1996 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1997 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1999 /* Until gen6, writes followed by reads from the same location
2000 * are not guaranteed to be ordered unless write_commit is set.
2001 * If set, then a no-op write is issued to the destination
2002 * register to set a dependency, and a read from the destination
2003 * can be used to ensure the ordering.
2005 * For gen6, only writes between different threads need ordering
2006 * protection. Our use of DP writes is all about register
2007 * spilling within a thread.
2009 if (brw
->gen
>= 6) {
2010 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2011 send_commit_msg
= 0;
2014 send_commit_msg
= 1;
2017 brw_set_dest(p
, insn
, dest
);
2018 if (brw
->gen
>= 6) {
2019 brw_set_src0(p
, insn
, mrf
);
2021 brw_set_src0(p
, insn
, brw_null_reg());
2025 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2027 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2029 brw_set_dp_write_message(p
,
2031 255, /* binding table index (255=stateless) */
2035 true, /* header_present */
2036 0, /* not a render target */
2037 send_commit_msg
, /* response_length */
2045 * Read a block of owords (half a GRF each) from the scratch buffer
2046 * using a constant index per channel.
2048 * Offset must be aligned to oword size (16 bytes). Used for register
2052 brw_oword_block_read_scratch(struct brw_compile
*p
,
2053 struct brw_reg dest
,
2058 struct brw_context
*brw
= p
->brw
;
2059 uint32_t msg_control
;
2065 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2066 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2068 if (num_regs
== 1) {
2069 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2072 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2077 brw_push_insn_state(p
);
2078 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2079 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2081 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2083 /* set message header global offset field (reg 0, element 2) */
2085 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2087 2), BRW_REGISTER_TYPE_UD
),
2088 brw_imm_ud(offset
));
2090 brw_pop_insn_state(p
);
2094 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2096 assert(insn
->header
.predicate_control
== 0);
2097 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2098 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2100 brw_set_dest(p
, insn
, dest
); /* UW? */
2101 if (brw
->gen
>= 6) {
2102 brw_set_src0(p
, insn
, mrf
);
2104 brw_set_src0(p
, insn
, brw_null_reg());
2107 brw_set_dp_read_message(p
,
2109 255, /* binding table index (255=stateless) */
2111 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2112 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2114 true, /* header_present */
2120 gen7_block_read_scratch(struct brw_compile
*p
,
2121 struct brw_reg dest
,
2125 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2127 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2129 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2130 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2132 brw_set_dest(p
, insn
, dest
);
2134 /* The HW requires that the header is present; this is to get the g0.5
2137 bool header_present
= true;
2138 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2140 brw_set_message_descriptor(p
, insn
,
2141 GEN7_SFID_DATAPORT_DATA_CACHE
,
2142 1, /* mlen: just g0 */
2147 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2149 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2150 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2152 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2153 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2154 * is 32 bytes, which happens to be the size of a register.
2157 assert(offset
< (1 << 12));
2158 insn
->bits3
.ud
|= offset
;
2162 * Read a float[4] vector from the data port Data Cache (const buffer).
2163 * Location (in buffer) should be a multiple of 16.
2164 * Used for fetching shader constants.
2166 void brw_oword_block_read(struct brw_compile
*p
,
2167 struct brw_reg dest
,
2170 uint32_t bind_table_index
)
2172 struct brw_context
*brw
= p
->brw
;
2174 /* On newer hardware, offset is in units of owords. */
2178 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2180 brw_push_insn_state(p
);
2181 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2182 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2183 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2185 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2187 /* set message header global offset field (reg 0, element 2) */
2189 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2191 2), BRW_REGISTER_TYPE_UD
),
2192 brw_imm_ud(offset
));
2194 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2195 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2197 /* cast dest to a uword[8] vector */
2198 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2200 brw_set_dest(p
, insn
, dest
);
2201 if (brw
->gen
>= 6) {
2202 brw_set_src0(p
, insn
, mrf
);
2204 brw_set_src0(p
, insn
, brw_null_reg());
2207 brw_set_dp_read_message(p
,
2210 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2211 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2212 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2214 true, /* header_present */
2215 1); /* response_length (1 reg, 2 owords!) */
2217 brw_pop_insn_state(p
);
2221 void brw_fb_WRITE(struct brw_compile
*p
,
2223 unsigned msg_reg_nr
,
2224 struct brw_reg src0
,
2225 unsigned msg_control
,
2226 unsigned binding_table_index
,
2227 unsigned msg_length
,
2228 unsigned response_length
,
2230 bool header_present
)
2232 struct brw_context
*brw
= p
->brw
;
2233 struct brw_instruction
*insn
;
2235 struct brw_reg dest
;
2237 if (dispatch_width
== 16)
2238 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2240 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2242 if (brw
->gen
>= 6) {
2243 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2245 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2247 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2249 if (brw
->gen
>= 6) {
2250 /* headerless version, just submit color payload */
2251 src0
= brw_message_reg(msg_reg_nr
);
2253 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2255 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2257 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2260 brw_set_dest(p
, insn
, dest
);
2261 brw_set_src0(p
, insn
, src0
);
2262 brw_set_dp_write_message(p
,
2264 binding_table_index
,
2269 eot
, /* last render target write */
2272 0 /* send_commit_msg */);
2277 * Texture sample instruction.
2278 * Note: the msg_type plus msg_length values determine exactly what kind
2279 * of sampling operation is performed. See volume 4, page 161 of docs.
2281 void brw_SAMPLE(struct brw_compile
*p
,
2282 struct brw_reg dest
,
2283 unsigned msg_reg_nr
,
2284 struct brw_reg src0
,
2285 unsigned binding_table_index
,
2288 unsigned response_length
,
2289 unsigned msg_length
,
2290 unsigned header_present
,
2292 unsigned return_format
)
2294 struct brw_context
*brw
= p
->brw
;
2295 struct brw_instruction
*insn
;
2297 if (msg_reg_nr
!= -1)
2298 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2300 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2301 insn
->header
.predicate_control
= 0; /* XXX */
2303 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2305 * "Instruction compression is not allowed for this instruction (that
2306 * is, send). The hardware behavior is undefined if this instruction is
2307 * set as compressed. However, compress control can be set to "SecHalf"
2308 * to affect the EMask generation."
2310 * No similar wording is found in later PRMs, but there are examples
2311 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2312 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2313 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2315 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2316 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2319 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2321 brw_set_dest(p
, insn
, dest
);
2322 brw_set_src0(p
, insn
, src0
);
2323 brw_set_sampler_message(p
, insn
,
2324 binding_table_index
,
2334 /* All these variables are pretty confusing - we might be better off
2335 * using bitmasks and macros for this, in the old style. Or perhaps
2336 * just having the caller instantiate the fields in dword3 itself.
2338 void brw_urb_WRITE(struct brw_compile
*p
,
2339 struct brw_reg dest
,
2340 unsigned msg_reg_nr
,
2341 struct brw_reg src0
,
2342 enum brw_urb_write_flags flags
,
2343 unsigned msg_length
,
2344 unsigned response_length
,
2348 struct brw_context
*brw
= p
->brw
;
2349 struct brw_instruction
*insn
;
2351 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2353 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2354 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2355 brw_push_insn_state(p
);
2356 brw_set_access_mode(p
, BRW_ALIGN_1
);
2357 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2358 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2359 BRW_REGISTER_TYPE_UD
),
2360 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2361 brw_imm_ud(0xff00));
2362 brw_pop_insn_state(p
);
2365 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2367 assert(msg_length
< BRW_MAX_MRF
);
2369 brw_set_dest(p
, insn
, dest
);
2370 brw_set_src0(p
, insn
, src0
);
2371 brw_set_src1(p
, insn
, brw_imm_d(0));
2374 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2376 brw_set_urb_message(p
,
2386 next_ip(struct brw_compile
*p
, int ip
)
2388 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2390 if (insn
->header
.cmpt_control
)
2397 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2400 void *store
= p
->store
;
2402 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2403 struct brw_instruction
*insn
= store
+ ip
;
2405 switch (insn
->header
.opcode
) {
2406 case BRW_OPCODE_ENDIF
:
2407 case BRW_OPCODE_ELSE
:
2408 case BRW_OPCODE_WHILE
:
2409 case BRW_OPCODE_HALT
:
2417 /* There is no DO instruction on gen6, so to find the end of the loop
2418 * we have to see if the loop is jumping back before our start
2422 brw_find_loop_end(struct brw_compile
*p
, int start
)
2424 struct brw_context
*brw
= p
->brw
;
2427 void *store
= p
->store
;
2429 /* Always start after the instruction (such as a WHILE) we're trying to fix
2432 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2433 struct brw_instruction
*insn
= store
+ ip
;
2435 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2436 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2437 : insn
->bits3
.break_cont
.jip
;
2438 if (ip
+ jip
* scale
<= start
)
2442 assert(!"not reached");
2446 /* After program generation, go back and update the UIP and JIP of
2447 * BREAK, CONT, and HALT instructions to their correct locations.
2450 brw_set_uip_jip(struct brw_compile
*p
)
2452 struct brw_context
*brw
= p
->brw
;
2455 void *store
= p
->store
;
2460 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2461 struct brw_instruction
*insn
= store
+ ip
;
2463 if (insn
->header
.cmpt_control
) {
2464 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2465 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2466 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2467 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2471 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2472 switch (insn
->header
.opcode
) {
2473 case BRW_OPCODE_BREAK
:
2474 assert(block_end_ip
!= 0);
2475 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2476 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2477 insn
->bits3
.break_cont
.uip
=
2478 (brw_find_loop_end(p
, ip
) - ip
+
2479 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2481 case BRW_OPCODE_CONTINUE
:
2482 assert(block_end_ip
!= 0);
2483 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2484 insn
->bits3
.break_cont
.uip
=
2485 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2487 assert(insn
->bits3
.break_cont
.uip
!= 0);
2488 assert(insn
->bits3
.break_cont
.jip
!= 0);
2491 case BRW_OPCODE_ENDIF
:
2492 if (block_end_ip
== 0)
2493 insn
->bits3
.break_cont
.jip
= 2;
2495 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2498 case BRW_OPCODE_HALT
:
2499 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2501 * "In case of the halt instruction not inside any conditional
2502 * code block, the value of <JIP> and <UIP> should be the
2503 * same. In case of the halt instruction inside conditional code
2504 * block, the <UIP> should be the end of the program, and the
2505 * <JIP> should be end of the most inner conditional code block."
2507 * The uip will have already been set by whoever set up the
2510 if (block_end_ip
== 0) {
2511 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2513 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2515 assert(insn
->bits3
.break_cont
.uip
!= 0);
2516 assert(insn
->bits3
.break_cont
.jip
!= 0);
2522 void brw_ff_sync(struct brw_compile
*p
,
2523 struct brw_reg dest
,
2524 unsigned msg_reg_nr
,
2525 struct brw_reg src0
,
2527 unsigned response_length
,
2530 struct brw_context
*brw
= p
->brw
;
2531 struct brw_instruction
*insn
;
2533 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2535 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2536 brw_set_dest(p
, insn
, dest
);
2537 brw_set_src0(p
, insn
, src0
);
2538 brw_set_src1(p
, insn
, brw_imm_d(0));
2541 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2543 brw_set_ff_sync_message(p
,
2551 * Emit the SEND instruction necessary to generate stream output data on Gen6
2552 * (for transform feedback).
2554 * If send_commit_msg is true, this is the last piece of stream output data
2555 * from this thread, so send the data as a committed write. According to the
2556 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2558 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2559 * writes are complete by sending the final write as a committed write."
2562 brw_svb_write(struct brw_compile
*p
,
2563 struct brw_reg dest
,
2564 unsigned msg_reg_nr
,
2565 struct brw_reg src0
,
2566 unsigned binding_table_index
,
2567 bool send_commit_msg
)
2569 struct brw_instruction
*insn
;
2571 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2573 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2574 brw_set_dest(p
, insn
, dest
);
2575 brw_set_src0(p
, insn
, src0
);
2576 brw_set_src1(p
, insn
, brw_imm_d(0));
2577 brw_set_dp_write_message(p
, insn
,
2578 binding_table_index
,
2579 0, /* msg_control: ignored */
2580 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2582 true, /* header_present */
2583 0, /* last_render_target: ignored */
2584 send_commit_msg
, /* response_length */
2585 0, /* end_of_thread */
2586 send_commit_msg
); /* send_commit_msg */
2590 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2591 struct brw_instruction
*insn
,
2593 unsigned bind_table_index
,
2594 unsigned msg_length
,
2595 unsigned response_length
,
2596 bool header_present
)
2598 if (p
->brw
->is_haswell
) {
2599 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2600 msg_length
, response_length
,
2601 header_present
, false);
2604 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2605 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2606 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2608 insn
->bits3
.gen7_dp
.msg_type
=
2609 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2611 insn
->bits3
.gen7_dp
.msg_type
=
2612 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2616 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2617 msg_length
, response_length
,
2618 header_present
, false);
2620 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2622 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2623 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2626 if (response_length
)
2627 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2629 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2630 insn
->bits3
.ud
|= atomic_op
<< 8;
2634 brw_untyped_atomic(struct brw_compile
*p
,
2635 struct brw_reg dest
,
2638 unsigned bind_table_index
,
2639 unsigned msg_length
,
2640 unsigned response_length
) {
2641 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2643 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2644 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2645 brw_set_src1(p
, insn
, brw_imm_d(0));
2646 brw_set_dp_untyped_atomic_message(
2647 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2648 insn
->header
.access_mode
== BRW_ALIGN_1
);
2652 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2653 struct brw_instruction
*insn
,
2654 unsigned bind_table_index
,
2655 unsigned msg_length
,
2656 unsigned response_length
,
2657 bool header_present
)
2659 const unsigned dispatch_width
=
2660 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2661 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2663 if (p
->brw
->is_haswell
) {
2664 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2665 msg_length
, response_length
,
2666 header_present
, false);
2668 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2670 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2671 msg_length
, response_length
,
2672 header_present
, false);
2674 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2677 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2678 if (dispatch_width
== 16)
2679 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2681 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2684 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2686 /* Set mask of 32-bit channels to drop. */
2687 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2691 brw_untyped_surface_read(struct brw_compile
*p
,
2692 struct brw_reg dest
,
2694 unsigned bind_table_index
,
2695 unsigned msg_length
,
2696 unsigned response_length
)
2698 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2700 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2701 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2702 brw_set_dp_untyped_surface_read_message(
2703 p
, insn
, bind_table_index
, msg_length
, response_length
,
2704 insn
->header
.access_mode
== BRW_ALIGN_1
);
2708 * This instruction is generated as a single-channel align1 instruction by
2709 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2711 * We can't use the typed atomic op in the FS because that has the execution
2712 * mask ANDed with the pixel mask, but we just want to write the one dword for
2715 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2716 * one u32. So we use the same untyped atomic write message as the pixel
2719 * The untyped atomic operation requires a BUFFER surface type with RAW
2720 * format, and is only accessible through the legacy DATA_CACHE dataport
2723 void brw_shader_time_add(struct brw_compile
*p
,
2724 struct brw_reg payload
,
2725 uint32_t surf_index
)
2727 struct brw_context
*brw
= p
->brw
;
2728 assert(brw
->gen
>= 7);
2730 brw_push_insn_state(p
);
2731 brw_set_access_mode(p
, BRW_ALIGN_1
);
2732 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2733 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2734 brw_pop_insn_state(p
);
2736 /* We use brw_vec1_reg and unmasked because we want to increment the given
2739 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2741 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2743 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2744 2 /* message length */,
2745 0 /* response length */,
2746 false /* header present */);