2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
302 struct brw_context
*brw
= p
->brw
;
304 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
305 assert(reg
.nr
< 128);
307 gen7_convert_mrf_to_grf(p
, ®
);
309 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
310 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
317 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
320 validate_reg(insn
, reg
);
322 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
323 insn
->bits1
.da1
.src0_reg_type
=
324 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
325 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
326 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
327 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
329 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
330 insn
->bits3
.ud
= reg
.dw1
.ud
;
332 /* Required to set some fields in src1 as well:
334 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
335 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
339 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
340 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
341 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
342 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
345 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
346 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
350 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
352 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
353 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
356 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
360 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
361 if (reg
.width
== BRW_WIDTH_1
&&
362 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
363 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
364 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
365 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
368 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
369 insn
->bits2
.da1
.src0_width
= reg
.width
;
370 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
374 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
375 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
376 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
377 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
382 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
383 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
385 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
391 void brw_set_src1(struct brw_compile
*p
,
392 struct brw_instruction
*insn
,
395 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
397 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
398 assert(reg
.nr
< 128);
400 gen7_convert_mrf_to_grf(p
, ®
);
402 validate_reg(insn
, reg
);
404 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
405 insn
->bits1
.da1
.src1_reg_type
=
406 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
407 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
408 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
410 /* Only src1 can be immediate in two-argument instructions.
412 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
414 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
415 insn
->bits3
.ud
= reg
.dw1
.ud
;
418 /* This is a hardware restriction, which may or may not be lifted
421 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
422 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
424 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
425 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
426 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
429 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
430 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
433 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
434 if (reg
.width
== BRW_WIDTH_1
&&
435 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
436 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
437 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
438 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
441 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
442 insn
->bits3
.da1
.src1_width
= reg
.width
;
443 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
447 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
448 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
449 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
450 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
452 /* This is an oddity of the fact we're using the same
453 * descriptions for registers in align_16 as align_1:
455 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
456 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
458 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
464 * Set the Message Descriptor and Extended Message Descriptor fields
467 * \note This zeroes out the Function Control bits, so it must be called
468 * \b before filling out any message-specific data. Callers can
469 * choose not to fill in irrelevant bits; they will be zero.
472 brw_set_message_descriptor(struct brw_compile
*p
,
473 struct brw_instruction
*inst
,
474 enum brw_message_target sfid
,
476 unsigned response_length
,
480 struct brw_context
*brw
= p
->brw
;
482 brw_set_src1(p
, inst
, brw_imm_d(0));
485 inst
->bits3
.generic_gen5
.header_present
= header_present
;
486 inst
->bits3
.generic_gen5
.response_length
= response_length
;
487 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
488 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
491 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
492 inst
->header
.destreg__conditionalmod
= sfid
;
494 /* Set Extended Message Descriptor (ex_desc) */
495 inst
->bits2
.send_gen5
.sfid
= sfid
;
496 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
499 inst
->bits3
.generic
.response_length
= response_length
;
500 inst
->bits3
.generic
.msg_length
= msg_length
;
501 inst
->bits3
.generic
.msg_target
= sfid
;
502 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
506 static void brw_set_math_message( struct brw_compile
*p
,
507 struct brw_instruction
*insn
,
509 unsigned integer_type
,
513 struct brw_context
*brw
= p
->brw
;
515 unsigned response_length
;
517 /* Infer message length from the function */
519 case BRW_MATH_FUNCTION_POW
:
520 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
521 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
522 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
530 /* Infer response length from the function */
532 case BRW_MATH_FUNCTION_SINCOS
:
533 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
542 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
543 msg_length
, response_length
, false, false);
545 insn
->bits3
.math_gen5
.function
= function
;
546 insn
->bits3
.math_gen5
.int_type
= integer_type
;
547 insn
->bits3
.math_gen5
.precision
= low_precision
;
548 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
549 insn
->bits3
.math_gen5
.data_type
= dataType
;
550 insn
->bits3
.math_gen5
.snapshot
= 0;
552 insn
->bits3
.math
.function
= function
;
553 insn
->bits3
.math
.int_type
= integer_type
;
554 insn
->bits3
.math
.precision
= low_precision
;
555 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
556 insn
->bits3
.math
.data_type
= dataType
;
558 insn
->header
.saturate
= 0;
562 static void brw_set_ff_sync_message(struct brw_compile
*p
,
563 struct brw_instruction
*insn
,
565 unsigned response_length
,
568 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
569 1, response_length
, true, end_of_thread
);
570 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
571 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
572 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
573 insn
->bits3
.urb_gen5
.allocate
= allocate
;
574 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
575 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
578 static void brw_set_urb_message( struct brw_compile
*p
,
579 struct brw_instruction
*insn
,
580 enum brw_urb_write_flags flags
,
582 unsigned response_length
,
584 unsigned swizzle_control
)
586 struct brw_context
*brw
= p
->brw
;
588 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
589 msg_length
, response_length
, true,
590 flags
& BRW_URB_WRITE_EOT
);
592 if (flags
& BRW_URB_WRITE_OWORD
) {
593 assert(msg_length
== 2); /* header + one OWORD of data */
594 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
596 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
598 insn
->bits3
.urb_gen7
.offset
= offset
;
599 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
600 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
601 insn
->bits3
.urb_gen7
.per_slot_offset
=
602 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
603 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
604 } else if (brw
->gen
>= 5) {
605 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
606 insn
->bits3
.urb_gen5
.offset
= offset
;
607 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
608 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
609 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
610 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
612 insn
->bits3
.urb
.opcode
= 0; /* ? */
613 insn
->bits3
.urb
.offset
= offset
;
614 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
615 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
616 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
617 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
622 brw_set_dp_write_message(struct brw_compile
*p
,
623 struct brw_instruction
*insn
,
624 unsigned binding_table_index
,
625 unsigned msg_control
,
629 unsigned last_render_target
,
630 unsigned response_length
,
631 unsigned end_of_thread
,
632 unsigned send_commit_msg
)
634 struct brw_context
*brw
= p
->brw
;
638 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
639 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
640 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
642 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
643 } else if (brw
->gen
== 6) {
644 /* Use the render cache for all write messages. */
645 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
647 sfid
= BRW_SFID_DATAPORT_WRITE
;
650 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
651 header_present
, end_of_thread
);
654 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
655 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
656 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
657 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
658 } else if (brw
->gen
== 6) {
659 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
660 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
661 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
662 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
663 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
664 } else if (brw
->gen
== 5) {
665 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
666 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
667 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
668 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
669 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
671 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
672 insn
->bits3
.dp_write
.msg_control
= msg_control
;
673 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
674 insn
->bits3
.dp_write
.msg_type
= msg_type
;
675 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
680 brw_set_dp_read_message(struct brw_compile
*p
,
681 struct brw_instruction
*insn
,
682 unsigned binding_table_index
,
683 unsigned msg_control
,
685 unsigned target_cache
,
688 unsigned response_length
)
690 struct brw_context
*brw
= p
->brw
;
694 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
695 } else if (brw
->gen
== 6) {
696 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
697 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
699 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
701 sfid
= BRW_SFID_DATAPORT_READ
;
704 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
705 header_present
, false);
708 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
709 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
710 insn
->bits3
.gen7_dp
.last_render_target
= 0;
711 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
712 } else if (brw
->gen
== 6) {
713 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
714 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
715 insn
->bits3
.gen6_dp
.last_render_target
= 0;
716 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
717 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
718 } else if (brw
->gen
== 5) {
719 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
720 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
721 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
722 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
723 } else if (brw
->is_g4x
) {
724 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
725 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
726 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
727 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
729 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
730 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
731 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
732 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
737 brw_set_sampler_message(struct brw_compile
*p
,
738 struct brw_instruction
*insn
,
739 unsigned binding_table_index
,
742 unsigned response_length
,
744 unsigned header_present
,
746 unsigned return_format
)
748 struct brw_context
*brw
= p
->brw
;
750 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
751 response_length
, header_present
, false);
754 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
755 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
756 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
757 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
758 } else if (brw
->gen
>= 5) {
759 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
760 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
761 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
762 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
763 } else if (brw
->is_g4x
) {
764 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
765 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
766 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
768 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
769 insn
->bits3
.sampler
.sampler
= sampler
;
770 insn
->bits3
.sampler
.msg_type
= msg_type
;
771 insn
->bits3
.sampler
.return_format
= return_format
;
776 #define next_insn brw_next_insn
777 struct brw_instruction
*
778 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
780 struct brw_instruction
*insn
;
782 if (p
->nr_insn
+ 1 > p
->store_size
) {
784 printf("incresing the store size to %d\n", p
->store_size
<< 1);
786 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
787 struct brw_instruction
, p
->store_size
);
789 assert(!"realloc eu store memeory failed");
792 p
->next_insn_offset
+= 16;
793 insn
= &p
->store
[p
->nr_insn
++];
794 memcpy(insn
, p
->current
, sizeof(*insn
));
796 /* Reset this one-shot flag:
799 if (p
->current
->header
.destreg__conditionalmod
) {
800 p
->current
->header
.destreg__conditionalmod
= 0;
801 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
804 insn
->header
.opcode
= opcode
;
808 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
813 struct brw_instruction
*insn
= next_insn(p
, opcode
);
814 brw_set_dest(p
, insn
, dest
);
815 brw_set_src0(p
, insn
, src
);
819 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
823 struct brw_reg src1
)
825 struct brw_instruction
*insn
= next_insn(p
, opcode
);
826 brw_set_dest(p
, insn
, dest
);
827 brw_set_src0(p
, insn
, src0
);
828 brw_set_src1(p
, insn
, src1
);
833 get_3src_subreg_nr(struct brw_reg reg
)
835 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
836 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
837 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
839 return reg
.subnr
/ 4;
843 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
850 struct brw_context
*brw
= p
->brw
;
851 struct brw_instruction
*insn
= next_insn(p
, opcode
);
853 gen7_convert_mrf_to_grf(p
, &dest
);
855 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
857 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
858 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
859 assert(dest
.nr
< 128);
860 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
861 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
862 dest
.type
== BRW_REGISTER_TYPE_D
||
863 dest
.type
== BRW_REGISTER_TYPE_UD
);
864 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
865 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
866 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
867 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
868 guess_execution_size(p
, insn
, dest
);
870 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
871 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
872 assert(src0
.nr
< 128);
873 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
874 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
875 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
876 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
877 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
878 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
880 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
881 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
882 assert(src1
.nr
< 128);
883 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
884 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
885 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
886 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
887 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
888 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
889 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
891 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
892 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
893 assert(src2
.nr
< 128);
894 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
895 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
896 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
897 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
898 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
899 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
902 /* Set both the source and destination types based on dest.type,
903 * ignoring the source register types. The MAD and LRP emitters ensure
904 * that all four types are float. The BFE and BFI2 emitters, however,
905 * may send us mixed D and UD types and want us to ignore that and use
906 * the destination type.
909 case BRW_REGISTER_TYPE_F
:
910 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
911 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
913 case BRW_REGISTER_TYPE_D
:
914 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
915 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
917 case BRW_REGISTER_TYPE_UD
:
918 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
919 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
928 /***********************************************************************
929 * Convenience routines.
932 struct brw_instruction *brw_##OP(struct brw_compile *p, \
933 struct brw_reg dest, \
934 struct brw_reg src0) \
936 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
940 struct brw_instruction *brw_##OP(struct brw_compile *p, \
941 struct brw_reg dest, \
942 struct brw_reg src0, \
943 struct brw_reg src1) \
945 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
949 struct brw_instruction *brw_##OP(struct brw_compile *p, \
950 struct brw_reg dest, \
951 struct brw_reg src0, \
952 struct brw_reg src1, \
953 struct brw_reg src2) \
955 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
959 struct brw_instruction *brw_##OP(struct brw_compile *p, \
960 struct brw_reg dest, \
961 struct brw_reg src0, \
962 struct brw_reg src1, \
963 struct brw_reg src2) \
965 assert(dest.type == BRW_REGISTER_TYPE_F); \
966 assert(src0.type == BRW_REGISTER_TYPE_F); \
967 assert(src1.type == BRW_REGISTER_TYPE_F); \
968 assert(src2.type == BRW_REGISTER_TYPE_F); \
969 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
972 /* Rounding operations (other than RNDD) require two instructions - the first
973 * stores a rounded value (possibly the wrong way) in the dest register, but
974 * also sets a per-channel "increment bit" in the flag register. A predicated
975 * add of 1.0 fixes dest to contain the desired result.
977 * Sandybridge and later appear to round correctly without an ADD.
980 void brw_##OP(struct brw_compile *p, \
981 struct brw_reg dest, \
982 struct brw_reg src) \
984 struct brw_instruction *rnd, *add; \
985 rnd = next_insn(p, BRW_OPCODE_##OP); \
986 brw_set_dest(p, rnd, dest); \
987 brw_set_src0(p, rnd, src); \
989 if (p->brw->gen < 6) { \
990 /* turn on round-increments */ \
991 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
992 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
993 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1036 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1037 struct brw_reg dest
,
1038 struct brw_reg src0
,
1039 struct brw_reg src1
)
1042 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1043 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1044 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1045 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1046 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1049 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1050 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1051 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1052 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1053 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1056 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1059 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1060 struct brw_reg dest
,
1061 struct brw_reg src0
,
1062 struct brw_reg src1
)
1064 assert(dest
.type
== src0
.type
);
1065 assert(src0
.type
== src1
.type
);
1066 switch (src0
.type
) {
1067 case BRW_REGISTER_TYPE_B
:
1068 case BRW_REGISTER_TYPE_UB
:
1069 case BRW_REGISTER_TYPE_W
:
1070 case BRW_REGISTER_TYPE_UW
:
1071 case BRW_REGISTER_TYPE_D
:
1072 case BRW_REGISTER_TYPE_UD
:
1075 assert(!"Bad type for brw_AVG");
1078 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1081 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1082 struct brw_reg dest
,
1083 struct brw_reg src0
,
1084 struct brw_reg src1
)
1087 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1088 src0
.type
== BRW_REGISTER_TYPE_UD
||
1089 src1
.type
== BRW_REGISTER_TYPE_D
||
1090 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1091 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1094 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1095 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1096 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1097 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1098 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1101 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1102 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1103 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1104 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1105 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1108 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1109 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1110 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1111 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1113 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1117 void brw_NOP(struct brw_compile
*p
)
1119 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1120 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1121 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1122 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1129 /***********************************************************************
1130 * Comparisons, if/else/endif
1133 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1134 struct brw_reg dest
,
1135 struct brw_reg src0
,
1136 struct brw_reg src1
)
1138 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1140 insn
->header
.execution_size
= 1;
1141 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1142 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1144 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1150 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1152 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1154 p
->if_stack_depth
++;
1155 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1156 p
->if_stack_array_size
*= 2;
1157 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1158 p
->if_stack_array_size
);
1162 static struct brw_instruction
*
1163 pop_if_stack(struct brw_compile
*p
)
1165 p
->if_stack_depth
--;
1166 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1170 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1172 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1173 p
->loop_stack_array_size
*= 2;
1174 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1175 p
->loop_stack_array_size
);
1176 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1177 p
->loop_stack_array_size
);
1180 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1181 p
->loop_stack_depth
++;
1182 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1185 static struct brw_instruction
*
1186 get_inner_do_insn(struct brw_compile
*p
)
1188 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1191 /* EU takes the value from the flag register and pushes it onto some
1192 * sort of a stack (presumably merging with any flag value already on
1193 * the stack). Within an if block, the flags at the top of the stack
1194 * control execution on each channel of the unit, eg. on each of the
1195 * 16 pixel values in our wm programs.
1197 * When the matching 'else' instruction is reached (presumably by
1198 * countdown of the instruction count patched in by our ELSE/ENDIF
1199 * functions), the relevent flags are inverted.
1201 * When the matching 'endif' instruction is reached, the flags are
1202 * popped off. If the stack is now empty, normal execution resumes.
1204 struct brw_instruction
*
1205 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1207 struct brw_context
*brw
= p
->brw
;
1208 struct brw_instruction
*insn
;
1210 insn
= next_insn(p
, BRW_OPCODE_IF
);
1212 /* Override the defaults for this instruction:
1215 brw_set_dest(p
, insn
, brw_ip_reg());
1216 brw_set_src0(p
, insn
, brw_ip_reg());
1217 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1218 } else if (brw
->gen
== 6) {
1219 brw_set_dest(p
, insn
, brw_imm_w(0));
1220 insn
->bits1
.branch_gen6
.jump_count
= 0;
1221 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1222 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1224 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1225 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1226 brw_set_src1(p
, insn
, brw_imm_ud(0));
1227 insn
->bits3
.break_cont
.jip
= 0;
1228 insn
->bits3
.break_cont
.uip
= 0;
1231 insn
->header
.execution_size
= execute_size
;
1232 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1233 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1234 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1235 if (!p
->single_program_flow
)
1236 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1238 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1240 push_if_stack(p
, insn
);
1241 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1245 /* This function is only used for gen6-style IF instructions with an
1246 * embedded comparison (conditional modifier). It is not used on gen7.
1248 struct brw_instruction
*
1249 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1250 struct brw_reg src0
, struct brw_reg src1
)
1252 struct brw_instruction
*insn
;
1254 insn
= next_insn(p
, BRW_OPCODE_IF
);
1256 brw_set_dest(p
, insn
, brw_imm_w(0));
1257 if (p
->compressed
) {
1258 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1260 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1262 insn
->bits1
.branch_gen6
.jump_count
= 0;
1263 brw_set_src0(p
, insn
, src0
);
1264 brw_set_src1(p
, insn
, src1
);
1266 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1267 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1268 insn
->header
.destreg__conditionalmod
= conditional
;
1270 if (!p
->single_program_flow
)
1271 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1273 push_if_stack(p
, insn
);
1278 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1281 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1282 struct brw_instruction
*if_inst
,
1283 struct brw_instruction
*else_inst
)
1285 /* The next instruction (where the ENDIF would be, if it existed) */
1286 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1288 assert(p
->single_program_flow
);
1289 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1290 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1291 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1293 /* Convert IF to an ADD instruction that moves the instruction pointer
1294 * to the first instruction of the ELSE block. If there is no ELSE
1295 * block, point to where ENDIF would be. Reverse the predicate.
1297 * There's no need to execute an ENDIF since we don't need to do any
1298 * stack operations, and if we're currently executing, we just want to
1299 * continue normally.
1301 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1302 if_inst
->header
.predicate_inverse
= 1;
1304 if (else_inst
!= NULL
) {
1305 /* Convert ELSE to an ADD instruction that points where the ENDIF
1308 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1310 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1311 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1313 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1318 * Patch IF and ELSE instructions with appropriate jump targets.
1321 patch_IF_ELSE(struct brw_compile
*p
,
1322 struct brw_instruction
*if_inst
,
1323 struct brw_instruction
*else_inst
,
1324 struct brw_instruction
*endif_inst
)
1326 struct brw_context
*brw
= p
->brw
;
1328 /* We shouldn't be patching IF and ELSE instructions in single program flow
1329 * mode when gen < 6, because in single program flow mode on those
1330 * platforms, we convert flow control instructions to conditional ADDs that
1331 * operate on IP (see brw_ENDIF).
1333 * However, on Gen6, writing to IP doesn't work in single program flow mode
1334 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1335 * not be updated by non-flow control instructions."). And on later
1336 * platforms, there is no significant benefit to converting control flow
1337 * instructions to conditional ADDs. So we do patch IF and ELSE
1338 * instructions in single program flow mode on those platforms.
1341 assert(!p
->single_program_flow
);
1343 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1344 assert(endif_inst
!= NULL
);
1345 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1348 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1349 * requires 2 chunks.
1354 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1355 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1357 if (else_inst
== NULL
) {
1358 /* Patch IF -> ENDIF */
1360 /* Turn it into an IFF, which means no mask stack operations for
1361 * all-false and jumping past the ENDIF.
1363 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1364 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1365 if_inst
->bits3
.if_else
.pop_count
= 0;
1366 if_inst
->bits3
.if_else
.pad0
= 0;
1367 } else if (brw
->gen
== 6) {
1368 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1369 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1371 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1372 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1375 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1377 /* Patch IF -> ELSE */
1379 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1380 if_inst
->bits3
.if_else
.pop_count
= 0;
1381 if_inst
->bits3
.if_else
.pad0
= 0;
1382 } else if (brw
->gen
== 6) {
1383 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1386 /* Patch ELSE -> ENDIF */
1388 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1391 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1392 else_inst
->bits3
.if_else
.pop_count
= 1;
1393 else_inst
->bits3
.if_else
.pad0
= 0;
1394 } else if (brw
->gen
== 6) {
1395 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1396 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1398 /* The IF instruction's JIP should point just past the ELSE */
1399 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1400 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1401 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1402 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1408 brw_ELSE(struct brw_compile
*p
)
1410 struct brw_context
*brw
= p
->brw
;
1411 struct brw_instruction
*insn
;
1413 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1416 brw_set_dest(p
, insn
, brw_ip_reg());
1417 brw_set_src0(p
, insn
, brw_ip_reg());
1418 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1419 } else if (brw
->gen
== 6) {
1420 brw_set_dest(p
, insn
, brw_imm_w(0));
1421 insn
->bits1
.branch_gen6
.jump_count
= 0;
1422 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1423 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1425 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1426 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1427 brw_set_src1(p
, insn
, brw_imm_ud(0));
1428 insn
->bits3
.break_cont
.jip
= 0;
1429 insn
->bits3
.break_cont
.uip
= 0;
1432 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1433 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1434 if (!p
->single_program_flow
)
1435 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1437 push_if_stack(p
, insn
);
1441 brw_ENDIF(struct brw_compile
*p
)
1443 struct brw_context
*brw
= p
->brw
;
1444 struct brw_instruction
*insn
= NULL
;
1445 struct brw_instruction
*else_inst
= NULL
;
1446 struct brw_instruction
*if_inst
= NULL
;
1447 struct brw_instruction
*tmp
;
1448 bool emit_endif
= true;
1450 /* In single program flow mode, we can express IF and ELSE instructions
1451 * equivalently as ADD instructions that operate on IP. On platforms prior
1452 * to Gen6, flow control instructions cause an implied thread switch, so
1453 * this is a significant savings.
1455 * However, on Gen6, writing to IP doesn't work in single program flow mode
1456 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1457 * not be updated by non-flow control instructions."). And on later
1458 * platforms, there is no significant benefit to converting control flow
1459 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1462 if (brw
->gen
< 6 && p
->single_program_flow
)
1466 * A single next_insn() may change the base adress of instruction store
1467 * memory(p->store), so call it first before referencing the instruction
1468 * store pointer from an index
1471 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1473 /* Pop the IF and (optional) ELSE instructions from the stack */
1474 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1475 tmp
= pop_if_stack(p
);
1476 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1478 tmp
= pop_if_stack(p
);
1483 /* ENDIF is useless; don't bother emitting it. */
1484 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1489 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1490 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1491 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1492 } else if (brw
->gen
== 6) {
1493 brw_set_dest(p
, insn
, brw_imm_w(0));
1494 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1495 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1497 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1498 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1499 brw_set_src1(p
, insn
, brw_imm_ud(0));
1502 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1503 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1504 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1506 /* Also pop item off the stack in the endif instruction: */
1508 insn
->bits3
.if_else
.jump_count
= 0;
1509 insn
->bits3
.if_else
.pop_count
= 1;
1510 insn
->bits3
.if_else
.pad0
= 0;
1511 } else if (brw
->gen
== 6) {
1512 insn
->bits1
.branch_gen6
.jump_count
= 2;
1514 insn
->bits3
.break_cont
.jip
= 2;
1516 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1519 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1521 struct brw_context
*brw
= p
->brw
;
1522 struct brw_instruction
*insn
;
1524 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1525 if (brw
->gen
>= 6) {
1526 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1527 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1528 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1530 brw_set_dest(p
, insn
, brw_ip_reg());
1531 brw_set_src0(p
, insn
, brw_ip_reg());
1532 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1533 insn
->bits3
.if_else
.pad0
= 0;
1534 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1536 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1537 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1542 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1544 struct brw_instruction
*insn
;
1546 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1547 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1548 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1549 brw_set_dest(p
, insn
, brw_ip_reg());
1550 brw_set_src0(p
, insn
, brw_ip_reg());
1551 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1553 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1554 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1558 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1560 struct brw_instruction
*insn
;
1561 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1562 brw_set_dest(p
, insn
, brw_ip_reg());
1563 brw_set_src0(p
, insn
, brw_ip_reg());
1564 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1565 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1566 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1567 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1568 insn
->bits3
.if_else
.pad0
= 0;
1569 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1573 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1575 struct brw_instruction
*insn
;
1577 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1578 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1579 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1580 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1582 if (p
->compressed
) {
1583 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1585 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1586 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1593 * The DO/WHILE is just an unterminated loop -- break or continue are
1594 * used for control within the loop. We have a few ways they can be
1597 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1598 * jip and no DO instruction.
1600 * For non-uniform control flow pre-gen6, there's a DO instruction to
1601 * push the mask, and a WHILE to jump back, and BREAK to get out and
1604 * For gen6, there's no more mask stack, so no need for DO. WHILE
1605 * just points back to the first instruction of the loop.
1607 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1609 struct brw_context
*brw
= p
->brw
;
1611 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1612 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1613 return &p
->store
[p
->nr_insn
];
1615 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1617 push_loop_stack(p
, insn
);
1619 /* Override the defaults for this instruction:
1621 brw_set_dest(p
, insn
, brw_null_reg());
1622 brw_set_src0(p
, insn
, brw_null_reg());
1623 brw_set_src1(p
, insn
, brw_null_reg());
1625 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1626 insn
->header
.execution_size
= execute_size
;
1627 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1628 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1629 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1636 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1639 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1640 * nesting, since it can always just point to the end of the block/current loop.
1643 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1645 struct brw_context
*brw
= p
->brw
;
1646 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1647 struct brw_instruction
*inst
;
1648 int br
= (brw
->gen
== 5) ? 2 : 1;
1650 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1651 /* If the jump count is != 0, that means that this instruction has already
1652 * been patched because it's part of a loop inside of the one we're
1655 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1656 inst
->bits3
.if_else
.jump_count
== 0) {
1657 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1658 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1659 inst
->bits3
.if_else
.jump_count
== 0) {
1660 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1665 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1667 struct brw_context
*brw
= p
->brw
;
1668 struct brw_instruction
*insn
, *do_insn
;
1674 if (brw
->gen
>= 7) {
1675 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1676 do_insn
= get_inner_do_insn(p
);
1678 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1679 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1680 brw_set_src1(p
, insn
, brw_imm_ud(0));
1681 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1683 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1684 } else if (brw
->gen
== 6) {
1685 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1686 do_insn
= get_inner_do_insn(p
);
1688 brw_set_dest(p
, insn
, brw_imm_w(0));
1689 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1690 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1691 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1693 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1695 if (p
->single_program_flow
) {
1696 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1697 do_insn
= get_inner_do_insn(p
);
1699 brw_set_dest(p
, insn
, brw_ip_reg());
1700 brw_set_src0(p
, insn
, brw_ip_reg());
1701 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1702 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1704 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1705 do_insn
= get_inner_do_insn(p
);
1707 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1709 brw_set_dest(p
, insn
, brw_ip_reg());
1710 brw_set_src0(p
, insn
, brw_ip_reg());
1711 brw_set_src1(p
, insn
, brw_imm_d(0));
1713 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1714 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1715 insn
->bits3
.if_else
.pop_count
= 0;
1716 insn
->bits3
.if_else
.pad0
= 0;
1718 brw_patch_break_cont(p
, insn
);
1721 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1722 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1724 p
->loop_stack_depth
--;
1732 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1734 struct brw_context
*brw
= p
->brw
;
1735 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1741 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1742 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1744 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1749 /* To integrate with the above, it makes sense that the comparison
1750 * instruction should populate the flag register. It might be simpler
1751 * just to use the flag reg for most WM tasks?
1753 void brw_CMP(struct brw_compile
*p
,
1754 struct brw_reg dest
,
1755 unsigned conditional
,
1756 struct brw_reg src0
,
1757 struct brw_reg src1
)
1759 struct brw_context
*brw
= p
->brw
;
1760 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1762 insn
->header
.destreg__conditionalmod
= conditional
;
1763 brw_set_dest(p
, insn
, dest
);
1764 brw_set_src0(p
, insn
, src0
);
1765 brw_set_src1(p
, insn
, src1
);
1767 /* guess_execution_size(insn, src0); */
1770 /* Make it so that future instructions will use the computed flag
1771 * value until brw_set_predicate_control_flag_value() is called
1774 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1776 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1777 p
->flag_value
= 0xff;
1780 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1782 * "Any CMP instruction with a null destination must use a {switch}."
1784 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1785 * mentioned on their work-arounds pages.
1787 if (brw
->gen
== 7) {
1788 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1789 dest
.nr
== BRW_ARF_NULL
) {
1790 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1795 /* Issue 'wait' instruction for n1, host could program MMIO
1796 to wake up thread. */
1797 void brw_WAIT (struct brw_compile
*p
)
1799 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1800 struct brw_reg src
= brw_notification_1_reg();
1802 brw_set_dest(p
, insn
, src
);
1803 brw_set_src0(p
, insn
, src
);
1804 brw_set_src1(p
, insn
, brw_null_reg());
1805 insn
->header
.execution_size
= 0; /* must */
1806 insn
->header
.predicate_control
= 0;
1807 insn
->header
.compression_control
= 0;
1811 /***********************************************************************
1812 * Helpers for the various SEND message types:
1815 /** Extended math function, float[8].
1817 void brw_math( struct brw_compile
*p
,
1818 struct brw_reg dest
,
1820 unsigned msg_reg_nr
,
1823 unsigned precision
)
1825 struct brw_context
*brw
= p
->brw
;
1827 if (brw
->gen
>= 6) {
1828 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1830 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1831 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1832 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1834 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1836 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1838 /* Source modifiers are ignored for extended math instructions on Gen6. */
1839 if (brw
->gen
== 6) {
1840 assert(!src
.negate
);
1844 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1845 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1846 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1847 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1849 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1852 /* Math is the same ISA format as other opcodes, except that CondModifier
1853 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1855 insn
->header
.destreg__conditionalmod
= function
;
1857 brw_set_dest(p
, insn
, dest
);
1858 brw_set_src0(p
, insn
, src
);
1859 brw_set_src1(p
, insn
, brw_null_reg());
1861 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1863 /* Example code doesn't set predicate_control for send
1866 insn
->header
.predicate_control
= 0;
1867 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1869 brw_set_dest(p
, insn
, dest
);
1870 brw_set_src0(p
, insn
, src
);
1871 brw_set_math_message(p
,
1874 src
.type
== BRW_REGISTER_TYPE_D
,
1880 /** Extended math function, float[8].
1882 void brw_math2(struct brw_compile
*p
,
1883 struct brw_reg dest
,
1885 struct brw_reg src0
,
1886 struct brw_reg src1
)
1888 struct brw_context
*brw
= p
->brw
;
1889 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1891 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1892 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1893 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1894 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1896 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1897 if (brw
->gen
== 6) {
1898 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1899 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1902 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1903 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1904 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1905 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1906 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1908 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1909 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1912 /* Source modifiers are ignored for extended math instructions on Gen6. */
1913 if (brw
->gen
== 6) {
1914 assert(!src0
.negate
);
1916 assert(!src1
.negate
);
1920 /* Math is the same ISA format as other opcodes, except that CondModifier
1921 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1923 insn
->header
.destreg__conditionalmod
= function
;
1925 brw_set_dest(p
, insn
, dest
);
1926 brw_set_src0(p
, insn
, src0
);
1927 brw_set_src1(p
, insn
, src1
);
1932 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1933 * using a constant offset per channel.
1935 * The offset must be aligned to oword size (16 bytes). Used for
1936 * register spilling.
1938 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1943 struct brw_context
*brw
= p
->brw
;
1944 uint32_t msg_control
, msg_type
;
1950 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1952 if (num_regs
== 1) {
1953 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1956 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1960 /* Set up the message header. This is g0, with g0.2 filled with
1961 * the offset. We don't want to leave our offset around in g0 or
1962 * it'll screw up texture samples, so set it up inside the message
1966 brw_push_insn_state(p
);
1967 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1968 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1970 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1972 /* set message header global offset field (reg 0, element 2) */
1974 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1976 2), BRW_REGISTER_TYPE_UD
),
1977 brw_imm_ud(offset
));
1979 brw_pop_insn_state(p
);
1983 struct brw_reg dest
;
1984 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1985 int send_commit_msg
;
1986 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1987 BRW_REGISTER_TYPE_UW
);
1989 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1990 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1991 src_header
= vec16(src_header
);
1993 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1994 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1996 /* Until gen6, writes followed by reads from the same location
1997 * are not guaranteed to be ordered unless write_commit is set.
1998 * If set, then a no-op write is issued to the destination
1999 * register to set a dependency, and a read from the destination
2000 * can be used to ensure the ordering.
2002 * For gen6, only writes between different threads need ordering
2003 * protection. Our use of DP writes is all about register
2004 * spilling within a thread.
2006 if (brw
->gen
>= 6) {
2007 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2008 send_commit_msg
= 0;
2011 send_commit_msg
= 1;
2014 brw_set_dest(p
, insn
, dest
);
2015 if (brw
->gen
>= 6) {
2016 brw_set_src0(p
, insn
, mrf
);
2018 brw_set_src0(p
, insn
, brw_null_reg());
2022 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2024 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2026 brw_set_dp_write_message(p
,
2028 255, /* binding table index (255=stateless) */
2032 true, /* header_present */
2033 0, /* not a render target */
2034 send_commit_msg
, /* response_length */
2042 * Read a block of owords (half a GRF each) from the scratch buffer
2043 * using a constant index per channel.
2045 * Offset must be aligned to oword size (16 bytes). Used for register
2049 brw_oword_block_read_scratch(struct brw_compile
*p
,
2050 struct brw_reg dest
,
2055 struct brw_context
*brw
= p
->brw
;
2056 uint32_t msg_control
;
2062 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2063 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2065 if (num_regs
== 1) {
2066 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2069 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2074 brw_push_insn_state(p
);
2075 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2076 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2078 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2080 /* set message header global offset field (reg 0, element 2) */
2082 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2084 2), BRW_REGISTER_TYPE_UD
),
2085 brw_imm_ud(offset
));
2087 brw_pop_insn_state(p
);
2091 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2093 assert(insn
->header
.predicate_control
== 0);
2094 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2095 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2097 brw_set_dest(p
, insn
, dest
); /* UW? */
2098 if (brw
->gen
>= 6) {
2099 brw_set_src0(p
, insn
, mrf
);
2101 brw_set_src0(p
, insn
, brw_null_reg());
2104 brw_set_dp_read_message(p
,
2106 255, /* binding table index (255=stateless) */
2108 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2109 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2111 true, /* header_present */
2117 gen7_block_read_scratch(struct brw_compile
*p
,
2118 struct brw_reg dest
,
2122 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2124 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2126 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2127 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2129 brw_set_dest(p
, insn
, dest
);
2131 /* The HW requires that the header is present; this is to get the g0.5
2134 bool header_present
= true;
2135 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2137 brw_set_message_descriptor(p
, insn
,
2138 GEN7_SFID_DATAPORT_DATA_CACHE
,
2139 1, /* mlen: just g0 */
2144 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2146 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2147 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2149 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2150 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2151 * is 32 bytes, which happens to be the size of a register.
2154 assert(offset
< (1 << 12));
2155 insn
->bits3
.ud
|= offset
;
2159 * Read a float[4] vector from the data port Data Cache (const buffer).
2160 * Location (in buffer) should be a multiple of 16.
2161 * Used for fetching shader constants.
2163 void brw_oword_block_read(struct brw_compile
*p
,
2164 struct brw_reg dest
,
2167 uint32_t bind_table_index
)
2169 struct brw_context
*brw
= p
->brw
;
2171 /* On newer hardware, offset is in units of owords. */
2175 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2177 brw_push_insn_state(p
);
2178 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2179 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2180 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2182 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2184 /* set message header global offset field (reg 0, element 2) */
2186 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2188 2), BRW_REGISTER_TYPE_UD
),
2189 brw_imm_ud(offset
));
2191 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2192 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2194 /* cast dest to a uword[8] vector */
2195 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2197 brw_set_dest(p
, insn
, dest
);
2198 if (brw
->gen
>= 6) {
2199 brw_set_src0(p
, insn
, mrf
);
2201 brw_set_src0(p
, insn
, brw_null_reg());
2204 brw_set_dp_read_message(p
,
2207 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2208 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2209 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2211 true, /* header_present */
2212 1); /* response_length (1 reg, 2 owords!) */
2214 brw_pop_insn_state(p
);
2218 void brw_fb_WRITE(struct brw_compile
*p
,
2220 unsigned msg_reg_nr
,
2221 struct brw_reg src0
,
2222 unsigned msg_control
,
2223 unsigned binding_table_index
,
2224 unsigned msg_length
,
2225 unsigned response_length
,
2227 bool header_present
)
2229 struct brw_context
*brw
= p
->brw
;
2230 struct brw_instruction
*insn
;
2232 struct brw_reg dest
;
2234 if (dispatch_width
== 16)
2235 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2237 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2239 if (brw
->gen
>= 6) {
2240 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2242 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2244 /* The execution mask is ignored for render target writes. */
2245 insn
->header
.predicate_control
= 0;
2246 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2248 if (brw
->gen
>= 6) {
2249 /* headerless version, just submit color payload */
2250 src0
= brw_message_reg(msg_reg_nr
);
2252 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2254 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2256 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2259 brw_set_dest(p
, insn
, dest
);
2260 brw_set_src0(p
, insn
, src0
);
2261 brw_set_dp_write_message(p
,
2263 binding_table_index
,
2268 eot
, /* last render target write */
2271 0 /* send_commit_msg */);
2276 * Texture sample instruction.
2277 * Note: the msg_type plus msg_length values determine exactly what kind
2278 * of sampling operation is performed. See volume 4, page 161 of docs.
2280 void brw_SAMPLE(struct brw_compile
*p
,
2281 struct brw_reg dest
,
2282 unsigned msg_reg_nr
,
2283 struct brw_reg src0
,
2284 unsigned binding_table_index
,
2287 unsigned response_length
,
2288 unsigned msg_length
,
2289 unsigned header_present
,
2291 unsigned return_format
)
2293 struct brw_context
*brw
= p
->brw
;
2294 struct brw_instruction
*insn
;
2296 if (msg_reg_nr
!= -1)
2297 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2299 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2300 insn
->header
.predicate_control
= 0; /* XXX */
2302 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2304 * "Instruction compression is not allowed for this instruction (that
2305 * is, send). The hardware behavior is undefined if this instruction is
2306 * set as compressed. However, compress control can be set to "SecHalf"
2307 * to affect the EMask generation."
2309 * No similar wording is found in later PRMs, but there are examples
2310 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2311 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2312 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2314 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2315 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2318 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2320 brw_set_dest(p
, insn
, dest
);
2321 brw_set_src0(p
, insn
, src0
);
2322 brw_set_sampler_message(p
, insn
,
2323 binding_table_index
,
2333 /* All these variables are pretty confusing - we might be better off
2334 * using bitmasks and macros for this, in the old style. Or perhaps
2335 * just having the caller instantiate the fields in dword3 itself.
2337 void brw_urb_WRITE(struct brw_compile
*p
,
2338 struct brw_reg dest
,
2339 unsigned msg_reg_nr
,
2340 struct brw_reg src0
,
2341 enum brw_urb_write_flags flags
,
2342 unsigned msg_length
,
2343 unsigned response_length
,
2347 struct brw_context
*brw
= p
->brw
;
2348 struct brw_instruction
*insn
;
2350 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2352 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2353 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2354 brw_push_insn_state(p
);
2355 brw_set_access_mode(p
, BRW_ALIGN_1
);
2356 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2357 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2358 BRW_REGISTER_TYPE_UD
),
2359 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2360 brw_imm_ud(0xff00));
2361 brw_pop_insn_state(p
);
2364 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2366 assert(msg_length
< BRW_MAX_MRF
);
2368 brw_set_dest(p
, insn
, dest
);
2369 brw_set_src0(p
, insn
, src0
);
2370 brw_set_src1(p
, insn
, brw_imm_d(0));
2373 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2375 brw_set_urb_message(p
,
2385 next_ip(struct brw_compile
*p
, int ip
)
2387 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2389 if (insn
->header
.cmpt_control
)
2396 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2399 void *store
= p
->store
;
2401 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2402 struct brw_instruction
*insn
= store
+ ip
;
2404 switch (insn
->header
.opcode
) {
2405 case BRW_OPCODE_ENDIF
:
2406 case BRW_OPCODE_ELSE
:
2407 case BRW_OPCODE_WHILE
:
2408 case BRW_OPCODE_HALT
:
2416 /* There is no DO instruction on gen6, so to find the end of the loop
2417 * we have to see if the loop is jumping back before our start
2421 brw_find_loop_end(struct brw_compile
*p
, int start
)
2423 struct brw_context
*brw
= p
->brw
;
2426 void *store
= p
->store
;
2428 /* Always start after the instruction (such as a WHILE) we're trying to fix
2431 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2432 struct brw_instruction
*insn
= store
+ ip
;
2434 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2435 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2436 : insn
->bits3
.break_cont
.jip
;
2437 if (ip
+ jip
* scale
<= start
)
2441 assert(!"not reached");
2445 /* After program generation, go back and update the UIP and JIP of
2446 * BREAK, CONT, and HALT instructions to their correct locations.
2449 brw_set_uip_jip(struct brw_compile
*p
)
2451 struct brw_context
*brw
= p
->brw
;
2454 void *store
= p
->store
;
2459 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2460 struct brw_instruction
*insn
= store
+ ip
;
2462 if (insn
->header
.cmpt_control
) {
2463 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2464 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2465 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2466 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2470 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2471 switch (insn
->header
.opcode
) {
2472 case BRW_OPCODE_BREAK
:
2473 assert(block_end_ip
!= 0);
2474 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2475 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2476 insn
->bits3
.break_cont
.uip
=
2477 (brw_find_loop_end(p
, ip
) - ip
+
2478 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2480 case BRW_OPCODE_CONTINUE
:
2481 assert(block_end_ip
!= 0);
2482 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2483 insn
->bits3
.break_cont
.uip
=
2484 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2486 assert(insn
->bits3
.break_cont
.uip
!= 0);
2487 assert(insn
->bits3
.break_cont
.jip
!= 0);
2490 case BRW_OPCODE_ENDIF
:
2491 if (block_end_ip
== 0)
2492 insn
->bits3
.break_cont
.jip
= 2;
2494 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2497 case BRW_OPCODE_HALT
:
2498 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2500 * "In case of the halt instruction not inside any conditional
2501 * code block, the value of <JIP> and <UIP> should be the
2502 * same. In case of the halt instruction inside conditional code
2503 * block, the <UIP> should be the end of the program, and the
2504 * <JIP> should be end of the most inner conditional code block."
2506 * The uip will have already been set by whoever set up the
2509 if (block_end_ip
== 0) {
2510 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2512 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2514 assert(insn
->bits3
.break_cont
.uip
!= 0);
2515 assert(insn
->bits3
.break_cont
.jip
!= 0);
2521 void brw_ff_sync(struct brw_compile
*p
,
2522 struct brw_reg dest
,
2523 unsigned msg_reg_nr
,
2524 struct brw_reg src0
,
2526 unsigned response_length
,
2529 struct brw_context
*brw
= p
->brw
;
2530 struct brw_instruction
*insn
;
2532 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2534 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2535 brw_set_dest(p
, insn
, dest
);
2536 brw_set_src0(p
, insn
, src0
);
2537 brw_set_src1(p
, insn
, brw_imm_d(0));
2540 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2542 brw_set_ff_sync_message(p
,
2550 * Emit the SEND instruction necessary to generate stream output data on Gen6
2551 * (for transform feedback).
2553 * If send_commit_msg is true, this is the last piece of stream output data
2554 * from this thread, so send the data as a committed write. According to the
2555 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2557 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2558 * writes are complete by sending the final write as a committed write."
2561 brw_svb_write(struct brw_compile
*p
,
2562 struct brw_reg dest
,
2563 unsigned msg_reg_nr
,
2564 struct brw_reg src0
,
2565 unsigned binding_table_index
,
2566 bool send_commit_msg
)
2568 struct brw_instruction
*insn
;
2570 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2572 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2573 brw_set_dest(p
, insn
, dest
);
2574 brw_set_src0(p
, insn
, src0
);
2575 brw_set_src1(p
, insn
, brw_imm_d(0));
2576 brw_set_dp_write_message(p
, insn
,
2577 binding_table_index
,
2578 0, /* msg_control: ignored */
2579 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2581 true, /* header_present */
2582 0, /* last_render_target: ignored */
2583 send_commit_msg
, /* response_length */
2584 0, /* end_of_thread */
2585 send_commit_msg
); /* send_commit_msg */
2589 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2590 struct brw_instruction
*insn
,
2592 unsigned bind_table_index
,
2593 unsigned msg_length
,
2594 unsigned response_length
,
2595 bool header_present
)
2597 if (p
->brw
->is_haswell
) {
2598 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2599 msg_length
, response_length
,
2600 header_present
, false);
2603 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2604 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2605 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2607 insn
->bits3
.gen7_dp
.msg_type
=
2608 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2610 insn
->bits3
.gen7_dp
.msg_type
=
2611 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2615 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2616 msg_length
, response_length
,
2617 header_present
, false);
2619 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2621 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2622 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2625 if (response_length
)
2626 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2628 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2629 insn
->bits3
.ud
|= atomic_op
<< 8;
2633 brw_untyped_atomic(struct brw_compile
*p
,
2634 struct brw_reg dest
,
2637 unsigned bind_table_index
,
2638 unsigned msg_length
,
2639 unsigned response_length
) {
2640 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2642 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2643 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2644 brw_set_src1(p
, insn
, brw_imm_d(0));
2645 brw_set_dp_untyped_atomic_message(
2646 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2647 insn
->header
.access_mode
== BRW_ALIGN_1
);
2651 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2652 struct brw_instruction
*insn
,
2653 unsigned bind_table_index
,
2654 unsigned msg_length
,
2655 unsigned response_length
,
2656 bool header_present
)
2658 const unsigned dispatch_width
=
2659 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2660 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2662 if (p
->brw
->is_haswell
) {
2663 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2664 msg_length
, response_length
,
2665 header_present
, false);
2667 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2669 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2670 msg_length
, response_length
,
2671 header_present
, false);
2673 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2676 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2677 if (dispatch_width
== 16)
2678 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2680 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2683 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2685 /* Set mask of 32-bit channels to drop. */
2686 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2690 brw_untyped_surface_read(struct brw_compile
*p
,
2691 struct brw_reg dest
,
2693 unsigned bind_table_index
,
2694 unsigned msg_length
,
2695 unsigned response_length
)
2697 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2699 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2700 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2701 brw_set_dp_untyped_surface_read_message(
2702 p
, insn
, bind_table_index
, msg_length
, response_length
,
2703 insn
->header
.access_mode
== BRW_ALIGN_1
);
2707 * This instruction is generated as a single-channel align1 instruction by
2708 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2710 * We can't use the typed atomic op in the FS because that has the execution
2711 * mask ANDed with the pixel mask, but we just want to write the one dword for
2714 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2715 * one u32. So we use the same untyped atomic write message as the pixel
2718 * The untyped atomic operation requires a BUFFER surface type with RAW
2719 * format, and is only accessible through the legacy DATA_CACHE dataport
2722 void brw_shader_time_add(struct brw_compile
*p
,
2723 struct brw_reg payload
,
2724 uint32_t surf_index
)
2726 struct brw_context
*brw
= p
->brw
;
2727 assert(brw
->gen
>= 7);
2729 brw_push_insn_state(p
);
2730 brw_set_access_mode(p
, BRW_ALIGN_1
);
2731 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2732 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2733 brw_pop_insn_state(p
);
2735 /* We use brw_vec1_reg and unmasked because we want to increment the given
2738 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2740 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2742 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2743 2 /* message length */,
2744 0 /* response length */,
2745 false /* header present */);