2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 if (file
== BRW_IMMEDIATE_VALUE
) {
112 const static int imm_hw_types
[] = {
113 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
114 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
115 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
116 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
117 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
118 [BRW_REGISTER_TYPE_UB
] = -1,
119 [BRW_REGISTER_TYPE_B
] = -1,
120 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
121 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
122 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
123 [BRW_REGISTER_TYPE_DF
] = GEN8_HW_REG_IMM_TYPE_DF
,
124 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_IMM_TYPE_HF
,
125 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
126 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
128 assert(type
< ARRAY_SIZE(imm_hw_types
));
129 assert(imm_hw_types
[type
] != -1);
130 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_DF
);
131 return imm_hw_types
[type
];
133 /* Non-immediate registers */
134 const static int hw_types
[] = {
135 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
136 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
137 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
138 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
139 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
140 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
141 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
142 [BRW_REGISTER_TYPE_UV
] = -1,
143 [BRW_REGISTER_TYPE_VF
] = -1,
144 [BRW_REGISTER_TYPE_V
] = -1,
145 [BRW_REGISTER_TYPE_DF
] = GEN7_HW_REG_NON_IMM_TYPE_DF
,
146 [BRW_REGISTER_TYPE_HF
] = GEN8_HW_REG_NON_IMM_TYPE_HF
,
147 [BRW_REGISTER_TYPE_UQ
] = GEN8_HW_REG_TYPE_UQ
,
148 [BRW_REGISTER_TYPE_Q
] = GEN8_HW_REG_TYPE_Q
,
150 assert(type
< ARRAY_SIZE(hw_types
));
151 assert(hw_types
[type
] != -1);
152 assert(brw
->gen
>= 7 || type
< BRW_REGISTER_TYPE_DF
);
153 assert(brw
->gen
>= 8 || type
< BRW_REGISTER_TYPE_HF
);
154 return hw_types
[type
];
159 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
162 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
163 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
164 assert(dest
.nr
< 128);
166 gen7_convert_mrf_to_grf(p
, &dest
);
168 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
169 insn
->bits1
.da1
.dest_reg_type
=
170 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
171 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
173 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
174 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
176 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
177 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
178 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
179 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
180 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
183 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
184 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
185 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
186 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
187 assert(dest
.dw1
.bits
.writemask
!= 0);
189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
190 * Although Dst.HorzStride is a don't care for Align16, HW needs
191 * this to be programmed as "01".
193 insn
->bits1
.da16
.dest_horiz_stride
= 1;
197 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
199 /* These are different sizes in align1 vs align16:
201 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
202 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
203 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
204 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
205 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
208 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
209 /* even ignored in da16, still need to set as '01' */
210 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
214 /* NEW: Set the execution size based on dest.width and
215 * insn->compression_control:
217 guess_execution_size(p
, insn
, dest
);
220 extern int reg_type_size
[];
223 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
225 int hstride_for_reg
[] = {0, 1, 2, 4};
226 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
227 int width_for_reg
[] = {1, 2, 4, 8, 16};
228 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
229 int width
, hstride
, vstride
, execsize
;
231 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
233 * mean the destination has to be 128-bit aligned and the
234 * destination horiz stride has to be a word.
236 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
237 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
238 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
244 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
245 reg
.file
== BRW_ARF_NULL
)
248 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
249 hstride
= hstride_for_reg
[reg
.hstride
];
251 if (reg
.vstride
== 0xf) {
254 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
255 vstride
= vstride_for_reg
[reg
.vstride
];
258 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
259 width
= width_for_reg
[reg
.width
];
261 assert(insn
->header
.execution_size
>= 0 &&
262 insn
->header
.execution_size
< Elements(execsize_for_reg
));
263 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
265 /* Restrictions from 3.3.10: Register Region Restrictions. */
267 assert(execsize
>= width
);
270 if (execsize
== width
&& hstride
!= 0) {
271 assert(vstride
== -1 || vstride
== width
* hstride
);
275 if (execsize
== width
&& hstride
== 0) {
276 /* no restriction on vstride. */
281 assert(hstride
== 0);
285 if (execsize
== 1 && width
== 1) {
286 assert(hstride
== 0);
287 assert(vstride
== 0);
291 if (vstride
== 0 && hstride
== 0) {
295 /* 10. Check destination issues. */
299 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
302 struct brw_context
*brw
= p
->brw
;
304 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
305 assert(reg
.nr
< 128);
307 gen7_convert_mrf_to_grf(p
, ®
);
309 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
310 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
311 /* Any source modifiers or regions will be ignored, since this just
312 * identifies the MRF/GRF to start reading the message contents from.
313 * Check for some likely failures.
317 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
320 validate_reg(insn
, reg
);
322 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
323 insn
->bits1
.da1
.src0_reg_type
=
324 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
325 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
326 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
327 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
329 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
330 insn
->bits3
.ud
= reg
.dw1
.ud
;
332 /* Required to set some fields in src1 as well:
334 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
335 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
339 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
340 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
341 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
342 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
345 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
346 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
350 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
352 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
353 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
356 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
360 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
361 if (reg
.width
== BRW_WIDTH_1
&&
362 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
363 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
364 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
365 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
368 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
369 insn
->bits2
.da1
.src0_width
= reg
.width
;
370 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
374 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
375 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
376 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
377 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
379 /* This is an oddity of the fact we're using the same
380 * descriptions for registers in align_16 as align_1:
382 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
383 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
385 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
391 void brw_set_src1(struct brw_compile
*p
,
392 struct brw_instruction
*insn
,
395 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
397 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
398 assert(reg
.nr
< 128);
400 gen7_convert_mrf_to_grf(p
, ®
);
402 validate_reg(insn
, reg
);
404 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
405 insn
->bits1
.da1
.src1_reg_type
=
406 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
407 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
408 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
410 /* Only src1 can be immediate in two-argument instructions.
412 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
414 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
415 insn
->bits3
.ud
= reg
.dw1
.ud
;
418 /* This is a hardware restriction, which may or may not be lifted
421 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
422 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
424 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
425 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
426 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
429 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
430 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
433 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
434 if (reg
.width
== BRW_WIDTH_1
&&
435 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
436 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
437 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
438 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
441 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
442 insn
->bits3
.da1
.src1_width
= reg
.width
;
443 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
447 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
448 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
449 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
450 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
452 /* This is an oddity of the fact we're using the same
453 * descriptions for registers in align_16 as align_1:
455 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
456 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
458 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
464 * Set the Message Descriptor and Extended Message Descriptor fields
467 * \note This zeroes out the Function Control bits, so it must be called
468 * \b before filling out any message-specific data. Callers can
469 * choose not to fill in irrelevant bits; they will be zero.
472 brw_set_message_descriptor(struct brw_compile
*p
,
473 struct brw_instruction
*inst
,
474 enum brw_message_target sfid
,
476 unsigned response_length
,
480 struct brw_context
*brw
= p
->brw
;
482 brw_set_src1(p
, inst
, brw_imm_d(0));
485 inst
->bits3
.generic_gen5
.header_present
= header_present
;
486 inst
->bits3
.generic_gen5
.response_length
= response_length
;
487 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
488 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
491 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
492 inst
->header
.destreg__conditionalmod
= sfid
;
494 /* Set Extended Message Descriptor (ex_desc) */
495 inst
->bits2
.send_gen5
.sfid
= sfid
;
496 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
499 inst
->bits3
.generic
.response_length
= response_length
;
500 inst
->bits3
.generic
.msg_length
= msg_length
;
501 inst
->bits3
.generic
.msg_target
= sfid
;
502 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
506 static void brw_set_math_message( struct brw_compile
*p
,
507 struct brw_instruction
*insn
,
509 unsigned integer_type
,
513 struct brw_context
*brw
= p
->brw
;
515 unsigned response_length
;
517 /* Infer message length from the function */
519 case BRW_MATH_FUNCTION_POW
:
520 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
521 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
522 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
530 /* Infer response length from the function */
532 case BRW_MATH_FUNCTION_SINCOS
:
533 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
542 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
543 msg_length
, response_length
, false, false);
545 insn
->bits3
.math_gen5
.function
= function
;
546 insn
->bits3
.math_gen5
.int_type
= integer_type
;
547 insn
->bits3
.math_gen5
.precision
= low_precision
;
548 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
549 insn
->bits3
.math_gen5
.data_type
= dataType
;
550 insn
->bits3
.math_gen5
.snapshot
= 0;
552 insn
->bits3
.math
.function
= function
;
553 insn
->bits3
.math
.int_type
= integer_type
;
554 insn
->bits3
.math
.precision
= low_precision
;
555 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
556 insn
->bits3
.math
.data_type
= dataType
;
558 insn
->header
.saturate
= 0;
562 static void brw_set_ff_sync_message(struct brw_compile
*p
,
563 struct brw_instruction
*insn
,
565 unsigned response_length
,
568 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
569 1, response_length
, true, end_of_thread
);
570 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
571 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
572 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
573 insn
->bits3
.urb_gen5
.allocate
= allocate
;
574 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
575 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
578 static void brw_set_urb_message( struct brw_compile
*p
,
579 struct brw_instruction
*insn
,
580 enum brw_urb_write_flags flags
,
582 unsigned response_length
,
584 unsigned swizzle_control
)
586 struct brw_context
*brw
= p
->brw
;
588 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
589 msg_length
, response_length
, true,
590 flags
& BRW_URB_WRITE_EOT
);
592 if (flags
& BRW_URB_WRITE_OWORD
) {
593 assert(msg_length
== 2); /* header + one OWORD of data */
594 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
596 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
598 insn
->bits3
.urb_gen7
.offset
= offset
;
599 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
600 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
601 insn
->bits3
.urb_gen7
.per_slot_offset
=
602 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
603 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
604 } else if (brw
->gen
>= 5) {
605 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
606 insn
->bits3
.urb_gen5
.offset
= offset
;
607 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
608 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
609 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
610 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
612 insn
->bits3
.urb
.opcode
= 0; /* ? */
613 insn
->bits3
.urb
.offset
= offset
;
614 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
615 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
616 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
617 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
622 brw_set_dp_write_message(struct brw_compile
*p
,
623 struct brw_instruction
*insn
,
624 unsigned binding_table_index
,
625 unsigned msg_control
,
629 unsigned last_render_target
,
630 unsigned response_length
,
631 unsigned end_of_thread
,
632 unsigned send_commit_msg
)
634 struct brw_context
*brw
= p
->brw
;
638 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
639 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
640 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
642 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
643 } else if (brw
->gen
== 6) {
644 /* Use the render cache for all write messages. */
645 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
647 sfid
= BRW_SFID_DATAPORT_WRITE
;
650 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
651 header_present
, end_of_thread
);
654 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
655 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
656 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
657 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
658 } else if (brw
->gen
== 6) {
659 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
660 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
661 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
662 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
663 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
664 } else if (brw
->gen
== 5) {
665 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
666 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
667 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
668 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
669 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
671 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
672 insn
->bits3
.dp_write
.msg_control
= msg_control
;
673 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
674 insn
->bits3
.dp_write
.msg_type
= msg_type
;
675 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
680 brw_set_dp_read_message(struct brw_compile
*p
,
681 struct brw_instruction
*insn
,
682 unsigned binding_table_index
,
683 unsigned msg_control
,
685 unsigned target_cache
,
688 unsigned response_length
)
690 struct brw_context
*brw
= p
->brw
;
694 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
695 } else if (brw
->gen
== 6) {
696 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
697 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
699 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
701 sfid
= BRW_SFID_DATAPORT_READ
;
704 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
705 header_present
, false);
708 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
709 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
710 insn
->bits3
.gen7_dp
.last_render_target
= 0;
711 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
712 } else if (brw
->gen
== 6) {
713 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
714 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
715 insn
->bits3
.gen6_dp
.last_render_target
= 0;
716 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
717 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
718 } else if (brw
->gen
== 5) {
719 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
720 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
721 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
722 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
723 } else if (brw
->is_g4x
) {
724 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
725 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
726 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
727 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
729 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
730 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
731 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
732 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
737 brw_set_sampler_message(struct brw_compile
*p
,
738 struct brw_instruction
*insn
,
739 unsigned binding_table_index
,
742 unsigned response_length
,
744 unsigned header_present
,
746 unsigned return_format
)
748 struct brw_context
*brw
= p
->brw
;
750 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
751 response_length
, header_present
, false);
754 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
755 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
756 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
757 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
758 } else if (brw
->gen
>= 5) {
759 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
760 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
761 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
762 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
763 } else if (brw
->is_g4x
) {
764 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
765 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
766 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
768 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
769 insn
->bits3
.sampler
.sampler
= sampler
;
770 insn
->bits3
.sampler
.msg_type
= msg_type
;
771 insn
->bits3
.sampler
.return_format
= return_format
;
776 #define next_insn brw_next_insn
777 struct brw_instruction
*
778 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
780 struct brw_instruction
*insn
;
782 if (p
->nr_insn
+ 1 > p
->store_size
) {
784 fprintf(stderr
, "incresing the store size to %d\n",
788 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
789 struct brw_instruction
, p
->store_size
);
791 assert(!"realloc eu store memeory failed");
794 p
->next_insn_offset
+= 16;
795 insn
= &p
->store
[p
->nr_insn
++];
796 memcpy(insn
, p
->current
, sizeof(*insn
));
798 /* Reset this one-shot flag:
801 if (p
->current
->header
.destreg__conditionalmod
) {
802 p
->current
->header
.destreg__conditionalmod
= 0;
803 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
806 insn
->header
.opcode
= opcode
;
810 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
815 struct brw_instruction
*insn
= next_insn(p
, opcode
);
816 brw_set_dest(p
, insn
, dest
);
817 brw_set_src0(p
, insn
, src
);
821 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
825 struct brw_reg src1
)
827 struct brw_instruction
*insn
= next_insn(p
, opcode
);
828 brw_set_dest(p
, insn
, dest
);
829 brw_set_src0(p
, insn
, src0
);
830 brw_set_src1(p
, insn
, src1
);
835 get_3src_subreg_nr(struct brw_reg reg
)
837 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
838 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
839 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
841 return reg
.subnr
/ 4;
845 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
852 struct brw_context
*brw
= p
->brw
;
853 struct brw_instruction
*insn
= next_insn(p
, opcode
);
855 gen7_convert_mrf_to_grf(p
, &dest
);
857 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
859 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
860 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
861 assert(dest
.nr
< 128);
862 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
863 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
864 dest
.type
== BRW_REGISTER_TYPE_D
||
865 dest
.type
== BRW_REGISTER_TYPE_UD
);
866 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
867 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
868 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
869 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
870 guess_execution_size(p
, insn
, dest
);
872 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
873 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
874 assert(src0
.nr
< 128);
875 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
876 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
877 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
878 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
879 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
880 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
882 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
883 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
884 assert(src1
.nr
< 128);
885 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
886 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
887 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
888 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
889 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
890 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
891 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
893 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
894 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
895 assert(src2
.nr
< 128);
896 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
897 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
898 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
899 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
900 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
901 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
904 /* Set both the source and destination types based on dest.type,
905 * ignoring the source register types. The MAD and LRP emitters ensure
906 * that all four types are float. The BFE and BFI2 emitters, however,
907 * may send us mixed D and UD types and want us to ignore that and use
908 * the destination type.
911 case BRW_REGISTER_TYPE_F
:
912 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
913 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
915 case BRW_REGISTER_TYPE_D
:
916 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
917 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
919 case BRW_REGISTER_TYPE_UD
:
920 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
921 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
930 /***********************************************************************
931 * Convenience routines.
934 struct brw_instruction *brw_##OP(struct brw_compile *p, \
935 struct brw_reg dest, \
936 struct brw_reg src0) \
938 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
942 struct brw_instruction *brw_##OP(struct brw_compile *p, \
943 struct brw_reg dest, \
944 struct brw_reg src0, \
945 struct brw_reg src1) \
947 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
951 struct brw_instruction *brw_##OP(struct brw_compile *p, \
952 struct brw_reg dest, \
953 struct brw_reg src0, \
954 struct brw_reg src1, \
955 struct brw_reg src2) \
957 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
961 struct brw_instruction *brw_##OP(struct brw_compile *p, \
962 struct brw_reg dest, \
963 struct brw_reg src0, \
964 struct brw_reg src1, \
965 struct brw_reg src2) \
967 assert(dest.type == BRW_REGISTER_TYPE_F); \
968 assert(src0.type == BRW_REGISTER_TYPE_F); \
969 assert(src1.type == BRW_REGISTER_TYPE_F); \
970 assert(src2.type == BRW_REGISTER_TYPE_F); \
971 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
974 /* Rounding operations (other than RNDD) require two instructions - the first
975 * stores a rounded value (possibly the wrong way) in the dest register, but
976 * also sets a per-channel "increment bit" in the flag register. A predicated
977 * add of 1.0 fixes dest to contain the desired result.
979 * Sandybridge and later appear to round correctly without an ADD.
982 void brw_##OP(struct brw_compile *p, \
983 struct brw_reg dest, \
984 struct brw_reg src) \
986 struct brw_instruction *rnd, *add; \
987 rnd = next_insn(p, BRW_OPCODE_##OP); \
988 brw_set_dest(p, rnd, dest); \
989 brw_set_src0(p, rnd, src); \
991 if (p->brw->gen < 6) { \
992 /* turn on round-increments */ \
993 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
994 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
995 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1038 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1039 struct brw_reg dest
,
1040 struct brw_reg src0
,
1041 struct brw_reg src1
)
1044 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1045 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1046 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1047 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1048 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1051 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1052 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1053 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1054 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1055 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1058 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1061 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1062 struct brw_reg dest
,
1063 struct brw_reg src0
,
1064 struct brw_reg src1
)
1066 assert(dest
.type
== src0
.type
);
1067 assert(src0
.type
== src1
.type
);
1068 switch (src0
.type
) {
1069 case BRW_REGISTER_TYPE_B
:
1070 case BRW_REGISTER_TYPE_UB
:
1071 case BRW_REGISTER_TYPE_W
:
1072 case BRW_REGISTER_TYPE_UW
:
1073 case BRW_REGISTER_TYPE_D
:
1074 case BRW_REGISTER_TYPE_UD
:
1077 assert(!"Bad type for brw_AVG");
1080 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1083 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1084 struct brw_reg dest
,
1085 struct brw_reg src0
,
1086 struct brw_reg src1
)
1089 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1090 src0
.type
== BRW_REGISTER_TYPE_UD
||
1091 src1
.type
== BRW_REGISTER_TYPE_D
||
1092 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1093 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1096 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1097 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1098 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1099 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1100 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1103 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1104 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1105 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1106 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1107 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1110 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1111 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1112 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1113 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1115 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1119 void brw_NOP(struct brw_compile
*p
)
1121 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1122 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1123 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1124 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1131 /***********************************************************************
1132 * Comparisons, if/else/endif
1135 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1136 struct brw_reg dest
,
1137 struct brw_reg src0
,
1138 struct brw_reg src1
)
1140 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1142 insn
->header
.execution_size
= 1;
1143 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1144 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1146 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1152 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1154 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1156 p
->if_stack_depth
++;
1157 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1158 p
->if_stack_array_size
*= 2;
1159 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1160 p
->if_stack_array_size
);
1164 static struct brw_instruction
*
1165 pop_if_stack(struct brw_compile
*p
)
1167 p
->if_stack_depth
--;
1168 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1172 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1174 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1175 p
->loop_stack_array_size
*= 2;
1176 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1177 p
->loop_stack_array_size
);
1178 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1179 p
->loop_stack_array_size
);
1182 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1183 p
->loop_stack_depth
++;
1184 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1187 static struct brw_instruction
*
1188 get_inner_do_insn(struct brw_compile
*p
)
1190 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1193 /* EU takes the value from the flag register and pushes it onto some
1194 * sort of a stack (presumably merging with any flag value already on
1195 * the stack). Within an if block, the flags at the top of the stack
1196 * control execution on each channel of the unit, eg. on each of the
1197 * 16 pixel values in our wm programs.
1199 * When the matching 'else' instruction is reached (presumably by
1200 * countdown of the instruction count patched in by our ELSE/ENDIF
1201 * functions), the relevent flags are inverted.
1203 * When the matching 'endif' instruction is reached, the flags are
1204 * popped off. If the stack is now empty, normal execution resumes.
1206 struct brw_instruction
*
1207 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1209 struct brw_context
*brw
= p
->brw
;
1210 struct brw_instruction
*insn
;
1212 insn
= next_insn(p
, BRW_OPCODE_IF
);
1214 /* Override the defaults for this instruction:
1217 brw_set_dest(p
, insn
, brw_ip_reg());
1218 brw_set_src0(p
, insn
, brw_ip_reg());
1219 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1220 } else if (brw
->gen
== 6) {
1221 brw_set_dest(p
, insn
, brw_imm_w(0));
1222 insn
->bits1
.branch_gen6
.jump_count
= 0;
1223 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1224 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1226 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1227 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1228 brw_set_src1(p
, insn
, brw_imm_ud(0));
1229 insn
->bits3
.break_cont
.jip
= 0;
1230 insn
->bits3
.break_cont
.uip
= 0;
1233 insn
->header
.execution_size
= execute_size
;
1234 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1235 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1236 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1237 if (!p
->single_program_flow
)
1238 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1240 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1242 push_if_stack(p
, insn
);
1243 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1247 /* This function is only used for gen6-style IF instructions with an
1248 * embedded comparison (conditional modifier). It is not used on gen7.
1250 struct brw_instruction
*
1251 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1252 struct brw_reg src0
, struct brw_reg src1
)
1254 struct brw_instruction
*insn
;
1256 insn
= next_insn(p
, BRW_OPCODE_IF
);
1258 brw_set_dest(p
, insn
, brw_imm_w(0));
1259 if (p
->compressed
) {
1260 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1262 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1264 insn
->bits1
.branch_gen6
.jump_count
= 0;
1265 brw_set_src0(p
, insn
, src0
);
1266 brw_set_src1(p
, insn
, src1
);
1268 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1269 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1270 insn
->header
.destreg__conditionalmod
= conditional
;
1272 if (!p
->single_program_flow
)
1273 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1275 push_if_stack(p
, insn
);
1280 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1283 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1284 struct brw_instruction
*if_inst
,
1285 struct brw_instruction
*else_inst
)
1287 /* The next instruction (where the ENDIF would be, if it existed) */
1288 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1290 assert(p
->single_program_flow
);
1291 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1292 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1293 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1295 /* Convert IF to an ADD instruction that moves the instruction pointer
1296 * to the first instruction of the ELSE block. If there is no ELSE
1297 * block, point to where ENDIF would be. Reverse the predicate.
1299 * There's no need to execute an ENDIF since we don't need to do any
1300 * stack operations, and if we're currently executing, we just want to
1301 * continue normally.
1303 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1304 if_inst
->header
.predicate_inverse
= 1;
1306 if (else_inst
!= NULL
) {
1307 /* Convert ELSE to an ADD instruction that points where the ENDIF
1310 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1312 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1313 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1315 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1320 * Patch IF and ELSE instructions with appropriate jump targets.
1323 patch_IF_ELSE(struct brw_compile
*p
,
1324 struct brw_instruction
*if_inst
,
1325 struct brw_instruction
*else_inst
,
1326 struct brw_instruction
*endif_inst
)
1328 struct brw_context
*brw
= p
->brw
;
1330 /* We shouldn't be patching IF and ELSE instructions in single program flow
1331 * mode when gen < 6, because in single program flow mode on those
1332 * platforms, we convert flow control instructions to conditional ADDs that
1333 * operate on IP (see brw_ENDIF).
1335 * However, on Gen6, writing to IP doesn't work in single program flow mode
1336 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1337 * not be updated by non-flow control instructions."). And on later
1338 * platforms, there is no significant benefit to converting control flow
1339 * instructions to conditional ADDs. So we do patch IF and ELSE
1340 * instructions in single program flow mode on those platforms.
1343 assert(!p
->single_program_flow
);
1345 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1346 assert(endif_inst
!= NULL
);
1347 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1350 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1351 * requires 2 chunks.
1356 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1357 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1359 if (else_inst
== NULL
) {
1360 /* Patch IF -> ENDIF */
1362 /* Turn it into an IFF, which means no mask stack operations for
1363 * all-false and jumping past the ENDIF.
1365 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1366 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1367 if_inst
->bits3
.if_else
.pop_count
= 0;
1368 if_inst
->bits3
.if_else
.pad0
= 0;
1369 } else if (brw
->gen
== 6) {
1370 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1371 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1373 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1374 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1377 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1379 /* Patch IF -> ELSE */
1381 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1382 if_inst
->bits3
.if_else
.pop_count
= 0;
1383 if_inst
->bits3
.if_else
.pad0
= 0;
1384 } else if (brw
->gen
== 6) {
1385 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1388 /* Patch ELSE -> ENDIF */
1390 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1393 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1394 else_inst
->bits3
.if_else
.pop_count
= 1;
1395 else_inst
->bits3
.if_else
.pad0
= 0;
1396 } else if (brw
->gen
== 6) {
1397 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1398 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1400 /* The IF instruction's JIP should point just past the ELSE */
1401 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1402 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1403 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1404 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1410 brw_ELSE(struct brw_compile
*p
)
1412 struct brw_context
*brw
= p
->brw
;
1413 struct brw_instruction
*insn
;
1415 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1418 brw_set_dest(p
, insn
, brw_ip_reg());
1419 brw_set_src0(p
, insn
, brw_ip_reg());
1420 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1421 } else if (brw
->gen
== 6) {
1422 brw_set_dest(p
, insn
, brw_imm_w(0));
1423 insn
->bits1
.branch_gen6
.jump_count
= 0;
1424 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1425 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1427 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1428 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1429 brw_set_src1(p
, insn
, brw_imm_ud(0));
1430 insn
->bits3
.break_cont
.jip
= 0;
1431 insn
->bits3
.break_cont
.uip
= 0;
1434 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1435 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1436 if (!p
->single_program_flow
)
1437 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1439 push_if_stack(p
, insn
);
1443 brw_ENDIF(struct brw_compile
*p
)
1445 struct brw_context
*brw
= p
->brw
;
1446 struct brw_instruction
*insn
= NULL
;
1447 struct brw_instruction
*else_inst
= NULL
;
1448 struct brw_instruction
*if_inst
= NULL
;
1449 struct brw_instruction
*tmp
;
1450 bool emit_endif
= true;
1452 /* In single program flow mode, we can express IF and ELSE instructions
1453 * equivalently as ADD instructions that operate on IP. On platforms prior
1454 * to Gen6, flow control instructions cause an implied thread switch, so
1455 * this is a significant savings.
1457 * However, on Gen6, writing to IP doesn't work in single program flow mode
1458 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1459 * not be updated by non-flow control instructions."). And on later
1460 * platforms, there is no significant benefit to converting control flow
1461 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1464 if (brw
->gen
< 6 && p
->single_program_flow
)
1468 * A single next_insn() may change the base adress of instruction store
1469 * memory(p->store), so call it first before referencing the instruction
1470 * store pointer from an index
1473 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1475 /* Pop the IF and (optional) ELSE instructions from the stack */
1476 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1477 tmp
= pop_if_stack(p
);
1478 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1480 tmp
= pop_if_stack(p
);
1485 /* ENDIF is useless; don't bother emitting it. */
1486 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1491 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1492 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1493 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1494 } else if (brw
->gen
== 6) {
1495 brw_set_dest(p
, insn
, brw_imm_w(0));
1496 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1497 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1499 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1500 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1501 brw_set_src1(p
, insn
, brw_imm_ud(0));
1504 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1505 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1506 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1508 /* Also pop item off the stack in the endif instruction: */
1510 insn
->bits3
.if_else
.jump_count
= 0;
1511 insn
->bits3
.if_else
.pop_count
= 1;
1512 insn
->bits3
.if_else
.pad0
= 0;
1513 } else if (brw
->gen
== 6) {
1514 insn
->bits1
.branch_gen6
.jump_count
= 2;
1516 insn
->bits3
.break_cont
.jip
= 2;
1518 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1521 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1523 struct brw_context
*brw
= p
->brw
;
1524 struct brw_instruction
*insn
;
1526 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1527 if (brw
->gen
>= 6) {
1528 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1529 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1530 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1532 brw_set_dest(p
, insn
, brw_ip_reg());
1533 brw_set_src0(p
, insn
, brw_ip_reg());
1534 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1535 insn
->bits3
.if_else
.pad0
= 0;
1536 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1538 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1539 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1544 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1546 struct brw_instruction
*insn
;
1548 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1549 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1550 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1551 brw_set_dest(p
, insn
, brw_ip_reg());
1552 brw_set_src0(p
, insn
, brw_ip_reg());
1553 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1555 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1556 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1560 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1562 struct brw_instruction
*insn
;
1563 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1564 brw_set_dest(p
, insn
, brw_ip_reg());
1565 brw_set_src0(p
, insn
, brw_ip_reg());
1566 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1567 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1568 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1569 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1570 insn
->bits3
.if_else
.pad0
= 0;
1571 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1575 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1577 struct brw_instruction
*insn
;
1579 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1580 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1581 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1582 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1584 if (p
->compressed
) {
1585 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1587 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1588 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1595 * The DO/WHILE is just an unterminated loop -- break or continue are
1596 * used for control within the loop. We have a few ways they can be
1599 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1600 * jip and no DO instruction.
1602 * For non-uniform control flow pre-gen6, there's a DO instruction to
1603 * push the mask, and a WHILE to jump back, and BREAK to get out and
1606 * For gen6, there's no more mask stack, so no need for DO. WHILE
1607 * just points back to the first instruction of the loop.
1609 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1611 struct brw_context
*brw
= p
->brw
;
1613 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1614 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1615 return &p
->store
[p
->nr_insn
];
1617 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1619 push_loop_stack(p
, insn
);
1621 /* Override the defaults for this instruction:
1623 brw_set_dest(p
, insn
, brw_null_reg());
1624 brw_set_src0(p
, insn
, brw_null_reg());
1625 brw_set_src1(p
, insn
, brw_null_reg());
1627 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1628 insn
->header
.execution_size
= execute_size
;
1629 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1630 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1631 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1638 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1641 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1642 * nesting, since it can always just point to the end of the block/current loop.
1645 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1647 struct brw_context
*brw
= p
->brw
;
1648 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1649 struct brw_instruction
*inst
;
1650 int br
= (brw
->gen
== 5) ? 2 : 1;
1652 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1653 /* If the jump count is != 0, that means that this instruction has already
1654 * been patched because it's part of a loop inside of the one we're
1657 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1658 inst
->bits3
.if_else
.jump_count
== 0) {
1659 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1660 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1661 inst
->bits3
.if_else
.jump_count
== 0) {
1662 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1667 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1669 struct brw_context
*brw
= p
->brw
;
1670 struct brw_instruction
*insn
, *do_insn
;
1676 if (brw
->gen
>= 7) {
1677 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1678 do_insn
= get_inner_do_insn(p
);
1680 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1681 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1682 brw_set_src1(p
, insn
, brw_imm_ud(0));
1683 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1685 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1686 } else if (brw
->gen
== 6) {
1687 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1688 do_insn
= get_inner_do_insn(p
);
1690 brw_set_dest(p
, insn
, brw_imm_w(0));
1691 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1692 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1693 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1695 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1697 if (p
->single_program_flow
) {
1698 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1699 do_insn
= get_inner_do_insn(p
);
1701 brw_set_dest(p
, insn
, brw_ip_reg());
1702 brw_set_src0(p
, insn
, brw_ip_reg());
1703 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1704 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1706 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1707 do_insn
= get_inner_do_insn(p
);
1709 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1711 brw_set_dest(p
, insn
, brw_ip_reg());
1712 brw_set_src0(p
, insn
, brw_ip_reg());
1713 brw_set_src1(p
, insn
, brw_imm_d(0));
1715 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1716 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1717 insn
->bits3
.if_else
.pop_count
= 0;
1718 insn
->bits3
.if_else
.pad0
= 0;
1720 brw_patch_break_cont(p
, insn
);
1723 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1724 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1726 p
->loop_stack_depth
--;
1734 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1736 struct brw_context
*brw
= p
->brw
;
1737 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1743 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1744 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1746 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1751 /* To integrate with the above, it makes sense that the comparison
1752 * instruction should populate the flag register. It might be simpler
1753 * just to use the flag reg for most WM tasks?
1755 void brw_CMP(struct brw_compile
*p
,
1756 struct brw_reg dest
,
1757 unsigned conditional
,
1758 struct brw_reg src0
,
1759 struct brw_reg src1
)
1761 struct brw_context
*brw
= p
->brw
;
1762 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1764 insn
->header
.destreg__conditionalmod
= conditional
;
1765 brw_set_dest(p
, insn
, dest
);
1766 brw_set_src0(p
, insn
, src0
);
1767 brw_set_src1(p
, insn
, src1
);
1769 /* guess_execution_size(insn, src0); */
1772 /* Make it so that future instructions will use the computed flag
1773 * value until brw_set_predicate_control_flag_value() is called
1776 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1778 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1779 p
->flag_value
= 0xff;
1782 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1784 * "Any CMP instruction with a null destination must use a {switch}."
1786 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1787 * mentioned on their work-arounds pages.
1789 if (brw
->gen
== 7) {
1790 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1791 dest
.nr
== BRW_ARF_NULL
) {
1792 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1797 /* Issue 'wait' instruction for n1, host could program MMIO
1798 to wake up thread. */
1799 void brw_WAIT (struct brw_compile
*p
)
1801 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1802 struct brw_reg src
= brw_notification_1_reg();
1804 brw_set_dest(p
, insn
, src
);
1805 brw_set_src0(p
, insn
, src
);
1806 brw_set_src1(p
, insn
, brw_null_reg());
1807 insn
->header
.execution_size
= 0; /* must */
1808 insn
->header
.predicate_control
= 0;
1809 insn
->header
.compression_control
= 0;
1813 /***********************************************************************
1814 * Helpers for the various SEND message types:
1817 /** Extended math function, float[8].
1819 void brw_math( struct brw_compile
*p
,
1820 struct brw_reg dest
,
1822 unsigned msg_reg_nr
,
1825 unsigned precision
)
1827 struct brw_context
*brw
= p
->brw
;
1829 if (brw
->gen
>= 6) {
1830 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1832 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1833 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1834 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1836 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1838 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1840 /* Source modifiers are ignored for extended math instructions on Gen6. */
1841 if (brw
->gen
== 6) {
1842 assert(!src
.negate
);
1846 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1847 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1848 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1849 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1851 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1854 /* Math is the same ISA format as other opcodes, except that CondModifier
1855 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1857 insn
->header
.destreg__conditionalmod
= function
;
1859 brw_set_dest(p
, insn
, dest
);
1860 brw_set_src0(p
, insn
, src
);
1861 brw_set_src1(p
, insn
, brw_null_reg());
1863 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1865 /* Example code doesn't set predicate_control for send
1868 insn
->header
.predicate_control
= 0;
1869 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1871 brw_set_dest(p
, insn
, dest
);
1872 brw_set_src0(p
, insn
, src
);
1873 brw_set_math_message(p
,
1876 src
.type
== BRW_REGISTER_TYPE_D
,
1882 /** Extended math function, float[8].
1884 void brw_math2(struct brw_compile
*p
,
1885 struct brw_reg dest
,
1887 struct brw_reg src0
,
1888 struct brw_reg src1
)
1890 struct brw_context
*brw
= p
->brw
;
1891 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1893 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1894 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1895 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1896 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1898 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1899 if (brw
->gen
== 6) {
1900 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1901 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1904 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1905 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1906 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1907 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1908 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1910 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1911 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1914 /* Source modifiers are ignored for extended math instructions on Gen6. */
1915 if (brw
->gen
== 6) {
1916 assert(!src0
.negate
);
1918 assert(!src1
.negate
);
1922 /* Math is the same ISA format as other opcodes, except that CondModifier
1923 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1925 insn
->header
.destreg__conditionalmod
= function
;
1927 brw_set_dest(p
, insn
, dest
);
1928 brw_set_src0(p
, insn
, src0
);
1929 brw_set_src1(p
, insn
, src1
);
1934 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1935 * using a constant offset per channel.
1937 * The offset must be aligned to oword size (16 bytes). Used for
1938 * register spilling.
1940 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1945 struct brw_context
*brw
= p
->brw
;
1946 uint32_t msg_control
, msg_type
;
1952 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1954 if (num_regs
== 1) {
1955 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1958 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1962 /* Set up the message header. This is g0, with g0.2 filled with
1963 * the offset. We don't want to leave our offset around in g0 or
1964 * it'll screw up texture samples, so set it up inside the message
1968 brw_push_insn_state(p
);
1969 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1970 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1972 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1974 /* set message header global offset field (reg 0, element 2) */
1976 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1978 2), BRW_REGISTER_TYPE_UD
),
1979 brw_imm_ud(offset
));
1981 brw_pop_insn_state(p
);
1985 struct brw_reg dest
;
1986 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1987 int send_commit_msg
;
1988 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1989 BRW_REGISTER_TYPE_UW
);
1991 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1992 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1993 src_header
= vec16(src_header
);
1995 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1996 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1998 /* Until gen6, writes followed by reads from the same location
1999 * are not guaranteed to be ordered unless write_commit is set.
2000 * If set, then a no-op write is issued to the destination
2001 * register to set a dependency, and a read from the destination
2002 * can be used to ensure the ordering.
2004 * For gen6, only writes between different threads need ordering
2005 * protection. Our use of DP writes is all about register
2006 * spilling within a thread.
2008 if (brw
->gen
>= 6) {
2009 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2010 send_commit_msg
= 0;
2013 send_commit_msg
= 1;
2016 brw_set_dest(p
, insn
, dest
);
2017 if (brw
->gen
>= 6) {
2018 brw_set_src0(p
, insn
, mrf
);
2020 brw_set_src0(p
, insn
, brw_null_reg());
2024 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2026 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2028 brw_set_dp_write_message(p
,
2030 255, /* binding table index (255=stateless) */
2034 true, /* header_present */
2035 0, /* not a render target */
2036 send_commit_msg
, /* response_length */
2044 * Read a block of owords (half a GRF each) from the scratch buffer
2045 * using a constant index per channel.
2047 * Offset must be aligned to oword size (16 bytes). Used for register
2051 brw_oword_block_read_scratch(struct brw_compile
*p
,
2052 struct brw_reg dest
,
2057 struct brw_context
*brw
= p
->brw
;
2058 uint32_t msg_control
;
2064 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2065 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2067 if (num_regs
== 1) {
2068 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2071 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2076 brw_push_insn_state(p
);
2077 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2078 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2080 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2082 /* set message header global offset field (reg 0, element 2) */
2084 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2086 2), BRW_REGISTER_TYPE_UD
),
2087 brw_imm_ud(offset
));
2089 brw_pop_insn_state(p
);
2093 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2095 assert(insn
->header
.predicate_control
== 0);
2096 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2097 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2099 brw_set_dest(p
, insn
, dest
); /* UW? */
2100 if (brw
->gen
>= 6) {
2101 brw_set_src0(p
, insn
, mrf
);
2103 brw_set_src0(p
, insn
, brw_null_reg());
2106 brw_set_dp_read_message(p
,
2108 255, /* binding table index (255=stateless) */
2110 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2111 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2113 true, /* header_present */
2119 gen7_block_read_scratch(struct brw_compile
*p
,
2120 struct brw_reg dest
,
2124 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2126 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2128 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2129 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2131 brw_set_dest(p
, insn
, dest
);
2133 /* The HW requires that the header is present; this is to get the g0.5
2136 bool header_present
= true;
2137 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2139 brw_set_message_descriptor(p
, insn
,
2140 GEN7_SFID_DATAPORT_DATA_CACHE
,
2141 1, /* mlen: just g0 */
2146 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2148 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2149 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2151 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2152 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2153 * is 32 bytes, which happens to be the size of a register.
2156 assert(offset
< (1 << 12));
2157 insn
->bits3
.ud
|= offset
;
2161 * Read a float[4] vector from the data port Data Cache (const buffer).
2162 * Location (in buffer) should be a multiple of 16.
2163 * Used for fetching shader constants.
2165 void brw_oword_block_read(struct brw_compile
*p
,
2166 struct brw_reg dest
,
2169 uint32_t bind_table_index
)
2171 struct brw_context
*brw
= p
->brw
;
2173 /* On newer hardware, offset is in units of owords. */
2177 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2179 brw_push_insn_state(p
);
2180 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2181 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2182 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2184 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2186 /* set message header global offset field (reg 0, element 2) */
2188 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2190 2), BRW_REGISTER_TYPE_UD
),
2191 brw_imm_ud(offset
));
2193 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2194 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2196 /* cast dest to a uword[8] vector */
2197 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2199 brw_set_dest(p
, insn
, dest
);
2200 if (brw
->gen
>= 6) {
2201 brw_set_src0(p
, insn
, mrf
);
2203 brw_set_src0(p
, insn
, brw_null_reg());
2206 brw_set_dp_read_message(p
,
2209 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2210 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2211 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2213 true, /* header_present */
2214 1); /* response_length (1 reg, 2 owords!) */
2216 brw_pop_insn_state(p
);
2220 void brw_fb_WRITE(struct brw_compile
*p
,
2222 unsigned msg_reg_nr
,
2223 struct brw_reg src0
,
2224 unsigned msg_control
,
2225 unsigned binding_table_index
,
2226 unsigned msg_length
,
2227 unsigned response_length
,
2229 bool header_present
)
2231 struct brw_context
*brw
= p
->brw
;
2232 struct brw_instruction
*insn
;
2234 struct brw_reg dest
;
2236 if (dispatch_width
== 16)
2237 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2239 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2241 if (brw
->gen
>= 6) {
2242 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2244 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2246 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2248 if (brw
->gen
>= 6) {
2249 /* headerless version, just submit color payload */
2250 src0
= brw_message_reg(msg_reg_nr
);
2252 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2254 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2256 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2259 brw_set_dest(p
, insn
, dest
);
2260 brw_set_src0(p
, insn
, src0
);
2261 brw_set_dp_write_message(p
,
2263 binding_table_index
,
2268 eot
, /* last render target write */
2271 0 /* send_commit_msg */);
2276 * Texture sample instruction.
2277 * Note: the msg_type plus msg_length values determine exactly what kind
2278 * of sampling operation is performed. See volume 4, page 161 of docs.
2280 void brw_SAMPLE(struct brw_compile
*p
,
2281 struct brw_reg dest
,
2282 unsigned msg_reg_nr
,
2283 struct brw_reg src0
,
2284 unsigned binding_table_index
,
2287 unsigned response_length
,
2288 unsigned msg_length
,
2289 unsigned header_present
,
2291 unsigned return_format
)
2293 struct brw_context
*brw
= p
->brw
;
2294 struct brw_instruction
*insn
;
2296 if (msg_reg_nr
!= -1)
2297 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2299 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2300 insn
->header
.predicate_control
= 0; /* XXX */
2302 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2304 * "Instruction compression is not allowed for this instruction (that
2305 * is, send). The hardware behavior is undefined if this instruction is
2306 * set as compressed. However, compress control can be set to "SecHalf"
2307 * to affect the EMask generation."
2309 * No similar wording is found in later PRMs, but there are examples
2310 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2311 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2312 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2314 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2315 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2318 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2320 brw_set_dest(p
, insn
, dest
);
2321 brw_set_src0(p
, insn
, src0
);
2322 brw_set_sampler_message(p
, insn
,
2323 binding_table_index
,
2333 /* All these variables are pretty confusing - we might be better off
2334 * using bitmasks and macros for this, in the old style. Or perhaps
2335 * just having the caller instantiate the fields in dword3 itself.
2337 void brw_urb_WRITE(struct brw_compile
*p
,
2338 struct brw_reg dest
,
2339 unsigned msg_reg_nr
,
2340 struct brw_reg src0
,
2341 enum brw_urb_write_flags flags
,
2342 unsigned msg_length
,
2343 unsigned response_length
,
2347 struct brw_context
*brw
= p
->brw
;
2348 struct brw_instruction
*insn
;
2350 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2352 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2353 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2354 brw_push_insn_state(p
);
2355 brw_set_access_mode(p
, BRW_ALIGN_1
);
2356 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2357 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2358 BRW_REGISTER_TYPE_UD
),
2359 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2360 brw_imm_ud(0xff00));
2361 brw_pop_insn_state(p
);
2364 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2366 assert(msg_length
< BRW_MAX_MRF
);
2368 brw_set_dest(p
, insn
, dest
);
2369 brw_set_src0(p
, insn
, src0
);
2370 brw_set_src1(p
, insn
, brw_imm_d(0));
2373 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2375 brw_set_urb_message(p
,
2385 next_ip(struct brw_compile
*p
, int ip
)
2387 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2389 if (insn
->header
.cmpt_control
)
2396 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2399 void *store
= p
->store
;
2401 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2402 struct brw_instruction
*insn
= store
+ ip
;
2404 switch (insn
->header
.opcode
) {
2405 case BRW_OPCODE_ENDIF
:
2406 case BRW_OPCODE_ELSE
:
2407 case BRW_OPCODE_WHILE
:
2408 case BRW_OPCODE_HALT
:
2416 /* There is no DO instruction on gen6, so to find the end of the loop
2417 * we have to see if the loop is jumping back before our start
2421 brw_find_loop_end(struct brw_compile
*p
, int start
)
2423 struct brw_context
*brw
= p
->brw
;
2426 void *store
= p
->store
;
2428 /* Always start after the instruction (such as a WHILE) we're trying to fix
2431 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2432 struct brw_instruction
*insn
= store
+ ip
;
2434 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2435 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2436 : insn
->bits3
.break_cont
.jip
;
2437 if (ip
+ jip
* scale
<= start
)
2441 assert(!"not reached");
2445 /* After program generation, go back and update the UIP and JIP of
2446 * BREAK, CONT, and HALT instructions to their correct locations.
2449 brw_set_uip_jip(struct brw_compile
*p
)
2451 struct brw_context
*brw
= p
->brw
;
2454 void *store
= p
->store
;
2459 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2460 struct brw_instruction
*insn
= store
+ ip
;
2462 if (insn
->header
.cmpt_control
) {
2463 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2464 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2465 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2466 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2470 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2471 switch (insn
->header
.opcode
) {
2472 case BRW_OPCODE_BREAK
:
2473 assert(block_end_ip
!= 0);
2474 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2475 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2476 insn
->bits3
.break_cont
.uip
=
2477 (brw_find_loop_end(p
, ip
) - ip
+
2478 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2480 case BRW_OPCODE_CONTINUE
:
2481 assert(block_end_ip
!= 0);
2482 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2483 insn
->bits3
.break_cont
.uip
=
2484 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2486 assert(insn
->bits3
.break_cont
.uip
!= 0);
2487 assert(insn
->bits3
.break_cont
.jip
!= 0);
2490 case BRW_OPCODE_ENDIF
:
2491 if (block_end_ip
== 0)
2492 insn
->bits3
.break_cont
.jip
= 2;
2494 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2497 case BRW_OPCODE_HALT
:
2498 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2500 * "In case of the halt instruction not inside any conditional
2501 * code block, the value of <JIP> and <UIP> should be the
2502 * same. In case of the halt instruction inside conditional code
2503 * block, the <UIP> should be the end of the program, and the
2504 * <JIP> should be end of the most inner conditional code block."
2506 * The uip will have already been set by whoever set up the
2509 if (block_end_ip
== 0) {
2510 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2512 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2514 assert(insn
->bits3
.break_cont
.uip
!= 0);
2515 assert(insn
->bits3
.break_cont
.jip
!= 0);
2521 void brw_ff_sync(struct brw_compile
*p
,
2522 struct brw_reg dest
,
2523 unsigned msg_reg_nr
,
2524 struct brw_reg src0
,
2526 unsigned response_length
,
2529 struct brw_context
*brw
= p
->brw
;
2530 struct brw_instruction
*insn
;
2532 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2534 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2535 brw_set_dest(p
, insn
, dest
);
2536 brw_set_src0(p
, insn
, src0
);
2537 brw_set_src1(p
, insn
, brw_imm_d(0));
2540 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2542 brw_set_ff_sync_message(p
,
2550 * Emit the SEND instruction necessary to generate stream output data on Gen6
2551 * (for transform feedback).
2553 * If send_commit_msg is true, this is the last piece of stream output data
2554 * from this thread, so send the data as a committed write. According to the
2555 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2557 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2558 * writes are complete by sending the final write as a committed write."
2561 brw_svb_write(struct brw_compile
*p
,
2562 struct brw_reg dest
,
2563 unsigned msg_reg_nr
,
2564 struct brw_reg src0
,
2565 unsigned binding_table_index
,
2566 bool send_commit_msg
)
2568 struct brw_instruction
*insn
;
2570 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2572 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2573 brw_set_dest(p
, insn
, dest
);
2574 brw_set_src0(p
, insn
, src0
);
2575 brw_set_src1(p
, insn
, brw_imm_d(0));
2576 brw_set_dp_write_message(p
, insn
,
2577 binding_table_index
,
2578 0, /* msg_control: ignored */
2579 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2581 true, /* header_present */
2582 0, /* last_render_target: ignored */
2583 send_commit_msg
, /* response_length */
2584 0, /* end_of_thread */
2585 send_commit_msg
); /* send_commit_msg */
2589 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2590 struct brw_instruction
*insn
,
2592 unsigned bind_table_index
,
2593 unsigned msg_length
,
2594 unsigned response_length
,
2595 bool header_present
)
2597 if (p
->brw
->is_haswell
) {
2598 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2599 msg_length
, response_length
,
2600 header_present
, false);
2603 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2604 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2605 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2607 insn
->bits3
.gen7_dp
.msg_type
=
2608 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2610 insn
->bits3
.gen7_dp
.msg_type
=
2611 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2615 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2616 msg_length
, response_length
,
2617 header_present
, false);
2619 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2621 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2622 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2625 if (response_length
)
2626 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2628 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2629 insn
->bits3
.ud
|= atomic_op
<< 8;
2633 brw_untyped_atomic(struct brw_compile
*p
,
2634 struct brw_reg dest
,
2637 unsigned bind_table_index
,
2638 unsigned msg_length
,
2639 unsigned response_length
) {
2640 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2642 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2643 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2644 brw_set_src1(p
, insn
, brw_imm_d(0));
2645 brw_set_dp_untyped_atomic_message(
2646 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2647 insn
->header
.access_mode
== BRW_ALIGN_1
);
2651 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2652 struct brw_instruction
*insn
,
2653 unsigned bind_table_index
,
2654 unsigned msg_length
,
2655 unsigned response_length
,
2656 bool header_present
)
2658 const unsigned dispatch_width
=
2659 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2660 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2662 if (p
->brw
->is_haswell
) {
2663 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2664 msg_length
, response_length
,
2665 header_present
, false);
2667 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2669 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2670 msg_length
, response_length
,
2671 header_present
, false);
2673 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2676 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2677 if (dispatch_width
== 16)
2678 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2680 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2683 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2685 /* Set mask of 32-bit channels to drop. */
2686 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2690 brw_untyped_surface_read(struct brw_compile
*p
,
2691 struct brw_reg dest
,
2693 unsigned bind_table_index
,
2694 unsigned msg_length
,
2695 unsigned response_length
)
2697 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2699 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2700 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2701 brw_set_dp_untyped_surface_read_message(
2702 p
, insn
, bind_table_index
, msg_length
, response_length
,
2703 insn
->header
.access_mode
== BRW_ALIGN_1
);
2707 * This instruction is generated as a single-channel align1 instruction by
2708 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2710 * We can't use the typed atomic op in the FS because that has the execution
2711 * mask ANDed with the pixel mask, but we just want to write the one dword for
2714 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2715 * one u32. So we use the same untyped atomic write message as the pixel
2718 * The untyped atomic operation requires a BUFFER surface type with RAW
2719 * format, and is only accessible through the legacy DATA_CACHE dataport
2722 void brw_shader_time_add(struct brw_compile
*p
,
2723 struct brw_reg payload
,
2724 uint32_t surf_index
)
2726 struct brw_context
*brw
= p
->brw
;
2727 assert(brw
->gen
>= 7);
2729 brw_push_insn_state(p
);
2730 brw_set_access_mode(p
, BRW_ALIGN_1
);
2731 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2732 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2733 brw_pop_insn_state(p
);
2735 /* We use brw_vec1_reg and unmasked because we want to increment the given
2738 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2740 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2742 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2743 2 /* message length */,
2744 0 /* response length */,
2745 false /* header present */);