2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
103 * Convert a brw_reg_type enumeration value into the hardware representation.
105 * The hardware encoding may depend on whether the value is an immediate.
108 brw_reg_type_to_hw_type(const struct brw_context
*brw
,
109 enum brw_reg_type type
, unsigned file
)
111 bool imm
= file
== BRW_IMMEDIATE_VALUE
;
113 if (file
== BRW_IMMEDIATE_VALUE
) {
114 const static int imm_hw_types
[] = {
115 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
116 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
117 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
118 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
119 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
120 [BRW_REGISTER_TYPE_UB
] = -1,
121 [BRW_REGISTER_TYPE_B
] = -1,
122 [BRW_REGISTER_TYPE_UV
] = BRW_HW_REG_IMM_TYPE_UV
,
123 [BRW_REGISTER_TYPE_VF
] = BRW_HW_REG_IMM_TYPE_VF
,
124 [BRW_REGISTER_TYPE_V
] = BRW_HW_REG_IMM_TYPE_V
,
126 assert(type
< ARRAY_SIZE(imm_hw_types
));
127 assert(imm_hw_types
[type
] != -1);
128 return imm_hw_types
[type
];
130 /* Non-immediate registers */
131 const static int hw_types
[] = {
132 [BRW_REGISTER_TYPE_UD
] = BRW_HW_REG_TYPE_UD
,
133 [BRW_REGISTER_TYPE_D
] = BRW_HW_REG_TYPE_D
,
134 [BRW_REGISTER_TYPE_UW
] = BRW_HW_REG_TYPE_UW
,
135 [BRW_REGISTER_TYPE_W
] = BRW_HW_REG_TYPE_W
,
136 [BRW_REGISTER_TYPE_UB
] = BRW_HW_REG_NON_IMM_TYPE_UB
,
137 [BRW_REGISTER_TYPE_B
] = BRW_HW_REG_NON_IMM_TYPE_B
,
138 [BRW_REGISTER_TYPE_F
] = BRW_HW_REG_TYPE_F
,
139 [BRW_REGISTER_TYPE_UV
] = -1,
140 [BRW_REGISTER_TYPE_VF
] = -1,
141 [BRW_REGISTER_TYPE_V
] = -1,
143 assert(type
< ARRAY_SIZE(hw_types
));
144 assert(hw_types
[type
] != -1);
145 return hw_types
[type
];
150 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
153 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
154 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
155 assert(dest
.nr
< 128);
157 gen7_convert_mrf_to_grf(p
, &dest
);
159 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
160 insn
->bits1
.da1
.dest_reg_type
=
161 brw_reg_type_to_hw_type(p
->brw
, dest
.type
, dest
.file
);
162 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
164 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
165 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
167 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
168 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
169 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
170 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
171 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
174 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
175 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
176 if (dest
.file
== BRW_GENERAL_REGISTER_FILE
||
177 dest
.file
== BRW_MESSAGE_REGISTER_FILE
) {
178 assert(dest
.dw1
.bits
.writemask
!= 0);
180 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
181 * Although Dst.HorzStride is a don't care for Align16, HW needs
182 * this to be programmed as "01".
184 insn
->bits1
.da16
.dest_horiz_stride
= 1;
188 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
190 /* These are different sizes in align1 vs align16:
192 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
193 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
194 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
195 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
196 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
199 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
200 /* even ignored in da16, still need to set as '01' */
201 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
205 /* NEW: Set the execution size based on dest.width and
206 * insn->compression_control:
208 guess_execution_size(p
, insn
, dest
);
211 extern int reg_type_size
[];
214 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
216 int hstride_for_reg
[] = {0, 1, 2, 4};
217 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
218 int width_for_reg
[] = {1, 2, 4, 8, 16};
219 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
220 int width
, hstride
, vstride
, execsize
;
222 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
223 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
224 * mean the destination has to be 128-bit aligned and the
225 * destination horiz stride has to be a word.
227 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
228 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
229 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
235 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
236 reg
.file
== BRW_ARF_NULL
)
239 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
240 hstride
= hstride_for_reg
[reg
.hstride
];
242 if (reg
.vstride
== 0xf) {
245 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
246 vstride
= vstride_for_reg
[reg
.vstride
];
249 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
250 width
= width_for_reg
[reg
.width
];
252 assert(insn
->header
.execution_size
>= 0 &&
253 insn
->header
.execution_size
< Elements(execsize_for_reg
));
254 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
256 /* Restrictions from 3.3.10: Register Region Restrictions. */
258 assert(execsize
>= width
);
261 if (execsize
== width
&& hstride
!= 0) {
262 assert(vstride
== -1 || vstride
== width
* hstride
);
266 if (execsize
== width
&& hstride
== 0) {
267 /* no restriction on vstride. */
272 assert(hstride
== 0);
276 if (execsize
== 1 && width
== 1) {
277 assert(hstride
== 0);
278 assert(vstride
== 0);
282 if (vstride
== 0 && hstride
== 0) {
286 /* 10. Check destination issues. */
290 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
293 struct brw_context
*brw
= p
->brw
;
295 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
296 assert(reg
.nr
< 128);
298 gen7_convert_mrf_to_grf(p
, ®
);
300 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
301 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
302 /* Any source modifiers or regions will be ignored, since this just
303 * identifies the MRF/GRF to start reading the message contents from.
304 * Check for some likely failures.
308 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
311 validate_reg(insn
, reg
);
313 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
314 insn
->bits1
.da1
.src0_reg_type
=
315 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
);
316 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
317 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
318 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
320 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
321 insn
->bits3
.ud
= reg
.dw1
.ud
;
323 /* Required to set some fields in src1 as well:
325 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
326 insn
->bits1
.da1
.src1_reg_type
= insn
->bits1
.da1
.src0_reg_type
;
330 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
331 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
332 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
333 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
336 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
337 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
341 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
347 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
351 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
352 if (reg
.width
== BRW_WIDTH_1
&&
353 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
354 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
355 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
356 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
359 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
360 insn
->bits2
.da1
.src0_width
= reg
.width
;
361 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
365 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
366 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
367 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
368 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
370 /* This is an oddity of the fact we're using the same
371 * descriptions for registers in align_16 as align_1:
373 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
374 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
376 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
382 void brw_set_src1(struct brw_compile
*p
,
383 struct brw_instruction
*insn
,
386 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
388 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
389 assert(reg
.nr
< 128);
391 gen7_convert_mrf_to_grf(p
, ®
);
393 validate_reg(insn
, reg
);
395 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
396 insn
->bits1
.da1
.src1_reg_type
=
397 brw_reg_type_to_hw_type(p
->brw
, reg
.type
, reg
.file
);
398 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
399 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
401 /* Only src1 can be immediate in two-argument instructions.
403 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
405 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
406 insn
->bits3
.ud
= reg
.dw1
.ud
;
409 /* This is a hardware restriction, which may or may not be lifted
412 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
413 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
415 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
416 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
417 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
420 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
421 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
424 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
425 if (reg
.width
== BRW_WIDTH_1
&&
426 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
427 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
428 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
429 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
432 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
433 insn
->bits3
.da1
.src1_width
= reg
.width
;
434 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
438 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
439 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
440 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
441 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
443 /* This is an oddity of the fact we're using the same
444 * descriptions for registers in align_16 as align_1:
446 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
447 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
449 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
455 * Set the Message Descriptor and Extended Message Descriptor fields
458 * \note This zeroes out the Function Control bits, so it must be called
459 * \b before filling out any message-specific data. Callers can
460 * choose not to fill in irrelevant bits; they will be zero.
463 brw_set_message_descriptor(struct brw_compile
*p
,
464 struct brw_instruction
*inst
,
465 enum brw_message_target sfid
,
467 unsigned response_length
,
471 struct brw_context
*brw
= p
->brw
;
473 brw_set_src1(p
, inst
, brw_imm_d(0));
476 inst
->bits3
.generic_gen5
.header_present
= header_present
;
477 inst
->bits3
.generic_gen5
.response_length
= response_length
;
478 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
479 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
482 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
483 inst
->header
.destreg__conditionalmod
= sfid
;
485 /* Set Extended Message Descriptor (ex_desc) */
486 inst
->bits2
.send_gen5
.sfid
= sfid
;
487 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
490 inst
->bits3
.generic
.response_length
= response_length
;
491 inst
->bits3
.generic
.msg_length
= msg_length
;
492 inst
->bits3
.generic
.msg_target
= sfid
;
493 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
497 static void brw_set_math_message( struct brw_compile
*p
,
498 struct brw_instruction
*insn
,
500 unsigned integer_type
,
504 struct brw_context
*brw
= p
->brw
;
506 unsigned response_length
;
508 /* Infer message length from the function */
510 case BRW_MATH_FUNCTION_POW
:
511 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
512 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
513 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
521 /* Infer response length from the function */
523 case BRW_MATH_FUNCTION_SINCOS
:
524 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
533 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
534 msg_length
, response_length
, false, false);
536 insn
->bits3
.math_gen5
.function
= function
;
537 insn
->bits3
.math_gen5
.int_type
= integer_type
;
538 insn
->bits3
.math_gen5
.precision
= low_precision
;
539 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
540 insn
->bits3
.math_gen5
.data_type
= dataType
;
541 insn
->bits3
.math_gen5
.snapshot
= 0;
543 insn
->bits3
.math
.function
= function
;
544 insn
->bits3
.math
.int_type
= integer_type
;
545 insn
->bits3
.math
.precision
= low_precision
;
546 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
547 insn
->bits3
.math
.data_type
= dataType
;
549 insn
->header
.saturate
= 0;
553 static void brw_set_ff_sync_message(struct brw_compile
*p
,
554 struct brw_instruction
*insn
,
556 unsigned response_length
,
559 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
560 1, response_length
, true, end_of_thread
);
561 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
562 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
563 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
564 insn
->bits3
.urb_gen5
.allocate
= allocate
;
565 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
566 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
569 static void brw_set_urb_message( struct brw_compile
*p
,
570 struct brw_instruction
*insn
,
571 enum brw_urb_write_flags flags
,
573 unsigned response_length
,
575 unsigned swizzle_control
)
577 struct brw_context
*brw
= p
->brw
;
579 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
580 msg_length
, response_length
, true,
581 flags
& BRW_URB_WRITE_EOT
);
583 if (flags
& BRW_URB_WRITE_OWORD
) {
584 assert(msg_length
== 2); /* header + one OWORD of data */
585 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
587 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
589 insn
->bits3
.urb_gen7
.offset
= offset
;
590 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
591 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
592 insn
->bits3
.urb_gen7
.per_slot_offset
=
593 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
594 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
595 } else if (brw
->gen
>= 5) {
596 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
597 insn
->bits3
.urb_gen5
.offset
= offset
;
598 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
599 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
600 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
601 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
603 insn
->bits3
.urb
.opcode
= 0; /* ? */
604 insn
->bits3
.urb
.offset
= offset
;
605 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
606 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
607 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
608 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
613 brw_set_dp_write_message(struct brw_compile
*p
,
614 struct brw_instruction
*insn
,
615 unsigned binding_table_index
,
616 unsigned msg_control
,
620 unsigned last_render_target
,
621 unsigned response_length
,
622 unsigned end_of_thread
,
623 unsigned send_commit_msg
)
625 struct brw_context
*brw
= p
->brw
;
629 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
630 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
631 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
633 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
634 } else if (brw
->gen
== 6) {
635 /* Use the render cache for all write messages. */
636 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
638 sfid
= BRW_SFID_DATAPORT_WRITE
;
641 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
642 header_present
, end_of_thread
);
645 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
646 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
647 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
648 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
649 } else if (brw
->gen
== 6) {
650 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
651 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
652 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
653 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
654 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
655 } else if (brw
->gen
== 5) {
656 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
657 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
658 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
659 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
660 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
662 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
663 insn
->bits3
.dp_write
.msg_control
= msg_control
;
664 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
665 insn
->bits3
.dp_write
.msg_type
= msg_type
;
666 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
671 brw_set_dp_read_message(struct brw_compile
*p
,
672 struct brw_instruction
*insn
,
673 unsigned binding_table_index
,
674 unsigned msg_control
,
676 unsigned target_cache
,
679 unsigned response_length
)
681 struct brw_context
*brw
= p
->brw
;
685 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
686 } else if (brw
->gen
== 6) {
687 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
688 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
690 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
692 sfid
= BRW_SFID_DATAPORT_READ
;
695 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
696 header_present
, false);
699 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
700 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
701 insn
->bits3
.gen7_dp
.last_render_target
= 0;
702 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
703 } else if (brw
->gen
== 6) {
704 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
705 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
706 insn
->bits3
.gen6_dp
.last_render_target
= 0;
707 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
708 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
709 } else if (brw
->gen
== 5) {
710 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
711 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
712 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
713 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
714 } else if (brw
->is_g4x
) {
715 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
716 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
717 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
718 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
720 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
721 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
722 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
723 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
728 brw_set_sampler_message(struct brw_compile
*p
,
729 struct brw_instruction
*insn
,
730 unsigned binding_table_index
,
733 unsigned response_length
,
735 unsigned header_present
,
737 unsigned return_format
)
739 struct brw_context
*brw
= p
->brw
;
741 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
742 response_length
, header_present
, false);
745 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
746 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
747 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
748 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
749 } else if (brw
->gen
>= 5) {
750 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
751 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
752 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
753 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
754 } else if (brw
->is_g4x
) {
755 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
756 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
757 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
759 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
760 insn
->bits3
.sampler
.sampler
= sampler
;
761 insn
->bits3
.sampler
.msg_type
= msg_type
;
762 insn
->bits3
.sampler
.return_format
= return_format
;
767 #define next_insn brw_next_insn
768 struct brw_instruction
*
769 brw_next_insn(struct brw_compile
*p
, unsigned opcode
)
771 struct brw_instruction
*insn
;
773 if (p
->nr_insn
+ 1 > p
->store_size
) {
775 printf("incresing the store size to %d\n", p
->store_size
<< 1);
777 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
778 struct brw_instruction
, p
->store_size
);
780 assert(!"realloc eu store memeory failed");
783 p
->next_insn_offset
+= 16;
784 insn
= &p
->store
[p
->nr_insn
++];
785 memcpy(insn
, p
->current
, sizeof(*insn
));
787 /* Reset this one-shot flag:
790 if (p
->current
->header
.destreg__conditionalmod
) {
791 p
->current
->header
.destreg__conditionalmod
= 0;
792 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
795 insn
->header
.opcode
= opcode
;
799 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
804 struct brw_instruction
*insn
= next_insn(p
, opcode
);
805 brw_set_dest(p
, insn
, dest
);
806 brw_set_src0(p
, insn
, src
);
810 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
814 struct brw_reg src1
)
816 struct brw_instruction
*insn
= next_insn(p
, opcode
);
817 brw_set_dest(p
, insn
, dest
);
818 brw_set_src0(p
, insn
, src0
);
819 brw_set_src1(p
, insn
, src1
);
824 get_3src_subreg_nr(struct brw_reg reg
)
826 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
827 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
828 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
830 return reg
.subnr
/ 4;
834 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
841 struct brw_context
*brw
= p
->brw
;
842 struct brw_instruction
*insn
= next_insn(p
, opcode
);
844 gen7_convert_mrf_to_grf(p
, &dest
);
846 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
848 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
849 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
850 assert(dest
.nr
< 128);
851 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
852 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
853 dest
.type
== BRW_REGISTER_TYPE_D
||
854 dest
.type
== BRW_REGISTER_TYPE_UD
);
855 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
856 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
857 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
858 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
859 guess_execution_size(p
, insn
, dest
);
861 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
862 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
863 assert(src0
.nr
< 128);
864 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
865 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
866 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
867 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
868 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
869 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
871 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
872 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
873 assert(src1
.nr
< 128);
874 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
875 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
876 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
877 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
878 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
879 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
880 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
882 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
883 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
884 assert(src2
.nr
< 128);
885 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
886 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
887 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
888 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
889 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
890 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
893 /* Set both the source and destination types based on dest.type,
894 * ignoring the source register types. The MAD and LRP emitters ensure
895 * that all four types are float. The BFE and BFI2 emitters, however,
896 * may send us mixed D and UD types and want us to ignore that and use
897 * the destination type.
900 case BRW_REGISTER_TYPE_F
:
901 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
902 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
904 case BRW_REGISTER_TYPE_D
:
905 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
906 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
908 case BRW_REGISTER_TYPE_UD
:
909 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
910 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
919 /***********************************************************************
920 * Convenience routines.
923 struct brw_instruction *brw_##OP(struct brw_compile *p, \
924 struct brw_reg dest, \
925 struct brw_reg src0) \
927 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
931 struct brw_instruction *brw_##OP(struct brw_compile *p, \
932 struct brw_reg dest, \
933 struct brw_reg src0, \
934 struct brw_reg src1) \
936 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
940 struct brw_instruction *brw_##OP(struct brw_compile *p, \
941 struct brw_reg dest, \
942 struct brw_reg src0, \
943 struct brw_reg src1, \
944 struct brw_reg src2) \
946 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
950 struct brw_instruction *brw_##OP(struct brw_compile *p, \
951 struct brw_reg dest, \
952 struct brw_reg src0, \
953 struct brw_reg src1, \
954 struct brw_reg src2) \
956 assert(dest.type == BRW_REGISTER_TYPE_F); \
957 assert(src0.type == BRW_REGISTER_TYPE_F); \
958 assert(src1.type == BRW_REGISTER_TYPE_F); \
959 assert(src2.type == BRW_REGISTER_TYPE_F); \
960 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
963 /* Rounding operations (other than RNDD) require two instructions - the first
964 * stores a rounded value (possibly the wrong way) in the dest register, but
965 * also sets a per-channel "increment bit" in the flag register. A predicated
966 * add of 1.0 fixes dest to contain the desired result.
968 * Sandybridge and later appear to round correctly without an ADD.
971 void brw_##OP(struct brw_compile *p, \
972 struct brw_reg dest, \
973 struct brw_reg src) \
975 struct brw_instruction *rnd, *add; \
976 rnd = next_insn(p, BRW_OPCODE_##OP); \
977 brw_set_dest(p, rnd, dest); \
978 brw_set_src0(p, rnd, src); \
980 if (p->brw->gen < 6) { \
981 /* turn on round-increments */ \
982 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
983 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
984 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
1027 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
1028 struct brw_reg dest
,
1029 struct brw_reg src0
,
1030 struct brw_reg src1
)
1033 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1034 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1035 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1036 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1037 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1040 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1041 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1042 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1043 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1044 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1047 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1050 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1051 struct brw_reg dest
,
1052 struct brw_reg src0
,
1053 struct brw_reg src1
)
1055 assert(dest
.type
== src0
.type
);
1056 assert(src0
.type
== src1
.type
);
1057 switch (src0
.type
) {
1058 case BRW_REGISTER_TYPE_B
:
1059 case BRW_REGISTER_TYPE_UB
:
1060 case BRW_REGISTER_TYPE_W
:
1061 case BRW_REGISTER_TYPE_UW
:
1062 case BRW_REGISTER_TYPE_D
:
1063 case BRW_REGISTER_TYPE_UD
:
1066 assert(!"Bad type for brw_AVG");
1069 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1072 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1073 struct brw_reg dest
,
1074 struct brw_reg src0
,
1075 struct brw_reg src1
)
1078 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1079 src0
.type
== BRW_REGISTER_TYPE_UD
||
1080 src1
.type
== BRW_REGISTER_TYPE_D
||
1081 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1082 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1085 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1086 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1087 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1088 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1089 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1092 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1093 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1094 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1095 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1096 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1099 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1100 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1101 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1102 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1104 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1108 void brw_NOP(struct brw_compile
*p
)
1110 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1111 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1112 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1113 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1120 /***********************************************************************
1121 * Comparisons, if/else/endif
1124 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1125 struct brw_reg dest
,
1126 struct brw_reg src0
,
1127 struct brw_reg src1
)
1129 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1131 insn
->header
.execution_size
= 1;
1132 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1133 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1135 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1141 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1143 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1145 p
->if_stack_depth
++;
1146 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1147 p
->if_stack_array_size
*= 2;
1148 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1149 p
->if_stack_array_size
);
1153 static struct brw_instruction
*
1154 pop_if_stack(struct brw_compile
*p
)
1156 p
->if_stack_depth
--;
1157 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1161 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1163 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1164 p
->loop_stack_array_size
*= 2;
1165 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1166 p
->loop_stack_array_size
);
1167 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1168 p
->loop_stack_array_size
);
1171 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1172 p
->loop_stack_depth
++;
1173 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1176 static struct brw_instruction
*
1177 get_inner_do_insn(struct brw_compile
*p
)
1179 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1182 /* EU takes the value from the flag register and pushes it onto some
1183 * sort of a stack (presumably merging with any flag value already on
1184 * the stack). Within an if block, the flags at the top of the stack
1185 * control execution on each channel of the unit, eg. on each of the
1186 * 16 pixel values in our wm programs.
1188 * When the matching 'else' instruction is reached (presumably by
1189 * countdown of the instruction count patched in by our ELSE/ENDIF
1190 * functions), the relevent flags are inverted.
1192 * When the matching 'endif' instruction is reached, the flags are
1193 * popped off. If the stack is now empty, normal execution resumes.
1195 struct brw_instruction
*
1196 brw_IF(struct brw_compile
*p
, unsigned execute_size
)
1198 struct brw_context
*brw
= p
->brw
;
1199 struct brw_instruction
*insn
;
1201 insn
= next_insn(p
, BRW_OPCODE_IF
);
1203 /* Override the defaults for this instruction:
1206 brw_set_dest(p
, insn
, brw_ip_reg());
1207 brw_set_src0(p
, insn
, brw_ip_reg());
1208 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1209 } else if (brw
->gen
== 6) {
1210 brw_set_dest(p
, insn
, brw_imm_w(0));
1211 insn
->bits1
.branch_gen6
.jump_count
= 0;
1212 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1213 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1215 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1216 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1217 brw_set_src1(p
, insn
, brw_imm_ud(0));
1218 insn
->bits3
.break_cont
.jip
= 0;
1219 insn
->bits3
.break_cont
.uip
= 0;
1222 insn
->header
.execution_size
= execute_size
;
1223 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1224 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1225 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1226 if (!p
->single_program_flow
)
1227 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1229 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1231 push_if_stack(p
, insn
);
1232 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1236 /* This function is only used for gen6-style IF instructions with an
1237 * embedded comparison (conditional modifier). It is not used on gen7.
1239 struct brw_instruction
*
1240 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1241 struct brw_reg src0
, struct brw_reg src1
)
1243 struct brw_instruction
*insn
;
1245 insn
= next_insn(p
, BRW_OPCODE_IF
);
1247 brw_set_dest(p
, insn
, brw_imm_w(0));
1248 if (p
->compressed
) {
1249 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1251 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1253 insn
->bits1
.branch_gen6
.jump_count
= 0;
1254 brw_set_src0(p
, insn
, src0
);
1255 brw_set_src1(p
, insn
, src1
);
1257 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1258 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1259 insn
->header
.destreg__conditionalmod
= conditional
;
1261 if (!p
->single_program_flow
)
1262 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1264 push_if_stack(p
, insn
);
1269 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1272 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1273 struct brw_instruction
*if_inst
,
1274 struct brw_instruction
*else_inst
)
1276 /* The next instruction (where the ENDIF would be, if it existed) */
1277 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1279 assert(p
->single_program_flow
);
1280 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1281 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1282 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1284 /* Convert IF to an ADD instruction that moves the instruction pointer
1285 * to the first instruction of the ELSE block. If there is no ELSE
1286 * block, point to where ENDIF would be. Reverse the predicate.
1288 * There's no need to execute an ENDIF since we don't need to do any
1289 * stack operations, and if we're currently executing, we just want to
1290 * continue normally.
1292 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1293 if_inst
->header
.predicate_inverse
= 1;
1295 if (else_inst
!= NULL
) {
1296 /* Convert ELSE to an ADD instruction that points where the ENDIF
1299 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1301 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1302 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1304 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1309 * Patch IF and ELSE instructions with appropriate jump targets.
1312 patch_IF_ELSE(struct brw_compile
*p
,
1313 struct brw_instruction
*if_inst
,
1314 struct brw_instruction
*else_inst
,
1315 struct brw_instruction
*endif_inst
)
1317 struct brw_context
*brw
= p
->brw
;
1319 /* We shouldn't be patching IF and ELSE instructions in single program flow
1320 * mode when gen < 6, because in single program flow mode on those
1321 * platforms, we convert flow control instructions to conditional ADDs that
1322 * operate on IP (see brw_ENDIF).
1324 * However, on Gen6, writing to IP doesn't work in single program flow mode
1325 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1326 * not be updated by non-flow control instructions."). And on later
1327 * platforms, there is no significant benefit to converting control flow
1328 * instructions to conditional ADDs. So we do patch IF and ELSE
1329 * instructions in single program flow mode on those platforms.
1332 assert(!p
->single_program_flow
);
1334 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1335 assert(endif_inst
!= NULL
);
1336 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1339 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1340 * requires 2 chunks.
1345 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1346 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1348 if (else_inst
== NULL
) {
1349 /* Patch IF -> ENDIF */
1351 /* Turn it into an IFF, which means no mask stack operations for
1352 * all-false and jumping past the ENDIF.
1354 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1355 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1356 if_inst
->bits3
.if_else
.pop_count
= 0;
1357 if_inst
->bits3
.if_else
.pad0
= 0;
1358 } else if (brw
->gen
== 6) {
1359 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1360 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1362 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1363 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1366 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1368 /* Patch IF -> ELSE */
1370 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1371 if_inst
->bits3
.if_else
.pop_count
= 0;
1372 if_inst
->bits3
.if_else
.pad0
= 0;
1373 } else if (brw
->gen
== 6) {
1374 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1377 /* Patch ELSE -> ENDIF */
1379 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1382 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1383 else_inst
->bits3
.if_else
.pop_count
= 1;
1384 else_inst
->bits3
.if_else
.pad0
= 0;
1385 } else if (brw
->gen
== 6) {
1386 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1387 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1389 /* The IF instruction's JIP should point just past the ELSE */
1390 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1391 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1392 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1393 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1399 brw_ELSE(struct brw_compile
*p
)
1401 struct brw_context
*brw
= p
->brw
;
1402 struct brw_instruction
*insn
;
1404 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1407 brw_set_dest(p
, insn
, brw_ip_reg());
1408 brw_set_src0(p
, insn
, brw_ip_reg());
1409 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1410 } else if (brw
->gen
== 6) {
1411 brw_set_dest(p
, insn
, brw_imm_w(0));
1412 insn
->bits1
.branch_gen6
.jump_count
= 0;
1413 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1414 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1416 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1417 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1418 brw_set_src1(p
, insn
, brw_imm_ud(0));
1419 insn
->bits3
.break_cont
.jip
= 0;
1420 insn
->bits3
.break_cont
.uip
= 0;
1423 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1424 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1425 if (!p
->single_program_flow
)
1426 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1428 push_if_stack(p
, insn
);
1432 brw_ENDIF(struct brw_compile
*p
)
1434 struct brw_context
*brw
= p
->brw
;
1435 struct brw_instruction
*insn
= NULL
;
1436 struct brw_instruction
*else_inst
= NULL
;
1437 struct brw_instruction
*if_inst
= NULL
;
1438 struct brw_instruction
*tmp
;
1439 bool emit_endif
= true;
1441 /* In single program flow mode, we can express IF and ELSE instructions
1442 * equivalently as ADD instructions that operate on IP. On platforms prior
1443 * to Gen6, flow control instructions cause an implied thread switch, so
1444 * this is a significant savings.
1446 * However, on Gen6, writing to IP doesn't work in single program flow mode
1447 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1448 * not be updated by non-flow control instructions."). And on later
1449 * platforms, there is no significant benefit to converting control flow
1450 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1453 if (brw
->gen
< 6 && p
->single_program_flow
)
1457 * A single next_insn() may change the base adress of instruction store
1458 * memory(p->store), so call it first before referencing the instruction
1459 * store pointer from an index
1462 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1464 /* Pop the IF and (optional) ELSE instructions from the stack */
1465 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1466 tmp
= pop_if_stack(p
);
1467 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1469 tmp
= pop_if_stack(p
);
1474 /* ENDIF is useless; don't bother emitting it. */
1475 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1480 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1481 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1482 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1483 } else if (brw
->gen
== 6) {
1484 brw_set_dest(p
, insn
, brw_imm_w(0));
1485 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1486 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1488 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1489 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1490 brw_set_src1(p
, insn
, brw_imm_ud(0));
1493 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1494 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1495 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1497 /* Also pop item off the stack in the endif instruction: */
1499 insn
->bits3
.if_else
.jump_count
= 0;
1500 insn
->bits3
.if_else
.pop_count
= 1;
1501 insn
->bits3
.if_else
.pad0
= 0;
1502 } else if (brw
->gen
== 6) {
1503 insn
->bits1
.branch_gen6
.jump_count
= 2;
1505 insn
->bits3
.break_cont
.jip
= 2;
1507 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1510 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1512 struct brw_context
*brw
= p
->brw
;
1513 struct brw_instruction
*insn
;
1515 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1516 if (brw
->gen
>= 6) {
1517 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1518 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1519 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1521 brw_set_dest(p
, insn
, brw_ip_reg());
1522 brw_set_src0(p
, insn
, brw_ip_reg());
1523 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1524 insn
->bits3
.if_else
.pad0
= 0;
1525 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1527 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1528 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1533 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1535 struct brw_instruction
*insn
;
1537 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1538 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1539 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1540 brw_set_dest(p
, insn
, brw_ip_reg());
1541 brw_set_src0(p
, insn
, brw_ip_reg());
1542 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1544 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1545 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1549 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1551 struct brw_instruction
*insn
;
1552 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1553 brw_set_dest(p
, insn
, brw_ip_reg());
1554 brw_set_src0(p
, insn
, brw_ip_reg());
1555 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1556 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1557 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1558 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1559 insn
->bits3
.if_else
.pad0
= 0;
1560 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1564 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1566 struct brw_instruction
*insn
;
1568 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1569 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1570 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1571 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1573 if (p
->compressed
) {
1574 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1576 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1577 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1584 * The DO/WHILE is just an unterminated loop -- break or continue are
1585 * used for control within the loop. We have a few ways they can be
1588 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1589 * jip and no DO instruction.
1591 * For non-uniform control flow pre-gen6, there's a DO instruction to
1592 * push the mask, and a WHILE to jump back, and BREAK to get out and
1595 * For gen6, there's no more mask stack, so no need for DO. WHILE
1596 * just points back to the first instruction of the loop.
1598 struct brw_instruction
*brw_DO(struct brw_compile
*p
, unsigned execute_size
)
1600 struct brw_context
*brw
= p
->brw
;
1602 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1603 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1604 return &p
->store
[p
->nr_insn
];
1606 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1608 push_loop_stack(p
, insn
);
1610 /* Override the defaults for this instruction:
1612 brw_set_dest(p
, insn
, brw_null_reg());
1613 brw_set_src0(p
, insn
, brw_null_reg());
1614 brw_set_src1(p
, insn
, brw_null_reg());
1616 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1617 insn
->header
.execution_size
= execute_size
;
1618 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1619 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1620 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1627 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1630 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1631 * nesting, since it can always just point to the end of the block/current loop.
1634 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1636 struct brw_context
*brw
= p
->brw
;
1637 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1638 struct brw_instruction
*inst
;
1639 int br
= (brw
->gen
== 5) ? 2 : 1;
1641 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1642 /* If the jump count is != 0, that means that this instruction has already
1643 * been patched because it's part of a loop inside of the one we're
1646 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1647 inst
->bits3
.if_else
.jump_count
== 0) {
1648 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1649 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1650 inst
->bits3
.if_else
.jump_count
== 0) {
1651 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1656 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1658 struct brw_context
*brw
= p
->brw
;
1659 struct brw_instruction
*insn
, *do_insn
;
1665 if (brw
->gen
>= 7) {
1666 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1667 do_insn
= get_inner_do_insn(p
);
1669 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1670 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1671 brw_set_src1(p
, insn
, brw_imm_ud(0));
1672 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1674 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1675 } else if (brw
->gen
== 6) {
1676 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1677 do_insn
= get_inner_do_insn(p
);
1679 brw_set_dest(p
, insn
, brw_imm_w(0));
1680 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1681 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1682 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1684 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1686 if (p
->single_program_flow
) {
1687 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1688 do_insn
= get_inner_do_insn(p
);
1690 brw_set_dest(p
, insn
, brw_ip_reg());
1691 brw_set_src0(p
, insn
, brw_ip_reg());
1692 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1693 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1695 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1696 do_insn
= get_inner_do_insn(p
);
1698 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1700 brw_set_dest(p
, insn
, brw_ip_reg());
1701 brw_set_src0(p
, insn
, brw_ip_reg());
1702 brw_set_src1(p
, insn
, brw_imm_d(0));
1704 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1705 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1706 insn
->bits3
.if_else
.pop_count
= 0;
1707 insn
->bits3
.if_else
.pad0
= 0;
1709 brw_patch_break_cont(p
, insn
);
1712 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1713 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1715 p
->loop_stack_depth
--;
1723 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1725 struct brw_context
*brw
= p
->brw
;
1726 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1732 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1733 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1735 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1740 /* To integrate with the above, it makes sense that the comparison
1741 * instruction should populate the flag register. It might be simpler
1742 * just to use the flag reg for most WM tasks?
1744 void brw_CMP(struct brw_compile
*p
,
1745 struct brw_reg dest
,
1746 unsigned conditional
,
1747 struct brw_reg src0
,
1748 struct brw_reg src1
)
1750 struct brw_context
*brw
= p
->brw
;
1751 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1753 insn
->header
.destreg__conditionalmod
= conditional
;
1754 brw_set_dest(p
, insn
, dest
);
1755 brw_set_src0(p
, insn
, src0
);
1756 brw_set_src1(p
, insn
, src1
);
1758 /* guess_execution_size(insn, src0); */
1761 /* Make it so that future instructions will use the computed flag
1762 * value until brw_set_predicate_control_flag_value() is called
1765 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1767 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1768 p
->flag_value
= 0xff;
1771 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1773 * "Any CMP instruction with a null destination must use a {switch}."
1775 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1776 * mentioned on their work-arounds pages.
1778 if (brw
->gen
== 7) {
1779 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1780 dest
.nr
== BRW_ARF_NULL
) {
1781 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1786 /* Issue 'wait' instruction for n1, host could program MMIO
1787 to wake up thread. */
1788 void brw_WAIT (struct brw_compile
*p
)
1790 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1791 struct brw_reg src
= brw_notification_1_reg();
1793 brw_set_dest(p
, insn
, src
);
1794 brw_set_src0(p
, insn
, src
);
1795 brw_set_src1(p
, insn
, brw_null_reg());
1796 insn
->header
.execution_size
= 0; /* must */
1797 insn
->header
.predicate_control
= 0;
1798 insn
->header
.compression_control
= 0;
1802 /***********************************************************************
1803 * Helpers for the various SEND message types:
1806 /** Extended math function, float[8].
1808 void brw_math( struct brw_compile
*p
,
1809 struct brw_reg dest
,
1811 unsigned msg_reg_nr
,
1814 unsigned precision
)
1816 struct brw_context
*brw
= p
->brw
;
1818 if (brw
->gen
>= 6) {
1819 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1821 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1822 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1823 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1825 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1827 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1829 /* Source modifiers are ignored for extended math instructions on Gen6. */
1830 if (brw
->gen
== 6) {
1831 assert(!src
.negate
);
1835 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1836 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1837 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1838 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1840 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1843 /* Math is the same ISA format as other opcodes, except that CondModifier
1844 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1846 insn
->header
.destreg__conditionalmod
= function
;
1848 brw_set_dest(p
, insn
, dest
);
1849 brw_set_src0(p
, insn
, src
);
1850 brw_set_src1(p
, insn
, brw_null_reg());
1852 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1854 /* Example code doesn't set predicate_control for send
1857 insn
->header
.predicate_control
= 0;
1858 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1860 brw_set_dest(p
, insn
, dest
);
1861 brw_set_src0(p
, insn
, src
);
1862 brw_set_math_message(p
,
1865 src
.type
== BRW_REGISTER_TYPE_D
,
1871 /** Extended math function, float[8].
1873 void brw_math2(struct brw_compile
*p
,
1874 struct brw_reg dest
,
1876 struct brw_reg src0
,
1877 struct brw_reg src1
)
1879 struct brw_context
*brw
= p
->brw
;
1880 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1882 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1883 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1884 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1885 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1887 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1888 if (brw
->gen
== 6) {
1889 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1890 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1893 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1894 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1895 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1896 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1897 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1899 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1900 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1903 /* Source modifiers are ignored for extended math instructions on Gen6. */
1904 if (brw
->gen
== 6) {
1905 assert(!src0
.negate
);
1907 assert(!src1
.negate
);
1911 /* Math is the same ISA format as other opcodes, except that CondModifier
1912 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1914 insn
->header
.destreg__conditionalmod
= function
;
1916 brw_set_dest(p
, insn
, dest
);
1917 brw_set_src0(p
, insn
, src0
);
1918 brw_set_src1(p
, insn
, src1
);
1923 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1924 * using a constant offset per channel.
1926 * The offset must be aligned to oword size (16 bytes). Used for
1927 * register spilling.
1929 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1934 struct brw_context
*brw
= p
->brw
;
1935 uint32_t msg_control
, msg_type
;
1941 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1943 if (num_regs
== 1) {
1944 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1947 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1951 /* Set up the message header. This is g0, with g0.2 filled with
1952 * the offset. We don't want to leave our offset around in g0 or
1953 * it'll screw up texture samples, so set it up inside the message
1957 brw_push_insn_state(p
);
1958 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1959 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1961 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1963 /* set message header global offset field (reg 0, element 2) */
1965 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1967 2), BRW_REGISTER_TYPE_UD
),
1968 brw_imm_ud(offset
));
1970 brw_pop_insn_state(p
);
1974 struct brw_reg dest
;
1975 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1976 int send_commit_msg
;
1977 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1978 BRW_REGISTER_TYPE_UW
);
1980 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1981 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1982 src_header
= vec16(src_header
);
1984 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1985 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1987 /* Until gen6, writes followed by reads from the same location
1988 * are not guaranteed to be ordered unless write_commit is set.
1989 * If set, then a no-op write is issued to the destination
1990 * register to set a dependency, and a read from the destination
1991 * can be used to ensure the ordering.
1993 * For gen6, only writes between different threads need ordering
1994 * protection. Our use of DP writes is all about register
1995 * spilling within a thread.
1997 if (brw
->gen
>= 6) {
1998 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1999 send_commit_msg
= 0;
2002 send_commit_msg
= 1;
2005 brw_set_dest(p
, insn
, dest
);
2006 if (brw
->gen
>= 6) {
2007 brw_set_src0(p
, insn
, mrf
);
2009 brw_set_src0(p
, insn
, brw_null_reg());
2013 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2015 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
2017 brw_set_dp_write_message(p
,
2019 255, /* binding table index (255=stateless) */
2023 true, /* header_present */
2024 0, /* not a render target */
2025 send_commit_msg
, /* response_length */
2033 * Read a block of owords (half a GRF each) from the scratch buffer
2034 * using a constant index per channel.
2036 * Offset must be aligned to oword size (16 bytes). Used for register
2040 brw_oword_block_read_scratch(struct brw_compile
*p
,
2041 struct brw_reg dest
,
2046 struct brw_context
*brw
= p
->brw
;
2047 uint32_t msg_control
;
2053 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2054 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2056 if (num_regs
== 1) {
2057 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2060 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2065 brw_push_insn_state(p
);
2066 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2067 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2069 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2071 /* set message header global offset field (reg 0, element 2) */
2073 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2075 2), BRW_REGISTER_TYPE_UD
),
2076 brw_imm_ud(offset
));
2078 brw_pop_insn_state(p
);
2082 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2084 assert(insn
->header
.predicate_control
== 0);
2085 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2086 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2088 brw_set_dest(p
, insn
, dest
); /* UW? */
2089 if (brw
->gen
>= 6) {
2090 brw_set_src0(p
, insn
, mrf
);
2092 brw_set_src0(p
, insn
, brw_null_reg());
2095 brw_set_dp_read_message(p
,
2097 255, /* binding table index (255=stateless) */
2099 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2100 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2102 true, /* header_present */
2108 gen7_block_read_scratch(struct brw_compile
*p
,
2109 struct brw_reg dest
,
2113 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2115 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2117 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
2118 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2120 brw_set_dest(p
, insn
, dest
);
2122 /* The HW requires that the header is present; this is to get the g0.5
2125 bool header_present
= true;
2126 brw_set_src0(p
, insn
, brw_vec8_grf(0, 0));
2128 brw_set_message_descriptor(p
, insn
,
2129 GEN7_SFID_DATAPORT_DATA_CACHE
,
2130 1, /* mlen: just g0 */
2135 insn
->bits3
.ud
|= GEN7_DATAPORT_SCRATCH_READ
;
2137 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4);
2138 insn
->bits3
.ud
|= (num_regs
- 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT
;
2140 /* According to the docs, offset is "A 12-bit HWord offset into the memory
2141 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD
2142 * is 32 bytes, which happens to be the size of a register.
2145 assert(offset
< (1 << 12));
2146 insn
->bits3
.ud
|= offset
;
2150 * Read a float[4] vector from the data port Data Cache (const buffer).
2151 * Location (in buffer) should be a multiple of 16.
2152 * Used for fetching shader constants.
2154 void brw_oword_block_read(struct brw_compile
*p
,
2155 struct brw_reg dest
,
2158 uint32_t bind_table_index
)
2160 struct brw_context
*brw
= p
->brw
;
2162 /* On newer hardware, offset is in units of owords. */
2166 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2168 brw_push_insn_state(p
);
2169 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2170 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2171 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2173 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2175 /* set message header global offset field (reg 0, element 2) */
2177 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2179 2), BRW_REGISTER_TYPE_UD
),
2180 brw_imm_ud(offset
));
2182 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2183 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2185 /* cast dest to a uword[8] vector */
2186 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2188 brw_set_dest(p
, insn
, dest
);
2189 if (brw
->gen
>= 6) {
2190 brw_set_src0(p
, insn
, mrf
);
2192 brw_set_src0(p
, insn
, brw_null_reg());
2195 brw_set_dp_read_message(p
,
2198 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2199 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2200 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2202 true, /* header_present */
2203 1); /* response_length (1 reg, 2 owords!) */
2205 brw_pop_insn_state(p
);
2209 void brw_fb_WRITE(struct brw_compile
*p
,
2211 unsigned msg_reg_nr
,
2212 struct brw_reg src0
,
2213 unsigned msg_control
,
2214 unsigned binding_table_index
,
2215 unsigned msg_length
,
2216 unsigned response_length
,
2218 bool header_present
)
2220 struct brw_context
*brw
= p
->brw
;
2221 struct brw_instruction
*insn
;
2223 struct brw_reg dest
;
2225 if (dispatch_width
== 16)
2226 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2228 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2230 if (brw
->gen
>= 6) {
2231 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2233 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2235 /* The execution mask is ignored for render target writes. */
2236 insn
->header
.predicate_control
= 0;
2237 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2239 if (brw
->gen
>= 6) {
2240 /* headerless version, just submit color payload */
2241 src0
= brw_message_reg(msg_reg_nr
);
2243 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2245 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2247 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2250 brw_set_dest(p
, insn
, dest
);
2251 brw_set_src0(p
, insn
, src0
);
2252 brw_set_dp_write_message(p
,
2254 binding_table_index
,
2259 eot
, /* last render target write */
2262 0 /* send_commit_msg */);
2267 * Texture sample instruction.
2268 * Note: the msg_type plus msg_length values determine exactly what kind
2269 * of sampling operation is performed. See volume 4, page 161 of docs.
2271 void brw_SAMPLE(struct brw_compile
*p
,
2272 struct brw_reg dest
,
2273 unsigned msg_reg_nr
,
2274 struct brw_reg src0
,
2275 unsigned binding_table_index
,
2278 unsigned response_length
,
2279 unsigned msg_length
,
2280 unsigned header_present
,
2282 unsigned return_format
)
2284 struct brw_context
*brw
= p
->brw
;
2285 struct brw_instruction
*insn
;
2287 if (msg_reg_nr
!= -1)
2288 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2290 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2291 insn
->header
.predicate_control
= 0; /* XXX */
2293 /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2295 * "Instruction compression is not allowed for this instruction (that
2296 * is, send). The hardware behavior is undefined if this instruction is
2297 * set as compressed. However, compress control can be set to "SecHalf"
2298 * to affect the EMask generation."
2300 * No similar wording is found in later PRMs, but there are examples
2301 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages
2302 * are allowed in SIMD16 mode and they could not work without SecHalf. For
2303 * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2305 if (insn
->header
.compression_control
!= BRW_COMPRESSION_2NDHALF
)
2306 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2309 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2311 brw_set_dest(p
, insn
, dest
);
2312 brw_set_src0(p
, insn
, src0
);
2313 brw_set_sampler_message(p
, insn
,
2314 binding_table_index
,
2324 /* All these variables are pretty confusing - we might be better off
2325 * using bitmasks and macros for this, in the old style. Or perhaps
2326 * just having the caller instantiate the fields in dword3 itself.
2328 void brw_urb_WRITE(struct brw_compile
*p
,
2329 struct brw_reg dest
,
2330 unsigned msg_reg_nr
,
2331 struct brw_reg src0
,
2332 enum brw_urb_write_flags flags
,
2333 unsigned msg_length
,
2334 unsigned response_length
,
2338 struct brw_context
*brw
= p
->brw
;
2339 struct brw_instruction
*insn
;
2341 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2343 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2344 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2345 brw_push_insn_state(p
);
2346 brw_set_access_mode(p
, BRW_ALIGN_1
);
2347 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2348 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2349 BRW_REGISTER_TYPE_UD
),
2350 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2351 brw_imm_ud(0xff00));
2352 brw_pop_insn_state(p
);
2355 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2357 assert(msg_length
< BRW_MAX_MRF
);
2359 brw_set_dest(p
, insn
, dest
);
2360 brw_set_src0(p
, insn
, src0
);
2361 brw_set_src1(p
, insn
, brw_imm_d(0));
2364 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2366 brw_set_urb_message(p
,
2376 next_ip(struct brw_compile
*p
, int ip
)
2378 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2380 if (insn
->header
.cmpt_control
)
2387 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2390 void *store
= p
->store
;
2392 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2393 struct brw_instruction
*insn
= store
+ ip
;
2395 switch (insn
->header
.opcode
) {
2396 case BRW_OPCODE_ENDIF
:
2397 case BRW_OPCODE_ELSE
:
2398 case BRW_OPCODE_WHILE
:
2399 case BRW_OPCODE_HALT
:
2407 /* There is no DO instruction on gen6, so to find the end of the loop
2408 * we have to see if the loop is jumping back before our start
2412 brw_find_loop_end(struct brw_compile
*p
, int start
)
2414 struct brw_context
*brw
= p
->brw
;
2417 void *store
= p
->store
;
2419 /* Always start after the instruction (such as a WHILE) we're trying to fix
2422 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2423 struct brw_instruction
*insn
= store
+ ip
;
2425 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2426 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2427 : insn
->bits3
.break_cont
.jip
;
2428 if (ip
+ jip
* scale
<= start
)
2432 assert(!"not reached");
2436 /* After program generation, go back and update the UIP and JIP of
2437 * BREAK, CONT, and HALT instructions to their correct locations.
2440 brw_set_uip_jip(struct brw_compile
*p
)
2442 struct brw_context
*brw
= p
->brw
;
2445 void *store
= p
->store
;
2450 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2451 struct brw_instruction
*insn
= store
+ ip
;
2453 if (insn
->header
.cmpt_control
) {
2454 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2455 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2456 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2457 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2461 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2462 switch (insn
->header
.opcode
) {
2463 case BRW_OPCODE_BREAK
:
2464 assert(block_end_ip
!= 0);
2465 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2466 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2467 insn
->bits3
.break_cont
.uip
=
2468 (brw_find_loop_end(p
, ip
) - ip
+
2469 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2471 case BRW_OPCODE_CONTINUE
:
2472 assert(block_end_ip
!= 0);
2473 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2474 insn
->bits3
.break_cont
.uip
=
2475 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2477 assert(insn
->bits3
.break_cont
.uip
!= 0);
2478 assert(insn
->bits3
.break_cont
.jip
!= 0);
2481 case BRW_OPCODE_ENDIF
:
2482 if (block_end_ip
== 0)
2483 insn
->bits3
.break_cont
.jip
= 2;
2485 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2488 case BRW_OPCODE_HALT
:
2489 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2491 * "In case of the halt instruction not inside any conditional
2492 * code block, the value of <JIP> and <UIP> should be the
2493 * same. In case of the halt instruction inside conditional code
2494 * block, the <UIP> should be the end of the program, and the
2495 * <JIP> should be end of the most inner conditional code block."
2497 * The uip will have already been set by whoever set up the
2500 if (block_end_ip
== 0) {
2501 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2503 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2505 assert(insn
->bits3
.break_cont
.uip
!= 0);
2506 assert(insn
->bits3
.break_cont
.jip
!= 0);
2512 void brw_ff_sync(struct brw_compile
*p
,
2513 struct brw_reg dest
,
2514 unsigned msg_reg_nr
,
2515 struct brw_reg src0
,
2517 unsigned response_length
,
2520 struct brw_context
*brw
= p
->brw
;
2521 struct brw_instruction
*insn
;
2523 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2525 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2526 brw_set_dest(p
, insn
, dest
);
2527 brw_set_src0(p
, insn
, src0
);
2528 brw_set_src1(p
, insn
, brw_imm_d(0));
2531 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2533 brw_set_ff_sync_message(p
,
2541 * Emit the SEND instruction necessary to generate stream output data on Gen6
2542 * (for transform feedback).
2544 * If send_commit_msg is true, this is the last piece of stream output data
2545 * from this thread, so send the data as a committed write. According to the
2546 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2548 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2549 * writes are complete by sending the final write as a committed write."
2552 brw_svb_write(struct brw_compile
*p
,
2553 struct brw_reg dest
,
2554 unsigned msg_reg_nr
,
2555 struct brw_reg src0
,
2556 unsigned binding_table_index
,
2557 bool send_commit_msg
)
2559 struct brw_instruction
*insn
;
2561 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2563 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2564 brw_set_dest(p
, insn
, dest
);
2565 brw_set_src0(p
, insn
, src0
);
2566 brw_set_src1(p
, insn
, brw_imm_d(0));
2567 brw_set_dp_write_message(p
, insn
,
2568 binding_table_index
,
2569 0, /* msg_control: ignored */
2570 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2572 true, /* header_present */
2573 0, /* last_render_target: ignored */
2574 send_commit_msg
, /* response_length */
2575 0, /* end_of_thread */
2576 send_commit_msg
); /* send_commit_msg */
2580 brw_set_dp_untyped_atomic_message(struct brw_compile
*p
,
2581 struct brw_instruction
*insn
,
2583 unsigned bind_table_index
,
2584 unsigned msg_length
,
2585 unsigned response_length
,
2586 bool header_present
)
2588 if (p
->brw
->is_haswell
) {
2589 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2590 msg_length
, response_length
,
2591 header_present
, false);
2594 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2595 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2596 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2598 insn
->bits3
.gen7_dp
.msg_type
=
2599 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2601 insn
->bits3
.gen7_dp
.msg_type
=
2602 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
2606 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2607 msg_length
, response_length
,
2608 header_present
, false);
2610 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2612 if (insn
->header
.execution_size
!= BRW_EXECUTE_16
)
2613 insn
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2616 if (response_length
)
2617 insn
->bits3
.ud
|= 1 << 13; /* Return data expected */
2619 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2620 insn
->bits3
.ud
|= atomic_op
<< 8;
2624 brw_untyped_atomic(struct brw_compile
*p
,
2625 struct brw_reg dest
,
2628 unsigned bind_table_index
,
2629 unsigned msg_length
,
2630 unsigned response_length
) {
2631 struct brw_instruction
*insn
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2633 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2634 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2635 brw_set_src1(p
, insn
, brw_imm_d(0));
2636 brw_set_dp_untyped_atomic_message(
2637 p
, insn
, atomic_op
, bind_table_index
, msg_length
, response_length
,
2638 insn
->header
.access_mode
== BRW_ALIGN_1
);
2642 brw_set_dp_untyped_surface_read_message(struct brw_compile
*p
,
2643 struct brw_instruction
*insn
,
2644 unsigned bind_table_index
,
2645 unsigned msg_length
,
2646 unsigned response_length
,
2647 bool header_present
)
2649 const unsigned dispatch_width
=
2650 (insn
->header
.execution_size
== BRW_EXECUTE_16
? 16 : 8);
2651 const unsigned num_channels
= response_length
/ (dispatch_width
/ 8);
2653 if (p
->brw
->is_haswell
) {
2654 brw_set_message_descriptor(p
, insn
, HSW_SFID_DATAPORT_DATA_CACHE_1
,
2655 msg_length
, response_length
,
2656 header_present
, false);
2658 insn
->bits3
.gen7_dp
.msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
2660 brw_set_message_descriptor(p
, insn
, GEN7_SFID_DATAPORT_DATA_CACHE
,
2661 msg_length
, response_length
,
2662 header_present
, false);
2664 insn
->bits3
.gen7_dp
.msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
2667 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
2668 if (dispatch_width
== 16)
2669 insn
->bits3
.ud
|= 1 << 12; /* SIMD16 mode */
2671 insn
->bits3
.ud
|= 2 << 12; /* SIMD8 mode */
2674 insn
->bits3
.gen7_dp
.binding_table_index
= bind_table_index
;
2676 /* Set mask of 32-bit channels to drop. */
2677 insn
->bits3
.ud
|= (0xf & (0xf << num_channels
)) << 8;
2681 brw_untyped_surface_read(struct brw_compile
*p
,
2682 struct brw_reg dest
,
2684 unsigned bind_table_index
,
2685 unsigned msg_length
,
2686 unsigned response_length
)
2688 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2690 brw_set_dest(p
, insn
, retype(dest
, BRW_REGISTER_TYPE_UD
));
2691 brw_set_src0(p
, insn
, retype(mrf
, BRW_REGISTER_TYPE_UD
));
2692 brw_set_dp_untyped_surface_read_message(
2693 p
, insn
, bind_table_index
, msg_length
, response_length
,
2694 insn
->header
.access_mode
== BRW_ALIGN_1
);
2698 * This instruction is generated as a single-channel align1 instruction by
2699 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2701 * We can't use the typed atomic op in the FS because that has the execution
2702 * mask ANDed with the pixel mask, but we just want to write the one dword for
2705 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2706 * one u32. So we use the same untyped atomic write message as the pixel
2709 * The untyped atomic operation requires a BUFFER surface type with RAW
2710 * format, and is only accessible through the legacy DATA_CACHE dataport
2713 void brw_shader_time_add(struct brw_compile
*p
,
2714 struct brw_reg payload
,
2715 uint32_t surf_index
)
2717 struct brw_context
*brw
= p
->brw
;
2718 assert(brw
->gen
>= 7);
2720 brw_push_insn_state(p
);
2721 brw_set_access_mode(p
, BRW_ALIGN_1
);
2722 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2723 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2724 brw_pop_insn_state(p
);
2726 /* We use brw_vec1_reg and unmasked because we want to increment the given
2729 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2731 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2733 brw_set_dp_untyped_atomic_message(p
, send
, BRW_AOP_ADD
, surf_index
,
2734 2 /* message length */,
2735 0 /* response length */,
2736 false /* header present */);