2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the BSpec / ISA Reference / send - [DevIVB+]:
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct intel_context
*intel
= &p
->brw
->intel
;
96 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
104 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
107 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
108 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
109 assert(dest
.nr
< 128);
111 gen7_convert_mrf_to_grf(p
, &dest
);
113 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
114 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
115 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
117 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
118 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
122 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
123 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
124 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
127 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
128 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
133 insn
->bits1
.da16
.dest_horiz_stride
= 1;
137 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
139 /* These are different sizes in align1 vs align16:
141 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
142 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
143 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
144 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
145 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
148 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
149 /* even ignored in da16, still need to set as '01' */
150 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
157 guess_execution_size(p
, insn
, dest
);
160 extern int reg_type_size
[];
163 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
165 int hstride_for_reg
[] = {0, 1, 2, 4};
166 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg
[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
169 int width
, hstride
, vstride
, execsize
;
171 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
176 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
177 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
178 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
184 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
185 reg
.file
== BRW_ARF_NULL
)
188 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
189 hstride
= hstride_for_reg
[reg
.hstride
];
191 if (reg
.vstride
== 0xf) {
194 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
195 vstride
= vstride_for_reg
[reg
.vstride
];
198 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
199 width
= width_for_reg
[reg
.width
];
201 assert(insn
->header
.execution_size
>= 0 &&
202 insn
->header
.execution_size
< Elements(execsize_for_reg
));
203 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
207 assert(execsize
>= width
);
210 if (execsize
== width
&& hstride
!= 0) {
211 assert(vstride
== -1 || vstride
== width
* hstride
);
215 if (execsize
== width
&& hstride
== 0) {
216 /* no restriction on vstride. */
221 assert(hstride
== 0);
225 if (execsize
== 1 && width
== 1) {
226 assert(hstride
== 0);
227 assert(vstride
== 0);
231 if (vstride
== 0 && hstride
== 0) {
235 /* 10. Check destination issues. */
239 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
242 struct brw_context
*brw
= p
->brw
;
243 struct intel_context
*intel
= &brw
->intel
;
245 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
246 assert(reg
.nr
< 128);
248 gen7_convert_mrf_to_grf(p
, ®
);
250 if (intel
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
251 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
252 /* Any source modifiers or regions will be ignored, since this just
253 * identifies the MRF/GRF to start reading the message contents from.
254 * Check for some likely failures.
258 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
261 validate_reg(insn
, reg
);
263 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
264 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
265 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
266 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
267 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
269 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
270 insn
->bits3
.ud
= reg
.dw1
.ud
;
272 /* Required to set some fields in src1 as well:
274 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
275 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
279 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
280 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
281 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
282 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
285 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
286 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
290 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
292 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
293 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
296 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
300 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
301 if (reg
.width
== BRW_WIDTH_1
&&
302 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
303 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
304 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
305 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
308 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
309 insn
->bits2
.da1
.src0_width
= reg
.width
;
310 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
314 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
315 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
316 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
317 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
319 /* This is an oddity of the fact we're using the same
320 * descriptions for registers in align_16 as align_1:
322 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
323 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
325 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
331 void brw_set_src1(struct brw_compile
*p
,
332 struct brw_instruction
*insn
,
335 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
337 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
338 assert(reg
.nr
< 128);
340 gen7_convert_mrf_to_grf(p
, ®
);
342 validate_reg(insn
, reg
);
344 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
345 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
346 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
347 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
349 /* Only src1 can be immediate in two-argument instructions.
351 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
353 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
354 insn
->bits3
.ud
= reg
.dw1
.ud
;
357 /* This is a hardware restriction, which may or may not be lifted
360 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
361 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
363 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
364 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
365 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
368 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
369 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
372 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
373 if (reg
.width
== BRW_WIDTH_1
&&
374 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
375 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
376 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
377 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
380 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
381 insn
->bits3
.da1
.src1_width
= reg
.width
;
382 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
386 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
387 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
388 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
389 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
391 /* This is an oddity of the fact we're using the same
392 * descriptions for registers in align_16 as align_1:
394 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
395 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
397 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
403 * Set the Message Descriptor and Extended Message Descriptor fields
406 * \note This zeroes out the Function Control bits, so it must be called
407 * \b before filling out any message-specific data. Callers can
408 * choose not to fill in irrelevant bits; they will be zero.
411 brw_set_message_descriptor(struct brw_compile
*p
,
412 struct brw_instruction
*inst
,
413 enum brw_message_target sfid
,
415 unsigned response_length
,
419 struct intel_context
*intel
= &p
->brw
->intel
;
421 brw_set_src1(p
, inst
, brw_imm_d(0));
423 if (intel
->gen
>= 5) {
424 inst
->bits3
.generic_gen5
.header_present
= header_present
;
425 inst
->bits3
.generic_gen5
.response_length
= response_length
;
426 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
427 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
429 if (intel
->gen
>= 6) {
430 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
431 inst
->header
.destreg__conditionalmod
= sfid
;
433 /* Set Extended Message Descriptor (ex_desc) */
434 inst
->bits2
.send_gen5
.sfid
= sfid
;
435 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
438 inst
->bits3
.generic
.response_length
= response_length
;
439 inst
->bits3
.generic
.msg_length
= msg_length
;
440 inst
->bits3
.generic
.msg_target
= sfid
;
441 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
445 static void brw_set_math_message( struct brw_compile
*p
,
446 struct brw_instruction
*insn
,
452 struct brw_context
*brw
= p
->brw
;
453 struct intel_context
*intel
= &brw
->intel
;
455 unsigned response_length
;
457 /* Infer message length from the function */
459 case BRW_MATH_FUNCTION_POW
:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
461 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
470 /* Infer response length from the function */
472 case BRW_MATH_FUNCTION_SINCOS
:
473 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
482 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
483 msg_length
, response_length
, false, false);
484 if (intel
->gen
== 5) {
485 insn
->bits3
.math_gen5
.function
= function
;
486 insn
->bits3
.math_gen5
.int_type
= integer_type
;
487 insn
->bits3
.math_gen5
.precision
= low_precision
;
488 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
489 insn
->bits3
.math_gen5
.data_type
= dataType
;
490 insn
->bits3
.math_gen5
.snapshot
= 0;
492 insn
->bits3
.math
.function
= function
;
493 insn
->bits3
.math
.int_type
= integer_type
;
494 insn
->bits3
.math
.precision
= low_precision
;
495 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
496 insn
->bits3
.math
.data_type
= dataType
;
498 insn
->header
.saturate
= 0;
502 static void brw_set_ff_sync_message(struct brw_compile
*p
,
503 struct brw_instruction
*insn
,
505 GLuint response_length
,
508 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
509 1, response_length
, true, end_of_thread
);
510 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
511 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
512 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
513 insn
->bits3
.urb_gen5
.allocate
= allocate
;
514 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
515 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
518 static void brw_set_urb_message( struct brw_compile
*p
,
519 struct brw_instruction
*insn
,
523 GLuint response_length
,
527 GLuint swizzle_control
)
529 struct brw_context
*brw
= p
->brw
;
530 struct intel_context
*intel
= &brw
->intel
;
532 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
533 msg_length
, response_length
, true, end_of_thread
);
534 if (intel
->gen
== 7) {
535 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
536 insn
->bits3
.urb_gen7
.offset
= offset
;
537 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
538 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
539 /* per_slot_offset = 0 makes it ignore offsets in message header */
540 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
541 insn
->bits3
.urb_gen7
.complete
= complete
;
542 } else if (intel
->gen
>= 5) {
543 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
544 insn
->bits3
.urb_gen5
.offset
= offset
;
545 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
546 insn
->bits3
.urb_gen5
.allocate
= allocate
;
547 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
548 insn
->bits3
.urb_gen5
.complete
= complete
;
550 insn
->bits3
.urb
.opcode
= 0; /* ? */
551 insn
->bits3
.urb
.offset
= offset
;
552 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
553 insn
->bits3
.urb
.allocate
= allocate
;
554 insn
->bits3
.urb
.used
= used
; /* ? */
555 insn
->bits3
.urb
.complete
= complete
;
560 brw_set_dp_write_message(struct brw_compile
*p
,
561 struct brw_instruction
*insn
,
562 GLuint binding_table_index
,
567 GLuint last_render_target
,
568 GLuint response_length
,
569 GLuint end_of_thread
,
570 GLuint send_commit_msg
)
572 struct brw_context
*brw
= p
->brw
;
573 struct intel_context
*intel
= &brw
->intel
;
576 if (intel
->gen
>= 7) {
577 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
578 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
579 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
581 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
582 } else if (intel
->gen
== 6) {
583 /* Use the render cache for all write messages. */
584 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
586 sfid
= BRW_SFID_DATAPORT_WRITE
;
589 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
590 header_present
, end_of_thread
);
592 if (intel
->gen
>= 7) {
593 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
594 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
595 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
596 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
597 } else if (intel
->gen
== 6) {
598 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
599 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
600 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
601 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
602 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
603 } else if (intel
->gen
== 5) {
604 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
605 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
606 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
607 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
608 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
610 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
611 insn
->bits3
.dp_write
.msg_control
= msg_control
;
612 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
613 insn
->bits3
.dp_write
.msg_type
= msg_type
;
614 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
619 brw_set_dp_read_message(struct brw_compile
*p
,
620 struct brw_instruction
*insn
,
621 GLuint binding_table_index
,
627 GLuint response_length
)
629 struct brw_context
*brw
= p
->brw
;
630 struct intel_context
*intel
= &brw
->intel
;
633 if (intel
->gen
>= 7) {
634 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
635 } else if (intel
->gen
== 6) {
636 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
637 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
639 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
641 sfid
= BRW_SFID_DATAPORT_READ
;
644 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
645 header_present
, false);
647 if (intel
->gen
>= 7) {
648 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
649 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
650 insn
->bits3
.gen7_dp
.last_render_target
= 0;
651 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
652 } else if (intel
->gen
== 6) {
653 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
654 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
655 insn
->bits3
.gen6_dp
.last_render_target
= 0;
656 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
657 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
658 } else if (intel
->gen
== 5) {
659 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
660 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
661 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
662 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
663 } else if (intel
->is_g4x
) {
664 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
665 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
666 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
667 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
669 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
670 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
671 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
672 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
677 brw_set_sampler_message(struct brw_compile
*p
,
678 struct brw_instruction
*insn
,
679 GLuint binding_table_index
,
682 GLuint response_length
,
684 GLuint header_present
,
686 GLuint return_format
)
688 struct brw_context
*brw
= p
->brw
;
689 struct intel_context
*intel
= &brw
->intel
;
691 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
692 response_length
, header_present
, false);
694 if (intel
->gen
>= 7) {
695 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
696 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
697 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
698 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
699 } else if (intel
->gen
>= 5) {
700 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
701 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
702 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
703 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
704 } else if (intel
->is_g4x
) {
705 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
706 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
707 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
709 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
710 insn
->bits3
.sampler
.sampler
= sampler
;
711 insn
->bits3
.sampler
.msg_type
= msg_type
;
712 insn
->bits3
.sampler
.return_format
= return_format
;
717 #define next_insn brw_next_insn
718 struct brw_instruction
*
719 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
721 struct brw_instruction
*insn
;
723 if (p
->nr_insn
+ 1 > p
->store_size
) {
725 printf("incresing the store size to %d\n", p
->store_size
<< 1);
727 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
728 struct brw_instruction
, p
->store_size
);
730 assert(!"realloc eu store memeory failed");
733 p
->next_insn_offset
+= 16;
734 insn
= &p
->store
[p
->nr_insn
++];
735 memcpy(insn
, p
->current
, sizeof(*insn
));
737 /* Reset this one-shot flag:
740 if (p
->current
->header
.destreg__conditionalmod
) {
741 p
->current
->header
.destreg__conditionalmod
= 0;
742 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
745 insn
->header
.opcode
= opcode
;
749 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
754 struct brw_instruction
*insn
= next_insn(p
, opcode
);
755 brw_set_dest(p
, insn
, dest
);
756 brw_set_src0(p
, insn
, src
);
760 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
764 struct brw_reg src1
)
766 struct brw_instruction
*insn
= next_insn(p
, opcode
);
767 brw_set_dest(p
, insn
, dest
);
768 brw_set_src0(p
, insn
, src0
);
769 brw_set_src1(p
, insn
, src1
);
774 get_3src_subreg_nr(struct brw_reg reg
)
776 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
777 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
778 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
780 return reg
.subnr
/ 4;
784 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
791 struct brw_instruction
*insn
= next_insn(p
, opcode
);
793 gen7_convert_mrf_to_grf(p
, &dest
);
795 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
797 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
798 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
799 assert(dest
.nr
< 128);
800 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
801 assert(dest
.type
== BRW_REGISTER_TYPE_F
);
802 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
803 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
804 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
805 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
806 guess_execution_size(p
, insn
, dest
);
808 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
809 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
810 assert(src0
.nr
< 128);
811 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
812 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
813 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
814 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
815 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
816 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
817 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
819 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
820 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
821 assert(src1
.nr
< 128);
822 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
823 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
824 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
825 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
826 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
827 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
828 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
829 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
831 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
832 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
833 assert(src2
.nr
< 128);
834 assert(src2
.type
== BRW_REGISTER_TYPE_F
);
835 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
836 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
837 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
838 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
839 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
840 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
846 /***********************************************************************
847 * Convenience routines.
850 struct brw_instruction *brw_##OP(struct brw_compile *p, \
851 struct brw_reg dest, \
852 struct brw_reg src0) \
854 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
858 struct brw_instruction *brw_##OP(struct brw_compile *p, \
859 struct brw_reg dest, \
860 struct brw_reg src0, \
861 struct brw_reg src1) \
863 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
867 struct brw_instruction *brw_##OP(struct brw_compile *p, \
868 struct brw_reg dest, \
869 struct brw_reg src0, \
870 struct brw_reg src1, \
871 struct brw_reg src2) \
873 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
876 /* Rounding operations (other than RNDD) require two instructions - the first
877 * stores a rounded value (possibly the wrong way) in the dest register, but
878 * also sets a per-channel "increment bit" in the flag register. A predicated
879 * add of 1.0 fixes dest to contain the desired result.
881 * Sandybridge and later appear to round correctly without an ADD.
884 void brw_##OP(struct brw_compile *p, \
885 struct brw_reg dest, \
886 struct brw_reg src) \
888 struct brw_instruction *rnd, *add; \
889 rnd = next_insn(p, BRW_OPCODE_##OP); \
890 brw_set_dest(p, rnd, dest); \
891 brw_set_src0(p, rnd, src); \
893 if (p->brw->intel.gen < 6) { \
894 /* turn on round-increments */ \
895 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
896 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
897 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
933 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
939 if (src0
.type
== BRW_REGISTER_TYPE_F
||
940 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
941 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
942 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
943 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
946 if (src1
.type
== BRW_REGISTER_TYPE_F
||
947 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
948 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
949 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
950 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
953 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
956 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
961 assert(dest
.type
== src0
.type
);
962 assert(src0
.type
== src1
.type
);
964 case BRW_REGISTER_TYPE_B
:
965 case BRW_REGISTER_TYPE_UB
:
966 case BRW_REGISTER_TYPE_W
:
967 case BRW_REGISTER_TYPE_UW
:
968 case BRW_REGISTER_TYPE_D
:
969 case BRW_REGISTER_TYPE_UD
:
972 assert(!"Bad type for brw_AVG");
975 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
978 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
984 if (src0
.type
== BRW_REGISTER_TYPE_D
||
985 src0
.type
== BRW_REGISTER_TYPE_UD
||
986 src1
.type
== BRW_REGISTER_TYPE_D
||
987 src1
.type
== BRW_REGISTER_TYPE_UD
) {
988 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
991 if (src0
.type
== BRW_REGISTER_TYPE_F
||
992 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
993 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
994 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
995 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
998 if (src1
.type
== BRW_REGISTER_TYPE_F
||
999 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1000 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1001 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1002 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1005 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1006 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1007 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1008 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1010 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1014 void brw_NOP(struct brw_compile
*p
)
1016 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1017 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1018 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1019 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1026 /***********************************************************************
1027 * Comparisons, if/else/endif
1030 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1031 struct brw_reg dest
,
1032 struct brw_reg src0
,
1033 struct brw_reg src1
)
1035 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1037 insn
->header
.execution_size
= 1;
1038 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1039 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1041 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1047 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1049 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1051 p
->if_stack_depth
++;
1052 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1053 p
->if_stack_array_size
*= 2;
1054 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1055 p
->if_stack_array_size
);
1059 static struct brw_instruction
*
1060 pop_if_stack(struct brw_compile
*p
)
1062 p
->if_stack_depth
--;
1063 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1067 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1069 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1070 p
->loop_stack_array_size
*= 2;
1071 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1072 p
->loop_stack_array_size
);
1073 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1074 p
->loop_stack_array_size
);
1077 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1078 p
->loop_stack_depth
++;
1079 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1082 static struct brw_instruction
*
1083 get_inner_do_insn(struct brw_compile
*p
)
1085 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1088 /* EU takes the value from the flag register and pushes it onto some
1089 * sort of a stack (presumably merging with any flag value already on
1090 * the stack). Within an if block, the flags at the top of the stack
1091 * control execution on each channel of the unit, eg. on each of the
1092 * 16 pixel values in our wm programs.
1094 * When the matching 'else' instruction is reached (presumably by
1095 * countdown of the instruction count patched in by our ELSE/ENDIF
1096 * functions), the relevent flags are inverted.
1098 * When the matching 'endif' instruction is reached, the flags are
1099 * popped off. If the stack is now empty, normal execution resumes.
1101 struct brw_instruction
*
1102 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
1104 struct intel_context
*intel
= &p
->brw
->intel
;
1105 struct brw_instruction
*insn
;
1107 insn
= next_insn(p
, BRW_OPCODE_IF
);
1109 /* Override the defaults for this instruction:
1111 if (intel
->gen
< 6) {
1112 brw_set_dest(p
, insn
, brw_ip_reg());
1113 brw_set_src0(p
, insn
, brw_ip_reg());
1114 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1115 } else if (intel
->gen
== 6) {
1116 brw_set_dest(p
, insn
, brw_imm_w(0));
1117 insn
->bits1
.branch_gen6
.jump_count
= 0;
1118 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1119 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1121 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1122 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1123 brw_set_src1(p
, insn
, brw_imm_ud(0));
1124 insn
->bits3
.break_cont
.jip
= 0;
1125 insn
->bits3
.break_cont
.uip
= 0;
1128 insn
->header
.execution_size
= execute_size
;
1129 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1130 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1131 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1132 if (!p
->single_program_flow
)
1133 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1135 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1137 push_if_stack(p
, insn
);
1138 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1142 /* This function is only used for gen6-style IF instructions with an
1143 * embedded comparison (conditional modifier). It is not used on gen7.
1145 struct brw_instruction
*
1146 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1147 struct brw_reg src0
, struct brw_reg src1
)
1149 struct brw_instruction
*insn
;
1151 insn
= next_insn(p
, BRW_OPCODE_IF
);
1153 brw_set_dest(p
, insn
, brw_imm_w(0));
1154 if (p
->compressed
) {
1155 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1157 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1159 insn
->bits1
.branch_gen6
.jump_count
= 0;
1160 brw_set_src0(p
, insn
, src0
);
1161 brw_set_src1(p
, insn
, src1
);
1163 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1164 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1165 insn
->header
.destreg__conditionalmod
= conditional
;
1167 if (!p
->single_program_flow
)
1168 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1170 push_if_stack(p
, insn
);
1175 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1178 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1179 struct brw_instruction
*if_inst
,
1180 struct brw_instruction
*else_inst
)
1182 /* The next instruction (where the ENDIF would be, if it existed) */
1183 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1185 assert(p
->single_program_flow
);
1186 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1187 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1188 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1190 /* Convert IF to an ADD instruction that moves the instruction pointer
1191 * to the first instruction of the ELSE block. If there is no ELSE
1192 * block, point to where ENDIF would be. Reverse the predicate.
1194 * There's no need to execute an ENDIF since we don't need to do any
1195 * stack operations, and if we're currently executing, we just want to
1196 * continue normally.
1198 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1199 if_inst
->header
.predicate_inverse
= 1;
1201 if (else_inst
!= NULL
) {
1202 /* Convert ELSE to an ADD instruction that points where the ENDIF
1205 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1207 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1208 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1210 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1215 * Patch IF and ELSE instructions with appropriate jump targets.
1218 patch_IF_ELSE(struct brw_compile
*p
,
1219 struct brw_instruction
*if_inst
,
1220 struct brw_instruction
*else_inst
,
1221 struct brw_instruction
*endif_inst
)
1223 struct intel_context
*intel
= &p
->brw
->intel
;
1225 /* We shouldn't be patching IF and ELSE instructions in single program flow
1226 * mode when gen < 6, because in single program flow mode on those
1227 * platforms, we convert flow control instructions to conditional ADDs that
1228 * operate on IP (see brw_ENDIF).
1230 * However, on Gen6, writing to IP doesn't work in single program flow mode
1231 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1232 * not be updated by non-flow control instructions."). And on later
1233 * platforms, there is no significant benefit to converting control flow
1234 * instructions to conditional ADDs. So we do patch IF and ELSE
1235 * instructions in single program flow mode on those platforms.
1238 assert(!p
->single_program_flow
);
1240 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1241 assert(endif_inst
!= NULL
);
1242 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1245 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1246 * requires 2 chunks.
1248 if (intel
->gen
>= 5)
1251 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1252 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1254 if (else_inst
== NULL
) {
1255 /* Patch IF -> ENDIF */
1256 if (intel
->gen
< 6) {
1257 /* Turn it into an IFF, which means no mask stack operations for
1258 * all-false and jumping past the ENDIF.
1260 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1261 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1262 if_inst
->bits3
.if_else
.pop_count
= 0;
1263 if_inst
->bits3
.if_else
.pad0
= 0;
1264 } else if (intel
->gen
== 6) {
1265 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1266 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1268 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1269 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1272 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1274 /* Patch IF -> ELSE */
1275 if (intel
->gen
< 6) {
1276 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1277 if_inst
->bits3
.if_else
.pop_count
= 0;
1278 if_inst
->bits3
.if_else
.pad0
= 0;
1279 } else if (intel
->gen
== 6) {
1280 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1283 /* Patch ELSE -> ENDIF */
1284 if (intel
->gen
< 6) {
1285 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1288 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1289 else_inst
->bits3
.if_else
.pop_count
= 1;
1290 else_inst
->bits3
.if_else
.pad0
= 0;
1291 } else if (intel
->gen
== 6) {
1292 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1293 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1295 /* The IF instruction's JIP should point just past the ELSE */
1296 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1297 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1298 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1299 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1305 brw_ELSE(struct brw_compile
*p
)
1307 struct intel_context
*intel
= &p
->brw
->intel
;
1308 struct brw_instruction
*insn
;
1310 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1312 if (intel
->gen
< 6) {
1313 brw_set_dest(p
, insn
, brw_ip_reg());
1314 brw_set_src0(p
, insn
, brw_ip_reg());
1315 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1316 } else if (intel
->gen
== 6) {
1317 brw_set_dest(p
, insn
, brw_imm_w(0));
1318 insn
->bits1
.branch_gen6
.jump_count
= 0;
1319 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1320 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1322 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1323 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1324 brw_set_src1(p
, insn
, brw_imm_ud(0));
1325 insn
->bits3
.break_cont
.jip
= 0;
1326 insn
->bits3
.break_cont
.uip
= 0;
1329 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1330 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1331 if (!p
->single_program_flow
)
1332 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1334 push_if_stack(p
, insn
);
1338 brw_ENDIF(struct brw_compile
*p
)
1340 struct intel_context
*intel
= &p
->brw
->intel
;
1341 struct brw_instruction
*insn
= NULL
;
1342 struct brw_instruction
*else_inst
= NULL
;
1343 struct brw_instruction
*if_inst
= NULL
;
1344 struct brw_instruction
*tmp
;
1345 bool emit_endif
= true;
1347 /* In single program flow mode, we can express IF and ELSE instructions
1348 * equivalently as ADD instructions that operate on IP. On platforms prior
1349 * to Gen6, flow control instructions cause an implied thread switch, so
1350 * this is a significant savings.
1352 * However, on Gen6, writing to IP doesn't work in single program flow mode
1353 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1354 * not be updated by non-flow control instructions."). And on later
1355 * platforms, there is no significant benefit to converting control flow
1356 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1359 if (intel
->gen
< 6 && p
->single_program_flow
)
1363 * A single next_insn() may change the base adress of instruction store
1364 * memory(p->store), so call it first before referencing the instruction
1365 * store pointer from an index
1368 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1370 /* Pop the IF and (optional) ELSE instructions from the stack */
1371 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1372 tmp
= pop_if_stack(p
);
1373 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1375 tmp
= pop_if_stack(p
);
1380 /* ENDIF is useless; don't bother emitting it. */
1381 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1385 if (intel
->gen
< 6) {
1386 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1387 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1388 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1389 } else if (intel
->gen
== 6) {
1390 brw_set_dest(p
, insn
, brw_imm_w(0));
1391 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1392 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1394 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1395 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1396 brw_set_src1(p
, insn
, brw_imm_ud(0));
1399 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1400 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1401 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1403 /* Also pop item off the stack in the endif instruction: */
1404 if (intel
->gen
< 6) {
1405 insn
->bits3
.if_else
.jump_count
= 0;
1406 insn
->bits3
.if_else
.pop_count
= 1;
1407 insn
->bits3
.if_else
.pad0
= 0;
1408 } else if (intel
->gen
== 6) {
1409 insn
->bits1
.branch_gen6
.jump_count
= 2;
1411 insn
->bits3
.break_cont
.jip
= 2;
1413 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1416 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1418 struct intel_context
*intel
= &p
->brw
->intel
;
1419 struct brw_instruction
*insn
;
1421 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1422 if (intel
->gen
>= 6) {
1423 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1424 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1425 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1427 brw_set_dest(p
, insn
, brw_ip_reg());
1428 brw_set_src0(p
, insn
, brw_ip_reg());
1429 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1430 insn
->bits3
.if_else
.pad0
= 0;
1431 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1433 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1434 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1439 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1441 struct brw_instruction
*insn
;
1443 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1444 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1445 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1446 brw_set_dest(p
, insn
, brw_ip_reg());
1447 brw_set_src0(p
, insn
, brw_ip_reg());
1448 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1450 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1451 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1455 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1457 struct brw_instruction
*insn
;
1458 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1459 brw_set_dest(p
, insn
, brw_ip_reg());
1460 brw_set_src0(p
, insn
, brw_ip_reg());
1461 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1462 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1463 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1464 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1465 insn
->bits3
.if_else
.pad0
= 0;
1466 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1470 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1472 struct brw_instruction
*insn
;
1474 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1475 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1476 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1477 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1479 if (p
->compressed
) {
1480 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1482 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1483 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1490 * The DO/WHILE is just an unterminated loop -- break or continue are
1491 * used for control within the loop. We have a few ways they can be
1494 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1495 * jip and no DO instruction.
1497 * For non-uniform control flow pre-gen6, there's a DO instruction to
1498 * push the mask, and a WHILE to jump back, and BREAK to get out and
1501 * For gen6, there's no more mask stack, so no need for DO. WHILE
1502 * just points back to the first instruction of the loop.
1504 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1506 struct intel_context
*intel
= &p
->brw
->intel
;
1508 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1509 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1510 return &p
->store
[p
->nr_insn
];
1512 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1514 push_loop_stack(p
, insn
);
1516 /* Override the defaults for this instruction:
1518 brw_set_dest(p
, insn
, brw_null_reg());
1519 brw_set_src0(p
, insn
, brw_null_reg());
1520 brw_set_src1(p
, insn
, brw_null_reg());
1522 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1523 insn
->header
.execution_size
= execute_size
;
1524 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1525 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1526 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1533 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1536 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1537 * nesting, since it can always just point to the end of the block/current loop.
1540 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1542 struct intel_context
*intel
= &p
->brw
->intel
;
1543 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1544 struct brw_instruction
*inst
;
1545 int br
= (intel
->gen
== 5) ? 2 : 1;
1547 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1548 /* If the jump count is != 0, that means that this instruction has already
1549 * been patched because it's part of a loop inside of the one we're
1552 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1553 inst
->bits3
.if_else
.jump_count
== 0) {
1554 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1555 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1556 inst
->bits3
.if_else
.jump_count
== 0) {
1557 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1562 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1564 struct intel_context
*intel
= &p
->brw
->intel
;
1565 struct brw_instruction
*insn
, *do_insn
;
1568 if (intel
->gen
>= 5)
1571 if (intel
->gen
>= 7) {
1572 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1573 do_insn
= get_inner_do_insn(p
);
1575 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1576 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1577 brw_set_src1(p
, insn
, brw_imm_ud(0));
1578 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1580 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1581 } else if (intel
->gen
== 6) {
1582 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1583 do_insn
= get_inner_do_insn(p
);
1585 brw_set_dest(p
, insn
, brw_imm_w(0));
1586 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1587 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1588 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1590 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1592 if (p
->single_program_flow
) {
1593 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1594 do_insn
= get_inner_do_insn(p
);
1596 brw_set_dest(p
, insn
, brw_ip_reg());
1597 brw_set_src0(p
, insn
, brw_ip_reg());
1598 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1599 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1601 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1602 do_insn
= get_inner_do_insn(p
);
1604 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1606 brw_set_dest(p
, insn
, brw_ip_reg());
1607 brw_set_src0(p
, insn
, brw_ip_reg());
1608 brw_set_src1(p
, insn
, brw_imm_d(0));
1610 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1611 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1612 insn
->bits3
.if_else
.pop_count
= 0;
1613 insn
->bits3
.if_else
.pad0
= 0;
1615 brw_patch_break_cont(p
, insn
);
1618 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1619 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1621 p
->loop_stack_depth
--;
1629 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1631 struct intel_context
*intel
= &p
->brw
->intel
;
1632 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1635 if (intel
->gen
>= 5)
1638 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1639 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1641 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1646 /* To integrate with the above, it makes sense that the comparison
1647 * instruction should populate the flag register. It might be simpler
1648 * just to use the flag reg for most WM tasks?
1650 void brw_CMP(struct brw_compile
*p
,
1651 struct brw_reg dest
,
1653 struct brw_reg src0
,
1654 struct brw_reg src1
)
1656 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1658 insn
->header
.destreg__conditionalmod
= conditional
;
1659 brw_set_dest(p
, insn
, dest
);
1660 brw_set_src0(p
, insn
, src0
);
1661 brw_set_src1(p
, insn
, src1
);
1663 /* guess_execution_size(insn, src0); */
1666 /* Make it so that future instructions will use the computed flag
1667 * value until brw_set_predicate_control_flag_value() is called
1670 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1672 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1673 p
->flag_value
= 0xff;
1677 /* Issue 'wait' instruction for n1, host could program MMIO
1678 to wake up thread. */
1679 void brw_WAIT (struct brw_compile
*p
)
1681 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1682 struct brw_reg src
= brw_notification_1_reg();
1684 brw_set_dest(p
, insn
, src
);
1685 brw_set_src0(p
, insn
, src
);
1686 brw_set_src1(p
, insn
, brw_null_reg());
1687 insn
->header
.execution_size
= 0; /* must */
1688 insn
->header
.predicate_control
= 0;
1689 insn
->header
.compression_control
= 0;
1693 /***********************************************************************
1694 * Helpers for the various SEND message types:
1697 /** Extended math function, float[8].
1699 void brw_math( struct brw_compile
*p
,
1700 struct brw_reg dest
,
1707 struct intel_context
*intel
= &p
->brw
->intel
;
1709 if (intel
->gen
>= 6) {
1710 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1712 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1713 (intel
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1714 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1716 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1717 if (intel
->gen
== 6)
1718 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1720 /* Source modifiers are ignored for extended math instructions on Gen6. */
1721 if (intel
->gen
== 6) {
1722 assert(!src
.negate
);
1726 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1727 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1728 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1729 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1731 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1734 /* Math is the same ISA format as other opcodes, except that CondModifier
1735 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1737 insn
->header
.destreg__conditionalmod
= function
;
1739 brw_set_dest(p
, insn
, dest
);
1740 brw_set_src0(p
, insn
, src
);
1741 brw_set_src1(p
, insn
, brw_null_reg());
1743 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1745 /* Example code doesn't set predicate_control for send
1748 insn
->header
.predicate_control
= 0;
1749 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1751 brw_set_dest(p
, insn
, dest
);
1752 brw_set_src0(p
, insn
, src
);
1753 brw_set_math_message(p
,
1756 src
.type
== BRW_REGISTER_TYPE_D
,
1762 /** Extended math function, float[8].
1764 void brw_math2(struct brw_compile
*p
,
1765 struct brw_reg dest
,
1767 struct brw_reg src0
,
1768 struct brw_reg src1
)
1770 struct intel_context
*intel
= &p
->brw
->intel
;
1771 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1773 assert(intel
->gen
>= 6);
1777 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1778 (intel
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1779 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1780 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1782 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1783 if (intel
->gen
== 6) {
1784 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1785 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1788 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1789 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1790 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1791 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1792 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1794 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1795 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1798 /* Source modifiers are ignored for extended math instructions on Gen6. */
1799 if (intel
->gen
== 6) {
1800 assert(!src0
.negate
);
1802 assert(!src1
.negate
);
1806 /* Math is the same ISA format as other opcodes, except that CondModifier
1807 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1809 insn
->header
.destreg__conditionalmod
= function
;
1811 brw_set_dest(p
, insn
, dest
);
1812 brw_set_src0(p
, insn
, src0
);
1813 brw_set_src1(p
, insn
, src1
);
1818 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1819 * using a constant offset per channel.
1821 * The offset must be aligned to oword size (16 bytes). Used for
1822 * register spilling.
1824 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1829 struct intel_context
*intel
= &p
->brw
->intel
;
1830 uint32_t msg_control
, msg_type
;
1833 if (intel
->gen
>= 6)
1836 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1838 if (num_regs
== 1) {
1839 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1842 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1846 /* Set up the message header. This is g0, with g0.2 filled with
1847 * the offset. We don't want to leave our offset around in g0 or
1848 * it'll screw up texture samples, so set it up inside the message
1852 brw_push_insn_state(p
);
1853 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1854 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1856 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1858 /* set message header global offset field (reg 0, element 2) */
1860 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1862 2), BRW_REGISTER_TYPE_UD
),
1863 brw_imm_ud(offset
));
1865 brw_pop_insn_state(p
);
1869 struct brw_reg dest
;
1870 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1871 int send_commit_msg
;
1872 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1873 BRW_REGISTER_TYPE_UW
);
1875 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1876 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1877 src_header
= vec16(src_header
);
1879 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1880 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1882 /* Until gen6, writes followed by reads from the same location
1883 * are not guaranteed to be ordered unless write_commit is set.
1884 * If set, then a no-op write is issued to the destination
1885 * register to set a dependency, and a read from the destination
1886 * can be used to ensure the ordering.
1888 * For gen6, only writes between different threads need ordering
1889 * protection. Our use of DP writes is all about register
1890 * spilling within a thread.
1892 if (intel
->gen
>= 6) {
1893 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1894 send_commit_msg
= 0;
1897 send_commit_msg
= 1;
1900 brw_set_dest(p
, insn
, dest
);
1901 if (intel
->gen
>= 6) {
1902 brw_set_src0(p
, insn
, mrf
);
1904 brw_set_src0(p
, insn
, brw_null_reg());
1907 if (intel
->gen
>= 6)
1908 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1910 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1912 brw_set_dp_write_message(p
,
1914 255, /* binding table index (255=stateless) */
1918 true, /* header_present */
1919 0, /* not a render target */
1920 send_commit_msg
, /* response_length */
1928 * Read a block of owords (half a GRF each) from the scratch buffer
1929 * using a constant index per channel.
1931 * Offset must be aligned to oword size (16 bytes). Used for register
1935 brw_oword_block_read_scratch(struct brw_compile
*p
,
1936 struct brw_reg dest
,
1941 struct intel_context
*intel
= &p
->brw
->intel
;
1942 uint32_t msg_control
;
1945 if (intel
->gen
>= 6)
1948 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1949 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1951 if (num_regs
== 1) {
1952 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1955 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1960 brw_push_insn_state(p
);
1961 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1962 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1964 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1966 /* set message header global offset field (reg 0, element 2) */
1968 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1970 2), BRW_REGISTER_TYPE_UD
),
1971 brw_imm_ud(offset
));
1973 brw_pop_insn_state(p
);
1977 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1979 assert(insn
->header
.predicate_control
== 0);
1980 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1981 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1983 brw_set_dest(p
, insn
, dest
); /* UW? */
1984 if (intel
->gen
>= 6) {
1985 brw_set_src0(p
, insn
, mrf
);
1987 brw_set_src0(p
, insn
, brw_null_reg());
1990 brw_set_dp_read_message(p
,
1992 255, /* binding table index (255=stateless) */
1994 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1995 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1997 true, /* header_present */
2003 * Read a float[4] vector from the data port Data Cache (const buffer).
2004 * Location (in buffer) should be a multiple of 16.
2005 * Used for fetching shader constants.
2007 void brw_oword_block_read(struct brw_compile
*p
,
2008 struct brw_reg dest
,
2011 uint32_t bind_table_index
)
2013 struct intel_context
*intel
= &p
->brw
->intel
;
2015 /* On newer hardware, offset is in units of owords. */
2016 if (intel
->gen
>= 6)
2019 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2021 brw_push_insn_state(p
);
2022 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2023 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2024 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2026 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2028 /* set message header global offset field (reg 0, element 2) */
2030 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2032 2), BRW_REGISTER_TYPE_UD
),
2033 brw_imm_ud(offset
));
2035 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2036 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2038 /* cast dest to a uword[8] vector */
2039 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2041 brw_set_dest(p
, insn
, dest
);
2042 if (intel
->gen
>= 6) {
2043 brw_set_src0(p
, insn
, mrf
);
2045 brw_set_src0(p
, insn
, brw_null_reg());
2048 brw_set_dp_read_message(p
,
2051 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2052 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2053 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2055 true, /* header_present */
2056 1); /* response_length (1 reg, 2 owords!) */
2058 brw_pop_insn_state(p
);
2062 void brw_fb_WRITE(struct brw_compile
*p
,
2065 struct brw_reg src0
,
2067 GLuint binding_table_index
,
2069 GLuint response_length
,
2071 bool header_present
)
2073 struct intel_context
*intel
= &p
->brw
->intel
;
2074 struct brw_instruction
*insn
;
2076 struct brw_reg dest
;
2078 if (dispatch_width
== 16)
2079 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2081 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2083 if (intel
->gen
>= 6) {
2084 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2086 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2088 /* The execution mask is ignored for render target writes. */
2089 insn
->header
.predicate_control
= 0;
2090 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2092 if (intel
->gen
>= 6) {
2093 /* headerless version, just submit color payload */
2094 src0
= brw_message_reg(msg_reg_nr
);
2096 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2098 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2100 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2103 brw_set_dest(p
, insn
, dest
);
2104 brw_set_src0(p
, insn
, src0
);
2105 brw_set_dp_write_message(p
,
2107 binding_table_index
,
2112 eot
, /* last render target write */
2115 0 /* send_commit_msg */);
2120 * Texture sample instruction.
2121 * Note: the msg_type plus msg_length values determine exactly what kind
2122 * of sampling operation is performed. See volume 4, page 161 of docs.
2124 void brw_SAMPLE(struct brw_compile
*p
,
2125 struct brw_reg dest
,
2127 struct brw_reg src0
,
2128 GLuint binding_table_index
,
2131 GLuint response_length
,
2133 GLuint header_present
,
2135 GLuint return_format
)
2137 struct intel_context
*intel
= &p
->brw
->intel
;
2138 struct brw_instruction
*insn
;
2140 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2142 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2143 insn
->header
.predicate_control
= 0; /* XXX */
2144 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2146 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2148 brw_set_dest(p
, insn
, dest
);
2149 brw_set_src0(p
, insn
, src0
);
2150 brw_set_sampler_message(p
, insn
,
2151 binding_table_index
,
2161 /* All these variables are pretty confusing - we might be better off
2162 * using bitmasks and macros for this, in the old style. Or perhaps
2163 * just having the caller instantiate the fields in dword3 itself.
2165 void brw_urb_WRITE(struct brw_compile
*p
,
2166 struct brw_reg dest
,
2168 struct brw_reg src0
,
2172 GLuint response_length
,
2174 bool writes_complete
,
2178 struct intel_context
*intel
= &p
->brw
->intel
;
2179 struct brw_instruction
*insn
;
2181 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2183 if (intel
->gen
== 7) {
2184 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2185 brw_push_insn_state(p
);
2186 brw_set_access_mode(p
, BRW_ALIGN_1
);
2187 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2188 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2189 BRW_REGISTER_TYPE_UD
),
2190 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2191 brw_imm_ud(0xff00));
2192 brw_pop_insn_state(p
);
2195 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2197 assert(msg_length
< BRW_MAX_MRF
);
2199 brw_set_dest(p
, insn
, dest
);
2200 brw_set_src0(p
, insn
, src0
);
2201 brw_set_src1(p
, insn
, brw_imm_d(0));
2204 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2206 brw_set_urb_message(p
,
2219 next_ip(struct brw_compile
*p
, int ip
)
2221 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2223 if (insn
->header
.cmpt_control
)
2230 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2233 void *store
= p
->store
;
2235 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2236 struct brw_instruction
*insn
= store
+ ip
;
2238 switch (insn
->header
.opcode
) {
2239 case BRW_OPCODE_ENDIF
:
2240 case BRW_OPCODE_ELSE
:
2241 case BRW_OPCODE_WHILE
:
2242 case BRW_OPCODE_HALT
:
2250 /* There is no DO instruction on gen6, so to find the end of the loop
2251 * we have to see if the loop is jumping back before our start
2255 brw_find_loop_end(struct brw_compile
*p
, int start
)
2257 struct intel_context
*intel
= &p
->brw
->intel
;
2260 void *store
= p
->store
;
2262 /* Always start after the instruction (such as a WHILE) we're trying to fix
2265 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2266 struct brw_instruction
*insn
= store
+ ip
;
2268 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2269 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2270 : insn
->bits3
.break_cont
.jip
;
2271 if (ip
+ jip
* scale
<= start
)
2275 assert(!"not reached");
2279 /* After program generation, go back and update the UIP and JIP of
2280 * BREAK, CONT, and HALT instructions to their correct locations.
2283 brw_set_uip_jip(struct brw_compile
*p
)
2285 struct intel_context
*intel
= &p
->brw
->intel
;
2288 void *store
= p
->store
;
2293 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2294 struct brw_instruction
*insn
= store
+ ip
;
2296 if (insn
->header
.cmpt_control
) {
2297 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2298 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2299 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2300 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2304 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2305 switch (insn
->header
.opcode
) {
2306 case BRW_OPCODE_BREAK
:
2307 assert(block_end_ip
!= 0);
2308 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2309 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2310 insn
->bits3
.break_cont
.uip
=
2311 (brw_find_loop_end(p
, ip
) - ip
+
2312 (intel
->gen
== 6 ? 16 : 0)) / scale
;
2314 case BRW_OPCODE_CONTINUE
:
2315 assert(block_end_ip
!= 0);
2316 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2317 insn
->bits3
.break_cont
.uip
=
2318 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2320 assert(insn
->bits3
.break_cont
.uip
!= 0);
2321 assert(insn
->bits3
.break_cont
.jip
!= 0);
2324 case BRW_OPCODE_ENDIF
:
2325 if (block_end_ip
== 0)
2326 insn
->bits3
.break_cont
.jip
= 2;
2328 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2331 case BRW_OPCODE_HALT
:
2332 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2334 * "In case of the halt instruction not inside any conditional
2335 * code block, the value of <JIP> and <UIP> should be the
2336 * same. In case of the halt instruction inside conditional code
2337 * block, the <UIP> should be the end of the program, and the
2338 * <JIP> should be end of the most inner conditional code block."
2340 * The uip will have already been set by whoever set up the
2343 if (block_end_ip
== 0) {
2344 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2346 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2348 assert(insn
->bits3
.break_cont
.uip
!= 0);
2349 assert(insn
->bits3
.break_cont
.jip
!= 0);
2355 void brw_ff_sync(struct brw_compile
*p
,
2356 struct brw_reg dest
,
2358 struct brw_reg src0
,
2360 GLuint response_length
,
2363 struct intel_context
*intel
= &p
->brw
->intel
;
2364 struct brw_instruction
*insn
;
2366 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2368 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2369 brw_set_dest(p
, insn
, dest
);
2370 brw_set_src0(p
, insn
, src0
);
2371 brw_set_src1(p
, insn
, brw_imm_d(0));
2374 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2376 brw_set_ff_sync_message(p
,
2384 * Emit the SEND instruction necessary to generate stream output data on Gen6
2385 * (for transform feedback).
2387 * If send_commit_msg is true, this is the last piece of stream output data
2388 * from this thread, so send the data as a committed write. According to the
2389 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2391 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2392 * writes are complete by sending the final write as a committed write."
2395 brw_svb_write(struct brw_compile
*p
,
2396 struct brw_reg dest
,
2398 struct brw_reg src0
,
2399 GLuint binding_table_index
,
2400 bool send_commit_msg
)
2402 struct brw_instruction
*insn
;
2404 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2406 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2407 brw_set_dest(p
, insn
, dest
);
2408 brw_set_src0(p
, insn
, src0
);
2409 brw_set_src1(p
, insn
, brw_imm_d(0));
2410 brw_set_dp_write_message(p
, insn
,
2411 binding_table_index
,
2412 0, /* msg_control: ignored */
2413 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2415 true, /* header_present */
2416 0, /* last_render_target: ignored */
2417 send_commit_msg
, /* response_length */
2418 0, /* end_of_thread */
2419 send_commit_msg
); /* send_commit_msg */
2423 * This instruction is generated as a single-channel align1 instruction by
2424 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2426 * We can't use the typed atomic op in the FS because that has the execution
2427 * mask ANDed with the pixel mask, but we just want to write the one dword for
2430 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2431 * one u32. So we use the same untyped atomic write message as the pixel
2434 * The untyped atomic operation requires a BUFFER surface type with RAW
2435 * format, and is only accessible through the legacy DATA_CACHE dataport
2438 void brw_shader_time_add(struct brw_compile
*p
,
2440 uint32_t surf_index
)
2442 struct intel_context
*intel
= &p
->brw
->intel
;
2443 assert(intel
->gen
>= 7);
2445 brw_push_insn_state(p
);
2446 brw_set_access_mode(p
, BRW_ALIGN_1
);
2447 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2448 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2449 brw_pop_insn_state(p
);
2451 /* We use brw_vec1_reg and unmasked because we want to increment the given
2454 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2456 brw_set_src0(p
, send
, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2459 uint32_t sfid
, msg_type
;
2460 if (intel
->is_haswell
) {
2461 sfid
= HSW_SFID_DATAPORT_DATA_CACHE_1
;
2462 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2464 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
2465 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2468 bool header_present
= false;
2470 uint32_t mlen
= 2; /* offset, value */
2472 brw_set_message_descriptor(p
, send
, sfid
, mlen
, rlen
, header_present
, eot
);
2474 send
->bits3
.ud
|= msg_type
<< 14;
2475 send
->bits3
.ud
|= 0 << 13; /* no return data */
2476 send
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2477 send
->bits3
.ud
|= BRW_AOP_ADD
<< 8;
2478 send
->bits3
.ud
|= surf_index
<< 0;