2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the BSpec / ISA Reference / send - [DevIVB+]:
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct intel_context
*intel
= &p
->brw
->intel
;
96 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
104 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
107 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
108 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
109 assert(dest
.nr
< 128);
111 gen7_convert_mrf_to_grf(p
, &dest
);
113 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
114 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
115 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
117 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
118 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
122 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
123 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
124 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
127 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
128 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
133 insn
->bits1
.da16
.dest_horiz_stride
= 1;
137 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
139 /* These are different sizes in align1 vs align16:
141 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
142 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
143 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
144 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
145 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
148 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
149 /* even ignored in da16, still need to set as '01' */
150 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
157 guess_execution_size(p
, insn
, dest
);
160 extern int reg_type_size
[];
163 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
165 int hstride_for_reg
[] = {0, 1, 2, 4};
166 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg
[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
169 int width
, hstride
, vstride
, execsize
;
171 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
176 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
177 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
178 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
184 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
185 reg
.file
== BRW_ARF_NULL
)
188 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
189 hstride
= hstride_for_reg
[reg
.hstride
];
191 if (reg
.vstride
== 0xf) {
194 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
195 vstride
= vstride_for_reg
[reg
.vstride
];
198 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
199 width
= width_for_reg
[reg
.width
];
201 assert(insn
->header
.execution_size
>= 0 &&
202 insn
->header
.execution_size
< Elements(execsize_for_reg
));
203 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
207 assert(execsize
>= width
);
210 if (execsize
== width
&& hstride
!= 0) {
211 assert(vstride
== -1 || vstride
== width
* hstride
);
215 if (execsize
== width
&& hstride
== 0) {
216 /* no restriction on vstride. */
221 assert(hstride
== 0);
225 if (execsize
== 1 && width
== 1) {
226 assert(hstride
== 0);
227 assert(vstride
== 0);
231 if (vstride
== 0 && hstride
== 0) {
235 /* 10. Check destination issues. */
239 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
242 struct brw_context
*brw
= p
->brw
;
243 struct intel_context
*intel
= &brw
->intel
;
245 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
246 assert(reg
.nr
< 128);
248 gen7_convert_mrf_to_grf(p
, ®
);
250 if (intel
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
251 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
252 /* Any source modifiers or regions will be ignored, since this just
253 * identifies the MRF/GRF to start reading the message contents from.
254 * Check for some likely failures.
258 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
261 validate_reg(insn
, reg
);
263 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
264 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
265 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
266 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
267 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
269 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
270 insn
->bits3
.ud
= reg
.dw1
.ud
;
272 /* Required to set some fields in src1 as well:
274 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
275 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
279 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
280 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
281 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
282 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
285 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
286 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
290 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
292 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
293 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
296 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
300 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
301 if (reg
.width
== BRW_WIDTH_1
&&
302 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
303 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
304 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
305 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
308 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
309 insn
->bits2
.da1
.src0_width
= reg
.width
;
310 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
314 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
315 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
316 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
317 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
319 /* This is an oddity of the fact we're using the same
320 * descriptions for registers in align_16 as align_1:
322 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
323 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
325 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
331 void brw_set_src1(struct brw_compile
*p
,
332 struct brw_instruction
*insn
,
335 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
337 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
338 assert(reg
.nr
< 128);
340 gen7_convert_mrf_to_grf(p
, ®
);
342 validate_reg(insn
, reg
);
344 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
345 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
346 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
347 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
349 /* Only src1 can be immediate in two-argument instructions.
351 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
353 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
354 insn
->bits3
.ud
= reg
.dw1
.ud
;
357 /* This is a hardware restriction, which may or may not be lifted
360 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
361 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
363 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
364 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
365 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
368 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
369 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
372 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
373 if (reg
.width
== BRW_WIDTH_1
&&
374 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
375 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
376 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
377 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
380 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
381 insn
->bits3
.da1
.src1_width
= reg
.width
;
382 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
386 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
387 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
388 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
389 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
391 /* This is an oddity of the fact we're using the same
392 * descriptions for registers in align_16 as align_1:
394 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
395 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
397 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
403 * Set the Message Descriptor and Extended Message Descriptor fields
406 * \note This zeroes out the Function Control bits, so it must be called
407 * \b before filling out any message-specific data. Callers can
408 * choose not to fill in irrelevant bits; they will be zero.
411 brw_set_message_descriptor(struct brw_compile
*p
,
412 struct brw_instruction
*inst
,
413 enum brw_message_target sfid
,
415 unsigned response_length
,
419 struct intel_context
*intel
= &p
->brw
->intel
;
421 brw_set_src1(p
, inst
, brw_imm_d(0));
423 if (intel
->gen
>= 5) {
424 inst
->bits3
.generic_gen5
.header_present
= header_present
;
425 inst
->bits3
.generic_gen5
.response_length
= response_length
;
426 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
427 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
429 if (intel
->gen
>= 6) {
430 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
431 inst
->header
.destreg__conditionalmod
= sfid
;
433 /* Set Extended Message Descriptor (ex_desc) */
434 inst
->bits2
.send_gen5
.sfid
= sfid
;
435 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
438 inst
->bits3
.generic
.response_length
= response_length
;
439 inst
->bits3
.generic
.msg_length
= msg_length
;
440 inst
->bits3
.generic
.msg_target
= sfid
;
441 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
445 static void brw_set_math_message( struct brw_compile
*p
,
446 struct brw_instruction
*insn
,
452 struct brw_context
*brw
= p
->brw
;
453 struct intel_context
*intel
= &brw
->intel
;
455 unsigned response_length
;
457 /* Infer message length from the function */
459 case BRW_MATH_FUNCTION_POW
:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
461 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
470 /* Infer response length from the function */
472 case BRW_MATH_FUNCTION_SINCOS
:
473 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
482 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
483 msg_length
, response_length
, false, false);
484 if (intel
->gen
== 5) {
485 insn
->bits3
.math_gen5
.function
= function
;
486 insn
->bits3
.math_gen5
.int_type
= integer_type
;
487 insn
->bits3
.math_gen5
.precision
= low_precision
;
488 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
489 insn
->bits3
.math_gen5
.data_type
= dataType
;
490 insn
->bits3
.math_gen5
.snapshot
= 0;
492 insn
->bits3
.math
.function
= function
;
493 insn
->bits3
.math
.int_type
= integer_type
;
494 insn
->bits3
.math
.precision
= low_precision
;
495 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
496 insn
->bits3
.math
.data_type
= dataType
;
498 insn
->header
.saturate
= 0;
502 static void brw_set_ff_sync_message(struct brw_compile
*p
,
503 struct brw_instruction
*insn
,
505 GLuint response_length
,
508 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
509 1, response_length
, true, end_of_thread
);
510 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
511 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
512 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
513 insn
->bits3
.urb_gen5
.allocate
= allocate
;
514 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
515 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
518 static void brw_set_urb_message( struct brw_compile
*p
,
519 struct brw_instruction
*insn
,
523 GLuint response_length
,
527 GLuint swizzle_control
)
529 struct brw_context
*brw
= p
->brw
;
530 struct intel_context
*intel
= &brw
->intel
;
532 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
533 msg_length
, response_length
, true, end_of_thread
);
534 if (intel
->gen
== 7) {
535 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
536 insn
->bits3
.urb_gen7
.offset
= offset
;
537 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
538 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
539 /* per_slot_offset = 0 makes it ignore offsets in message header */
540 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
541 insn
->bits3
.urb_gen7
.complete
= complete
;
542 } else if (intel
->gen
>= 5) {
543 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
544 insn
->bits3
.urb_gen5
.offset
= offset
;
545 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
546 insn
->bits3
.urb_gen5
.allocate
= allocate
;
547 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
548 insn
->bits3
.urb_gen5
.complete
= complete
;
550 insn
->bits3
.urb
.opcode
= 0; /* ? */
551 insn
->bits3
.urb
.offset
= offset
;
552 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
553 insn
->bits3
.urb
.allocate
= allocate
;
554 insn
->bits3
.urb
.used
= used
; /* ? */
555 insn
->bits3
.urb
.complete
= complete
;
560 brw_set_dp_write_message(struct brw_compile
*p
,
561 struct brw_instruction
*insn
,
562 GLuint binding_table_index
,
567 GLuint last_render_target
,
568 GLuint response_length
,
569 GLuint end_of_thread
,
570 GLuint send_commit_msg
)
572 struct brw_context
*brw
= p
->brw
;
573 struct intel_context
*intel
= &brw
->intel
;
576 if (intel
->gen
>= 7) {
577 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
578 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
579 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
581 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
582 } else if (intel
->gen
== 6) {
583 /* Use the render cache for all write messages. */
584 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
586 sfid
= BRW_SFID_DATAPORT_WRITE
;
589 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
590 header_present
, end_of_thread
);
592 if (intel
->gen
>= 7) {
593 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
594 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
595 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
596 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
597 } else if (intel
->gen
== 6) {
598 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
599 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
600 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
601 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
602 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
603 } else if (intel
->gen
== 5) {
604 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
605 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
606 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
607 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
608 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
610 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
611 insn
->bits3
.dp_write
.msg_control
= msg_control
;
612 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
613 insn
->bits3
.dp_write
.msg_type
= msg_type
;
614 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
619 brw_set_dp_read_message(struct brw_compile
*p
,
620 struct brw_instruction
*insn
,
621 GLuint binding_table_index
,
627 GLuint response_length
)
629 struct brw_context
*brw
= p
->brw
;
630 struct intel_context
*intel
= &brw
->intel
;
633 if (intel
->gen
>= 7) {
634 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
635 } else if (intel
->gen
== 6) {
636 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
637 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
639 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
641 sfid
= BRW_SFID_DATAPORT_READ
;
644 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
645 header_present
, false);
647 if (intel
->gen
>= 7) {
648 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
649 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
650 insn
->bits3
.gen7_dp
.last_render_target
= 0;
651 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
652 } else if (intel
->gen
== 6) {
653 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
654 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
655 insn
->bits3
.gen6_dp
.last_render_target
= 0;
656 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
657 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
658 } else if (intel
->gen
== 5) {
659 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
660 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
661 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
662 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
663 } else if (intel
->is_g4x
) {
664 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
665 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
666 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
667 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
669 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
670 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
671 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
672 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
677 brw_set_sampler_message(struct brw_compile
*p
,
678 struct brw_instruction
*insn
,
679 GLuint binding_table_index
,
682 GLuint response_length
,
684 GLuint header_present
,
686 GLuint return_format
)
688 struct brw_context
*brw
= p
->brw
;
689 struct intel_context
*intel
= &brw
->intel
;
691 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
692 response_length
, header_present
, false);
694 if (intel
->gen
>= 7) {
695 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
696 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
697 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
698 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
699 } else if (intel
->gen
>= 5) {
700 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
701 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
702 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
703 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
704 } else if (intel
->is_g4x
) {
705 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
706 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
707 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
709 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
710 insn
->bits3
.sampler
.sampler
= sampler
;
711 insn
->bits3
.sampler
.msg_type
= msg_type
;
712 insn
->bits3
.sampler
.return_format
= return_format
;
717 #define next_insn brw_next_insn
718 struct brw_instruction
*
719 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
721 struct brw_instruction
*insn
;
723 if (p
->nr_insn
+ 1 > p
->store_size
) {
725 printf("incresing the store size to %d\n", p
->store_size
<< 1);
727 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
728 struct brw_instruction
, p
->store_size
);
730 assert(!"realloc eu store memeory failed");
733 p
->next_insn_offset
+= 16;
734 insn
= &p
->store
[p
->nr_insn
++];
735 memcpy(insn
, p
->current
, sizeof(*insn
));
737 /* Reset this one-shot flag:
740 if (p
->current
->header
.destreg__conditionalmod
) {
741 p
->current
->header
.destreg__conditionalmod
= 0;
742 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
745 insn
->header
.opcode
= opcode
;
749 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
754 struct brw_instruction
*insn
= next_insn(p
, opcode
);
755 brw_set_dest(p
, insn
, dest
);
756 brw_set_src0(p
, insn
, src
);
760 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
764 struct brw_reg src1
)
766 struct brw_instruction
*insn
= next_insn(p
, opcode
);
767 brw_set_dest(p
, insn
, dest
);
768 brw_set_src0(p
, insn
, src0
);
769 brw_set_src1(p
, insn
, src1
);
774 get_3src_subreg_nr(struct brw_reg reg
)
776 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
777 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
778 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
780 return reg
.subnr
/ 4;
784 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
791 struct intel_context
*intel
= &p
->brw
->intel
;
792 struct brw_instruction
*insn
= next_insn(p
, opcode
);
794 gen7_convert_mrf_to_grf(p
, &dest
);
796 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
798 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
799 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
800 assert(dest
.nr
< 128);
801 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
802 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
803 dest
.type
== BRW_REGISTER_TYPE_D
||
804 dest
.type
== BRW_REGISTER_TYPE_UD
);
805 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
806 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
807 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
808 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
809 guess_execution_size(p
, insn
, dest
);
811 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
812 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
813 assert(src0
.nr
< 128);
814 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
815 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
816 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
817 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
818 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
819 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
821 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
822 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
823 assert(src1
.nr
< 128);
824 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
825 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
826 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
827 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
828 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
829 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
830 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
832 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
833 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
834 assert(src2
.nr
< 128);
835 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
836 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
837 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
838 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
839 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
840 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
842 if (intel
->gen
>= 7) {
843 /* Set both the source and destination types based on dest.type,
844 * ignoring the source register types. The MAD and LRP emitters ensure
845 * that all four types are float. The BFE and BFI2 emitters, however,
846 * may send us mixed D and UD types and want us to ignore that and use
847 * the destination type.
850 case BRW_REGISTER_TYPE_F
:
851 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
852 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
854 case BRW_REGISTER_TYPE_D
:
855 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
856 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
858 case BRW_REGISTER_TYPE_UD
:
859 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
860 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
869 /***********************************************************************
870 * Convenience routines.
873 struct brw_instruction *brw_##OP(struct brw_compile *p, \
874 struct brw_reg dest, \
875 struct brw_reg src0) \
877 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
881 struct brw_instruction *brw_##OP(struct brw_compile *p, \
882 struct brw_reg dest, \
883 struct brw_reg src0, \
884 struct brw_reg src1) \
886 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
890 struct brw_instruction *brw_##OP(struct brw_compile *p, \
891 struct brw_reg dest, \
892 struct brw_reg src0, \
893 struct brw_reg src1, \
894 struct brw_reg src2) \
896 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
900 struct brw_instruction *brw_##OP(struct brw_compile *p, \
901 struct brw_reg dest, \
902 struct brw_reg src0, \
903 struct brw_reg src1, \
904 struct brw_reg src2) \
906 assert(dest.type == BRW_REGISTER_TYPE_F); \
907 assert(src0.type == BRW_REGISTER_TYPE_F); \
908 assert(src1.type == BRW_REGISTER_TYPE_F); \
909 assert(src2.type == BRW_REGISTER_TYPE_F); \
910 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
913 /* Rounding operations (other than RNDD) require two instructions - the first
914 * stores a rounded value (possibly the wrong way) in the dest register, but
915 * also sets a per-channel "increment bit" in the flag register. A predicated
916 * add of 1.0 fixes dest to contain the desired result.
918 * Sandybridge and later appear to round correctly without an ADD.
921 void brw_##OP(struct brw_compile *p, \
922 struct brw_reg dest, \
923 struct brw_reg src) \
925 struct brw_instruction *rnd, *add; \
926 rnd = next_insn(p, BRW_OPCODE_##OP); \
927 brw_set_dest(p, rnd, dest); \
928 brw_set_src0(p, rnd, src); \
930 if (p->brw->intel.gen < 6) { \
931 /* turn on round-increments */ \
932 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
933 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
934 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
977 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
983 if (src0
.type
== BRW_REGISTER_TYPE_F
||
984 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
985 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
986 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
987 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
990 if (src1
.type
== BRW_REGISTER_TYPE_F
||
991 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
992 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
993 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
994 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
997 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
1000 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1001 struct brw_reg dest
,
1002 struct brw_reg src0
,
1003 struct brw_reg src1
)
1005 assert(dest
.type
== src0
.type
);
1006 assert(src0
.type
== src1
.type
);
1007 switch (src0
.type
) {
1008 case BRW_REGISTER_TYPE_B
:
1009 case BRW_REGISTER_TYPE_UB
:
1010 case BRW_REGISTER_TYPE_W
:
1011 case BRW_REGISTER_TYPE_UW
:
1012 case BRW_REGISTER_TYPE_D
:
1013 case BRW_REGISTER_TYPE_UD
:
1016 assert(!"Bad type for brw_AVG");
1019 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1022 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1023 struct brw_reg dest
,
1024 struct brw_reg src0
,
1025 struct brw_reg src1
)
1028 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1029 src0
.type
== BRW_REGISTER_TYPE_UD
||
1030 src1
.type
== BRW_REGISTER_TYPE_D
||
1031 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1032 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1035 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1036 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1037 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1038 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1039 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1042 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1043 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1044 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1045 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1046 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1049 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1050 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1051 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1052 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1054 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1058 void brw_NOP(struct brw_compile
*p
)
1060 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1061 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1062 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1063 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1070 /***********************************************************************
1071 * Comparisons, if/else/endif
1074 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1075 struct brw_reg dest
,
1076 struct brw_reg src0
,
1077 struct brw_reg src1
)
1079 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1081 insn
->header
.execution_size
= 1;
1082 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1083 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1085 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1091 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1093 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1095 p
->if_stack_depth
++;
1096 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1097 p
->if_stack_array_size
*= 2;
1098 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1099 p
->if_stack_array_size
);
1103 static struct brw_instruction
*
1104 pop_if_stack(struct brw_compile
*p
)
1106 p
->if_stack_depth
--;
1107 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1111 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1113 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1114 p
->loop_stack_array_size
*= 2;
1115 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1116 p
->loop_stack_array_size
);
1117 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1118 p
->loop_stack_array_size
);
1121 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1122 p
->loop_stack_depth
++;
1123 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1126 static struct brw_instruction
*
1127 get_inner_do_insn(struct brw_compile
*p
)
1129 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1132 /* EU takes the value from the flag register and pushes it onto some
1133 * sort of a stack (presumably merging with any flag value already on
1134 * the stack). Within an if block, the flags at the top of the stack
1135 * control execution on each channel of the unit, eg. on each of the
1136 * 16 pixel values in our wm programs.
1138 * When the matching 'else' instruction is reached (presumably by
1139 * countdown of the instruction count patched in by our ELSE/ENDIF
1140 * functions), the relevent flags are inverted.
1142 * When the matching 'endif' instruction is reached, the flags are
1143 * popped off. If the stack is now empty, normal execution resumes.
1145 struct brw_instruction
*
1146 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
1148 struct intel_context
*intel
= &p
->brw
->intel
;
1149 struct brw_instruction
*insn
;
1151 insn
= next_insn(p
, BRW_OPCODE_IF
);
1153 /* Override the defaults for this instruction:
1155 if (intel
->gen
< 6) {
1156 brw_set_dest(p
, insn
, brw_ip_reg());
1157 brw_set_src0(p
, insn
, brw_ip_reg());
1158 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1159 } else if (intel
->gen
== 6) {
1160 brw_set_dest(p
, insn
, brw_imm_w(0));
1161 insn
->bits1
.branch_gen6
.jump_count
= 0;
1162 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1163 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1165 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1166 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1167 brw_set_src1(p
, insn
, brw_imm_ud(0));
1168 insn
->bits3
.break_cont
.jip
= 0;
1169 insn
->bits3
.break_cont
.uip
= 0;
1172 insn
->header
.execution_size
= execute_size
;
1173 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1174 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1175 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1176 if (!p
->single_program_flow
)
1177 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1179 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1181 push_if_stack(p
, insn
);
1182 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1186 /* This function is only used for gen6-style IF instructions with an
1187 * embedded comparison (conditional modifier). It is not used on gen7.
1189 struct brw_instruction
*
1190 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1191 struct brw_reg src0
, struct brw_reg src1
)
1193 struct brw_instruction
*insn
;
1195 insn
= next_insn(p
, BRW_OPCODE_IF
);
1197 brw_set_dest(p
, insn
, brw_imm_w(0));
1198 if (p
->compressed
) {
1199 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1201 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1203 insn
->bits1
.branch_gen6
.jump_count
= 0;
1204 brw_set_src0(p
, insn
, src0
);
1205 brw_set_src1(p
, insn
, src1
);
1207 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1208 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1209 insn
->header
.destreg__conditionalmod
= conditional
;
1211 if (!p
->single_program_flow
)
1212 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1214 push_if_stack(p
, insn
);
1219 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1222 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1223 struct brw_instruction
*if_inst
,
1224 struct brw_instruction
*else_inst
)
1226 /* The next instruction (where the ENDIF would be, if it existed) */
1227 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1229 assert(p
->single_program_flow
);
1230 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1231 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1232 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1234 /* Convert IF to an ADD instruction that moves the instruction pointer
1235 * to the first instruction of the ELSE block. If there is no ELSE
1236 * block, point to where ENDIF would be. Reverse the predicate.
1238 * There's no need to execute an ENDIF since we don't need to do any
1239 * stack operations, and if we're currently executing, we just want to
1240 * continue normally.
1242 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1243 if_inst
->header
.predicate_inverse
= 1;
1245 if (else_inst
!= NULL
) {
1246 /* Convert ELSE to an ADD instruction that points where the ENDIF
1249 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1251 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1252 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1254 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1259 * Patch IF and ELSE instructions with appropriate jump targets.
1262 patch_IF_ELSE(struct brw_compile
*p
,
1263 struct brw_instruction
*if_inst
,
1264 struct brw_instruction
*else_inst
,
1265 struct brw_instruction
*endif_inst
)
1267 struct intel_context
*intel
= &p
->brw
->intel
;
1269 /* We shouldn't be patching IF and ELSE instructions in single program flow
1270 * mode when gen < 6, because in single program flow mode on those
1271 * platforms, we convert flow control instructions to conditional ADDs that
1272 * operate on IP (see brw_ENDIF).
1274 * However, on Gen6, writing to IP doesn't work in single program flow mode
1275 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1276 * not be updated by non-flow control instructions."). And on later
1277 * platforms, there is no significant benefit to converting control flow
1278 * instructions to conditional ADDs. So we do patch IF and ELSE
1279 * instructions in single program flow mode on those platforms.
1282 assert(!p
->single_program_flow
);
1284 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1285 assert(endif_inst
!= NULL
);
1286 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1289 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1290 * requires 2 chunks.
1292 if (intel
->gen
>= 5)
1295 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1296 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1298 if (else_inst
== NULL
) {
1299 /* Patch IF -> ENDIF */
1300 if (intel
->gen
< 6) {
1301 /* Turn it into an IFF, which means no mask stack operations for
1302 * all-false and jumping past the ENDIF.
1304 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1305 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1306 if_inst
->bits3
.if_else
.pop_count
= 0;
1307 if_inst
->bits3
.if_else
.pad0
= 0;
1308 } else if (intel
->gen
== 6) {
1309 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1310 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1312 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1313 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1316 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1318 /* Patch IF -> ELSE */
1319 if (intel
->gen
< 6) {
1320 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1321 if_inst
->bits3
.if_else
.pop_count
= 0;
1322 if_inst
->bits3
.if_else
.pad0
= 0;
1323 } else if (intel
->gen
== 6) {
1324 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1327 /* Patch ELSE -> ENDIF */
1328 if (intel
->gen
< 6) {
1329 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1332 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1333 else_inst
->bits3
.if_else
.pop_count
= 1;
1334 else_inst
->bits3
.if_else
.pad0
= 0;
1335 } else if (intel
->gen
== 6) {
1336 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1337 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1339 /* The IF instruction's JIP should point just past the ELSE */
1340 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1341 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1342 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1343 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1349 brw_ELSE(struct brw_compile
*p
)
1351 struct intel_context
*intel
= &p
->brw
->intel
;
1352 struct brw_instruction
*insn
;
1354 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1356 if (intel
->gen
< 6) {
1357 brw_set_dest(p
, insn
, brw_ip_reg());
1358 brw_set_src0(p
, insn
, brw_ip_reg());
1359 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1360 } else if (intel
->gen
== 6) {
1361 brw_set_dest(p
, insn
, brw_imm_w(0));
1362 insn
->bits1
.branch_gen6
.jump_count
= 0;
1363 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1364 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1366 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1367 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1368 brw_set_src1(p
, insn
, brw_imm_ud(0));
1369 insn
->bits3
.break_cont
.jip
= 0;
1370 insn
->bits3
.break_cont
.uip
= 0;
1373 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1374 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1375 if (!p
->single_program_flow
)
1376 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1378 push_if_stack(p
, insn
);
1382 brw_ENDIF(struct brw_compile
*p
)
1384 struct intel_context
*intel
= &p
->brw
->intel
;
1385 struct brw_instruction
*insn
= NULL
;
1386 struct brw_instruction
*else_inst
= NULL
;
1387 struct brw_instruction
*if_inst
= NULL
;
1388 struct brw_instruction
*tmp
;
1389 bool emit_endif
= true;
1391 /* In single program flow mode, we can express IF and ELSE instructions
1392 * equivalently as ADD instructions that operate on IP. On platforms prior
1393 * to Gen6, flow control instructions cause an implied thread switch, so
1394 * this is a significant savings.
1396 * However, on Gen6, writing to IP doesn't work in single program flow mode
1397 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1398 * not be updated by non-flow control instructions."). And on later
1399 * platforms, there is no significant benefit to converting control flow
1400 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1403 if (intel
->gen
< 6 && p
->single_program_flow
)
1407 * A single next_insn() may change the base adress of instruction store
1408 * memory(p->store), so call it first before referencing the instruction
1409 * store pointer from an index
1412 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1414 /* Pop the IF and (optional) ELSE instructions from the stack */
1415 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1416 tmp
= pop_if_stack(p
);
1417 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1419 tmp
= pop_if_stack(p
);
1424 /* ENDIF is useless; don't bother emitting it. */
1425 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1429 if (intel
->gen
< 6) {
1430 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1431 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1432 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1433 } else if (intel
->gen
== 6) {
1434 brw_set_dest(p
, insn
, brw_imm_w(0));
1435 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1436 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1438 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1439 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1440 brw_set_src1(p
, insn
, brw_imm_ud(0));
1443 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1444 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1445 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1447 /* Also pop item off the stack in the endif instruction: */
1448 if (intel
->gen
< 6) {
1449 insn
->bits3
.if_else
.jump_count
= 0;
1450 insn
->bits3
.if_else
.pop_count
= 1;
1451 insn
->bits3
.if_else
.pad0
= 0;
1452 } else if (intel
->gen
== 6) {
1453 insn
->bits1
.branch_gen6
.jump_count
= 2;
1455 insn
->bits3
.break_cont
.jip
= 2;
1457 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1460 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1462 struct intel_context
*intel
= &p
->brw
->intel
;
1463 struct brw_instruction
*insn
;
1465 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1466 if (intel
->gen
>= 6) {
1467 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1468 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1469 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1471 brw_set_dest(p
, insn
, brw_ip_reg());
1472 brw_set_src0(p
, insn
, brw_ip_reg());
1473 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1474 insn
->bits3
.if_else
.pad0
= 0;
1475 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1477 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1478 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1483 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1485 struct brw_instruction
*insn
;
1487 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1488 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1489 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1490 brw_set_dest(p
, insn
, brw_ip_reg());
1491 brw_set_src0(p
, insn
, brw_ip_reg());
1492 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1494 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1495 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1499 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1501 struct brw_instruction
*insn
;
1502 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1503 brw_set_dest(p
, insn
, brw_ip_reg());
1504 brw_set_src0(p
, insn
, brw_ip_reg());
1505 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1506 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1507 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1508 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1509 insn
->bits3
.if_else
.pad0
= 0;
1510 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1514 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1516 struct brw_instruction
*insn
;
1518 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1519 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1520 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1521 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1523 if (p
->compressed
) {
1524 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1526 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1527 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1534 * The DO/WHILE is just an unterminated loop -- break or continue are
1535 * used for control within the loop. We have a few ways they can be
1538 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1539 * jip and no DO instruction.
1541 * For non-uniform control flow pre-gen6, there's a DO instruction to
1542 * push the mask, and a WHILE to jump back, and BREAK to get out and
1545 * For gen6, there's no more mask stack, so no need for DO. WHILE
1546 * just points back to the first instruction of the loop.
1548 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1550 struct intel_context
*intel
= &p
->brw
->intel
;
1552 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1553 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1554 return &p
->store
[p
->nr_insn
];
1556 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1558 push_loop_stack(p
, insn
);
1560 /* Override the defaults for this instruction:
1562 brw_set_dest(p
, insn
, brw_null_reg());
1563 brw_set_src0(p
, insn
, brw_null_reg());
1564 brw_set_src1(p
, insn
, brw_null_reg());
1566 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1567 insn
->header
.execution_size
= execute_size
;
1568 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1569 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1570 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1577 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1580 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1581 * nesting, since it can always just point to the end of the block/current loop.
1584 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1586 struct intel_context
*intel
= &p
->brw
->intel
;
1587 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1588 struct brw_instruction
*inst
;
1589 int br
= (intel
->gen
== 5) ? 2 : 1;
1591 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1592 /* If the jump count is != 0, that means that this instruction has already
1593 * been patched because it's part of a loop inside of the one we're
1596 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1597 inst
->bits3
.if_else
.jump_count
== 0) {
1598 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1599 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1600 inst
->bits3
.if_else
.jump_count
== 0) {
1601 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1606 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1608 struct intel_context
*intel
= &p
->brw
->intel
;
1609 struct brw_instruction
*insn
, *do_insn
;
1612 if (intel
->gen
>= 5)
1615 if (intel
->gen
>= 7) {
1616 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1617 do_insn
= get_inner_do_insn(p
);
1619 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1620 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1621 brw_set_src1(p
, insn
, brw_imm_ud(0));
1622 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1624 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1625 } else if (intel
->gen
== 6) {
1626 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1627 do_insn
= get_inner_do_insn(p
);
1629 brw_set_dest(p
, insn
, brw_imm_w(0));
1630 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1631 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1632 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1634 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1636 if (p
->single_program_flow
) {
1637 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1638 do_insn
= get_inner_do_insn(p
);
1640 brw_set_dest(p
, insn
, brw_ip_reg());
1641 brw_set_src0(p
, insn
, brw_ip_reg());
1642 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1643 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1645 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1646 do_insn
= get_inner_do_insn(p
);
1648 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1650 brw_set_dest(p
, insn
, brw_ip_reg());
1651 brw_set_src0(p
, insn
, brw_ip_reg());
1652 brw_set_src1(p
, insn
, brw_imm_d(0));
1654 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1655 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1656 insn
->bits3
.if_else
.pop_count
= 0;
1657 insn
->bits3
.if_else
.pad0
= 0;
1659 brw_patch_break_cont(p
, insn
);
1662 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1663 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1665 p
->loop_stack_depth
--;
1673 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1675 struct intel_context
*intel
= &p
->brw
->intel
;
1676 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1679 if (intel
->gen
>= 5)
1682 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1683 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1685 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1690 /* To integrate with the above, it makes sense that the comparison
1691 * instruction should populate the flag register. It might be simpler
1692 * just to use the flag reg for most WM tasks?
1694 void brw_CMP(struct brw_compile
*p
,
1695 struct brw_reg dest
,
1697 struct brw_reg src0
,
1698 struct brw_reg src1
)
1700 struct intel_context
*intel
= &p
->brw
->intel
;
1701 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1703 insn
->header
.destreg__conditionalmod
= conditional
;
1704 brw_set_dest(p
, insn
, dest
);
1705 brw_set_src0(p
, insn
, src0
);
1706 brw_set_src1(p
, insn
, src1
);
1708 /* guess_execution_size(insn, src0); */
1711 /* Make it so that future instructions will use the computed flag
1712 * value until brw_set_predicate_control_flag_value() is called
1715 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1717 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1718 p
->flag_value
= 0xff;
1721 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1723 * "Any CMP instruction with a null destination must use a {switch}."
1725 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1726 * mentioned on their work-arounds pages.
1728 if (intel
->gen
== 7) {
1729 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1730 dest
.nr
== BRW_ARF_NULL
) {
1731 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1736 /* Issue 'wait' instruction for n1, host could program MMIO
1737 to wake up thread. */
1738 void brw_WAIT (struct brw_compile
*p
)
1740 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1741 struct brw_reg src
= brw_notification_1_reg();
1743 brw_set_dest(p
, insn
, src
);
1744 brw_set_src0(p
, insn
, src
);
1745 brw_set_src1(p
, insn
, brw_null_reg());
1746 insn
->header
.execution_size
= 0; /* must */
1747 insn
->header
.predicate_control
= 0;
1748 insn
->header
.compression_control
= 0;
1752 /***********************************************************************
1753 * Helpers for the various SEND message types:
1756 /** Extended math function, float[8].
1758 void brw_math( struct brw_compile
*p
,
1759 struct brw_reg dest
,
1766 struct intel_context
*intel
= &p
->brw
->intel
;
1768 if (intel
->gen
>= 6) {
1769 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1771 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1772 (intel
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1773 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1775 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1776 if (intel
->gen
== 6)
1777 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1779 /* Source modifiers are ignored for extended math instructions on Gen6. */
1780 if (intel
->gen
== 6) {
1781 assert(!src
.negate
);
1785 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1786 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1787 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1788 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1790 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1793 /* Math is the same ISA format as other opcodes, except that CondModifier
1794 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1796 insn
->header
.destreg__conditionalmod
= function
;
1798 brw_set_dest(p
, insn
, dest
);
1799 brw_set_src0(p
, insn
, src
);
1800 brw_set_src1(p
, insn
, brw_null_reg());
1802 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1804 /* Example code doesn't set predicate_control for send
1807 insn
->header
.predicate_control
= 0;
1808 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1810 brw_set_dest(p
, insn
, dest
);
1811 brw_set_src0(p
, insn
, src
);
1812 brw_set_math_message(p
,
1815 src
.type
== BRW_REGISTER_TYPE_D
,
1821 /** Extended math function, float[8].
1823 void brw_math2(struct brw_compile
*p
,
1824 struct brw_reg dest
,
1826 struct brw_reg src0
,
1827 struct brw_reg src1
)
1829 struct intel_context
*intel
= &p
->brw
->intel
;
1830 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1832 assert(intel
->gen
>= 6);
1836 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1837 (intel
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1838 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1839 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1841 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1842 if (intel
->gen
== 6) {
1843 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1844 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1847 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1848 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1849 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1850 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1851 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1853 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1854 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1857 /* Source modifiers are ignored for extended math instructions on Gen6. */
1858 if (intel
->gen
== 6) {
1859 assert(!src0
.negate
);
1861 assert(!src1
.negate
);
1865 /* Math is the same ISA format as other opcodes, except that CondModifier
1866 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1868 insn
->header
.destreg__conditionalmod
= function
;
1870 brw_set_dest(p
, insn
, dest
);
1871 brw_set_src0(p
, insn
, src0
);
1872 brw_set_src1(p
, insn
, src1
);
1877 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1878 * using a constant offset per channel.
1880 * The offset must be aligned to oword size (16 bytes). Used for
1881 * register spilling.
1883 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1888 struct intel_context
*intel
= &p
->brw
->intel
;
1889 uint32_t msg_control
, msg_type
;
1892 if (intel
->gen
>= 6)
1895 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1897 if (num_regs
== 1) {
1898 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1901 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1905 /* Set up the message header. This is g0, with g0.2 filled with
1906 * the offset. We don't want to leave our offset around in g0 or
1907 * it'll screw up texture samples, so set it up inside the message
1911 brw_push_insn_state(p
);
1912 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1913 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1915 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1917 /* set message header global offset field (reg 0, element 2) */
1919 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1921 2), BRW_REGISTER_TYPE_UD
),
1922 brw_imm_ud(offset
));
1924 brw_pop_insn_state(p
);
1928 struct brw_reg dest
;
1929 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1930 int send_commit_msg
;
1931 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1932 BRW_REGISTER_TYPE_UW
);
1934 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1935 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1936 src_header
= vec16(src_header
);
1938 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1939 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1941 /* Until gen6, writes followed by reads from the same location
1942 * are not guaranteed to be ordered unless write_commit is set.
1943 * If set, then a no-op write is issued to the destination
1944 * register to set a dependency, and a read from the destination
1945 * can be used to ensure the ordering.
1947 * For gen6, only writes between different threads need ordering
1948 * protection. Our use of DP writes is all about register
1949 * spilling within a thread.
1951 if (intel
->gen
>= 6) {
1952 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1953 send_commit_msg
= 0;
1956 send_commit_msg
= 1;
1959 brw_set_dest(p
, insn
, dest
);
1960 if (intel
->gen
>= 6) {
1961 brw_set_src0(p
, insn
, mrf
);
1963 brw_set_src0(p
, insn
, brw_null_reg());
1966 if (intel
->gen
>= 6)
1967 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1969 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1971 brw_set_dp_write_message(p
,
1973 255, /* binding table index (255=stateless) */
1977 true, /* header_present */
1978 0, /* not a render target */
1979 send_commit_msg
, /* response_length */
1987 * Read a block of owords (half a GRF each) from the scratch buffer
1988 * using a constant index per channel.
1990 * Offset must be aligned to oword size (16 bytes). Used for register
1994 brw_oword_block_read_scratch(struct brw_compile
*p
,
1995 struct brw_reg dest
,
2000 struct intel_context
*intel
= &p
->brw
->intel
;
2001 uint32_t msg_control
;
2004 if (intel
->gen
>= 6)
2007 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2008 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2010 if (num_regs
== 1) {
2011 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2014 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2019 brw_push_insn_state(p
);
2020 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2021 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2023 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2025 /* set message header global offset field (reg 0, element 2) */
2027 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2029 2), BRW_REGISTER_TYPE_UD
),
2030 brw_imm_ud(offset
));
2032 brw_pop_insn_state(p
);
2036 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2038 assert(insn
->header
.predicate_control
== 0);
2039 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2040 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2042 brw_set_dest(p
, insn
, dest
); /* UW? */
2043 if (intel
->gen
>= 6) {
2044 brw_set_src0(p
, insn
, mrf
);
2046 brw_set_src0(p
, insn
, brw_null_reg());
2049 brw_set_dp_read_message(p
,
2051 255, /* binding table index (255=stateless) */
2053 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2054 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2056 true, /* header_present */
2062 * Read a float[4] vector from the data port Data Cache (const buffer).
2063 * Location (in buffer) should be a multiple of 16.
2064 * Used for fetching shader constants.
2066 void brw_oword_block_read(struct brw_compile
*p
,
2067 struct brw_reg dest
,
2070 uint32_t bind_table_index
)
2072 struct intel_context
*intel
= &p
->brw
->intel
;
2074 /* On newer hardware, offset is in units of owords. */
2075 if (intel
->gen
>= 6)
2078 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2080 brw_push_insn_state(p
);
2081 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2082 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2083 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2085 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2087 /* set message header global offset field (reg 0, element 2) */
2089 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2091 2), BRW_REGISTER_TYPE_UD
),
2092 brw_imm_ud(offset
));
2094 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2095 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2097 /* cast dest to a uword[8] vector */
2098 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2100 brw_set_dest(p
, insn
, dest
);
2101 if (intel
->gen
>= 6) {
2102 brw_set_src0(p
, insn
, mrf
);
2104 brw_set_src0(p
, insn
, brw_null_reg());
2107 brw_set_dp_read_message(p
,
2110 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2111 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2112 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2114 true, /* header_present */
2115 1); /* response_length (1 reg, 2 owords!) */
2117 brw_pop_insn_state(p
);
2121 void brw_fb_WRITE(struct brw_compile
*p
,
2124 struct brw_reg src0
,
2126 GLuint binding_table_index
,
2128 GLuint response_length
,
2130 bool header_present
)
2132 struct intel_context
*intel
= &p
->brw
->intel
;
2133 struct brw_instruction
*insn
;
2135 struct brw_reg dest
;
2137 if (dispatch_width
== 16)
2138 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2140 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2142 if (intel
->gen
>= 6) {
2143 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2145 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2147 /* The execution mask is ignored for render target writes. */
2148 insn
->header
.predicate_control
= 0;
2149 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2151 if (intel
->gen
>= 6) {
2152 /* headerless version, just submit color payload */
2153 src0
= brw_message_reg(msg_reg_nr
);
2155 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2157 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2159 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2162 brw_set_dest(p
, insn
, dest
);
2163 brw_set_src0(p
, insn
, src0
);
2164 brw_set_dp_write_message(p
,
2166 binding_table_index
,
2171 eot
, /* last render target write */
2174 0 /* send_commit_msg */);
2179 * Texture sample instruction.
2180 * Note: the msg_type plus msg_length values determine exactly what kind
2181 * of sampling operation is performed. See volume 4, page 161 of docs.
2183 void brw_SAMPLE(struct brw_compile
*p
,
2184 struct brw_reg dest
,
2186 struct brw_reg src0
,
2187 GLuint binding_table_index
,
2190 GLuint response_length
,
2192 GLuint header_present
,
2194 GLuint return_format
)
2196 struct intel_context
*intel
= &p
->brw
->intel
;
2197 struct brw_instruction
*insn
;
2199 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2201 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2202 insn
->header
.predicate_control
= 0; /* XXX */
2203 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2205 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2207 brw_set_dest(p
, insn
, dest
);
2208 brw_set_src0(p
, insn
, src0
);
2209 brw_set_sampler_message(p
, insn
,
2210 binding_table_index
,
2220 /* All these variables are pretty confusing - we might be better off
2221 * using bitmasks and macros for this, in the old style. Or perhaps
2222 * just having the caller instantiate the fields in dword3 itself.
2224 void brw_urb_WRITE(struct brw_compile
*p
,
2225 struct brw_reg dest
,
2227 struct brw_reg src0
,
2231 GLuint response_length
,
2233 bool writes_complete
,
2237 struct intel_context
*intel
= &p
->brw
->intel
;
2238 struct brw_instruction
*insn
;
2240 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2242 if (intel
->gen
== 7) {
2243 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2244 brw_push_insn_state(p
);
2245 brw_set_access_mode(p
, BRW_ALIGN_1
);
2246 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2247 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2248 BRW_REGISTER_TYPE_UD
),
2249 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2250 brw_imm_ud(0xff00));
2251 brw_pop_insn_state(p
);
2254 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2256 assert(msg_length
< BRW_MAX_MRF
);
2258 brw_set_dest(p
, insn
, dest
);
2259 brw_set_src0(p
, insn
, src0
);
2260 brw_set_src1(p
, insn
, brw_imm_d(0));
2263 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2265 brw_set_urb_message(p
,
2278 next_ip(struct brw_compile
*p
, int ip
)
2280 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2282 if (insn
->header
.cmpt_control
)
2289 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2292 void *store
= p
->store
;
2294 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2295 struct brw_instruction
*insn
= store
+ ip
;
2297 switch (insn
->header
.opcode
) {
2298 case BRW_OPCODE_ENDIF
:
2299 case BRW_OPCODE_ELSE
:
2300 case BRW_OPCODE_WHILE
:
2301 case BRW_OPCODE_HALT
:
2309 /* There is no DO instruction on gen6, so to find the end of the loop
2310 * we have to see if the loop is jumping back before our start
2314 brw_find_loop_end(struct brw_compile
*p
, int start
)
2316 struct intel_context
*intel
= &p
->brw
->intel
;
2319 void *store
= p
->store
;
2321 /* Always start after the instruction (such as a WHILE) we're trying to fix
2324 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2325 struct brw_instruction
*insn
= store
+ ip
;
2327 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2328 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2329 : insn
->bits3
.break_cont
.jip
;
2330 if (ip
+ jip
* scale
<= start
)
2334 assert(!"not reached");
2338 /* After program generation, go back and update the UIP and JIP of
2339 * BREAK, CONT, and HALT instructions to their correct locations.
2342 brw_set_uip_jip(struct brw_compile
*p
)
2344 struct intel_context
*intel
= &p
->brw
->intel
;
2347 void *store
= p
->store
;
2352 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2353 struct brw_instruction
*insn
= store
+ ip
;
2355 if (insn
->header
.cmpt_control
) {
2356 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2357 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2358 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2359 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2363 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2364 switch (insn
->header
.opcode
) {
2365 case BRW_OPCODE_BREAK
:
2366 assert(block_end_ip
!= 0);
2367 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2368 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2369 insn
->bits3
.break_cont
.uip
=
2370 (brw_find_loop_end(p
, ip
) - ip
+
2371 (intel
->gen
== 6 ? 16 : 0)) / scale
;
2373 case BRW_OPCODE_CONTINUE
:
2374 assert(block_end_ip
!= 0);
2375 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2376 insn
->bits3
.break_cont
.uip
=
2377 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2379 assert(insn
->bits3
.break_cont
.uip
!= 0);
2380 assert(insn
->bits3
.break_cont
.jip
!= 0);
2383 case BRW_OPCODE_ENDIF
:
2384 if (block_end_ip
== 0)
2385 insn
->bits3
.break_cont
.jip
= 2;
2387 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2390 case BRW_OPCODE_HALT
:
2391 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2393 * "In case of the halt instruction not inside any conditional
2394 * code block, the value of <JIP> and <UIP> should be the
2395 * same. In case of the halt instruction inside conditional code
2396 * block, the <UIP> should be the end of the program, and the
2397 * <JIP> should be end of the most inner conditional code block."
2399 * The uip will have already been set by whoever set up the
2402 if (block_end_ip
== 0) {
2403 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2405 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2407 assert(insn
->bits3
.break_cont
.uip
!= 0);
2408 assert(insn
->bits3
.break_cont
.jip
!= 0);
2414 void brw_ff_sync(struct brw_compile
*p
,
2415 struct brw_reg dest
,
2417 struct brw_reg src0
,
2419 GLuint response_length
,
2422 struct intel_context
*intel
= &p
->brw
->intel
;
2423 struct brw_instruction
*insn
;
2425 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2427 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2428 brw_set_dest(p
, insn
, dest
);
2429 brw_set_src0(p
, insn
, src0
);
2430 brw_set_src1(p
, insn
, brw_imm_d(0));
2433 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2435 brw_set_ff_sync_message(p
,
2443 * Emit the SEND instruction necessary to generate stream output data on Gen6
2444 * (for transform feedback).
2446 * If send_commit_msg is true, this is the last piece of stream output data
2447 * from this thread, so send the data as a committed write. According to the
2448 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2450 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2451 * writes are complete by sending the final write as a committed write."
2454 brw_svb_write(struct brw_compile
*p
,
2455 struct brw_reg dest
,
2457 struct brw_reg src0
,
2458 GLuint binding_table_index
,
2459 bool send_commit_msg
)
2461 struct brw_instruction
*insn
;
2463 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2465 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2466 brw_set_dest(p
, insn
, dest
);
2467 brw_set_src0(p
, insn
, src0
);
2468 brw_set_src1(p
, insn
, brw_imm_d(0));
2469 brw_set_dp_write_message(p
, insn
,
2470 binding_table_index
,
2471 0, /* msg_control: ignored */
2472 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2474 true, /* header_present */
2475 0, /* last_render_target: ignored */
2476 send_commit_msg
, /* response_length */
2477 0, /* end_of_thread */
2478 send_commit_msg
); /* send_commit_msg */
2482 * This instruction is generated as a single-channel align1 instruction by
2483 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2485 * We can't use the typed atomic op in the FS because that has the execution
2486 * mask ANDed with the pixel mask, but we just want to write the one dword for
2489 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2490 * one u32. So we use the same untyped atomic write message as the pixel
2493 * The untyped atomic operation requires a BUFFER surface type with RAW
2494 * format, and is only accessible through the legacy DATA_CACHE dataport
2497 void brw_shader_time_add(struct brw_compile
*p
,
2498 struct brw_reg payload
,
2499 uint32_t surf_index
)
2501 struct intel_context
*intel
= &p
->brw
->intel
;
2502 assert(intel
->gen
>= 7);
2504 brw_push_insn_state(p
);
2505 brw_set_access_mode(p
, BRW_ALIGN_1
);
2506 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2507 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2508 brw_pop_insn_state(p
);
2510 /* We use brw_vec1_reg and unmasked because we want to increment the given
2513 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2515 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2518 uint32_t sfid
, msg_type
;
2519 if (intel
->is_haswell
) {
2520 sfid
= HSW_SFID_DATAPORT_DATA_CACHE_1
;
2521 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2523 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
2524 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2527 bool header_present
= false;
2529 uint32_t mlen
= 2; /* offset, value */
2531 brw_set_message_descriptor(p
, send
, sfid
, mlen
, rlen
, header_present
, eot
);
2533 send
->bits3
.ud
|= msg_type
<< 14;
2534 send
->bits3
.ud
|= 0 << 13; /* no return data */
2535 send
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2536 send
->bits3
.ud
|= BRW_AOP_ADD
<< 8;
2537 send
->bits3
.ud
|= surf_index
<< 0;