2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
104 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
107 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
108 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
109 assert(dest
.nr
< 128);
111 gen7_convert_mrf_to_grf(p
, &dest
);
113 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
114 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
115 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
117 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
118 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
122 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
123 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
124 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
127 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
128 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
129 assert(dest
.dw1
.bits
.writemask
!= 0 ||
130 dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
);
131 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
132 * Although Dst.HorzStride is a don't care for Align16, HW needs
133 * this to be programmed as "01".
135 insn
->bits1
.da16
.dest_horiz_stride
= 1;
139 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
141 /* These are different sizes in align1 vs align16:
143 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
144 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
145 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
146 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
147 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
150 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
151 /* even ignored in da16, still need to set as '01' */
152 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
156 /* NEW: Set the execution size based on dest.width and
157 * insn->compression_control:
159 guess_execution_size(p
, insn
, dest
);
162 extern int reg_type_size
[];
165 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
167 int hstride_for_reg
[] = {0, 1, 2, 4};
168 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
169 int width_for_reg
[] = {1, 2, 4, 8, 16};
170 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
171 int width
, hstride
, vstride
, execsize
;
173 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
174 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
175 * mean the destination has to be 128-bit aligned and the
176 * destination horiz stride has to be a word.
178 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
179 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
180 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
186 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
187 reg
.file
== BRW_ARF_NULL
)
190 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
191 hstride
= hstride_for_reg
[reg
.hstride
];
193 if (reg
.vstride
== 0xf) {
196 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
197 vstride
= vstride_for_reg
[reg
.vstride
];
200 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
201 width
= width_for_reg
[reg
.width
];
203 assert(insn
->header
.execution_size
>= 0 &&
204 insn
->header
.execution_size
< Elements(execsize_for_reg
));
205 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
207 /* Restrictions from 3.3.10: Register Region Restrictions. */
209 assert(execsize
>= width
);
212 if (execsize
== width
&& hstride
!= 0) {
213 assert(vstride
== -1 || vstride
== width
* hstride
);
217 if (execsize
== width
&& hstride
== 0) {
218 /* no restriction on vstride. */
223 assert(hstride
== 0);
227 if (execsize
== 1 && width
== 1) {
228 assert(hstride
== 0);
229 assert(vstride
== 0);
233 if (vstride
== 0 && hstride
== 0) {
237 /* 10. Check destination issues. */
241 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
244 struct brw_context
*brw
= p
->brw
;
246 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
247 assert(reg
.nr
< 128);
249 gen7_convert_mrf_to_grf(p
, ®
);
251 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
252 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
253 /* Any source modifiers or regions will be ignored, since this just
254 * identifies the MRF/GRF to start reading the message contents from.
255 * Check for some likely failures.
259 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
262 validate_reg(insn
, reg
);
264 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
265 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
266 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
267 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
268 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
270 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
271 insn
->bits3
.ud
= reg
.dw1
.ud
;
273 /* Required to set some fields in src1 as well:
275 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
276 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
280 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
281 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
282 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
283 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
286 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
287 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
291 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
293 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
294 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
297 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
301 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
302 if (reg
.width
== BRW_WIDTH_1
&&
303 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
304 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
305 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
306 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
309 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
310 insn
->bits2
.da1
.src0_width
= reg
.width
;
311 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
315 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
316 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
317 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
318 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
320 /* This is an oddity of the fact we're using the same
321 * descriptions for registers in align_16 as align_1:
323 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
324 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
326 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
332 void brw_set_src1(struct brw_compile
*p
,
333 struct brw_instruction
*insn
,
336 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
338 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
339 assert(reg
.nr
< 128);
341 gen7_convert_mrf_to_grf(p
, ®
);
343 validate_reg(insn
, reg
);
345 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
346 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
347 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
348 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
350 /* Only src1 can be immediate in two-argument instructions.
352 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
354 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
355 insn
->bits3
.ud
= reg
.dw1
.ud
;
358 /* This is a hardware restriction, which may or may not be lifted
361 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
362 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
364 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
365 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
366 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
369 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
370 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
373 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
374 if (reg
.width
== BRW_WIDTH_1
&&
375 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
376 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
377 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
378 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
381 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
382 insn
->bits3
.da1
.src1_width
= reg
.width
;
383 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
387 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
388 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
389 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
390 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
392 /* This is an oddity of the fact we're using the same
393 * descriptions for registers in align_16 as align_1:
395 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
396 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
398 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
404 * Set the Message Descriptor and Extended Message Descriptor fields
407 * \note This zeroes out the Function Control bits, so it must be called
408 * \b before filling out any message-specific data. Callers can
409 * choose not to fill in irrelevant bits; they will be zero.
412 brw_set_message_descriptor(struct brw_compile
*p
,
413 struct brw_instruction
*inst
,
414 enum brw_message_target sfid
,
416 unsigned response_length
,
420 struct brw_context
*brw
= p
->brw
;
422 brw_set_src1(p
, inst
, brw_imm_d(0));
425 inst
->bits3
.generic_gen5
.header_present
= header_present
;
426 inst
->bits3
.generic_gen5
.response_length
= response_length
;
427 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
428 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
431 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
432 inst
->header
.destreg__conditionalmod
= sfid
;
434 /* Set Extended Message Descriptor (ex_desc) */
435 inst
->bits2
.send_gen5
.sfid
= sfid
;
436 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
439 inst
->bits3
.generic
.response_length
= response_length
;
440 inst
->bits3
.generic
.msg_length
= msg_length
;
441 inst
->bits3
.generic
.msg_target
= sfid
;
442 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
446 static void brw_set_math_message( struct brw_compile
*p
,
447 struct brw_instruction
*insn
,
453 struct brw_context
*brw
= p
->brw
;
455 unsigned response_length
;
457 /* Infer message length from the function */
459 case BRW_MATH_FUNCTION_POW
:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
461 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
470 /* Infer response length from the function */
472 case BRW_MATH_FUNCTION_SINCOS
:
473 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
482 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
483 msg_length
, response_length
, false, false);
485 insn
->bits3
.math_gen5
.function
= function
;
486 insn
->bits3
.math_gen5
.int_type
= integer_type
;
487 insn
->bits3
.math_gen5
.precision
= low_precision
;
488 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
489 insn
->bits3
.math_gen5
.data_type
= dataType
;
490 insn
->bits3
.math_gen5
.snapshot
= 0;
492 insn
->bits3
.math
.function
= function
;
493 insn
->bits3
.math
.int_type
= integer_type
;
494 insn
->bits3
.math
.precision
= low_precision
;
495 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
496 insn
->bits3
.math
.data_type
= dataType
;
498 insn
->header
.saturate
= 0;
502 static void brw_set_ff_sync_message(struct brw_compile
*p
,
503 struct brw_instruction
*insn
,
505 GLuint response_length
,
508 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
509 1, response_length
, true, end_of_thread
);
510 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
511 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
512 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
513 insn
->bits3
.urb_gen5
.allocate
= allocate
;
514 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
515 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
518 static void brw_set_urb_message( struct brw_compile
*p
,
519 struct brw_instruction
*insn
,
520 enum brw_urb_write_flags flags
,
522 GLuint response_length
,
524 GLuint swizzle_control
)
526 struct brw_context
*brw
= p
->brw
;
528 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
529 msg_length
, response_length
, true,
530 flags
& BRW_URB_WRITE_EOT
);
532 if (flags
& BRW_URB_WRITE_OWORD
) {
533 assert(msg_length
== 2); /* header + one OWORD of data */
534 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_OWORD
;
536 insn
->bits3
.urb_gen7
.opcode
= BRW_URB_OPCODE_WRITE_HWORD
;
538 insn
->bits3
.urb_gen7
.offset
= offset
;
539 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
540 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
541 insn
->bits3
.urb_gen7
.per_slot_offset
=
542 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
543 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
544 } else if (brw
->gen
>= 5) {
545 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
546 insn
->bits3
.urb_gen5
.offset
= offset
;
547 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
548 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
549 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
550 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
552 insn
->bits3
.urb
.opcode
= 0; /* ? */
553 insn
->bits3
.urb
.offset
= offset
;
554 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
555 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
556 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
557 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
562 brw_set_dp_write_message(struct brw_compile
*p
,
563 struct brw_instruction
*insn
,
564 GLuint binding_table_index
,
569 GLuint last_render_target
,
570 GLuint response_length
,
571 GLuint end_of_thread
,
572 GLuint send_commit_msg
)
574 struct brw_context
*brw
= p
->brw
;
578 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
579 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
580 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
582 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
583 } else if (brw
->gen
== 6) {
584 /* Use the render cache for all write messages. */
585 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
587 sfid
= BRW_SFID_DATAPORT_WRITE
;
590 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
591 header_present
, end_of_thread
);
594 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
595 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
596 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
597 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
598 } else if (brw
->gen
== 6) {
599 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
600 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
601 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
602 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
603 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
604 } else if (brw
->gen
== 5) {
605 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
606 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
607 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
608 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
609 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
611 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
612 insn
->bits3
.dp_write
.msg_control
= msg_control
;
613 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
614 insn
->bits3
.dp_write
.msg_type
= msg_type
;
615 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
620 brw_set_dp_read_message(struct brw_compile
*p
,
621 struct brw_instruction
*insn
,
622 GLuint binding_table_index
,
628 GLuint response_length
)
630 struct brw_context
*brw
= p
->brw
;
634 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
635 } else if (brw
->gen
== 6) {
636 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
637 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
639 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
641 sfid
= BRW_SFID_DATAPORT_READ
;
644 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
645 header_present
, false);
648 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
649 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
650 insn
->bits3
.gen7_dp
.last_render_target
= 0;
651 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
652 } else if (brw
->gen
== 6) {
653 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
654 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
655 insn
->bits3
.gen6_dp
.last_render_target
= 0;
656 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
657 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
658 } else if (brw
->gen
== 5) {
659 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
660 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
661 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
662 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
663 } else if (brw
->is_g4x
) {
664 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
665 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
666 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
667 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
669 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
670 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
671 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
672 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
677 brw_set_sampler_message(struct brw_compile
*p
,
678 struct brw_instruction
*insn
,
679 GLuint binding_table_index
,
682 GLuint response_length
,
684 GLuint header_present
,
686 GLuint return_format
)
688 struct brw_context
*brw
= p
->brw
;
690 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
691 response_length
, header_present
, false);
694 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
695 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
696 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
697 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
698 } else if (brw
->gen
>= 5) {
699 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
700 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
701 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
702 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
703 } else if (brw
->is_g4x
) {
704 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
705 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
706 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
708 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
709 insn
->bits3
.sampler
.sampler
= sampler
;
710 insn
->bits3
.sampler
.msg_type
= msg_type
;
711 insn
->bits3
.sampler
.return_format
= return_format
;
716 #define next_insn brw_next_insn
717 struct brw_instruction
*
718 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
720 struct brw_instruction
*insn
;
722 if (p
->nr_insn
+ 1 > p
->store_size
) {
724 printf("incresing the store size to %d\n", p
->store_size
<< 1);
726 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
727 struct brw_instruction
, p
->store_size
);
729 assert(!"realloc eu store memeory failed");
732 p
->next_insn_offset
+= 16;
733 insn
= &p
->store
[p
->nr_insn
++];
734 memcpy(insn
, p
->current
, sizeof(*insn
));
736 /* Reset this one-shot flag:
739 if (p
->current
->header
.destreg__conditionalmod
) {
740 p
->current
->header
.destreg__conditionalmod
= 0;
741 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
744 insn
->header
.opcode
= opcode
;
748 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
753 struct brw_instruction
*insn
= next_insn(p
, opcode
);
754 brw_set_dest(p
, insn
, dest
);
755 brw_set_src0(p
, insn
, src
);
759 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
763 struct brw_reg src1
)
765 struct brw_instruction
*insn
= next_insn(p
, opcode
);
766 brw_set_dest(p
, insn
, dest
);
767 brw_set_src0(p
, insn
, src0
);
768 brw_set_src1(p
, insn
, src1
);
773 get_3src_subreg_nr(struct brw_reg reg
)
775 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
776 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
777 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
779 return reg
.subnr
/ 4;
783 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
790 struct brw_context
*brw
= p
->brw
;
791 struct brw_instruction
*insn
= next_insn(p
, opcode
);
793 gen7_convert_mrf_to_grf(p
, &dest
);
795 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
797 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
798 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
799 assert(dest
.nr
< 128);
800 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
801 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
802 dest
.type
== BRW_REGISTER_TYPE_D
||
803 dest
.type
== BRW_REGISTER_TYPE_UD
);
804 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
805 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
806 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
807 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
808 guess_execution_size(p
, insn
, dest
);
810 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
811 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
812 assert(src0
.nr
< 128);
813 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
814 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
815 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
816 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
817 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
818 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
820 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
821 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
822 assert(src1
.nr
< 128);
823 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
824 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
825 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
826 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
827 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
828 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
829 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
831 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
832 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
833 assert(src2
.nr
< 128);
834 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
835 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
836 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
837 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
838 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
839 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
842 /* Set both the source and destination types based on dest.type,
843 * ignoring the source register types. The MAD and LRP emitters ensure
844 * that all four types are float. The BFE and BFI2 emitters, however,
845 * may send us mixed D and UD types and want us to ignore that and use
846 * the destination type.
849 case BRW_REGISTER_TYPE_F
:
850 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
851 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
853 case BRW_REGISTER_TYPE_D
:
854 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
855 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
857 case BRW_REGISTER_TYPE_UD
:
858 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
859 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
868 /***********************************************************************
869 * Convenience routines.
872 struct brw_instruction *brw_##OP(struct brw_compile *p, \
873 struct brw_reg dest, \
874 struct brw_reg src0) \
876 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
880 struct brw_instruction *brw_##OP(struct brw_compile *p, \
881 struct brw_reg dest, \
882 struct brw_reg src0, \
883 struct brw_reg src1) \
885 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
889 struct brw_instruction *brw_##OP(struct brw_compile *p, \
890 struct brw_reg dest, \
891 struct brw_reg src0, \
892 struct brw_reg src1, \
893 struct brw_reg src2) \
895 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
899 struct brw_instruction *brw_##OP(struct brw_compile *p, \
900 struct brw_reg dest, \
901 struct brw_reg src0, \
902 struct brw_reg src1, \
903 struct brw_reg src2) \
905 assert(dest.type == BRW_REGISTER_TYPE_F); \
906 assert(src0.type == BRW_REGISTER_TYPE_F); \
907 assert(src1.type == BRW_REGISTER_TYPE_F); \
908 assert(src2.type == BRW_REGISTER_TYPE_F); \
909 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
912 /* Rounding operations (other than RNDD) require two instructions - the first
913 * stores a rounded value (possibly the wrong way) in the dest register, but
914 * also sets a per-channel "increment bit" in the flag register. A predicated
915 * add of 1.0 fixes dest to contain the desired result.
917 * Sandybridge and later appear to round correctly without an ADD.
920 void brw_##OP(struct brw_compile *p, \
921 struct brw_reg dest, \
922 struct brw_reg src) \
924 struct brw_instruction *rnd, *add; \
925 rnd = next_insn(p, BRW_OPCODE_##OP); \
926 brw_set_dest(p, rnd, dest); \
927 brw_set_src0(p, rnd, src); \
929 if (p->brw->gen < 6) { \
930 /* turn on round-increments */ \
931 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
932 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
933 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
974 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
980 if (src0
.type
== BRW_REGISTER_TYPE_F
||
981 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
982 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
983 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
984 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
987 if (src1
.type
== BRW_REGISTER_TYPE_F
||
988 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
989 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
990 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
991 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
994 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
997 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
1000 struct brw_reg src1
)
1002 assert(dest
.type
== src0
.type
);
1003 assert(src0
.type
== src1
.type
);
1004 switch (src0
.type
) {
1005 case BRW_REGISTER_TYPE_B
:
1006 case BRW_REGISTER_TYPE_UB
:
1007 case BRW_REGISTER_TYPE_W
:
1008 case BRW_REGISTER_TYPE_UW
:
1009 case BRW_REGISTER_TYPE_D
:
1010 case BRW_REGISTER_TYPE_UD
:
1013 assert(!"Bad type for brw_AVG");
1016 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1019 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1020 struct brw_reg dest
,
1021 struct brw_reg src0
,
1022 struct brw_reg src1
)
1025 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1026 src0
.type
== BRW_REGISTER_TYPE_UD
||
1027 src1
.type
== BRW_REGISTER_TYPE_D
||
1028 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1029 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1032 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1033 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1034 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1035 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1036 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1039 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1040 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1041 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1042 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1043 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1046 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1047 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1048 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1049 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1051 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1055 void brw_NOP(struct brw_compile
*p
)
1057 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1058 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1059 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1060 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1067 /***********************************************************************
1068 * Comparisons, if/else/endif
1071 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1072 struct brw_reg dest
,
1073 struct brw_reg src0
,
1074 struct brw_reg src1
)
1076 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1078 insn
->header
.execution_size
= 1;
1079 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1080 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1082 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1088 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1090 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1092 p
->if_stack_depth
++;
1093 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1094 p
->if_stack_array_size
*= 2;
1095 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1096 p
->if_stack_array_size
);
1100 static struct brw_instruction
*
1101 pop_if_stack(struct brw_compile
*p
)
1103 p
->if_stack_depth
--;
1104 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1108 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1110 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1111 p
->loop_stack_array_size
*= 2;
1112 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1113 p
->loop_stack_array_size
);
1114 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1115 p
->loop_stack_array_size
);
1118 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1119 p
->loop_stack_depth
++;
1120 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1123 static struct brw_instruction
*
1124 get_inner_do_insn(struct brw_compile
*p
)
1126 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1129 /* EU takes the value from the flag register and pushes it onto some
1130 * sort of a stack (presumably merging with any flag value already on
1131 * the stack). Within an if block, the flags at the top of the stack
1132 * control execution on each channel of the unit, eg. on each of the
1133 * 16 pixel values in our wm programs.
1135 * When the matching 'else' instruction is reached (presumably by
1136 * countdown of the instruction count patched in by our ELSE/ENDIF
1137 * functions), the relevent flags are inverted.
1139 * When the matching 'endif' instruction is reached, the flags are
1140 * popped off. If the stack is now empty, normal execution resumes.
1142 struct brw_instruction
*
1143 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
1145 struct brw_context
*brw
= p
->brw
;
1146 struct brw_instruction
*insn
;
1148 insn
= next_insn(p
, BRW_OPCODE_IF
);
1150 /* Override the defaults for this instruction:
1153 brw_set_dest(p
, insn
, brw_ip_reg());
1154 brw_set_src0(p
, insn
, brw_ip_reg());
1155 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1156 } else if (brw
->gen
== 6) {
1157 brw_set_dest(p
, insn
, brw_imm_w(0));
1158 insn
->bits1
.branch_gen6
.jump_count
= 0;
1159 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1160 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1162 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1163 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1164 brw_set_src1(p
, insn
, brw_imm_ud(0));
1165 insn
->bits3
.break_cont
.jip
= 0;
1166 insn
->bits3
.break_cont
.uip
= 0;
1169 insn
->header
.execution_size
= execute_size
;
1170 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1171 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1172 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1173 if (!p
->single_program_flow
)
1174 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1176 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1178 push_if_stack(p
, insn
);
1179 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1183 /* This function is only used for gen6-style IF instructions with an
1184 * embedded comparison (conditional modifier). It is not used on gen7.
1186 struct brw_instruction
*
1187 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1188 struct brw_reg src0
, struct brw_reg src1
)
1190 struct brw_instruction
*insn
;
1192 insn
= next_insn(p
, BRW_OPCODE_IF
);
1194 brw_set_dest(p
, insn
, brw_imm_w(0));
1195 if (p
->compressed
) {
1196 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1198 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1200 insn
->bits1
.branch_gen6
.jump_count
= 0;
1201 brw_set_src0(p
, insn
, src0
);
1202 brw_set_src1(p
, insn
, src1
);
1204 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1205 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1206 insn
->header
.destreg__conditionalmod
= conditional
;
1208 if (!p
->single_program_flow
)
1209 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1211 push_if_stack(p
, insn
);
1216 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1219 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1220 struct brw_instruction
*if_inst
,
1221 struct brw_instruction
*else_inst
)
1223 /* The next instruction (where the ENDIF would be, if it existed) */
1224 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1226 assert(p
->single_program_flow
);
1227 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1228 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1229 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1231 /* Convert IF to an ADD instruction that moves the instruction pointer
1232 * to the first instruction of the ELSE block. If there is no ELSE
1233 * block, point to where ENDIF would be. Reverse the predicate.
1235 * There's no need to execute an ENDIF since we don't need to do any
1236 * stack operations, and if we're currently executing, we just want to
1237 * continue normally.
1239 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1240 if_inst
->header
.predicate_inverse
= 1;
1242 if (else_inst
!= NULL
) {
1243 /* Convert ELSE to an ADD instruction that points where the ENDIF
1246 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1248 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1249 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1251 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1256 * Patch IF and ELSE instructions with appropriate jump targets.
1259 patch_IF_ELSE(struct brw_compile
*p
,
1260 struct brw_instruction
*if_inst
,
1261 struct brw_instruction
*else_inst
,
1262 struct brw_instruction
*endif_inst
)
1264 struct brw_context
*brw
= p
->brw
;
1266 /* We shouldn't be patching IF and ELSE instructions in single program flow
1267 * mode when gen < 6, because in single program flow mode on those
1268 * platforms, we convert flow control instructions to conditional ADDs that
1269 * operate on IP (see brw_ENDIF).
1271 * However, on Gen6, writing to IP doesn't work in single program flow mode
1272 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1273 * not be updated by non-flow control instructions."). And on later
1274 * platforms, there is no significant benefit to converting control flow
1275 * instructions to conditional ADDs. So we do patch IF and ELSE
1276 * instructions in single program flow mode on those platforms.
1279 assert(!p
->single_program_flow
);
1281 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1282 assert(endif_inst
!= NULL
);
1283 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1286 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1287 * requires 2 chunks.
1292 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1293 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1295 if (else_inst
== NULL
) {
1296 /* Patch IF -> ENDIF */
1298 /* Turn it into an IFF, which means no mask stack operations for
1299 * all-false and jumping past the ENDIF.
1301 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1302 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1303 if_inst
->bits3
.if_else
.pop_count
= 0;
1304 if_inst
->bits3
.if_else
.pad0
= 0;
1305 } else if (brw
->gen
== 6) {
1306 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1307 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1309 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1310 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1313 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1315 /* Patch IF -> ELSE */
1317 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1318 if_inst
->bits3
.if_else
.pop_count
= 0;
1319 if_inst
->bits3
.if_else
.pad0
= 0;
1320 } else if (brw
->gen
== 6) {
1321 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1324 /* Patch ELSE -> ENDIF */
1326 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1329 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1330 else_inst
->bits3
.if_else
.pop_count
= 1;
1331 else_inst
->bits3
.if_else
.pad0
= 0;
1332 } else if (brw
->gen
== 6) {
1333 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1334 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1336 /* The IF instruction's JIP should point just past the ELSE */
1337 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1338 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1339 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1340 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1346 brw_ELSE(struct brw_compile
*p
)
1348 struct brw_context
*brw
= p
->brw
;
1349 struct brw_instruction
*insn
;
1351 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1354 brw_set_dest(p
, insn
, brw_ip_reg());
1355 brw_set_src0(p
, insn
, brw_ip_reg());
1356 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1357 } else if (brw
->gen
== 6) {
1358 brw_set_dest(p
, insn
, brw_imm_w(0));
1359 insn
->bits1
.branch_gen6
.jump_count
= 0;
1360 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1361 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1363 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1364 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1365 brw_set_src1(p
, insn
, brw_imm_ud(0));
1366 insn
->bits3
.break_cont
.jip
= 0;
1367 insn
->bits3
.break_cont
.uip
= 0;
1370 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1371 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1372 if (!p
->single_program_flow
)
1373 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1375 push_if_stack(p
, insn
);
1379 brw_ENDIF(struct brw_compile
*p
)
1381 struct brw_context
*brw
= p
->brw
;
1382 struct brw_instruction
*insn
= NULL
;
1383 struct brw_instruction
*else_inst
= NULL
;
1384 struct brw_instruction
*if_inst
= NULL
;
1385 struct brw_instruction
*tmp
;
1386 bool emit_endif
= true;
1388 /* In single program flow mode, we can express IF and ELSE instructions
1389 * equivalently as ADD instructions that operate on IP. On platforms prior
1390 * to Gen6, flow control instructions cause an implied thread switch, so
1391 * this is a significant savings.
1393 * However, on Gen6, writing to IP doesn't work in single program flow mode
1394 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1395 * not be updated by non-flow control instructions."). And on later
1396 * platforms, there is no significant benefit to converting control flow
1397 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1400 if (brw
->gen
< 6 && p
->single_program_flow
)
1404 * A single next_insn() may change the base adress of instruction store
1405 * memory(p->store), so call it first before referencing the instruction
1406 * store pointer from an index
1409 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1411 /* Pop the IF and (optional) ELSE instructions from the stack */
1412 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1413 tmp
= pop_if_stack(p
);
1414 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1416 tmp
= pop_if_stack(p
);
1421 /* ENDIF is useless; don't bother emitting it. */
1422 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1427 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1428 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1429 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1430 } else if (brw
->gen
== 6) {
1431 brw_set_dest(p
, insn
, brw_imm_w(0));
1432 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1433 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1435 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1436 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1437 brw_set_src1(p
, insn
, brw_imm_ud(0));
1440 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1441 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1442 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1444 /* Also pop item off the stack in the endif instruction: */
1446 insn
->bits3
.if_else
.jump_count
= 0;
1447 insn
->bits3
.if_else
.pop_count
= 1;
1448 insn
->bits3
.if_else
.pad0
= 0;
1449 } else if (brw
->gen
== 6) {
1450 insn
->bits1
.branch_gen6
.jump_count
= 2;
1452 insn
->bits3
.break_cont
.jip
= 2;
1454 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1457 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1459 struct brw_context
*brw
= p
->brw
;
1460 struct brw_instruction
*insn
;
1462 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1463 if (brw
->gen
>= 6) {
1464 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1465 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1466 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1468 brw_set_dest(p
, insn
, brw_ip_reg());
1469 brw_set_src0(p
, insn
, brw_ip_reg());
1470 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1471 insn
->bits3
.if_else
.pad0
= 0;
1472 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1474 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1475 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1480 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1482 struct brw_instruction
*insn
;
1484 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1485 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1486 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1487 brw_set_dest(p
, insn
, brw_ip_reg());
1488 brw_set_src0(p
, insn
, brw_ip_reg());
1489 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1491 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1492 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1496 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1498 struct brw_instruction
*insn
;
1499 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1500 brw_set_dest(p
, insn
, brw_ip_reg());
1501 brw_set_src0(p
, insn
, brw_ip_reg());
1502 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1503 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1504 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1505 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1506 insn
->bits3
.if_else
.pad0
= 0;
1507 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1511 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1513 struct brw_instruction
*insn
;
1515 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1516 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1517 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1518 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1520 if (p
->compressed
) {
1521 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1523 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1524 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1531 * The DO/WHILE is just an unterminated loop -- break or continue are
1532 * used for control within the loop. We have a few ways they can be
1535 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1536 * jip and no DO instruction.
1538 * For non-uniform control flow pre-gen6, there's a DO instruction to
1539 * push the mask, and a WHILE to jump back, and BREAK to get out and
1542 * For gen6, there's no more mask stack, so no need for DO. WHILE
1543 * just points back to the first instruction of the loop.
1545 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1547 struct brw_context
*brw
= p
->brw
;
1549 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1550 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1551 return &p
->store
[p
->nr_insn
];
1553 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1555 push_loop_stack(p
, insn
);
1557 /* Override the defaults for this instruction:
1559 brw_set_dest(p
, insn
, brw_null_reg());
1560 brw_set_src0(p
, insn
, brw_null_reg());
1561 brw_set_src1(p
, insn
, brw_null_reg());
1563 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1564 insn
->header
.execution_size
= execute_size
;
1565 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1566 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1567 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1574 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1577 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1578 * nesting, since it can always just point to the end of the block/current loop.
1581 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1583 struct brw_context
*brw
= p
->brw
;
1584 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1585 struct brw_instruction
*inst
;
1586 int br
= (brw
->gen
== 5) ? 2 : 1;
1588 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1589 /* If the jump count is != 0, that means that this instruction has already
1590 * been patched because it's part of a loop inside of the one we're
1593 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1594 inst
->bits3
.if_else
.jump_count
== 0) {
1595 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1596 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1597 inst
->bits3
.if_else
.jump_count
== 0) {
1598 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1603 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1605 struct brw_context
*brw
= p
->brw
;
1606 struct brw_instruction
*insn
, *do_insn
;
1612 if (brw
->gen
>= 7) {
1613 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1614 do_insn
= get_inner_do_insn(p
);
1616 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1617 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1618 brw_set_src1(p
, insn
, brw_imm_ud(0));
1619 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1621 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1622 } else if (brw
->gen
== 6) {
1623 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1624 do_insn
= get_inner_do_insn(p
);
1626 brw_set_dest(p
, insn
, brw_imm_w(0));
1627 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1628 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1629 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1631 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1633 if (p
->single_program_flow
) {
1634 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1635 do_insn
= get_inner_do_insn(p
);
1637 brw_set_dest(p
, insn
, brw_ip_reg());
1638 brw_set_src0(p
, insn
, brw_ip_reg());
1639 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1640 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1642 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1643 do_insn
= get_inner_do_insn(p
);
1645 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1647 brw_set_dest(p
, insn
, brw_ip_reg());
1648 brw_set_src0(p
, insn
, brw_ip_reg());
1649 brw_set_src1(p
, insn
, brw_imm_d(0));
1651 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1652 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1653 insn
->bits3
.if_else
.pop_count
= 0;
1654 insn
->bits3
.if_else
.pad0
= 0;
1656 brw_patch_break_cont(p
, insn
);
1659 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1660 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1662 p
->loop_stack_depth
--;
1670 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1672 struct brw_context
*brw
= p
->brw
;
1673 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1679 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1680 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1682 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1687 /* To integrate with the above, it makes sense that the comparison
1688 * instruction should populate the flag register. It might be simpler
1689 * just to use the flag reg for most WM tasks?
1691 void brw_CMP(struct brw_compile
*p
,
1692 struct brw_reg dest
,
1694 struct brw_reg src0
,
1695 struct brw_reg src1
)
1697 struct brw_context
*brw
= p
->brw
;
1698 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1700 insn
->header
.destreg__conditionalmod
= conditional
;
1701 brw_set_dest(p
, insn
, dest
);
1702 brw_set_src0(p
, insn
, src0
);
1703 brw_set_src1(p
, insn
, src1
);
1705 /* guess_execution_size(insn, src0); */
1708 /* Make it so that future instructions will use the computed flag
1709 * value until brw_set_predicate_control_flag_value() is called
1712 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1714 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1715 p
->flag_value
= 0xff;
1718 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1720 * "Any CMP instruction with a null destination must use a {switch}."
1722 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1723 * mentioned on their work-arounds pages.
1725 if (brw
->gen
== 7) {
1726 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1727 dest
.nr
== BRW_ARF_NULL
) {
1728 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1733 /* Issue 'wait' instruction for n1, host could program MMIO
1734 to wake up thread. */
1735 void brw_WAIT (struct brw_compile
*p
)
1737 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1738 struct brw_reg src
= brw_notification_1_reg();
1740 brw_set_dest(p
, insn
, src
);
1741 brw_set_src0(p
, insn
, src
);
1742 brw_set_src1(p
, insn
, brw_null_reg());
1743 insn
->header
.execution_size
= 0; /* must */
1744 insn
->header
.predicate_control
= 0;
1745 insn
->header
.compression_control
= 0;
1749 /***********************************************************************
1750 * Helpers for the various SEND message types:
1753 /** Extended math function, float[8].
1755 void brw_math( struct brw_compile
*p
,
1756 struct brw_reg dest
,
1763 struct brw_context
*brw
= p
->brw
;
1765 if (brw
->gen
>= 6) {
1766 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1768 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1769 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1770 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1772 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1774 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1776 /* Source modifiers are ignored for extended math instructions on Gen6. */
1777 if (brw
->gen
== 6) {
1778 assert(!src
.negate
);
1782 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1783 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1784 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1785 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1787 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1790 /* Math is the same ISA format as other opcodes, except that CondModifier
1791 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1793 insn
->header
.destreg__conditionalmod
= function
;
1795 brw_set_dest(p
, insn
, dest
);
1796 brw_set_src0(p
, insn
, src
);
1797 brw_set_src1(p
, insn
, brw_null_reg());
1799 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1801 /* Example code doesn't set predicate_control for send
1804 insn
->header
.predicate_control
= 0;
1805 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1807 brw_set_dest(p
, insn
, dest
);
1808 brw_set_src0(p
, insn
, src
);
1809 brw_set_math_message(p
,
1812 src
.type
== BRW_REGISTER_TYPE_D
,
1818 /** Extended math function, float[8].
1820 void brw_math2(struct brw_compile
*p
,
1821 struct brw_reg dest
,
1823 struct brw_reg src0
,
1824 struct brw_reg src1
)
1826 struct brw_context
*brw
= p
->brw
;
1827 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1829 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1830 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1831 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1832 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1834 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1835 if (brw
->gen
== 6) {
1836 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1837 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1840 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1841 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1842 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1843 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1844 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1846 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1847 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1850 /* Source modifiers are ignored for extended math instructions on Gen6. */
1851 if (brw
->gen
== 6) {
1852 assert(!src0
.negate
);
1854 assert(!src1
.negate
);
1858 /* Math is the same ISA format as other opcodes, except that CondModifier
1859 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1861 insn
->header
.destreg__conditionalmod
= function
;
1863 brw_set_dest(p
, insn
, dest
);
1864 brw_set_src0(p
, insn
, src0
);
1865 brw_set_src1(p
, insn
, src1
);
1870 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1871 * using a constant offset per channel.
1873 * The offset must be aligned to oword size (16 bytes). Used for
1874 * register spilling.
1876 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1881 struct brw_context
*brw
= p
->brw
;
1882 uint32_t msg_control
, msg_type
;
1888 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1890 if (num_regs
== 1) {
1891 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1894 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1898 /* Set up the message header. This is g0, with g0.2 filled with
1899 * the offset. We don't want to leave our offset around in g0 or
1900 * it'll screw up texture samples, so set it up inside the message
1904 brw_push_insn_state(p
);
1905 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1906 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1908 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1910 /* set message header global offset field (reg 0, element 2) */
1912 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1914 2), BRW_REGISTER_TYPE_UD
),
1915 brw_imm_ud(offset
));
1917 brw_pop_insn_state(p
);
1921 struct brw_reg dest
;
1922 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1923 int send_commit_msg
;
1924 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1925 BRW_REGISTER_TYPE_UW
);
1927 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1928 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1929 src_header
= vec16(src_header
);
1931 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1932 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1934 /* Until gen6, writes followed by reads from the same location
1935 * are not guaranteed to be ordered unless write_commit is set.
1936 * If set, then a no-op write is issued to the destination
1937 * register to set a dependency, and a read from the destination
1938 * can be used to ensure the ordering.
1940 * For gen6, only writes between different threads need ordering
1941 * protection. Our use of DP writes is all about register
1942 * spilling within a thread.
1944 if (brw
->gen
>= 6) {
1945 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1946 send_commit_msg
= 0;
1949 send_commit_msg
= 1;
1952 brw_set_dest(p
, insn
, dest
);
1953 if (brw
->gen
>= 6) {
1954 brw_set_src0(p
, insn
, mrf
);
1956 brw_set_src0(p
, insn
, brw_null_reg());
1960 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1962 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1964 brw_set_dp_write_message(p
,
1966 255, /* binding table index (255=stateless) */
1970 true, /* header_present */
1971 0, /* not a render target */
1972 send_commit_msg
, /* response_length */
1980 * Read a block of owords (half a GRF each) from the scratch buffer
1981 * using a constant index per channel.
1983 * Offset must be aligned to oword size (16 bytes). Used for register
1987 brw_oword_block_read_scratch(struct brw_compile
*p
,
1988 struct brw_reg dest
,
1993 struct brw_context
*brw
= p
->brw
;
1994 uint32_t msg_control
;
2000 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2001 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
2003 if (num_regs
== 1) {
2004 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2007 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2012 brw_push_insn_state(p
);
2013 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2014 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2016 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2018 /* set message header global offset field (reg 0, element 2) */
2020 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2022 2), BRW_REGISTER_TYPE_UD
),
2023 brw_imm_ud(offset
));
2025 brw_pop_insn_state(p
);
2029 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2031 assert(insn
->header
.predicate_control
== 0);
2032 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2033 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2035 brw_set_dest(p
, insn
, dest
); /* UW? */
2036 if (brw
->gen
>= 6) {
2037 brw_set_src0(p
, insn
, mrf
);
2039 brw_set_src0(p
, insn
, brw_null_reg());
2042 brw_set_dp_read_message(p
,
2044 255, /* binding table index (255=stateless) */
2046 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2047 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2049 true, /* header_present */
2055 * Read a float[4] vector from the data port Data Cache (const buffer).
2056 * Location (in buffer) should be a multiple of 16.
2057 * Used for fetching shader constants.
2059 void brw_oword_block_read(struct brw_compile
*p
,
2060 struct brw_reg dest
,
2063 uint32_t bind_table_index
)
2065 struct brw_context
*brw
= p
->brw
;
2067 /* On newer hardware, offset is in units of owords. */
2071 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2073 brw_push_insn_state(p
);
2074 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2075 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2076 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2078 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2080 /* set message header global offset field (reg 0, element 2) */
2082 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2084 2), BRW_REGISTER_TYPE_UD
),
2085 brw_imm_ud(offset
));
2087 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2088 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2090 /* cast dest to a uword[8] vector */
2091 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2093 brw_set_dest(p
, insn
, dest
);
2094 if (brw
->gen
>= 6) {
2095 brw_set_src0(p
, insn
, mrf
);
2097 brw_set_src0(p
, insn
, brw_null_reg());
2100 brw_set_dp_read_message(p
,
2103 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2104 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2105 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2107 true, /* header_present */
2108 1); /* response_length (1 reg, 2 owords!) */
2110 brw_pop_insn_state(p
);
2114 void brw_fb_WRITE(struct brw_compile
*p
,
2117 struct brw_reg src0
,
2119 GLuint binding_table_index
,
2121 GLuint response_length
,
2123 bool header_present
)
2125 struct brw_context
*brw
= p
->brw
;
2126 struct brw_instruction
*insn
;
2128 struct brw_reg dest
;
2130 if (dispatch_width
== 16)
2131 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2133 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2135 if (brw
->gen
>= 6) {
2136 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2138 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2140 /* The execution mask is ignored for render target writes. */
2141 insn
->header
.predicate_control
= 0;
2142 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2144 if (brw
->gen
>= 6) {
2145 /* headerless version, just submit color payload */
2146 src0
= brw_message_reg(msg_reg_nr
);
2148 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2150 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2152 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2155 brw_set_dest(p
, insn
, dest
);
2156 brw_set_src0(p
, insn
, src0
);
2157 brw_set_dp_write_message(p
,
2159 binding_table_index
,
2164 eot
, /* last render target write */
2167 0 /* send_commit_msg */);
2172 * Texture sample instruction.
2173 * Note: the msg_type plus msg_length values determine exactly what kind
2174 * of sampling operation is performed. See volume 4, page 161 of docs.
2176 void brw_SAMPLE(struct brw_compile
*p
,
2177 struct brw_reg dest
,
2179 struct brw_reg src0
,
2180 GLuint binding_table_index
,
2183 GLuint response_length
,
2185 GLuint header_present
,
2187 GLuint return_format
)
2189 struct brw_context
*brw
= p
->brw
;
2190 struct brw_instruction
*insn
;
2192 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2194 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2195 insn
->header
.predicate_control
= 0; /* XXX */
2196 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2198 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2200 brw_set_dest(p
, insn
, dest
);
2201 brw_set_src0(p
, insn
, src0
);
2202 brw_set_sampler_message(p
, insn
,
2203 binding_table_index
,
2213 /* All these variables are pretty confusing - we might be better off
2214 * using bitmasks and macros for this, in the old style. Or perhaps
2215 * just having the caller instantiate the fields in dword3 itself.
2217 void brw_urb_WRITE(struct brw_compile
*p
,
2218 struct brw_reg dest
,
2220 struct brw_reg src0
,
2221 enum brw_urb_write_flags flags
,
2223 GLuint response_length
,
2227 struct brw_context
*brw
= p
->brw
;
2228 struct brw_instruction
*insn
;
2230 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2232 if (brw
->gen
== 7 && !(flags
& BRW_URB_WRITE_USE_CHANNEL_MASKS
)) {
2233 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2234 brw_push_insn_state(p
);
2235 brw_set_access_mode(p
, BRW_ALIGN_1
);
2236 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2237 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2238 BRW_REGISTER_TYPE_UD
),
2239 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2240 brw_imm_ud(0xff00));
2241 brw_pop_insn_state(p
);
2244 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2246 assert(msg_length
< BRW_MAX_MRF
);
2248 brw_set_dest(p
, insn
, dest
);
2249 brw_set_src0(p
, insn
, src0
);
2250 brw_set_src1(p
, insn
, brw_imm_d(0));
2253 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2255 brw_set_urb_message(p
,
2265 next_ip(struct brw_compile
*p
, int ip
)
2267 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2269 if (insn
->header
.cmpt_control
)
2276 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2279 void *store
= p
->store
;
2281 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2282 struct brw_instruction
*insn
= store
+ ip
;
2284 switch (insn
->header
.opcode
) {
2285 case BRW_OPCODE_ENDIF
:
2286 case BRW_OPCODE_ELSE
:
2287 case BRW_OPCODE_WHILE
:
2288 case BRW_OPCODE_HALT
:
2296 /* There is no DO instruction on gen6, so to find the end of the loop
2297 * we have to see if the loop is jumping back before our start
2301 brw_find_loop_end(struct brw_compile
*p
, int start
)
2303 struct brw_context
*brw
= p
->brw
;
2306 void *store
= p
->store
;
2308 /* Always start after the instruction (such as a WHILE) we're trying to fix
2311 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2312 struct brw_instruction
*insn
= store
+ ip
;
2314 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2315 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2316 : insn
->bits3
.break_cont
.jip
;
2317 if (ip
+ jip
* scale
<= start
)
2321 assert(!"not reached");
2325 /* After program generation, go back and update the UIP and JIP of
2326 * BREAK, CONT, and HALT instructions to their correct locations.
2329 brw_set_uip_jip(struct brw_compile
*p
)
2331 struct brw_context
*brw
= p
->brw
;
2334 void *store
= p
->store
;
2339 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2340 struct brw_instruction
*insn
= store
+ ip
;
2342 if (insn
->header
.cmpt_control
) {
2343 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2344 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2345 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2346 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2350 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2351 switch (insn
->header
.opcode
) {
2352 case BRW_OPCODE_BREAK
:
2353 assert(block_end_ip
!= 0);
2354 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2355 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2356 insn
->bits3
.break_cont
.uip
=
2357 (brw_find_loop_end(p
, ip
) - ip
+
2358 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2360 case BRW_OPCODE_CONTINUE
:
2361 assert(block_end_ip
!= 0);
2362 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2363 insn
->bits3
.break_cont
.uip
=
2364 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2366 assert(insn
->bits3
.break_cont
.uip
!= 0);
2367 assert(insn
->bits3
.break_cont
.jip
!= 0);
2370 case BRW_OPCODE_ENDIF
:
2371 if (block_end_ip
== 0)
2372 insn
->bits3
.break_cont
.jip
= 2;
2374 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2377 case BRW_OPCODE_HALT
:
2378 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2380 * "In case of the halt instruction not inside any conditional
2381 * code block, the value of <JIP> and <UIP> should be the
2382 * same. In case of the halt instruction inside conditional code
2383 * block, the <UIP> should be the end of the program, and the
2384 * <JIP> should be end of the most inner conditional code block."
2386 * The uip will have already been set by whoever set up the
2389 if (block_end_ip
== 0) {
2390 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2392 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2394 assert(insn
->bits3
.break_cont
.uip
!= 0);
2395 assert(insn
->bits3
.break_cont
.jip
!= 0);
2401 void brw_ff_sync(struct brw_compile
*p
,
2402 struct brw_reg dest
,
2404 struct brw_reg src0
,
2406 GLuint response_length
,
2409 struct brw_context
*brw
= p
->brw
;
2410 struct brw_instruction
*insn
;
2412 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2414 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2415 brw_set_dest(p
, insn
, dest
);
2416 brw_set_src0(p
, insn
, src0
);
2417 brw_set_src1(p
, insn
, brw_imm_d(0));
2420 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2422 brw_set_ff_sync_message(p
,
2430 * Emit the SEND instruction necessary to generate stream output data on Gen6
2431 * (for transform feedback).
2433 * If send_commit_msg is true, this is the last piece of stream output data
2434 * from this thread, so send the data as a committed write. According to the
2435 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2437 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2438 * writes are complete by sending the final write as a committed write."
2441 brw_svb_write(struct brw_compile
*p
,
2442 struct brw_reg dest
,
2444 struct brw_reg src0
,
2445 GLuint binding_table_index
,
2446 bool send_commit_msg
)
2448 struct brw_instruction
*insn
;
2450 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2452 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2453 brw_set_dest(p
, insn
, dest
);
2454 brw_set_src0(p
, insn
, src0
);
2455 brw_set_src1(p
, insn
, brw_imm_d(0));
2456 brw_set_dp_write_message(p
, insn
,
2457 binding_table_index
,
2458 0, /* msg_control: ignored */
2459 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2461 true, /* header_present */
2462 0, /* last_render_target: ignored */
2463 send_commit_msg
, /* response_length */
2464 0, /* end_of_thread */
2465 send_commit_msg
); /* send_commit_msg */
2469 * This instruction is generated as a single-channel align1 instruction by
2470 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2472 * We can't use the typed atomic op in the FS because that has the execution
2473 * mask ANDed with the pixel mask, but we just want to write the one dword for
2476 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2477 * one u32. So we use the same untyped atomic write message as the pixel
2480 * The untyped atomic operation requires a BUFFER surface type with RAW
2481 * format, and is only accessible through the legacy DATA_CACHE dataport
2484 void brw_shader_time_add(struct brw_compile
*p
,
2485 struct brw_reg payload
,
2486 uint32_t surf_index
)
2488 struct brw_context
*brw
= p
->brw
;
2489 assert(brw
->gen
>= 7);
2491 brw_push_insn_state(p
);
2492 brw_set_access_mode(p
, BRW_ALIGN_1
);
2493 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2494 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2495 brw_pop_insn_state(p
);
2497 /* We use brw_vec1_reg and unmasked because we want to increment the given
2500 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2502 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2505 uint32_t sfid
, msg_type
;
2506 if (brw
->is_haswell
) {
2507 sfid
= HSW_SFID_DATAPORT_DATA_CACHE_1
;
2508 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2510 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
2511 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2514 bool header_present
= false;
2516 uint32_t mlen
= 2; /* offset, value */
2518 brw_set_message_descriptor(p
, send
, sfid
, mlen
, rlen
, header_present
, eot
);
2520 send
->bits3
.ud
|= msg_type
<< 14;
2521 send
->bits3
.ud
|= 0 << 13; /* no return data */
2522 send
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2523 send
->bits3
.ud
|= BRW_AOP_ADD
<< 8;
2524 send
->bits3
.ud
|= surf_index
<< 0;