2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct brw_context
*brw
= p
->brw
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
88 * "The send with EOT should use register space R112-R127 for <src>. This is
89 * to enable loading of a new thread into the same slot while the message
90 * with EOT for current thread is pending dispatch."
92 * Since we're pretending to have 16 MRFs anyway, we may as well use the
93 * registers required for messages with EOT.
95 struct brw_context
*brw
= p
->brw
;
96 if (brw
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
97 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
98 reg
->nr
+= GEN7_MRF_HACK_START
;
104 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
107 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
108 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
109 assert(dest
.nr
< 128);
111 gen7_convert_mrf_to_grf(p
, &dest
);
113 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
114 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
115 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
117 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
118 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
122 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
123 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
124 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
127 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
128 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
129 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
130 * Although Dst.HorzStride is a don't care for Align16, HW needs
131 * this to be programmed as "01".
133 insn
->bits1
.da16
.dest_horiz_stride
= 1;
137 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
139 /* These are different sizes in align1 vs align16:
141 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
142 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
143 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
144 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
145 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
148 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
149 /* even ignored in da16, still need to set as '01' */
150 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
154 /* NEW: Set the execution size based on dest.width and
155 * insn->compression_control:
157 guess_execution_size(p
, insn
, dest
);
160 extern int reg_type_size
[];
163 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
165 int hstride_for_reg
[] = {0, 1, 2, 4};
166 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
167 int width_for_reg
[] = {1, 2, 4, 8, 16};
168 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
169 int width
, hstride
, vstride
, execsize
;
171 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
172 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
173 * mean the destination has to be 128-bit aligned and the
174 * destination horiz stride has to be a word.
176 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
177 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
178 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
184 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
185 reg
.file
== BRW_ARF_NULL
)
188 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
189 hstride
= hstride_for_reg
[reg
.hstride
];
191 if (reg
.vstride
== 0xf) {
194 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
195 vstride
= vstride_for_reg
[reg
.vstride
];
198 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
199 width
= width_for_reg
[reg
.width
];
201 assert(insn
->header
.execution_size
>= 0 &&
202 insn
->header
.execution_size
< Elements(execsize_for_reg
));
203 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
205 /* Restrictions from 3.3.10: Register Region Restrictions. */
207 assert(execsize
>= width
);
210 if (execsize
== width
&& hstride
!= 0) {
211 assert(vstride
== -1 || vstride
== width
* hstride
);
215 if (execsize
== width
&& hstride
== 0) {
216 /* no restriction on vstride. */
221 assert(hstride
== 0);
225 if (execsize
== 1 && width
== 1) {
226 assert(hstride
== 0);
227 assert(vstride
== 0);
231 if (vstride
== 0 && hstride
== 0) {
235 /* 10. Check destination issues. */
239 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
242 struct brw_context
*brw
= p
->brw
;
244 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
245 assert(reg
.nr
< 128);
247 gen7_convert_mrf_to_grf(p
, ®
);
249 if (brw
->gen
>= 6 && (insn
->header
.opcode
== BRW_OPCODE_SEND
||
250 insn
->header
.opcode
== BRW_OPCODE_SENDC
)) {
251 /* Any source modifiers or regions will be ignored, since this just
252 * identifies the MRF/GRF to start reading the message contents from.
253 * Check for some likely failures.
257 assert(reg
.address_mode
== BRW_ADDRESS_DIRECT
);
260 validate_reg(insn
, reg
);
262 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
263 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
264 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
265 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
266 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
268 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
269 insn
->bits3
.ud
= reg
.dw1
.ud
;
271 /* Required to set some fields in src1 as well:
273 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
274 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
278 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
279 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
280 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
281 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
284 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
285 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
289 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
291 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
292 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
295 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
299 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
300 if (reg
.width
== BRW_WIDTH_1
&&
301 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
302 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
303 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
304 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
307 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
308 insn
->bits2
.da1
.src0_width
= reg
.width
;
309 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
313 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
314 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
315 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
316 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
318 /* This is an oddity of the fact we're using the same
319 * descriptions for registers in align_16 as align_1:
321 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
322 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
324 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
330 void brw_set_src1(struct brw_compile
*p
,
331 struct brw_instruction
*insn
,
334 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
336 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
337 assert(reg
.nr
< 128);
339 gen7_convert_mrf_to_grf(p
, ®
);
341 validate_reg(insn
, reg
);
343 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
344 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
345 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
346 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
348 /* Only src1 can be immediate in two-argument instructions.
350 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
352 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
353 insn
->bits3
.ud
= reg
.dw1
.ud
;
356 /* This is a hardware restriction, which may or may not be lifted
359 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
360 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
362 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
363 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
364 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
367 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
368 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
371 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
372 if (reg
.width
== BRW_WIDTH_1
&&
373 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
374 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
375 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
376 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
379 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
380 insn
->bits3
.da1
.src1_width
= reg
.width
;
381 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
385 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
386 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
387 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
388 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
390 /* This is an oddity of the fact we're using the same
391 * descriptions for registers in align_16 as align_1:
393 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
394 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
396 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
402 * Set the Message Descriptor and Extended Message Descriptor fields
405 * \note This zeroes out the Function Control bits, so it must be called
406 * \b before filling out any message-specific data. Callers can
407 * choose not to fill in irrelevant bits; they will be zero.
410 brw_set_message_descriptor(struct brw_compile
*p
,
411 struct brw_instruction
*inst
,
412 enum brw_message_target sfid
,
414 unsigned response_length
,
418 struct brw_context
*brw
= p
->brw
;
420 brw_set_src1(p
, inst
, brw_imm_d(0));
423 inst
->bits3
.generic_gen5
.header_present
= header_present
;
424 inst
->bits3
.generic_gen5
.response_length
= response_length
;
425 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
426 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
429 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
430 inst
->header
.destreg__conditionalmod
= sfid
;
432 /* Set Extended Message Descriptor (ex_desc) */
433 inst
->bits2
.send_gen5
.sfid
= sfid
;
434 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
437 inst
->bits3
.generic
.response_length
= response_length
;
438 inst
->bits3
.generic
.msg_length
= msg_length
;
439 inst
->bits3
.generic
.msg_target
= sfid
;
440 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
444 static void brw_set_math_message( struct brw_compile
*p
,
445 struct brw_instruction
*insn
,
451 struct brw_context
*brw
= p
->brw
;
453 unsigned response_length
;
455 /* Infer message length from the function */
457 case BRW_MATH_FUNCTION_POW
:
458 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
459 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
460 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
468 /* Infer response length from the function */
470 case BRW_MATH_FUNCTION_SINCOS
:
471 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
480 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
481 msg_length
, response_length
, false, false);
483 insn
->bits3
.math_gen5
.function
= function
;
484 insn
->bits3
.math_gen5
.int_type
= integer_type
;
485 insn
->bits3
.math_gen5
.precision
= low_precision
;
486 insn
->bits3
.math_gen5
.saturate
= insn
->header
.saturate
;
487 insn
->bits3
.math_gen5
.data_type
= dataType
;
488 insn
->bits3
.math_gen5
.snapshot
= 0;
490 insn
->bits3
.math
.function
= function
;
491 insn
->bits3
.math
.int_type
= integer_type
;
492 insn
->bits3
.math
.precision
= low_precision
;
493 insn
->bits3
.math
.saturate
= insn
->header
.saturate
;
494 insn
->bits3
.math
.data_type
= dataType
;
496 insn
->header
.saturate
= 0;
500 static void brw_set_ff_sync_message(struct brw_compile
*p
,
501 struct brw_instruction
*insn
,
503 GLuint response_length
,
506 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
507 1, response_length
, true, end_of_thread
);
508 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
509 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
510 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
511 insn
->bits3
.urb_gen5
.allocate
= allocate
;
512 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
513 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
516 static void brw_set_urb_message( struct brw_compile
*p
,
517 struct brw_instruction
*insn
,
518 enum brw_urb_write_flags flags
,
520 GLuint response_length
,
522 GLuint swizzle_control
)
524 struct brw_context
*brw
= p
->brw
;
526 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
527 msg_length
, response_length
, true,
528 flags
& BRW_URB_WRITE_EOT
);
530 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
531 insn
->bits3
.urb_gen7
.offset
= offset
;
532 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
533 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
534 insn
->bits3
.urb_gen7
.per_slot_offset
=
535 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0;
536 insn
->bits3
.urb_gen7
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
537 } else if (brw
->gen
>= 5) {
538 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
539 insn
->bits3
.urb_gen5
.offset
= offset
;
540 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
541 insn
->bits3
.urb_gen5
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
542 insn
->bits3
.urb_gen5
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
543 insn
->bits3
.urb_gen5
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
545 insn
->bits3
.urb
.opcode
= 0; /* ? */
546 insn
->bits3
.urb
.offset
= offset
;
547 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
548 insn
->bits3
.urb
.allocate
= flags
& BRW_URB_WRITE_ALLOCATE
? 1 : 0;
549 insn
->bits3
.urb
.used
= flags
& BRW_URB_WRITE_UNUSED
? 0 : 1;
550 insn
->bits3
.urb
.complete
= flags
& BRW_URB_WRITE_COMPLETE
? 1 : 0;
555 brw_set_dp_write_message(struct brw_compile
*p
,
556 struct brw_instruction
*insn
,
557 GLuint binding_table_index
,
562 GLuint last_render_target
,
563 GLuint response_length
,
564 GLuint end_of_thread
,
565 GLuint send_commit_msg
)
567 struct brw_context
*brw
= p
->brw
;
571 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
572 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
573 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
575 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
576 } else if (brw
->gen
== 6) {
577 /* Use the render cache for all write messages. */
578 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
580 sfid
= BRW_SFID_DATAPORT_WRITE
;
583 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
584 header_present
, end_of_thread
);
587 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
588 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
589 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
590 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
591 } else if (brw
->gen
== 6) {
592 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
593 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
594 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
595 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
596 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
597 } else if (brw
->gen
== 5) {
598 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
599 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
600 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
601 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
602 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
604 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
605 insn
->bits3
.dp_write
.msg_control
= msg_control
;
606 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
607 insn
->bits3
.dp_write
.msg_type
= msg_type
;
608 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
613 brw_set_dp_read_message(struct brw_compile
*p
,
614 struct brw_instruction
*insn
,
615 GLuint binding_table_index
,
621 GLuint response_length
)
623 struct brw_context
*brw
= p
->brw
;
627 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
628 } else if (brw
->gen
== 6) {
629 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
630 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
632 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
634 sfid
= BRW_SFID_DATAPORT_READ
;
637 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
638 header_present
, false);
641 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
642 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
643 insn
->bits3
.gen7_dp
.last_render_target
= 0;
644 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
645 } else if (brw
->gen
== 6) {
646 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
647 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
648 insn
->bits3
.gen6_dp
.last_render_target
= 0;
649 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
650 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
651 } else if (brw
->gen
== 5) {
652 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
653 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
654 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
655 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
656 } else if (brw
->is_g4x
) {
657 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
658 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
659 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
660 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
662 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
663 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
664 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
665 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
670 brw_set_sampler_message(struct brw_compile
*p
,
671 struct brw_instruction
*insn
,
672 GLuint binding_table_index
,
675 GLuint response_length
,
677 GLuint header_present
,
679 GLuint return_format
)
681 struct brw_context
*brw
= p
->brw
;
683 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
684 response_length
, header_present
, false);
687 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
688 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
689 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
690 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
691 } else if (brw
->gen
>= 5) {
692 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
693 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
694 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
695 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
696 } else if (brw
->is_g4x
) {
697 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
698 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
699 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
701 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
702 insn
->bits3
.sampler
.sampler
= sampler
;
703 insn
->bits3
.sampler
.msg_type
= msg_type
;
704 insn
->bits3
.sampler
.return_format
= return_format
;
709 #define next_insn brw_next_insn
710 struct brw_instruction
*
711 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
713 struct brw_instruction
*insn
;
715 if (p
->nr_insn
+ 1 > p
->store_size
) {
717 printf("incresing the store size to %d\n", p
->store_size
<< 1);
719 p
->store
= reralloc(p
->mem_ctx
, p
->store
,
720 struct brw_instruction
, p
->store_size
);
722 assert(!"realloc eu store memeory failed");
725 p
->next_insn_offset
+= 16;
726 insn
= &p
->store
[p
->nr_insn
++];
727 memcpy(insn
, p
->current
, sizeof(*insn
));
729 /* Reset this one-shot flag:
732 if (p
->current
->header
.destreg__conditionalmod
) {
733 p
->current
->header
.destreg__conditionalmod
= 0;
734 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
737 insn
->header
.opcode
= opcode
;
741 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
746 struct brw_instruction
*insn
= next_insn(p
, opcode
);
747 brw_set_dest(p
, insn
, dest
);
748 brw_set_src0(p
, insn
, src
);
752 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
756 struct brw_reg src1
)
758 struct brw_instruction
*insn
= next_insn(p
, opcode
);
759 brw_set_dest(p
, insn
, dest
);
760 brw_set_src0(p
, insn
, src0
);
761 brw_set_src1(p
, insn
, src1
);
766 get_3src_subreg_nr(struct brw_reg reg
)
768 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
769 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
770 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
772 return reg
.subnr
/ 4;
776 static struct brw_instruction
*brw_alu3(struct brw_compile
*p
,
783 struct brw_context
*brw
= p
->brw
;
784 struct brw_instruction
*insn
= next_insn(p
, opcode
);
786 gen7_convert_mrf_to_grf(p
, &dest
);
788 assert(insn
->header
.access_mode
== BRW_ALIGN_16
);
790 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
791 dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
792 assert(dest
.nr
< 128);
793 assert(dest
.address_mode
== BRW_ADDRESS_DIRECT
);
794 assert(dest
.type
== BRW_REGISTER_TYPE_F
||
795 dest
.type
== BRW_REGISTER_TYPE_D
||
796 dest
.type
== BRW_REGISTER_TYPE_UD
);
797 insn
->bits1
.da3src
.dest_reg_file
= (dest
.file
== BRW_MESSAGE_REGISTER_FILE
);
798 insn
->bits1
.da3src
.dest_reg_nr
= dest
.nr
;
799 insn
->bits1
.da3src
.dest_subreg_nr
= dest
.subnr
/ 16;
800 insn
->bits1
.da3src
.dest_writemask
= dest
.dw1
.bits
.writemask
;
801 guess_execution_size(p
, insn
, dest
);
803 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
804 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
805 assert(src0
.nr
< 128);
806 insn
->bits2
.da3src
.src0_swizzle
= src0
.dw1
.bits
.swizzle
;
807 insn
->bits2
.da3src
.src0_subreg_nr
= get_3src_subreg_nr(src0
);
808 insn
->bits2
.da3src
.src0_reg_nr
= src0
.nr
;
809 insn
->bits1
.da3src
.src0_abs
= src0
.abs
;
810 insn
->bits1
.da3src
.src0_negate
= src0
.negate
;
811 insn
->bits2
.da3src
.src0_rep_ctrl
= src0
.vstride
== BRW_VERTICAL_STRIDE_0
;
813 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
814 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
815 assert(src1
.nr
< 128);
816 insn
->bits2
.da3src
.src1_swizzle
= src1
.dw1
.bits
.swizzle
;
817 insn
->bits2
.da3src
.src1_subreg_nr_low
= get_3src_subreg_nr(src1
) & 0x3;
818 insn
->bits3
.da3src
.src1_subreg_nr_high
= get_3src_subreg_nr(src1
) >> 2;
819 insn
->bits2
.da3src
.src1_rep_ctrl
= src1
.vstride
== BRW_VERTICAL_STRIDE_0
;
820 insn
->bits3
.da3src
.src1_reg_nr
= src1
.nr
;
821 insn
->bits1
.da3src
.src1_abs
= src1
.abs
;
822 insn
->bits1
.da3src
.src1_negate
= src1
.negate
;
824 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
825 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
826 assert(src2
.nr
< 128);
827 insn
->bits3
.da3src
.src2_swizzle
= src2
.dw1
.bits
.swizzle
;
828 insn
->bits3
.da3src
.src2_subreg_nr
= get_3src_subreg_nr(src2
);
829 insn
->bits3
.da3src
.src2_rep_ctrl
= src2
.vstride
== BRW_VERTICAL_STRIDE_0
;
830 insn
->bits3
.da3src
.src2_reg_nr
= src2
.nr
;
831 insn
->bits1
.da3src
.src2_abs
= src2
.abs
;
832 insn
->bits1
.da3src
.src2_negate
= src2
.negate
;
835 /* Set both the source and destination types based on dest.type,
836 * ignoring the source register types. The MAD and LRP emitters ensure
837 * that all four types are float. The BFE and BFI2 emitters, however,
838 * may send us mixed D and UD types and want us to ignore that and use
839 * the destination type.
842 case BRW_REGISTER_TYPE_F
:
843 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_F
;
844 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_F
;
846 case BRW_REGISTER_TYPE_D
:
847 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_D
;
848 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_D
;
850 case BRW_REGISTER_TYPE_UD
:
851 insn
->bits1
.da3src
.src_type
= BRW_3SRC_TYPE_UD
;
852 insn
->bits1
.da3src
.dst_type
= BRW_3SRC_TYPE_UD
;
861 /***********************************************************************
862 * Convenience routines.
865 struct brw_instruction *brw_##OP(struct brw_compile *p, \
866 struct brw_reg dest, \
867 struct brw_reg src0) \
869 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
873 struct brw_instruction *brw_##OP(struct brw_compile *p, \
874 struct brw_reg dest, \
875 struct brw_reg src0, \
876 struct brw_reg src1) \
878 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
882 struct brw_instruction *brw_##OP(struct brw_compile *p, \
883 struct brw_reg dest, \
884 struct brw_reg src0, \
885 struct brw_reg src1, \
886 struct brw_reg src2) \
888 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
892 struct brw_instruction *brw_##OP(struct brw_compile *p, \
893 struct brw_reg dest, \
894 struct brw_reg src0, \
895 struct brw_reg src1, \
896 struct brw_reg src2) \
898 assert(dest.type == BRW_REGISTER_TYPE_F); \
899 assert(src0.type == BRW_REGISTER_TYPE_F); \
900 assert(src1.type == BRW_REGISTER_TYPE_F); \
901 assert(src2.type == BRW_REGISTER_TYPE_F); \
902 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
905 /* Rounding operations (other than RNDD) require two instructions - the first
906 * stores a rounded value (possibly the wrong way) in the dest register, but
907 * also sets a per-channel "increment bit" in the flag register. A predicated
908 * add of 1.0 fixes dest to contain the desired result.
910 * Sandybridge and later appear to round correctly without an ADD.
913 void brw_##OP(struct brw_compile *p, \
914 struct brw_reg dest, \
915 struct brw_reg src) \
917 struct brw_instruction *rnd, *add; \
918 rnd = next_insn(p, BRW_OPCODE_##OP); \
919 brw_set_dest(p, rnd, dest); \
920 brw_set_src0(p, rnd, src); \
922 if (p->brw->gen < 6) { \
923 /* turn on round-increments */ \
924 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
925 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
926 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
967 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
973 if (src0
.type
== BRW_REGISTER_TYPE_F
||
974 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
975 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
976 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
977 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
980 if (src1
.type
== BRW_REGISTER_TYPE_F
||
981 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
982 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
983 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
984 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
987 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
990 struct brw_instruction
*brw_AVG(struct brw_compile
*p
,
995 assert(dest
.type
== src0
.type
);
996 assert(src0
.type
== src1
.type
);
998 case BRW_REGISTER_TYPE_B
:
999 case BRW_REGISTER_TYPE_UB
:
1000 case BRW_REGISTER_TYPE_W
:
1001 case BRW_REGISTER_TYPE_UW
:
1002 case BRW_REGISTER_TYPE_D
:
1003 case BRW_REGISTER_TYPE_UD
:
1006 assert(!"Bad type for brw_AVG");
1009 return brw_alu2(p
, BRW_OPCODE_AVG
, dest
, src0
, src1
);
1012 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
1013 struct brw_reg dest
,
1014 struct brw_reg src0
,
1015 struct brw_reg src1
)
1018 if (src0
.type
== BRW_REGISTER_TYPE_D
||
1019 src0
.type
== BRW_REGISTER_TYPE_UD
||
1020 src1
.type
== BRW_REGISTER_TYPE_D
||
1021 src1
.type
== BRW_REGISTER_TYPE_UD
) {
1022 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
1025 if (src0
.type
== BRW_REGISTER_TYPE_F
||
1026 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
1027 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
1028 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
1029 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
1032 if (src1
.type
== BRW_REGISTER_TYPE_F
||
1033 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
1034 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
1035 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
1036 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
1039 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1040 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
1041 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1042 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
1044 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
1048 void brw_NOP(struct brw_compile
*p
)
1050 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
1051 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1052 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1053 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
1060 /***********************************************************************
1061 * Comparisons, if/else/endif
1064 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
1065 struct brw_reg dest
,
1066 struct brw_reg src0
,
1067 struct brw_reg src1
)
1069 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
1071 insn
->header
.execution_size
= 1;
1072 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1073 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1075 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1081 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1083 p
->if_stack
[p
->if_stack_depth
] = inst
- p
->store
;
1085 p
->if_stack_depth
++;
1086 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
1087 p
->if_stack_array_size
*= 2;
1088 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, int,
1089 p
->if_stack_array_size
);
1093 static struct brw_instruction
*
1094 pop_if_stack(struct brw_compile
*p
)
1096 p
->if_stack_depth
--;
1097 return &p
->store
[p
->if_stack
[p
->if_stack_depth
]];
1101 push_loop_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
1103 if (p
->loop_stack_array_size
< p
->loop_stack_depth
) {
1104 p
->loop_stack_array_size
*= 2;
1105 p
->loop_stack
= reralloc(p
->mem_ctx
, p
->loop_stack
, int,
1106 p
->loop_stack_array_size
);
1107 p
->if_depth_in_loop
= reralloc(p
->mem_ctx
, p
->if_depth_in_loop
, int,
1108 p
->loop_stack_array_size
);
1111 p
->loop_stack
[p
->loop_stack_depth
] = inst
- p
->store
;
1112 p
->loop_stack_depth
++;
1113 p
->if_depth_in_loop
[p
->loop_stack_depth
] = 0;
1116 static struct brw_instruction
*
1117 get_inner_do_insn(struct brw_compile
*p
)
1119 return &p
->store
[p
->loop_stack
[p
->loop_stack_depth
- 1]];
1122 /* EU takes the value from the flag register and pushes it onto some
1123 * sort of a stack (presumably merging with any flag value already on
1124 * the stack). Within an if block, the flags at the top of the stack
1125 * control execution on each channel of the unit, eg. on each of the
1126 * 16 pixel values in our wm programs.
1128 * When the matching 'else' instruction is reached (presumably by
1129 * countdown of the instruction count patched in by our ELSE/ENDIF
1130 * functions), the relevent flags are inverted.
1132 * When the matching 'endif' instruction is reached, the flags are
1133 * popped off. If the stack is now empty, normal execution resumes.
1135 struct brw_instruction
*
1136 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
1138 struct brw_context
*brw
= p
->brw
;
1139 struct brw_instruction
*insn
;
1141 insn
= next_insn(p
, BRW_OPCODE_IF
);
1143 /* Override the defaults for this instruction:
1146 brw_set_dest(p
, insn
, brw_ip_reg());
1147 brw_set_src0(p
, insn
, brw_ip_reg());
1148 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1149 } else if (brw
->gen
== 6) {
1150 brw_set_dest(p
, insn
, brw_imm_w(0));
1151 insn
->bits1
.branch_gen6
.jump_count
= 0;
1152 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1153 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1155 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1156 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
1157 brw_set_src1(p
, insn
, brw_imm_ud(0));
1158 insn
->bits3
.break_cont
.jip
= 0;
1159 insn
->bits3
.break_cont
.uip
= 0;
1162 insn
->header
.execution_size
= execute_size
;
1163 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1164 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1165 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1166 if (!p
->single_program_flow
)
1167 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1169 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1171 push_if_stack(p
, insn
);
1172 p
->if_depth_in_loop
[p
->loop_stack_depth
]++;
1176 /* This function is only used for gen6-style IF instructions with an
1177 * embedded comparison (conditional modifier). It is not used on gen7.
1179 struct brw_instruction
*
1180 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1181 struct brw_reg src0
, struct brw_reg src1
)
1183 struct brw_instruction
*insn
;
1185 insn
= next_insn(p
, BRW_OPCODE_IF
);
1187 brw_set_dest(p
, insn
, brw_imm_w(0));
1188 if (p
->compressed
) {
1189 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1191 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1193 insn
->bits1
.branch_gen6
.jump_count
= 0;
1194 brw_set_src0(p
, insn
, src0
);
1195 brw_set_src1(p
, insn
, src1
);
1197 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1198 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1199 insn
->header
.destreg__conditionalmod
= conditional
;
1201 if (!p
->single_program_flow
)
1202 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1204 push_if_stack(p
, insn
);
1209 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1212 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1213 struct brw_instruction
*if_inst
,
1214 struct brw_instruction
*else_inst
)
1216 /* The next instruction (where the ENDIF would be, if it existed) */
1217 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1219 assert(p
->single_program_flow
);
1220 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1221 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1222 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1224 /* Convert IF to an ADD instruction that moves the instruction pointer
1225 * to the first instruction of the ELSE block. If there is no ELSE
1226 * block, point to where ENDIF would be. Reverse the predicate.
1228 * There's no need to execute an ENDIF since we don't need to do any
1229 * stack operations, and if we're currently executing, we just want to
1230 * continue normally.
1232 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1233 if_inst
->header
.predicate_inverse
= 1;
1235 if (else_inst
!= NULL
) {
1236 /* Convert ELSE to an ADD instruction that points where the ENDIF
1239 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1241 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1242 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1244 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1249 * Patch IF and ELSE instructions with appropriate jump targets.
1252 patch_IF_ELSE(struct brw_compile
*p
,
1253 struct brw_instruction
*if_inst
,
1254 struct brw_instruction
*else_inst
,
1255 struct brw_instruction
*endif_inst
)
1257 struct brw_context
*brw
= p
->brw
;
1259 /* We shouldn't be patching IF and ELSE instructions in single program flow
1260 * mode when gen < 6, because in single program flow mode on those
1261 * platforms, we convert flow control instructions to conditional ADDs that
1262 * operate on IP (see brw_ENDIF).
1264 * However, on Gen6, writing to IP doesn't work in single program flow mode
1265 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1266 * not be updated by non-flow control instructions."). And on later
1267 * platforms, there is no significant benefit to converting control flow
1268 * instructions to conditional ADDs. So we do patch IF and ELSE
1269 * instructions in single program flow mode on those platforms.
1272 assert(!p
->single_program_flow
);
1274 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1275 assert(endif_inst
!= NULL
);
1276 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1279 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1280 * requires 2 chunks.
1285 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1286 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1288 if (else_inst
== NULL
) {
1289 /* Patch IF -> ENDIF */
1291 /* Turn it into an IFF, which means no mask stack operations for
1292 * all-false and jumping past the ENDIF.
1294 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1295 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1296 if_inst
->bits3
.if_else
.pop_count
= 0;
1297 if_inst
->bits3
.if_else
.pad0
= 0;
1298 } else if (brw
->gen
== 6) {
1299 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1300 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1302 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1303 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1306 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1308 /* Patch IF -> ELSE */
1310 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1311 if_inst
->bits3
.if_else
.pop_count
= 0;
1312 if_inst
->bits3
.if_else
.pad0
= 0;
1313 } else if (brw
->gen
== 6) {
1314 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1317 /* Patch ELSE -> ENDIF */
1319 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1322 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1323 else_inst
->bits3
.if_else
.pop_count
= 1;
1324 else_inst
->bits3
.if_else
.pad0
= 0;
1325 } else if (brw
->gen
== 6) {
1326 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1327 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1329 /* The IF instruction's JIP should point just past the ELSE */
1330 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1331 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1332 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1333 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1339 brw_ELSE(struct brw_compile
*p
)
1341 struct brw_context
*brw
= p
->brw
;
1342 struct brw_instruction
*insn
;
1344 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1347 brw_set_dest(p
, insn
, brw_ip_reg());
1348 brw_set_src0(p
, insn
, brw_ip_reg());
1349 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1350 } else if (brw
->gen
== 6) {
1351 brw_set_dest(p
, insn
, brw_imm_w(0));
1352 insn
->bits1
.branch_gen6
.jump_count
= 0;
1353 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1354 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1356 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1357 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1358 brw_set_src1(p
, insn
, brw_imm_ud(0));
1359 insn
->bits3
.break_cont
.jip
= 0;
1360 insn
->bits3
.break_cont
.uip
= 0;
1363 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1364 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1365 if (!p
->single_program_flow
)
1366 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1368 push_if_stack(p
, insn
);
1372 brw_ENDIF(struct brw_compile
*p
)
1374 struct brw_context
*brw
= p
->brw
;
1375 struct brw_instruction
*insn
= NULL
;
1376 struct brw_instruction
*else_inst
= NULL
;
1377 struct brw_instruction
*if_inst
= NULL
;
1378 struct brw_instruction
*tmp
;
1379 bool emit_endif
= true;
1381 /* In single program flow mode, we can express IF and ELSE instructions
1382 * equivalently as ADD instructions that operate on IP. On platforms prior
1383 * to Gen6, flow control instructions cause an implied thread switch, so
1384 * this is a significant savings.
1386 * However, on Gen6, writing to IP doesn't work in single program flow mode
1387 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1388 * not be updated by non-flow control instructions."). And on later
1389 * platforms, there is no significant benefit to converting control flow
1390 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1393 if (brw
->gen
< 6 && p
->single_program_flow
)
1397 * A single next_insn() may change the base adress of instruction store
1398 * memory(p->store), so call it first before referencing the instruction
1399 * store pointer from an index
1402 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1404 /* Pop the IF and (optional) ELSE instructions from the stack */
1405 p
->if_depth_in_loop
[p
->loop_stack_depth
]--;
1406 tmp
= pop_if_stack(p
);
1407 if (tmp
->header
.opcode
== BRW_OPCODE_ELSE
) {
1409 tmp
= pop_if_stack(p
);
1414 /* ENDIF is useless; don't bother emitting it. */
1415 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1420 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1421 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1422 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1423 } else if (brw
->gen
== 6) {
1424 brw_set_dest(p
, insn
, brw_imm_w(0));
1425 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1426 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1428 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1429 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1430 brw_set_src1(p
, insn
, brw_imm_ud(0));
1433 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1434 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1435 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1437 /* Also pop item off the stack in the endif instruction: */
1439 insn
->bits3
.if_else
.jump_count
= 0;
1440 insn
->bits3
.if_else
.pop_count
= 1;
1441 insn
->bits3
.if_else
.pad0
= 0;
1442 } else if (brw
->gen
== 6) {
1443 insn
->bits1
.branch_gen6
.jump_count
= 2;
1445 insn
->bits3
.break_cont
.jip
= 2;
1447 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1450 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
1452 struct brw_context
*brw
= p
->brw
;
1453 struct brw_instruction
*insn
;
1455 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1456 if (brw
->gen
>= 6) {
1457 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1458 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1459 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1461 brw_set_dest(p
, insn
, brw_ip_reg());
1462 brw_set_src0(p
, insn
, brw_ip_reg());
1463 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1464 insn
->bits3
.if_else
.pad0
= 0;
1465 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1467 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1468 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1473 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1475 struct brw_instruction
*insn
;
1477 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1478 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1479 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1480 brw_set_dest(p
, insn
, brw_ip_reg());
1481 brw_set_src0(p
, insn
, brw_ip_reg());
1482 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1484 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1485 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1489 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
1491 struct brw_instruction
*insn
;
1492 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1493 brw_set_dest(p
, insn
, brw_ip_reg());
1494 brw_set_src0(p
, insn
, brw_ip_reg());
1495 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1496 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1497 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1498 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1499 insn
->bits3
.if_else
.pad0
= 0;
1500 insn
->bits3
.if_else
.pop_count
= p
->if_depth_in_loop
[p
->loop_stack_depth
];
1504 struct brw_instruction
*gen6_HALT(struct brw_compile
*p
)
1506 struct brw_instruction
*insn
;
1508 insn
= next_insn(p
, BRW_OPCODE_HALT
);
1509 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1510 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1511 brw_set_src1(p
, insn
, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1513 if (p
->compressed
) {
1514 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1516 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1517 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1524 * The DO/WHILE is just an unterminated loop -- break or continue are
1525 * used for control within the loop. We have a few ways they can be
1528 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1529 * jip and no DO instruction.
1531 * For non-uniform control flow pre-gen6, there's a DO instruction to
1532 * push the mask, and a WHILE to jump back, and BREAK to get out and
1535 * For gen6, there's no more mask stack, so no need for DO. WHILE
1536 * just points back to the first instruction of the loop.
1538 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1540 struct brw_context
*brw
= p
->brw
;
1542 if (brw
->gen
>= 6 || p
->single_program_flow
) {
1543 push_loop_stack(p
, &p
->store
[p
->nr_insn
]);
1544 return &p
->store
[p
->nr_insn
];
1546 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1548 push_loop_stack(p
, insn
);
1550 /* Override the defaults for this instruction:
1552 brw_set_dest(p
, insn
, brw_null_reg());
1553 brw_set_src0(p
, insn
, brw_null_reg());
1554 brw_set_src1(p
, insn
, brw_null_reg());
1556 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1557 insn
->header
.execution_size
= execute_size
;
1558 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1559 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1560 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1567 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1570 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1571 * nesting, since it can always just point to the end of the block/current loop.
1574 brw_patch_break_cont(struct brw_compile
*p
, struct brw_instruction
*while_inst
)
1576 struct brw_context
*brw
= p
->brw
;
1577 struct brw_instruction
*do_inst
= get_inner_do_insn(p
);
1578 struct brw_instruction
*inst
;
1579 int br
= (brw
->gen
== 5) ? 2 : 1;
1581 for (inst
= while_inst
- 1; inst
!= do_inst
; inst
--) {
1582 /* If the jump count is != 0, that means that this instruction has already
1583 * been patched because it's part of a loop inside of the one we're
1586 if (inst
->header
.opcode
== BRW_OPCODE_BREAK
&&
1587 inst
->bits3
.if_else
.jump_count
== 0) {
1588 inst
->bits3
.if_else
.jump_count
= br
* ((while_inst
- inst
) + 1);
1589 } else if (inst
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
1590 inst
->bits3
.if_else
.jump_count
== 0) {
1591 inst
->bits3
.if_else
.jump_count
= br
* (while_inst
- inst
);
1596 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
)
1598 struct brw_context
*brw
= p
->brw
;
1599 struct brw_instruction
*insn
, *do_insn
;
1605 if (brw
->gen
>= 7) {
1606 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1607 do_insn
= get_inner_do_insn(p
);
1609 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1610 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1611 brw_set_src1(p
, insn
, brw_imm_ud(0));
1612 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1614 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1615 } else if (brw
->gen
== 6) {
1616 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1617 do_insn
= get_inner_do_insn(p
);
1619 brw_set_dest(p
, insn
, brw_imm_w(0));
1620 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1621 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1622 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1624 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1626 if (p
->single_program_flow
) {
1627 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1628 do_insn
= get_inner_do_insn(p
);
1630 brw_set_dest(p
, insn
, brw_ip_reg());
1631 brw_set_src0(p
, insn
, brw_ip_reg());
1632 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1633 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1635 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1636 do_insn
= get_inner_do_insn(p
);
1638 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1640 brw_set_dest(p
, insn
, brw_ip_reg());
1641 brw_set_src0(p
, insn
, brw_ip_reg());
1642 brw_set_src1(p
, insn
, brw_imm_d(0));
1644 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1645 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1646 insn
->bits3
.if_else
.pop_count
= 0;
1647 insn
->bits3
.if_else
.pad0
= 0;
1649 brw_patch_break_cont(p
, insn
);
1652 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1653 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1655 p
->loop_stack_depth
--;
1663 void brw_land_fwd_jump(struct brw_compile
*p
, int jmp_insn_idx
)
1665 struct brw_context
*brw
= p
->brw
;
1666 struct brw_instruction
*jmp_insn
= &p
->store
[jmp_insn_idx
];
1672 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1673 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1675 jmp_insn
->bits3
.ud
= jmpi
* (p
->nr_insn
- jmp_insn_idx
- 1);
1680 /* To integrate with the above, it makes sense that the comparison
1681 * instruction should populate the flag register. It might be simpler
1682 * just to use the flag reg for most WM tasks?
1684 void brw_CMP(struct brw_compile
*p
,
1685 struct brw_reg dest
,
1687 struct brw_reg src0
,
1688 struct brw_reg src1
)
1690 struct brw_context
*brw
= p
->brw
;
1691 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1693 insn
->header
.destreg__conditionalmod
= conditional
;
1694 brw_set_dest(p
, insn
, dest
);
1695 brw_set_src0(p
, insn
, src0
);
1696 brw_set_src1(p
, insn
, src1
);
1698 /* guess_execution_size(insn, src0); */
1701 /* Make it so that future instructions will use the computed flag
1702 * value until brw_set_predicate_control_flag_value() is called
1705 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1707 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1708 p
->flag_value
= 0xff;
1711 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1713 * "Any CMP instruction with a null destination must use a {switch}."
1715 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1716 * mentioned on their work-arounds pages.
1718 if (brw
->gen
== 7) {
1719 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1720 dest
.nr
== BRW_ARF_NULL
) {
1721 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1726 /* Issue 'wait' instruction for n1, host could program MMIO
1727 to wake up thread. */
1728 void brw_WAIT (struct brw_compile
*p
)
1730 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1731 struct brw_reg src
= brw_notification_1_reg();
1733 brw_set_dest(p
, insn
, src
);
1734 brw_set_src0(p
, insn
, src
);
1735 brw_set_src1(p
, insn
, brw_null_reg());
1736 insn
->header
.execution_size
= 0; /* must */
1737 insn
->header
.predicate_control
= 0;
1738 insn
->header
.compression_control
= 0;
1742 /***********************************************************************
1743 * Helpers for the various SEND message types:
1746 /** Extended math function, float[8].
1748 void brw_math( struct brw_compile
*p
,
1749 struct brw_reg dest
,
1756 struct brw_context
*brw
= p
->brw
;
1758 if (brw
->gen
>= 6) {
1759 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1761 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1762 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1763 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1765 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1767 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1769 /* Source modifiers are ignored for extended math instructions on Gen6. */
1770 if (brw
->gen
== 6) {
1771 assert(!src
.negate
);
1775 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1776 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1777 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1778 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1780 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1783 /* Math is the same ISA format as other opcodes, except that CondModifier
1784 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1786 insn
->header
.destreg__conditionalmod
= function
;
1788 brw_set_dest(p
, insn
, dest
);
1789 brw_set_src0(p
, insn
, src
);
1790 brw_set_src1(p
, insn
, brw_null_reg());
1792 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1794 /* Example code doesn't set predicate_control for send
1797 insn
->header
.predicate_control
= 0;
1798 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1800 brw_set_dest(p
, insn
, dest
);
1801 brw_set_src0(p
, insn
, src
);
1802 brw_set_math_message(p
,
1805 src
.type
== BRW_REGISTER_TYPE_D
,
1811 /** Extended math function, float[8].
1813 void brw_math2(struct brw_compile
*p
,
1814 struct brw_reg dest
,
1816 struct brw_reg src0
,
1817 struct brw_reg src1
)
1819 struct brw_context
*brw
= p
->brw
;
1820 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1822 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
||
1823 (brw
->gen
>= 7 && dest
.file
== BRW_MESSAGE_REGISTER_FILE
));
1824 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1825 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1827 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1828 if (brw
->gen
== 6) {
1829 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1830 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1833 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1834 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1835 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1836 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1837 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1839 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1840 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1843 /* Source modifiers are ignored for extended math instructions on Gen6. */
1844 if (brw
->gen
== 6) {
1845 assert(!src0
.negate
);
1847 assert(!src1
.negate
);
1851 /* Math is the same ISA format as other opcodes, except that CondModifier
1852 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1854 insn
->header
.destreg__conditionalmod
= function
;
1856 brw_set_dest(p
, insn
, dest
);
1857 brw_set_src0(p
, insn
, src0
);
1858 brw_set_src1(p
, insn
, src1
);
1863 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1864 * using a constant offset per channel.
1866 * The offset must be aligned to oword size (16 bytes). Used for
1867 * register spilling.
1869 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1874 struct brw_context
*brw
= p
->brw
;
1875 uint32_t msg_control
, msg_type
;
1881 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1883 if (num_regs
== 1) {
1884 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1887 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1891 /* Set up the message header. This is g0, with g0.2 filled with
1892 * the offset. We don't want to leave our offset around in g0 or
1893 * it'll screw up texture samples, so set it up inside the message
1897 brw_push_insn_state(p
);
1898 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1899 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1901 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1903 /* set message header global offset field (reg 0, element 2) */
1905 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1907 2), BRW_REGISTER_TYPE_UD
),
1908 brw_imm_ud(offset
));
1910 brw_pop_insn_state(p
);
1914 struct brw_reg dest
;
1915 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1916 int send_commit_msg
;
1917 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1918 BRW_REGISTER_TYPE_UW
);
1920 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1921 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1922 src_header
= vec16(src_header
);
1924 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1925 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1927 /* Until gen6, writes followed by reads from the same location
1928 * are not guaranteed to be ordered unless write_commit is set.
1929 * If set, then a no-op write is issued to the destination
1930 * register to set a dependency, and a read from the destination
1931 * can be used to ensure the ordering.
1933 * For gen6, only writes between different threads need ordering
1934 * protection. Our use of DP writes is all about register
1935 * spilling within a thread.
1937 if (brw
->gen
>= 6) {
1938 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1939 send_commit_msg
= 0;
1942 send_commit_msg
= 1;
1945 brw_set_dest(p
, insn
, dest
);
1946 if (brw
->gen
>= 6) {
1947 brw_set_src0(p
, insn
, mrf
);
1949 brw_set_src0(p
, insn
, brw_null_reg());
1953 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1955 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1957 brw_set_dp_write_message(p
,
1959 255, /* binding table index (255=stateless) */
1963 true, /* header_present */
1964 0, /* not a render target */
1965 send_commit_msg
, /* response_length */
1973 * Read a block of owords (half a GRF each) from the scratch buffer
1974 * using a constant index per channel.
1976 * Offset must be aligned to oword size (16 bytes). Used for register
1980 brw_oword_block_read_scratch(struct brw_compile
*p
,
1981 struct brw_reg dest
,
1986 struct brw_context
*brw
= p
->brw
;
1987 uint32_t msg_control
;
1993 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1994 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1996 if (num_regs
== 1) {
1997 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
2000 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
2005 brw_push_insn_state(p
);
2006 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2007 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2009 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2011 /* set message header global offset field (reg 0, element 2) */
2013 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2015 2), BRW_REGISTER_TYPE_UD
),
2016 brw_imm_ud(offset
));
2018 brw_pop_insn_state(p
);
2022 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2024 assert(insn
->header
.predicate_control
== 0);
2025 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2026 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2028 brw_set_dest(p
, insn
, dest
); /* UW? */
2029 if (brw
->gen
>= 6) {
2030 brw_set_src0(p
, insn
, mrf
);
2032 brw_set_src0(p
, insn
, brw_null_reg());
2035 brw_set_dp_read_message(p
,
2037 255, /* binding table index (255=stateless) */
2039 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
2040 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
2042 true, /* header_present */
2048 * Read a float[4] vector from the data port Data Cache (const buffer).
2049 * Location (in buffer) should be a multiple of 16.
2050 * Used for fetching shader constants.
2052 void brw_oword_block_read(struct brw_compile
*p
,
2053 struct brw_reg dest
,
2056 uint32_t bind_table_index
)
2058 struct brw_context
*brw
= p
->brw
;
2060 /* On newer hardware, offset is in units of owords. */
2064 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
2066 brw_push_insn_state(p
);
2067 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2068 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2069 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2071 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
2073 /* set message header global offset field (reg 0, element 2) */
2075 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
2077 2), BRW_REGISTER_TYPE_UD
),
2078 brw_imm_ud(offset
));
2080 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2081 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
2083 /* cast dest to a uword[8] vector */
2084 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
2086 brw_set_dest(p
, insn
, dest
);
2087 if (brw
->gen
>= 6) {
2088 brw_set_src0(p
, insn
, mrf
);
2090 brw_set_src0(p
, insn
, brw_null_reg());
2093 brw_set_dp_read_message(p
,
2096 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
2097 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
2098 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2100 true, /* header_present */
2101 1); /* response_length (1 reg, 2 owords!) */
2103 brw_pop_insn_state(p
);
2107 void brw_fb_WRITE(struct brw_compile
*p
,
2110 struct brw_reg src0
,
2112 GLuint binding_table_index
,
2114 GLuint response_length
,
2116 bool header_present
)
2118 struct brw_context
*brw
= p
->brw
;
2119 struct brw_instruction
*insn
;
2121 struct brw_reg dest
;
2123 if (dispatch_width
== 16)
2124 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2126 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2128 if (brw
->gen
>= 6) {
2129 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2131 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2133 /* The execution mask is ignored for render target writes. */
2134 insn
->header
.predicate_control
= 0;
2135 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2137 if (brw
->gen
>= 6) {
2138 /* headerless version, just submit color payload */
2139 src0
= brw_message_reg(msg_reg_nr
);
2141 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2143 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2145 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2148 brw_set_dest(p
, insn
, dest
);
2149 brw_set_src0(p
, insn
, src0
);
2150 brw_set_dp_write_message(p
,
2152 binding_table_index
,
2157 eot
, /* last render target write */
2160 0 /* send_commit_msg */);
2165 * Texture sample instruction.
2166 * Note: the msg_type plus msg_length values determine exactly what kind
2167 * of sampling operation is performed. See volume 4, page 161 of docs.
2169 void brw_SAMPLE(struct brw_compile
*p
,
2170 struct brw_reg dest
,
2172 struct brw_reg src0
,
2173 GLuint binding_table_index
,
2176 GLuint response_length
,
2178 GLuint header_present
,
2180 GLuint return_format
)
2182 struct brw_context
*brw
= p
->brw
;
2183 struct brw_instruction
*insn
;
2185 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2187 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2188 insn
->header
.predicate_control
= 0; /* XXX */
2189 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2191 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2193 brw_set_dest(p
, insn
, dest
);
2194 brw_set_src0(p
, insn
, src0
);
2195 brw_set_sampler_message(p
, insn
,
2196 binding_table_index
,
2206 /* All these variables are pretty confusing - we might be better off
2207 * using bitmasks and macros for this, in the old style. Or perhaps
2208 * just having the caller instantiate the fields in dword3 itself.
2210 void brw_urb_WRITE(struct brw_compile
*p
,
2211 struct brw_reg dest
,
2213 struct brw_reg src0
,
2214 enum brw_urb_write_flags flags
,
2216 GLuint response_length
,
2220 struct brw_context
*brw
= p
->brw
;
2221 struct brw_instruction
*insn
;
2223 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2225 if (brw
->gen
== 7) {
2226 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2227 brw_push_insn_state(p
);
2228 brw_set_access_mode(p
, BRW_ALIGN_1
);
2229 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2230 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2231 BRW_REGISTER_TYPE_UD
),
2232 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2233 brw_imm_ud(0xff00));
2234 brw_pop_insn_state(p
);
2237 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2239 assert(msg_length
< BRW_MAX_MRF
);
2241 brw_set_dest(p
, insn
, dest
);
2242 brw_set_src0(p
, insn
, src0
);
2243 brw_set_src1(p
, insn
, brw_imm_d(0));
2246 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2248 brw_set_urb_message(p
,
2258 next_ip(struct brw_compile
*p
, int ip
)
2260 struct brw_instruction
*insn
= (void *)p
->store
+ ip
;
2262 if (insn
->header
.cmpt_control
)
2269 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2272 void *store
= p
->store
;
2274 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2275 struct brw_instruction
*insn
= store
+ ip
;
2277 switch (insn
->header
.opcode
) {
2278 case BRW_OPCODE_ENDIF
:
2279 case BRW_OPCODE_ELSE
:
2280 case BRW_OPCODE_WHILE
:
2281 case BRW_OPCODE_HALT
:
2289 /* There is no DO instruction on gen6, so to find the end of the loop
2290 * we have to see if the loop is jumping back before our start
2294 brw_find_loop_end(struct brw_compile
*p
, int start
)
2296 struct brw_context
*brw
= p
->brw
;
2299 void *store
= p
->store
;
2301 /* Always start after the instruction (such as a WHILE) we're trying to fix
2304 for (ip
= next_ip(p
, start
); ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2305 struct brw_instruction
*insn
= store
+ ip
;
2307 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2308 int jip
= brw
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2309 : insn
->bits3
.break_cont
.jip
;
2310 if (ip
+ jip
* scale
<= start
)
2314 assert(!"not reached");
2318 /* After program generation, go back and update the UIP and JIP of
2319 * BREAK, CONT, and HALT instructions to their correct locations.
2322 brw_set_uip_jip(struct brw_compile
*p
)
2324 struct brw_context
*brw
= p
->brw
;
2327 void *store
= p
->store
;
2332 for (ip
= 0; ip
< p
->next_insn_offset
; ip
= next_ip(p
, ip
)) {
2333 struct brw_instruction
*insn
= store
+ ip
;
2335 if (insn
->header
.cmpt_control
) {
2336 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2337 assert(insn
->header
.opcode
!= BRW_OPCODE_BREAK
&&
2338 insn
->header
.opcode
!= BRW_OPCODE_CONTINUE
&&
2339 insn
->header
.opcode
!= BRW_OPCODE_HALT
);
2343 int block_end_ip
= brw_find_next_block_end(p
, ip
);
2344 switch (insn
->header
.opcode
) {
2345 case BRW_OPCODE_BREAK
:
2346 assert(block_end_ip
!= 0);
2347 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2348 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2349 insn
->bits3
.break_cont
.uip
=
2350 (brw_find_loop_end(p
, ip
) - ip
+
2351 (brw
->gen
== 6 ? 16 : 0)) / scale
;
2353 case BRW_OPCODE_CONTINUE
:
2354 assert(block_end_ip
!= 0);
2355 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2356 insn
->bits3
.break_cont
.uip
=
2357 (brw_find_loop_end(p
, ip
) - ip
) / scale
;
2359 assert(insn
->bits3
.break_cont
.uip
!= 0);
2360 assert(insn
->bits3
.break_cont
.jip
!= 0);
2363 case BRW_OPCODE_ENDIF
:
2364 if (block_end_ip
== 0)
2365 insn
->bits3
.break_cont
.jip
= 2;
2367 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2370 case BRW_OPCODE_HALT
:
2371 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2373 * "In case of the halt instruction not inside any conditional
2374 * code block, the value of <JIP> and <UIP> should be the
2375 * same. In case of the halt instruction inside conditional code
2376 * block, the <UIP> should be the end of the program, and the
2377 * <JIP> should be end of the most inner conditional code block."
2379 * The uip will have already been set by whoever set up the
2382 if (block_end_ip
== 0) {
2383 insn
->bits3
.break_cont
.jip
= insn
->bits3
.break_cont
.uip
;
2385 insn
->bits3
.break_cont
.jip
= (block_end_ip
- ip
) / scale
;
2387 assert(insn
->bits3
.break_cont
.uip
!= 0);
2388 assert(insn
->bits3
.break_cont
.jip
!= 0);
2394 void brw_ff_sync(struct brw_compile
*p
,
2395 struct brw_reg dest
,
2397 struct brw_reg src0
,
2399 GLuint response_length
,
2402 struct brw_context
*brw
= p
->brw
;
2403 struct brw_instruction
*insn
;
2405 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2407 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2408 brw_set_dest(p
, insn
, dest
);
2409 brw_set_src0(p
, insn
, src0
);
2410 brw_set_src1(p
, insn
, brw_imm_d(0));
2413 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2415 brw_set_ff_sync_message(p
,
2423 * Emit the SEND instruction necessary to generate stream output data on Gen6
2424 * (for transform feedback).
2426 * If send_commit_msg is true, this is the last piece of stream output data
2427 * from this thread, so send the data as a committed write. According to the
2428 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2430 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2431 * writes are complete by sending the final write as a committed write."
2434 brw_svb_write(struct brw_compile
*p
,
2435 struct brw_reg dest
,
2437 struct brw_reg src0
,
2438 GLuint binding_table_index
,
2439 bool send_commit_msg
)
2441 struct brw_instruction
*insn
;
2443 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2445 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2446 brw_set_dest(p
, insn
, dest
);
2447 brw_set_src0(p
, insn
, src0
);
2448 brw_set_src1(p
, insn
, brw_imm_d(0));
2449 brw_set_dp_write_message(p
, insn
,
2450 binding_table_index
,
2451 0, /* msg_control: ignored */
2452 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2454 true, /* header_present */
2455 0, /* last_render_target: ignored */
2456 send_commit_msg
, /* response_length */
2457 0, /* end_of_thread */
2458 send_commit_msg
); /* send_commit_msg */
2462 * This instruction is generated as a single-channel align1 instruction by
2463 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
2465 * We can't use the typed atomic op in the FS because that has the execution
2466 * mask ANDed with the pixel mask, but we just want to write the one dword for
2469 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
2470 * one u32. So we use the same untyped atomic write message as the pixel
2473 * The untyped atomic operation requires a BUFFER surface type with RAW
2474 * format, and is only accessible through the legacy DATA_CACHE dataport
2477 void brw_shader_time_add(struct brw_compile
*p
,
2478 struct brw_reg payload
,
2479 uint32_t surf_index
)
2481 struct brw_context
*brw
= p
->brw
;
2482 assert(brw
->gen
>= 7);
2484 brw_push_insn_state(p
);
2485 brw_set_access_mode(p
, BRW_ALIGN_1
);
2486 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2487 struct brw_instruction
*send
= brw_next_insn(p
, BRW_OPCODE_SEND
);
2488 brw_pop_insn_state(p
);
2490 /* We use brw_vec1_reg and unmasked because we want to increment the given
2493 brw_set_dest(p
, send
, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
2495 brw_set_src0(p
, send
, brw_vec1_reg(payload
.file
,
2498 uint32_t sfid
, msg_type
;
2499 if (brw
->is_haswell
) {
2500 sfid
= HSW_SFID_DATAPORT_DATA_CACHE_1
;
2501 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
2503 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
2504 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
2507 bool header_present
= false;
2509 uint32_t mlen
= 2; /* offset, value */
2511 brw_set_message_descriptor(p
, send
, sfid
, mlen
, rlen
, header_present
, eot
);
2513 send
->bits3
.ud
|= msg_type
<< 14;
2514 send
->bits3
.ud
|= 0 << 13; /* no return data */
2515 send
->bits3
.ud
|= 1 << 12; /* SIMD8 mode */
2516 send
->bits3
.ud
|= BRW_AOP_ADD
<< 8;
2517 send
->bits3
.ud
|= surf_index
<< 0;