2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
== BRW_MESSAGE_REGISTER_FILE
)
73 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
81 *src
= brw_message_reg(msg_reg_nr
);
85 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
87 struct intel_context
*intel
= &p
->brw
->intel
;
88 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
89 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
96 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
99 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
100 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
101 assert(dest
.nr
< 128);
103 gen7_convert_mrf_to_grf(p
, &dest
);
105 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
106 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
107 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
109 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
110 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
112 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
113 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
114 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
115 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
116 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
119 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
120 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
121 /* even ignored in da16, still need to set as '01' */
122 insn
->bits1
.da16
.dest_horiz_stride
= 1;
126 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
128 /* These are different sizes in align1 vs align16:
130 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
131 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
132 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
133 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
134 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
137 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
138 /* even ignored in da16, still need to set as '01' */
139 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
143 /* NEW: Set the execution size based on dest.width and
144 * insn->compression_control:
146 guess_execution_size(p
, insn
, dest
);
149 extern int reg_type_size
[];
152 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
154 int hstride_for_reg
[] = {0, 1, 2, 4};
155 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
156 int width_for_reg
[] = {1, 2, 4, 8, 16};
157 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
158 int width
, hstride
, vstride
, execsize
;
160 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
161 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
162 * mean the destination has to be 128-bit aligned and the
163 * destination horiz stride has to be a word.
165 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
166 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
167 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
173 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
174 reg
.file
== BRW_ARF_NULL
)
177 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
178 hstride
= hstride_for_reg
[reg
.hstride
];
180 if (reg
.vstride
== 0xf) {
183 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
184 vstride
= vstride_for_reg
[reg
.vstride
];
187 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
188 width
= width_for_reg
[reg
.width
];
190 assert(insn
->header
.execution_size
>= 0 &&
191 insn
->header
.execution_size
< Elements(execsize_for_reg
));
192 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
194 /* Restrictions from 3.3.10: Register Region Restrictions. */
196 assert(execsize
>= width
);
199 if (execsize
== width
&& hstride
!= 0) {
200 assert(vstride
== -1 || vstride
== width
* hstride
);
204 if (execsize
== width
&& hstride
== 0) {
205 /* no restriction on vstride. */
210 assert(hstride
== 0);
214 if (execsize
== 1 && width
== 1) {
215 assert(hstride
== 0);
216 assert(vstride
== 0);
220 if (vstride
== 0 && hstride
== 0) {
224 /* 10. Check destination issues. */
228 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
231 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
232 assert(reg
.nr
< 128);
234 gen7_convert_mrf_to_grf(p
, ®
);
236 validate_reg(insn
, reg
);
238 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
239 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
240 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
241 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
242 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
244 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
245 insn
->bits3
.ud
= reg
.dw1
.ud
;
247 /* Required to set some fields in src1 as well:
249 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
250 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
254 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
255 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
256 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
257 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
260 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
261 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
265 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
267 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
268 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
271 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
275 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
276 if (reg
.width
== BRW_WIDTH_1
&&
277 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
278 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
279 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
280 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
283 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
284 insn
->bits2
.da1
.src0_width
= reg
.width
;
285 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
289 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
290 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
291 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
292 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
294 /* This is an oddity of the fact we're using the same
295 * descriptions for registers in align_16 as align_1:
297 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
298 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
300 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
306 void brw_set_src1(struct brw_compile
*p
,
307 struct brw_instruction
*insn
,
310 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
312 assert(reg
.nr
< 128);
314 gen7_convert_mrf_to_grf(p
, ®
);
316 validate_reg(insn
, reg
);
318 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
319 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
320 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
321 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
323 /* Only src1 can be immediate in two-argument instructions.
325 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
327 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
328 insn
->bits3
.ud
= reg
.dw1
.ud
;
331 /* This is a hardware restriction, which may or may not be lifted
334 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
335 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
337 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
338 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
339 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
342 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
343 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
346 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
347 if (reg
.width
== BRW_WIDTH_1
&&
348 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
349 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
350 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
351 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
354 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
355 insn
->bits3
.da1
.src1_width
= reg
.width
;
356 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
360 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
361 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
362 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
363 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
365 /* This is an oddity of the fact we're using the same
366 * descriptions for registers in align_16 as align_1:
368 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
369 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
371 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
377 * Set the Message Descriptor and Extended Message Descriptor fields
380 * \note This zeroes out the Function Control bits, so it must be called
381 * \b before filling out any message-specific data. Callers can
382 * choose not to fill in irrelevant bits; they will be zero.
385 brw_set_message_descriptor(struct brw_compile
*p
,
386 struct brw_instruction
*inst
,
387 enum brw_message_target sfid
,
389 unsigned response_length
,
393 struct intel_context
*intel
= &p
->brw
->intel
;
395 brw_set_src1(p
, inst
, brw_imm_d(0));
397 if (intel
->gen
>= 5) {
398 inst
->bits3
.generic_gen5
.header_present
= header_present
;
399 inst
->bits3
.generic_gen5
.response_length
= response_length
;
400 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
401 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
403 if (intel
->gen
>= 6) {
404 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
405 inst
->header
.destreg__conditionalmod
= sfid
;
407 /* Set Extended Message Descriptor (ex_desc) */
408 inst
->bits2
.send_gen5
.sfid
= sfid
;
409 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
412 inst
->bits3
.generic
.response_length
= response_length
;
413 inst
->bits3
.generic
.msg_length
= msg_length
;
414 inst
->bits3
.generic
.msg_target
= sfid
;
415 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
419 static void brw_set_math_message( struct brw_compile
*p
,
420 struct brw_instruction
*insn
,
427 struct brw_context
*brw
= p
->brw
;
428 struct intel_context
*intel
= &brw
->intel
;
430 unsigned response_length
;
432 /* Infer message length from the function */
434 case BRW_MATH_FUNCTION_POW
:
435 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
436 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
437 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
445 /* Infer response length from the function */
447 case BRW_MATH_FUNCTION_SINCOS
:
448 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
456 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
457 msg_length
, response_length
, false, false);
458 if (intel
->gen
== 5) {
459 insn
->bits3
.math_gen5
.function
= function
;
460 insn
->bits3
.math_gen5
.int_type
= integer_type
;
461 insn
->bits3
.math_gen5
.precision
= low_precision
;
462 insn
->bits3
.math_gen5
.saturate
= saturate
;
463 insn
->bits3
.math_gen5
.data_type
= dataType
;
464 insn
->bits3
.math_gen5
.snapshot
= 0;
466 insn
->bits3
.math
.function
= function
;
467 insn
->bits3
.math
.int_type
= integer_type
;
468 insn
->bits3
.math
.precision
= low_precision
;
469 insn
->bits3
.math
.saturate
= saturate
;
470 insn
->bits3
.math
.data_type
= dataType
;
475 static void brw_set_ff_sync_message(struct brw_compile
*p
,
476 struct brw_instruction
*insn
,
478 GLuint response_length
,
481 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
482 1, response_length
, true, end_of_thread
);
483 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
484 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
485 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
486 insn
->bits3
.urb_gen5
.allocate
= allocate
;
487 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
488 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
491 static void brw_set_urb_message( struct brw_compile
*p
,
492 struct brw_instruction
*insn
,
496 GLuint response_length
,
500 GLuint swizzle_control
)
502 struct brw_context
*brw
= p
->brw
;
503 struct intel_context
*intel
= &brw
->intel
;
505 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
506 msg_length
, response_length
, true, end_of_thread
);
507 if (intel
->gen
== 7) {
508 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
509 insn
->bits3
.urb_gen7
.offset
= offset
;
510 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
511 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
512 /* per_slot_offset = 0 makes it ignore offsets in message header */
513 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
514 insn
->bits3
.urb_gen7
.complete
= complete
;
515 } else if (intel
->gen
>= 5) {
516 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
517 insn
->bits3
.urb_gen5
.offset
= offset
;
518 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
519 insn
->bits3
.urb_gen5
.allocate
= allocate
;
520 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
521 insn
->bits3
.urb_gen5
.complete
= complete
;
523 insn
->bits3
.urb
.opcode
= 0; /* ? */
524 insn
->bits3
.urb
.offset
= offset
;
525 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
526 insn
->bits3
.urb
.allocate
= allocate
;
527 insn
->bits3
.urb
.used
= used
; /* ? */
528 insn
->bits3
.urb
.complete
= complete
;
533 brw_set_dp_write_message(struct brw_compile
*p
,
534 struct brw_instruction
*insn
,
535 GLuint binding_table_index
,
540 GLuint last_render_target
,
541 GLuint response_length
,
542 GLuint end_of_thread
,
543 GLuint send_commit_msg
)
545 struct brw_context
*brw
= p
->brw
;
546 struct intel_context
*intel
= &brw
->intel
;
549 if (intel
->gen
>= 7) {
550 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
551 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
552 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
554 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
555 } else if (intel
->gen
== 6) {
556 /* Use the render cache for all write messages. */
557 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
559 sfid
= BRW_SFID_DATAPORT_WRITE
;
562 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
563 header_present
, end_of_thread
);
565 if (intel
->gen
>= 7) {
566 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
567 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
568 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
569 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
570 } else if (intel
->gen
== 6) {
571 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
572 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
573 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
574 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
575 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
576 } else if (intel
->gen
== 5) {
577 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
578 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
579 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
580 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
581 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
583 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
584 insn
->bits3
.dp_write
.msg_control
= msg_control
;
585 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
586 insn
->bits3
.dp_write
.msg_type
= msg_type
;
587 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
592 brw_set_dp_read_message(struct brw_compile
*p
,
593 struct brw_instruction
*insn
,
594 GLuint binding_table_index
,
599 GLuint response_length
)
601 struct brw_context
*brw
= p
->brw
;
602 struct intel_context
*intel
= &brw
->intel
;
605 if (intel
->gen
>= 7) {
606 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
607 } else if (intel
->gen
== 6) {
608 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
609 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
611 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
613 sfid
= BRW_SFID_DATAPORT_READ
;
616 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
619 if (intel
->gen
>= 7) {
620 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
621 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
622 insn
->bits3
.gen7_dp
.last_render_target
= 0;
623 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
624 } else if (intel
->gen
== 6) {
625 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
626 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
627 insn
->bits3
.gen6_dp
.last_render_target
= 0;
628 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
629 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
630 } else if (intel
->gen
== 5) {
631 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
632 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
633 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
634 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
635 } else if (intel
->is_g4x
) {
636 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
637 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
638 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
639 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
641 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
642 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
643 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
644 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
648 static void brw_set_sampler_message(struct brw_compile
*p
,
649 struct brw_instruction
*insn
,
650 GLuint binding_table_index
,
653 GLuint response_length
,
655 GLuint header_present
,
657 GLuint return_format
)
659 struct brw_context
*brw
= p
->brw
;
660 struct intel_context
*intel
= &brw
->intel
;
662 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
663 response_length
, header_present
, false);
665 if (intel
->gen
>= 7) {
666 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
667 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
668 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
669 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
670 } else if (intel
->gen
>= 5) {
671 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
672 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
673 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
674 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
675 } else if (intel
->is_g4x
) {
676 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
677 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
678 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
680 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
681 insn
->bits3
.sampler
.sampler
= sampler
;
682 insn
->bits3
.sampler
.msg_type
= msg_type
;
683 insn
->bits3
.sampler
.return_format
= return_format
;
688 #define next_insn brw_next_insn
689 struct brw_instruction
*
690 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
692 struct brw_instruction
*insn
;
694 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
696 insn
= &p
->store
[p
->nr_insn
++];
697 memcpy(insn
, p
->current
, sizeof(*insn
));
699 /* Reset this one-shot flag:
702 if (p
->current
->header
.destreg__conditionalmod
) {
703 p
->current
->header
.destreg__conditionalmod
= 0;
704 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
707 insn
->header
.opcode
= opcode
;
711 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
716 struct brw_instruction
*insn
= next_insn(p
, opcode
);
717 brw_set_dest(p
, insn
, dest
);
718 brw_set_src0(p
, insn
, src
);
722 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
726 struct brw_reg src1
)
728 struct brw_instruction
*insn
= next_insn(p
, opcode
);
729 brw_set_dest(p
, insn
, dest
);
730 brw_set_src0(p
, insn
, src0
);
731 brw_set_src1(p
, insn
, src1
);
736 /***********************************************************************
737 * Convenience routines.
740 struct brw_instruction *brw_##OP(struct brw_compile *p, \
741 struct brw_reg dest, \
742 struct brw_reg src0) \
744 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
748 struct brw_instruction *brw_##OP(struct brw_compile *p, \
749 struct brw_reg dest, \
750 struct brw_reg src0, \
751 struct brw_reg src1) \
753 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
756 /* Rounding operations (other than RNDD) require two instructions - the first
757 * stores a rounded value (possibly the wrong way) in the dest register, but
758 * also sets a per-channel "increment bit" in the flag register. A predicated
759 * add of 1.0 fixes dest to contain the desired result.
761 * Sandybridge and later appear to round correctly without an ADD.
764 void brw_##OP(struct brw_compile *p, \
765 struct brw_reg dest, \
766 struct brw_reg src) \
768 struct brw_instruction *rnd, *add; \
769 rnd = next_insn(p, BRW_OPCODE_##OP); \
770 brw_set_dest(p, rnd, dest); \
771 brw_set_src0(p, rnd, src); \
773 if (p->brw->intel.gen < 6) { \
774 /* turn on round-increments */ \
775 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
776 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
777 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
810 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
816 if (src0
.type
== BRW_REGISTER_TYPE_F
||
817 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
818 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
819 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
820 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
823 if (src1
.type
== BRW_REGISTER_TYPE_F
||
824 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
825 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
826 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
827 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
830 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
833 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
839 if (src0
.type
== BRW_REGISTER_TYPE_D
||
840 src0
.type
== BRW_REGISTER_TYPE_UD
||
841 src1
.type
== BRW_REGISTER_TYPE_D
||
842 src1
.type
== BRW_REGISTER_TYPE_UD
) {
843 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
846 if (src0
.type
== BRW_REGISTER_TYPE_F
||
847 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
848 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
849 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
850 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
853 if (src1
.type
== BRW_REGISTER_TYPE_F
||
854 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
855 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
856 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
857 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
860 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
861 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
862 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
863 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
865 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
869 void brw_NOP(struct brw_compile
*p
)
871 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
872 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
873 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
874 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
881 /***********************************************************************
882 * Comparisons, if/else/endif
885 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
890 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
892 insn
->header
.execution_size
= 1;
893 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
894 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
896 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
902 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
904 p
->if_stack
[p
->if_stack_depth
] = inst
;
907 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
908 p
->if_stack_array_size
*= 2;
909 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
910 p
->if_stack_array_size
);
914 /* EU takes the value from the flag register and pushes it onto some
915 * sort of a stack (presumably merging with any flag value already on
916 * the stack). Within an if block, the flags at the top of the stack
917 * control execution on each channel of the unit, eg. on each of the
918 * 16 pixel values in our wm programs.
920 * When the matching 'else' instruction is reached (presumably by
921 * countdown of the instruction count patched in by our ELSE/ENDIF
922 * functions), the relevent flags are inverted.
924 * When the matching 'endif' instruction is reached, the flags are
925 * popped off. If the stack is now empty, normal execution resumes.
927 struct brw_instruction
*
928 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
930 struct intel_context
*intel
= &p
->brw
->intel
;
931 struct brw_instruction
*insn
;
933 insn
= next_insn(p
, BRW_OPCODE_IF
);
935 /* Override the defaults for this instruction:
937 if (intel
->gen
< 6) {
938 brw_set_dest(p
, insn
, brw_ip_reg());
939 brw_set_src0(p
, insn
, brw_ip_reg());
940 brw_set_src1(p
, insn
, brw_imm_d(0x0));
941 } else if (intel
->gen
== 6) {
942 brw_set_dest(p
, insn
, brw_imm_w(0));
943 insn
->bits1
.branch_gen6
.jump_count
= 0;
944 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
945 brw_set_src1(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
947 brw_set_dest(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
948 brw_set_src0(p
, insn
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
949 brw_set_src1(p
, insn
, brw_imm_ud(0));
950 insn
->bits3
.break_cont
.jip
= 0;
951 insn
->bits3
.break_cont
.uip
= 0;
954 insn
->header
.execution_size
= execute_size
;
955 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
956 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
957 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
958 if (!p
->single_program_flow
)
959 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
961 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
963 push_if_stack(p
, insn
);
967 /* This function is only used for gen6-style IF instructions with an
968 * embedded comparison (conditional modifier). It is not used on gen7.
970 struct brw_instruction
*
971 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
972 struct brw_reg src0
, struct brw_reg src1
)
974 struct brw_instruction
*insn
;
976 insn
= next_insn(p
, BRW_OPCODE_IF
);
978 brw_set_dest(p
, insn
, brw_imm_w(0));
980 insn
->header
.execution_size
= BRW_EXECUTE_16
;
982 insn
->header
.execution_size
= BRW_EXECUTE_8
;
984 insn
->bits1
.branch_gen6
.jump_count
= 0;
985 brw_set_src0(p
, insn
, src0
);
986 brw_set_src1(p
, insn
, src1
);
988 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
989 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
990 insn
->header
.destreg__conditionalmod
= conditional
;
992 if (!p
->single_program_flow
)
993 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
995 push_if_stack(p
, insn
);
1000 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1003 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1004 struct brw_instruction
*if_inst
,
1005 struct brw_instruction
*else_inst
)
1007 /* The next instruction (where the ENDIF would be, if it existed) */
1008 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1010 assert(p
->single_program_flow
);
1011 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1012 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1013 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1015 /* Convert IF to an ADD instruction that moves the instruction pointer
1016 * to the first instruction of the ELSE block. If there is no ELSE
1017 * block, point to where ENDIF would be. Reverse the predicate.
1019 * There's no need to execute an ENDIF since we don't need to do any
1020 * stack operations, and if we're currently executing, we just want to
1021 * continue normally.
1023 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1024 if_inst
->header
.predicate_inverse
= 1;
1026 if (else_inst
!= NULL
) {
1027 /* Convert ELSE to an ADD instruction that points where the ENDIF
1030 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1032 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1033 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1035 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1040 * Patch IF and ELSE instructions with appropriate jump targets.
1043 patch_IF_ELSE(struct brw_compile
*p
,
1044 struct brw_instruction
*if_inst
,
1045 struct brw_instruction
*else_inst
,
1046 struct brw_instruction
*endif_inst
)
1048 struct intel_context
*intel
= &p
->brw
->intel
;
1050 /* We shouldn't be patching IF and ELSE instructions in single program flow
1051 * mode when gen < 6, because in single program flow mode on those
1052 * platforms, we convert flow control instructions to conditional ADDs that
1053 * operate on IP (see brw_ENDIF).
1055 * However, on Gen6, writing to IP doesn't work in single program flow mode
1056 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1057 * not be updated by non-flow control instructions."). And on later
1058 * platforms, there is no significant benefit to converting control flow
1059 * instructions to conditional ADDs. So we do patch IF and ELSE
1060 * instructions in single program flow mode on those platforms.
1063 assert(!p
->single_program_flow
);
1065 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1066 assert(endif_inst
!= NULL
);
1067 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1070 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1071 * requires 2 chunks.
1073 if (intel
->gen
>= 5)
1076 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1077 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1079 if (else_inst
== NULL
) {
1080 /* Patch IF -> ENDIF */
1081 if (intel
->gen
< 6) {
1082 /* Turn it into an IFF, which means no mask stack operations for
1083 * all-false and jumping past the ENDIF.
1085 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1086 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1087 if_inst
->bits3
.if_else
.pop_count
= 0;
1088 if_inst
->bits3
.if_else
.pad0
= 0;
1089 } else if (intel
->gen
== 6) {
1090 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1091 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1093 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1094 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1097 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1099 /* Patch IF -> ELSE */
1100 if (intel
->gen
< 6) {
1101 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1102 if_inst
->bits3
.if_else
.pop_count
= 0;
1103 if_inst
->bits3
.if_else
.pad0
= 0;
1104 } else if (intel
->gen
== 6) {
1105 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1108 /* Patch ELSE -> ENDIF */
1109 if (intel
->gen
< 6) {
1110 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1113 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1114 else_inst
->bits3
.if_else
.pop_count
= 1;
1115 else_inst
->bits3
.if_else
.pad0
= 0;
1116 } else if (intel
->gen
== 6) {
1117 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1118 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1120 /* The IF instruction's JIP should point just past the ELSE */
1121 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1122 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1123 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1124 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1130 brw_ELSE(struct brw_compile
*p
)
1132 struct intel_context
*intel
= &p
->brw
->intel
;
1133 struct brw_instruction
*insn
;
1135 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1137 if (intel
->gen
< 6) {
1138 brw_set_dest(p
, insn
, brw_ip_reg());
1139 brw_set_src0(p
, insn
, brw_ip_reg());
1140 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1141 } else if (intel
->gen
== 6) {
1142 brw_set_dest(p
, insn
, brw_imm_w(0));
1143 insn
->bits1
.branch_gen6
.jump_count
= 0;
1144 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1145 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1147 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1148 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1149 brw_set_src1(p
, insn
, brw_imm_ud(0));
1150 insn
->bits3
.break_cont
.jip
= 0;
1151 insn
->bits3
.break_cont
.uip
= 0;
1154 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1155 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1156 if (!p
->single_program_flow
)
1157 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1159 push_if_stack(p
, insn
);
1163 brw_ENDIF(struct brw_compile
*p
)
1165 struct intel_context
*intel
= &p
->brw
->intel
;
1166 struct brw_instruction
*insn
;
1167 struct brw_instruction
*else_inst
= NULL
;
1168 struct brw_instruction
*if_inst
= NULL
;
1170 /* Pop the IF and (optional) ELSE instructions from the stack */
1171 p
->if_stack_depth
--;
1172 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1173 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1174 p
->if_stack_depth
--;
1176 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1178 /* In single program flow mode, we can express IF and ELSE instructions
1179 * equivalently as ADD instructions that operate on IP. On platforms prior
1180 * to Gen6, flow control instructions cause an implied thread switch, so
1181 * this is a significant savings.
1183 * However, on Gen6, writing to IP doesn't work in single program flow mode
1184 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1185 * not be updated by non-flow control instructions."). And on later
1186 * platforms, there is no significant benefit to converting control flow
1187 * instructions to conditional ADDs. So we only do this trick on Gen4 and
1190 if (intel
->gen
< 6 && p
->single_program_flow
) {
1191 /* ENDIF is useless; don't bother emitting it. */
1192 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1196 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1198 if (intel
->gen
< 6) {
1199 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1200 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1201 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1202 } else if (intel
->gen
== 6) {
1203 brw_set_dest(p
, insn
, brw_imm_w(0));
1204 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1205 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1207 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1208 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1209 brw_set_src1(p
, insn
, brw_imm_ud(0));
1212 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1213 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1214 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1216 /* Also pop item off the stack in the endif instruction: */
1217 if (intel
->gen
< 6) {
1218 insn
->bits3
.if_else
.jump_count
= 0;
1219 insn
->bits3
.if_else
.pop_count
= 1;
1220 insn
->bits3
.if_else
.pad0
= 0;
1221 } else if (intel
->gen
== 6) {
1222 insn
->bits1
.branch_gen6
.jump_count
= 2;
1224 insn
->bits3
.break_cont
.jip
= 2;
1226 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1229 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1231 struct intel_context
*intel
= &p
->brw
->intel
;
1232 struct brw_instruction
*insn
;
1234 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1235 if (intel
->gen
>= 6) {
1236 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1237 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1238 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1240 brw_set_dest(p
, insn
, brw_ip_reg());
1241 brw_set_src0(p
, insn
, brw_ip_reg());
1242 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1243 insn
->bits3
.if_else
.pad0
= 0;
1244 insn
->bits3
.if_else
.pop_count
= pop_count
;
1246 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1247 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1252 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
)
1254 struct brw_instruction
*insn
;
1256 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1257 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1258 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1259 brw_set_dest(p
, insn
, brw_ip_reg());
1260 brw_set_src0(p
, insn
, brw_ip_reg());
1261 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1263 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1264 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1268 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1270 struct brw_instruction
*insn
;
1271 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1272 brw_set_dest(p
, insn
, brw_ip_reg());
1273 brw_set_src0(p
, insn
, brw_ip_reg());
1274 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1275 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1276 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1277 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1278 insn
->bits3
.if_else
.pad0
= 0;
1279 insn
->bits3
.if_else
.pop_count
= pop_count
;
1285 * The DO/WHILE is just an unterminated loop -- break or continue are
1286 * used for control within the loop. We have a few ways they can be
1289 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1290 * jip and no DO instruction.
1292 * For non-uniform control flow pre-gen6, there's a DO instruction to
1293 * push the mask, and a WHILE to jump back, and BREAK to get out and
1296 * For gen6, there's no more mask stack, so no need for DO. WHILE
1297 * just points back to the first instruction of the loop.
1299 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1301 struct intel_context
*intel
= &p
->brw
->intel
;
1303 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1304 return &p
->store
[p
->nr_insn
];
1306 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1308 /* Override the defaults for this instruction:
1310 brw_set_dest(p
, insn
, brw_null_reg());
1311 brw_set_src0(p
, insn
, brw_null_reg());
1312 brw_set_src1(p
, insn
, brw_null_reg());
1314 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1315 insn
->header
.execution_size
= execute_size
;
1316 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1317 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1318 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1326 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1327 struct brw_instruction
*do_insn
)
1329 struct intel_context
*intel
= &p
->brw
->intel
;
1330 struct brw_instruction
*insn
;
1333 if (intel
->gen
>= 5)
1336 if (intel
->gen
>= 7) {
1337 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1339 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1340 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1341 brw_set_src1(p
, insn
, brw_imm_ud(0));
1342 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1344 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1345 } else if (intel
->gen
== 6) {
1346 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1348 brw_set_dest(p
, insn
, brw_imm_w(0));
1349 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1350 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1351 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1353 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1355 if (p
->single_program_flow
) {
1356 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1358 brw_set_dest(p
, insn
, brw_ip_reg());
1359 brw_set_src0(p
, insn
, brw_ip_reg());
1360 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1361 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1363 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1365 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1367 brw_set_dest(p
, insn
, brw_ip_reg());
1368 brw_set_src0(p
, insn
, brw_ip_reg());
1369 brw_set_src1(p
, insn
, brw_imm_d(0));
1371 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1372 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1373 insn
->bits3
.if_else
.pop_count
= 0;
1374 insn
->bits3
.if_else
.pad0
= 0;
1377 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1378 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1386 void brw_land_fwd_jump(struct brw_compile
*p
,
1387 struct brw_instruction
*jmp_insn
)
1389 struct intel_context
*intel
= &p
->brw
->intel
;
1390 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1393 if (intel
->gen
>= 5)
1396 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1397 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1399 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1404 /* To integrate with the above, it makes sense that the comparison
1405 * instruction should populate the flag register. It might be simpler
1406 * just to use the flag reg for most WM tasks?
1408 void brw_CMP(struct brw_compile
*p
,
1409 struct brw_reg dest
,
1411 struct brw_reg src0
,
1412 struct brw_reg src1
)
1414 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1416 insn
->header
.destreg__conditionalmod
= conditional
;
1417 brw_set_dest(p
, insn
, dest
);
1418 brw_set_src0(p
, insn
, src0
);
1419 brw_set_src1(p
, insn
, src1
);
1421 /* guess_execution_size(insn, src0); */
1424 /* Make it so that future instructions will use the computed flag
1425 * value until brw_set_predicate_control_flag_value() is called
1428 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1430 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1431 p
->flag_value
= 0xff;
1435 /* Issue 'wait' instruction for n1, host could program MMIO
1436 to wake up thread. */
1437 void brw_WAIT (struct brw_compile
*p
)
1439 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1440 struct brw_reg src
= brw_notification_1_reg();
1442 brw_set_dest(p
, insn
, src
);
1443 brw_set_src0(p
, insn
, src
);
1444 brw_set_src1(p
, insn
, brw_null_reg());
1445 insn
->header
.execution_size
= 0; /* must */
1446 insn
->header
.predicate_control
= 0;
1447 insn
->header
.compression_control
= 0;
1451 /***********************************************************************
1452 * Helpers for the various SEND message types:
1455 /** Extended math function, float[8].
1457 void brw_math( struct brw_compile
*p
,
1458 struct brw_reg dest
,
1466 struct intel_context
*intel
= &p
->brw
->intel
;
1468 if (intel
->gen
>= 6) {
1469 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1471 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1472 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1474 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1475 if (intel
->gen
== 6)
1476 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1478 /* Source modifiers are ignored for extended math instructions on Gen6. */
1479 if (intel
->gen
== 6) {
1480 assert(!src
.negate
);
1484 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1485 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1486 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1487 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1489 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1492 /* Math is the same ISA format as other opcodes, except that CondModifier
1493 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1495 insn
->header
.destreg__conditionalmod
= function
;
1496 insn
->header
.saturate
= saturate
;
1498 brw_set_dest(p
, insn
, dest
);
1499 brw_set_src0(p
, insn
, src
);
1500 brw_set_src1(p
, insn
, brw_null_reg());
1502 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1504 /* Example code doesn't set predicate_control for send
1507 insn
->header
.predicate_control
= 0;
1508 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1510 brw_set_dest(p
, insn
, dest
);
1511 brw_set_src0(p
, insn
, src
);
1512 brw_set_math_message(p
,
1515 src
.type
== BRW_REGISTER_TYPE_D
,
1522 /** Extended math function, float[8].
1524 void brw_math2(struct brw_compile
*p
,
1525 struct brw_reg dest
,
1527 struct brw_reg src0
,
1528 struct brw_reg src1
)
1530 struct intel_context
*intel
= &p
->brw
->intel
;
1531 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1533 assert(intel
->gen
>= 6);
1537 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1538 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1539 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1541 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1542 if (intel
->gen
== 6) {
1543 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1544 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1547 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1548 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1549 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1550 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1551 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1553 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1554 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1557 /* Source modifiers are ignored for extended math instructions on Gen6. */
1558 if (intel
->gen
== 6) {
1559 assert(!src0
.negate
);
1561 assert(!src1
.negate
);
1565 /* Math is the same ISA format as other opcodes, except that CondModifier
1566 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1568 insn
->header
.destreg__conditionalmod
= function
;
1570 brw_set_dest(p
, insn
, dest
);
1571 brw_set_src0(p
, insn
, src0
);
1572 brw_set_src1(p
, insn
, src1
);
1576 * Extended math function, float[16].
1577 * Use 2 send instructions.
1579 void brw_math_16( struct brw_compile
*p
,
1580 struct brw_reg dest
,
1587 struct intel_context
*intel
= &p
->brw
->intel
;
1588 struct brw_instruction
*insn
;
1590 if (intel
->gen
>= 6) {
1591 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1593 /* Math is the same ISA format as other opcodes, except that CondModifier
1594 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1596 insn
->header
.destreg__conditionalmod
= function
;
1597 insn
->header
.saturate
= saturate
;
1599 /* Source modifiers are ignored for extended math instructions. */
1600 assert(!src
.negate
);
1603 brw_set_dest(p
, insn
, dest
);
1604 brw_set_src0(p
, insn
, src
);
1605 brw_set_src1(p
, insn
, brw_null_reg());
1609 /* First instruction:
1611 brw_push_insn_state(p
);
1612 brw_set_predicate_control_flag_value(p
, 0xff);
1613 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1615 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1616 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1618 brw_set_dest(p
, insn
, dest
);
1619 brw_set_src0(p
, insn
, src
);
1620 brw_set_math_message(p
,
1623 BRW_MATH_INTEGER_UNSIGNED
,
1626 BRW_MATH_DATA_VECTOR
);
1628 /* Second instruction:
1630 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1631 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1632 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1634 brw_set_dest(p
, insn
, offset(dest
,1));
1635 brw_set_src0(p
, insn
, src
);
1636 brw_set_math_message(p
,
1639 BRW_MATH_INTEGER_UNSIGNED
,
1642 BRW_MATH_DATA_VECTOR
);
1644 brw_pop_insn_state(p
);
1649 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1650 * using a constant offset per channel.
1652 * The offset must be aligned to oword size (16 bytes). Used for
1653 * register spilling.
1655 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1660 struct intel_context
*intel
= &p
->brw
->intel
;
1661 uint32_t msg_control
, msg_type
;
1664 if (intel
->gen
>= 6)
1667 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1669 if (num_regs
== 1) {
1670 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1673 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1677 /* Set up the message header. This is g0, with g0.2 filled with
1678 * the offset. We don't want to leave our offset around in g0 or
1679 * it'll screw up texture samples, so set it up inside the message
1683 brw_push_insn_state(p
);
1684 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1685 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1687 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1689 /* set message header global offset field (reg 0, element 2) */
1691 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1693 2), BRW_REGISTER_TYPE_UD
),
1694 brw_imm_ud(offset
));
1696 brw_pop_insn_state(p
);
1700 struct brw_reg dest
;
1701 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1702 int send_commit_msg
;
1703 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1704 BRW_REGISTER_TYPE_UW
);
1706 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1707 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1708 src_header
= vec16(src_header
);
1710 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1711 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1713 /* Until gen6, writes followed by reads from the same location
1714 * are not guaranteed to be ordered unless write_commit is set.
1715 * If set, then a no-op write is issued to the destination
1716 * register to set a dependency, and a read from the destination
1717 * can be used to ensure the ordering.
1719 * For gen6, only writes between different threads need ordering
1720 * protection. Our use of DP writes is all about register
1721 * spilling within a thread.
1723 if (intel
->gen
>= 6) {
1724 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1725 send_commit_msg
= 0;
1728 send_commit_msg
= 1;
1731 brw_set_dest(p
, insn
, dest
);
1732 if (intel
->gen
>= 6) {
1733 brw_set_src0(p
, insn
, mrf
);
1735 brw_set_src0(p
, insn
, brw_null_reg());
1738 if (intel
->gen
>= 6)
1739 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1741 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1743 brw_set_dp_write_message(p
,
1745 255, /* binding table index (255=stateless) */
1749 true, /* header_present */
1750 0, /* not a render target */
1751 send_commit_msg
, /* response_length */
1759 * Read a block of owords (half a GRF each) from the scratch buffer
1760 * using a constant index per channel.
1762 * Offset must be aligned to oword size (16 bytes). Used for register
1766 brw_oword_block_read_scratch(struct brw_compile
*p
,
1767 struct brw_reg dest
,
1772 struct intel_context
*intel
= &p
->brw
->intel
;
1773 uint32_t msg_control
;
1776 if (intel
->gen
>= 6)
1779 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1780 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1782 if (num_regs
== 1) {
1783 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1786 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1791 brw_push_insn_state(p
);
1792 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1793 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1795 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1797 /* set message header global offset field (reg 0, element 2) */
1799 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1801 2), BRW_REGISTER_TYPE_UD
),
1802 brw_imm_ud(offset
));
1804 brw_pop_insn_state(p
);
1808 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1810 assert(insn
->header
.predicate_control
== 0);
1811 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1812 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1814 brw_set_dest(p
, insn
, dest
); /* UW? */
1815 if (intel
->gen
>= 6) {
1816 brw_set_src0(p
, insn
, mrf
);
1818 brw_set_src0(p
, insn
, brw_null_reg());
1821 brw_set_dp_read_message(p
,
1823 255, /* binding table index (255=stateless) */
1825 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1826 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1833 * Read a float[4] vector from the data port Data Cache (const buffer).
1834 * Location (in buffer) should be a multiple of 16.
1835 * Used for fetching shader constants.
1837 void brw_oword_block_read(struct brw_compile
*p
,
1838 struct brw_reg dest
,
1841 uint32_t bind_table_index
)
1843 struct intel_context
*intel
= &p
->brw
->intel
;
1845 /* On newer hardware, offset is in units of owords. */
1846 if (intel
->gen
>= 6)
1849 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1851 brw_push_insn_state(p
);
1852 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1853 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1854 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1856 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1858 /* set message header global offset field (reg 0, element 2) */
1860 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1862 2), BRW_REGISTER_TYPE_UD
),
1863 brw_imm_ud(offset
));
1865 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1866 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1868 /* cast dest to a uword[8] vector */
1869 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1871 brw_set_dest(p
, insn
, dest
);
1872 if (intel
->gen
>= 6) {
1873 brw_set_src0(p
, insn
, mrf
);
1875 brw_set_src0(p
, insn
, brw_null_reg());
1878 brw_set_dp_read_message(p
,
1881 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1882 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1883 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1885 1); /* response_length (1 reg, 2 owords!) */
1887 brw_pop_insn_state(p
);
1891 * Read a set of dwords from the data port Data Cache (const buffer).
1893 * Location (in buffer) appears as UD offsets in the register after
1894 * the provided mrf header reg.
1896 void brw_dword_scattered_read(struct brw_compile
*p
,
1897 struct brw_reg dest
,
1899 uint32_t bind_table_index
)
1901 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1903 brw_push_insn_state(p
);
1904 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1905 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1906 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1907 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1908 brw_pop_insn_state(p
);
1910 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1911 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1913 /* cast dest to a uword[8] vector */
1914 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1916 brw_set_dest(p
, insn
, dest
);
1917 brw_set_src0(p
, insn
, brw_null_reg());
1919 brw_set_dp_read_message(p
,
1922 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1923 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1924 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1926 1); /* response_length */
1932 * Read float[4] constant(s) from VS constant buffer.
1933 * For relative addressing, two float[4] constants will be read into 'dest'.
1934 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1936 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1937 struct brw_reg dest
,
1939 GLuint bind_table_index
)
1941 struct intel_context
*intel
= &p
->brw
->intel
;
1942 struct brw_instruction
*insn
;
1943 GLuint msg_reg_nr
= 1;
1945 if (intel
->gen
>= 6)
1948 /* Setup MRF[1] with location/offset into const buffer */
1949 brw_push_insn_state(p
);
1950 brw_set_access_mode(p
, BRW_ALIGN_1
);
1951 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1952 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1953 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1954 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1955 BRW_REGISTER_TYPE_UD
),
1956 brw_imm_ud(location
));
1957 brw_pop_insn_state(p
);
1959 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1961 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1962 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1963 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1964 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1966 brw_set_dest(p
, insn
, dest
);
1967 if (intel
->gen
>= 6) {
1968 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1970 brw_set_src0(p
, insn
, brw_null_reg());
1973 brw_set_dp_read_message(p
,
1977 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1978 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1980 1); /* response_length (1 Oword) */
1984 * Read a float[4] constant per vertex from VS constant buffer, with
1985 * relative addressing.
1987 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1988 struct brw_reg dest
,
1989 struct brw_reg addr_reg
,
1991 GLuint bind_table_index
)
1993 struct intel_context
*intel
= &p
->brw
->intel
;
1994 struct brw_reg src
= brw_vec8_grf(0, 0);
1997 /* Setup MRF[1] with offset into const buffer */
1998 brw_push_insn_state(p
);
1999 brw_set_access_mode(p
, BRW_ALIGN_1
);
2000 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2001 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2002 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2004 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2007 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
2008 addr_reg
, brw_imm_d(offset
));
2009 brw_pop_insn_state(p
);
2011 gen6_resolve_implied_move(p
, &src
, 0);
2012 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2014 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
2015 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2016 insn
->header
.destreg__conditionalmod
= 0;
2017 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
2019 brw_set_dest(p
, insn
, dest
);
2020 brw_set_src0(p
, insn
, src
);
2022 if (intel
->gen
>= 6)
2023 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2024 else if (intel
->gen
== 5 || intel
->is_g4x
)
2025 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2027 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2029 brw_set_dp_read_message(p
,
2032 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2034 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2036 1); /* response_length */
2041 void brw_fb_WRITE(struct brw_compile
*p
,
2044 struct brw_reg src0
,
2045 GLuint binding_table_index
,
2047 GLuint response_length
,
2049 bool header_present
)
2051 struct intel_context
*intel
= &p
->brw
->intel
;
2052 struct brw_instruction
*insn
;
2053 GLuint msg_control
, msg_type
;
2054 struct brw_reg dest
;
2056 if (dispatch_width
== 16)
2057 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2059 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2061 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2062 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2064 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2066 /* The execution mask is ignored for render target writes. */
2067 insn
->header
.predicate_control
= 0;
2068 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2070 if (intel
->gen
>= 6) {
2071 /* headerless version, just submit color payload */
2072 src0
= brw_message_reg(msg_reg_nr
);
2074 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2076 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2078 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2081 if (dispatch_width
== 16)
2082 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2084 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2086 brw_set_dest(p
, insn
, dest
);
2087 brw_set_src0(p
, insn
, src0
);
2088 brw_set_dp_write_message(p
,
2090 binding_table_index
,
2095 1, /* last render target write */
2098 0 /* send_commit_msg */);
2103 * Texture sample instruction.
2104 * Note: the msg_type plus msg_length values determine exactly what kind
2105 * of sampling operation is performed. See volume 4, page 161 of docs.
2107 void brw_SAMPLE(struct brw_compile
*p
,
2108 struct brw_reg dest
,
2110 struct brw_reg src0
,
2111 GLuint binding_table_index
,
2115 GLuint response_length
,
2117 GLuint header_present
,
2119 GLuint return_format
)
2121 struct intel_context
*intel
= &p
->brw
->intel
;
2122 bool need_stall
= 0;
2124 if (writemask
== 0) {
2125 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2129 /* Hardware doesn't do destination dependency checking on send
2130 * instructions properly. Add a workaround which generates the
2131 * dependency by other means. In practice it seems like this bug
2132 * only crops up for texture samples, and only where registers are
2133 * written by the send and then written again later without being
2134 * read in between. Luckily for us, we already track that
2135 * information and use it to modify the writemask for the
2136 * instruction, so that is a guide for whether a workaround is
2139 if (writemask
!= WRITEMASK_XYZW
) {
2140 GLuint dst_offset
= 0;
2141 GLuint i
, newmask
= 0, len
= 0;
2143 for (i
= 0; i
< 4; i
++) {
2144 if (writemask
& (1<<i
))
2148 for (; i
< 4; i
++) {
2149 if (!(writemask
& (1<<i
)))
2155 if (newmask
!= writemask
) {
2157 /* printf("need stall %x %x\n", newmask , writemask); */
2160 bool dispatch_16
= false;
2162 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2164 guess_execution_size(p
, p
->current
, dest
);
2165 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2168 newmask
= ~newmask
& WRITEMASK_XYZW
;
2170 brw_push_insn_state(p
);
2172 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2173 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2175 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2176 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2177 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2179 brw_pop_insn_state(p
);
2181 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2182 dest
= offset(dest
, dst_offset
);
2184 /* For 16-wide dispatch, masked channels are skipped in the
2185 * response. For 8-wide, masked channels still take up slots,
2186 * and are just not written to.
2189 response_length
= len
* 2;
2194 struct brw_instruction
*insn
;
2196 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2198 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2199 insn
->header
.predicate_control
= 0; /* XXX */
2200 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2202 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2204 brw_set_dest(p
, insn
, dest
);
2205 brw_set_src0(p
, insn
, src0
);
2206 brw_set_sampler_message(p
, insn
,
2207 binding_table_index
,
2218 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2220 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2222 brw_push_insn_state(p
);
2223 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2224 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2225 retype(reg
, BRW_REGISTER_TYPE_UD
));
2226 brw_pop_insn_state(p
);
2231 /* All these variables are pretty confusing - we might be better off
2232 * using bitmasks and macros for this, in the old style. Or perhaps
2233 * just having the caller instantiate the fields in dword3 itself.
2235 void brw_urb_WRITE(struct brw_compile
*p
,
2236 struct brw_reg dest
,
2238 struct brw_reg src0
,
2242 GLuint response_length
,
2244 bool writes_complete
,
2248 struct intel_context
*intel
= &p
->brw
->intel
;
2249 struct brw_instruction
*insn
;
2251 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2253 if (intel
->gen
== 7) {
2254 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2255 brw_push_insn_state(p
);
2256 brw_set_access_mode(p
, BRW_ALIGN_1
);
2257 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2258 BRW_REGISTER_TYPE_UD
),
2259 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2260 brw_imm_ud(0xff00));
2261 brw_pop_insn_state(p
);
2264 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2266 assert(msg_length
< BRW_MAX_MRF
);
2268 brw_set_dest(p
, insn
, dest
);
2269 brw_set_src0(p
, insn
, src0
);
2270 brw_set_src1(p
, insn
, brw_imm_d(0));
2273 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2275 brw_set_urb_message(p
,
2288 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2292 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2293 struct brw_instruction
*insn
= &p
->store
[ip
];
2295 switch (insn
->header
.opcode
) {
2296 case BRW_OPCODE_ENDIF
:
2297 case BRW_OPCODE_ELSE
:
2298 case BRW_OPCODE_WHILE
:
2302 assert(!"not reached");
2306 /* There is no DO instruction on gen6, so to find the end of the loop
2307 * we have to see if the loop is jumping back before our start
2311 brw_find_loop_end(struct brw_compile
*p
, int start
)
2313 struct intel_context
*intel
= &p
->brw
->intel
;
2317 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2318 struct brw_instruction
*insn
= &p
->store
[ip
];
2320 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2321 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2322 : insn
->bits3
.break_cont
.jip
;
2323 if (ip
+ jip
/ br
<= start
)
2327 assert(!"not reached");
2331 /* After program generation, go back and update the UIP and JIP of
2332 * BREAK and CONT instructions to their correct locations.
2335 brw_set_uip_jip(struct brw_compile
*p
)
2337 struct intel_context
*intel
= &p
->brw
->intel
;
2344 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2345 struct brw_instruction
*insn
= &p
->store
[ip
];
2347 switch (insn
->header
.opcode
) {
2348 case BRW_OPCODE_BREAK
:
2349 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2350 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2351 insn
->bits3
.break_cont
.uip
=
2352 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2354 case BRW_OPCODE_CONTINUE
:
2355 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2356 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2358 assert(insn
->bits3
.break_cont
.uip
!= 0);
2359 assert(insn
->bits3
.break_cont
.jip
!= 0);
2365 void brw_ff_sync(struct brw_compile
*p
,
2366 struct brw_reg dest
,
2368 struct brw_reg src0
,
2370 GLuint response_length
,
2373 struct intel_context
*intel
= &p
->brw
->intel
;
2374 struct brw_instruction
*insn
;
2376 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2378 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2379 brw_set_dest(p
, insn
, dest
);
2380 brw_set_src0(p
, insn
, src0
);
2381 brw_set_src1(p
, insn
, brw_imm_d(0));
2384 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2386 brw_set_ff_sync_message(p
,
2394 * Emit the SEND instruction necessary to generate stream output data on Gen6
2395 * (for transform feedback).
2397 * If send_commit_msg is true, this is the last piece of stream output data
2398 * from this thread, so send the data as a committed write. According to the
2399 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2401 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2402 * writes are complete by sending the final write as a committed write."
2405 brw_svb_write(struct brw_compile
*p
,
2406 struct brw_reg dest
,
2408 struct brw_reg src0
,
2409 GLuint binding_table_index
,
2410 bool send_commit_msg
)
2412 struct brw_instruction
*insn
;
2414 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2416 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2417 brw_set_dest(p
, insn
, dest
);
2418 brw_set_src0(p
, insn
, src0
);
2419 brw_set_src1(p
, insn
, brw_imm_d(0));
2420 brw_set_dp_write_message(p
, insn
,
2421 binding_table_index
,
2422 0, /* msg_control: ignored */
2423 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE
,
2425 true, /* header_present */
2426 0, /* last_render_target: ignored */
2427 send_commit_msg
, /* response_length */
2428 0, /* end_of_thread */
2429 send_commit_msg
); /* send_commit_msg */