2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
374 * Set the Message Descriptor and Extended Message Descriptor fields
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
382 brw_set_message_descriptor(struct brw_compile
*p
,
383 struct brw_instruction
*inst
,
384 enum brw_message_target sfid
,
386 unsigned response_length
,
390 struct intel_context
*intel
= &p
->brw
->intel
;
392 brw_set_src1(p
, inst
, brw_imm_d(0));
394 if (intel
->gen
>= 5) {
395 inst
->bits3
.generic_gen5
.header_present
= header_present
;
396 inst
->bits3
.generic_gen5
.response_length
= response_length
;
397 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
398 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
400 if (intel
->gen
>= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst
->header
.destreg__conditionalmod
= sfid
;
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst
->bits2
.send_gen5
.sfid
= sfid
;
406 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
409 inst
->bits3
.generic
.response_length
= response_length
;
410 inst
->bits3
.generic
.msg_length
= msg_length
;
411 inst
->bits3
.generic
.msg_target
= sfid
;
412 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
416 static void brw_set_math_message( struct brw_compile
*p
,
417 struct brw_instruction
*insn
,
424 struct brw_context
*brw
= p
->brw
;
425 struct intel_context
*intel
= &brw
->intel
;
427 unsigned response_length
;
429 /* Infer message length from the function */
431 case BRW_MATH_FUNCTION_POW
:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
442 /* Infer response length from the function */
444 case BRW_MATH_FUNCTION_SINCOS
:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
453 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
454 msg_length
, response_length
, false, false);
455 if (intel
->gen
== 5) {
456 insn
->bits3
.math_gen5
.function
= function
;
457 insn
->bits3
.math_gen5
.int_type
= integer_type
;
458 insn
->bits3
.math_gen5
.precision
= low_precision
;
459 insn
->bits3
.math_gen5
.saturate
= saturate
;
460 insn
->bits3
.math_gen5
.data_type
= dataType
;
461 insn
->bits3
.math_gen5
.snapshot
= 0;
463 insn
->bits3
.math
.function
= function
;
464 insn
->bits3
.math
.int_type
= integer_type
;
465 insn
->bits3
.math
.precision
= low_precision
;
466 insn
->bits3
.math
.saturate
= saturate
;
467 insn
->bits3
.math
.data_type
= dataType
;
472 static void brw_set_ff_sync_message(struct brw_compile
*p
,
473 struct brw_instruction
*insn
,
475 GLuint response_length
,
478 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
479 1, response_length
, true, end_of_thread
);
480 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
481 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
482 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
483 insn
->bits3
.urb_gen5
.allocate
= allocate
;
484 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
485 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
488 static void brw_set_urb_message( struct brw_compile
*p
,
489 struct brw_instruction
*insn
,
493 GLuint response_length
,
497 GLuint swizzle_control
)
499 struct brw_context
*brw
= p
->brw
;
500 struct intel_context
*intel
= &brw
->intel
;
502 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
503 msg_length
, response_length
, true, end_of_thread
);
504 if (intel
->gen
== 7) {
505 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
506 insn
->bits3
.urb_gen7
.offset
= offset
;
507 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
508 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
511 insn
->bits3
.urb_gen7
.complete
= complete
;
512 } else if (intel
->gen
>= 5) {
513 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
514 insn
->bits3
.urb_gen5
.offset
= offset
;
515 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
516 insn
->bits3
.urb_gen5
.allocate
= allocate
;
517 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
518 insn
->bits3
.urb_gen5
.complete
= complete
;
520 insn
->bits3
.urb
.opcode
= 0; /* ? */
521 insn
->bits3
.urb
.offset
= offset
;
522 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
523 insn
->bits3
.urb
.allocate
= allocate
;
524 insn
->bits3
.urb
.used
= used
; /* ? */
525 insn
->bits3
.urb
.complete
= complete
;
530 brw_set_dp_write_message(struct brw_compile
*p
,
531 struct brw_instruction
*insn
,
532 GLuint binding_table_index
,
537 GLuint last_render_target
,
538 GLuint response_length
,
539 GLuint end_of_thread
,
540 GLuint send_commit_msg
)
542 struct brw_context
*brw
= p
->brw
;
543 struct intel_context
*intel
= &brw
->intel
;
546 if (intel
->gen
>= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
549 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
551 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
552 } else if (intel
->gen
== 6) {
553 /* Use the render cache for all write messages. */
554 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
556 sfid
= BRW_SFID_DATAPORT_WRITE
;
559 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
560 header_present
, end_of_thread
);
562 if (intel
->gen
>= 7) {
563 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
564 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
565 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
566 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
567 } else if (intel
->gen
== 6) {
568 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
569 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
570 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
571 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
572 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
573 } else if (intel
->gen
== 5) {
574 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
575 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
576 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
577 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
578 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
580 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
581 insn
->bits3
.dp_write
.msg_control
= msg_control
;
582 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
583 insn
->bits3
.dp_write
.msg_type
= msg_type
;
584 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
589 brw_set_dp_read_message(struct brw_compile
*p
,
590 struct brw_instruction
*insn
,
591 GLuint binding_table_index
,
596 GLuint response_length
)
598 struct brw_context
*brw
= p
->brw
;
599 struct intel_context
*intel
= &brw
->intel
;
602 if (intel
->gen
>= 7) {
603 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
604 } else if (intel
->gen
== 6) {
605 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
606 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
608 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
610 sfid
= BRW_SFID_DATAPORT_READ
;
613 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
616 if (intel
->gen
>= 7) {
617 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
618 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
619 insn
->bits3
.gen7_dp
.last_render_target
= 0;
620 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
621 } else if (intel
->gen
== 6) {
622 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
623 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
624 insn
->bits3
.gen6_dp
.last_render_target
= 0;
625 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
626 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
627 } else if (intel
->gen
== 5) {
628 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
629 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
630 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
631 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
632 } else if (intel
->is_g4x
) {
633 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
634 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
635 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
636 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
638 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
639 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
640 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
641 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
645 static void brw_set_sampler_message(struct brw_compile
*p
,
646 struct brw_instruction
*insn
,
647 GLuint binding_table_index
,
650 GLuint response_length
,
652 GLuint header_present
,
654 GLuint return_format
)
656 struct brw_context
*brw
= p
->brw
;
657 struct intel_context
*intel
= &brw
->intel
;
659 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
660 response_length
, header_present
, false);
662 if (intel
->gen
>= 7) {
663 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
664 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
665 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
666 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
667 } else if (intel
->gen
>= 5) {
668 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
669 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
670 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
671 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
672 } else if (intel
->is_g4x
) {
673 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
674 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
675 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
677 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
678 insn
->bits3
.sampler
.sampler
= sampler
;
679 insn
->bits3
.sampler
.msg_type
= msg_type
;
680 insn
->bits3
.sampler
.return_format
= return_format
;
685 #define next_insn brw_next_insn
686 struct brw_instruction
*
687 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
689 struct brw_instruction
*insn
;
691 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
693 insn
= &p
->store
[p
->nr_insn
++];
694 memcpy(insn
, p
->current
, sizeof(*insn
));
696 /* Reset this one-shot flag:
699 if (p
->current
->header
.destreg__conditionalmod
) {
700 p
->current
->header
.destreg__conditionalmod
= 0;
701 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
704 insn
->header
.opcode
= opcode
;
708 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
713 struct brw_instruction
*insn
= next_insn(p
, opcode
);
714 brw_set_dest(p
, insn
, dest
);
715 brw_set_src0(p
, insn
, src
);
719 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
723 struct brw_reg src1
)
725 struct brw_instruction
*insn
= next_insn(p
, opcode
);
726 brw_set_dest(p
, insn
, dest
);
727 brw_set_src0(p
, insn
, src0
);
728 brw_set_src1(p
, insn
, src1
);
733 /***********************************************************************
734 * Convenience routines.
737 struct brw_instruction *brw_##OP(struct brw_compile *p, \
738 struct brw_reg dest, \
739 struct brw_reg src0) \
741 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
745 struct brw_instruction *brw_##OP(struct brw_compile *p, \
746 struct brw_reg dest, \
747 struct brw_reg src0, \
748 struct brw_reg src1) \
750 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
753 /* Rounding operations (other than RNDD) require two instructions - the first
754 * stores a rounded value (possibly the wrong way) in the dest register, but
755 * also sets a per-channel "increment bit" in the flag register. A predicated
756 * add of 1.0 fixes dest to contain the desired result.
758 * Sandybridge and later appear to round correctly without an ADD.
761 void brw_##OP(struct brw_compile *p, \
762 struct brw_reg dest, \
763 struct brw_reg src) \
765 struct brw_instruction *rnd, *add; \
766 rnd = next_insn(p, BRW_OPCODE_##OP); \
767 brw_set_dest(p, rnd, dest); \
768 brw_set_src0(p, rnd, src); \
770 if (p->brw->intel.gen < 6) { \
771 /* turn on round-increments */ \
772 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
773 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
774 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
807 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
813 if (src0
.type
== BRW_REGISTER_TYPE_F
||
814 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
815 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
816 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
817 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
820 if (src1
.type
== BRW_REGISTER_TYPE_F
||
821 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
822 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
823 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
824 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
827 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
830 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
836 if (src0
.type
== BRW_REGISTER_TYPE_D
||
837 src0
.type
== BRW_REGISTER_TYPE_UD
||
838 src1
.type
== BRW_REGISTER_TYPE_D
||
839 src1
.type
== BRW_REGISTER_TYPE_UD
) {
840 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
843 if (src0
.type
== BRW_REGISTER_TYPE_F
||
844 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
845 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
846 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
847 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
850 if (src1
.type
== BRW_REGISTER_TYPE_F
||
851 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
852 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
853 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
854 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
857 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
858 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
859 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
860 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
862 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
866 void brw_NOP(struct brw_compile
*p
)
868 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
869 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
870 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
871 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
878 /***********************************************************************
879 * Comparisons, if/else/endif
882 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
887 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
889 insn
->header
.execution_size
= 1;
890 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
891 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
893 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
899 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
901 p
->if_stack
[p
->if_stack_depth
] = inst
;
904 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
905 p
->if_stack_array_size
*= 2;
906 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
907 p
->if_stack_array_size
);
911 /* EU takes the value from the flag register and pushes it onto some
912 * sort of a stack (presumably merging with any flag value already on
913 * the stack). Within an if block, the flags at the top of the stack
914 * control execution on each channel of the unit, eg. on each of the
915 * 16 pixel values in our wm programs.
917 * When the matching 'else' instruction is reached (presumably by
918 * countdown of the instruction count patched in by our ELSE/ENDIF
919 * functions), the relevent flags are inverted.
921 * When the matching 'endif' instruction is reached, the flags are
922 * popped off. If the stack is now empty, normal execution resumes.
924 struct brw_instruction
*
925 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
927 struct intel_context
*intel
= &p
->brw
->intel
;
928 struct brw_instruction
*insn
;
930 insn
= next_insn(p
, BRW_OPCODE_IF
);
932 /* Override the defaults for this instruction:
934 if (intel
->gen
< 6) {
935 brw_set_dest(p
, insn
, brw_ip_reg());
936 brw_set_src0(p
, insn
, brw_ip_reg());
937 brw_set_src1(p
, insn
, brw_imm_d(0x0));
938 } else if (intel
->gen
== 6) {
939 brw_set_dest(p
, insn
, brw_imm_w(0));
940 insn
->bits1
.branch_gen6
.jump_count
= 0;
941 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
942 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
944 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
945 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
946 brw_set_src1(p
, insn
, brw_imm_ud(0));
947 insn
->bits3
.break_cont
.jip
= 0;
948 insn
->bits3
.break_cont
.uip
= 0;
951 insn
->header
.execution_size
= execute_size
;
952 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
953 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
954 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
955 if (!p
->single_program_flow
)
956 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
958 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
960 push_if_stack(p
, insn
);
964 /* This function is only used for gen6-style IF instructions with an
965 * embedded comparison (conditional modifier). It is not used on gen7.
967 struct brw_instruction
*
968 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
969 struct brw_reg src0
, struct brw_reg src1
)
971 struct brw_instruction
*insn
;
973 insn
= next_insn(p
, BRW_OPCODE_IF
);
975 brw_set_dest(p
, insn
, brw_imm_w(0));
977 insn
->header
.execution_size
= BRW_EXECUTE_16
;
979 insn
->header
.execution_size
= BRW_EXECUTE_8
;
981 insn
->bits1
.branch_gen6
.jump_count
= 0;
982 brw_set_src0(p
, insn
, src0
);
983 brw_set_src1(p
, insn
, src1
);
985 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
986 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
987 insn
->header
.destreg__conditionalmod
= conditional
;
989 if (!p
->single_program_flow
)
990 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
992 push_if_stack(p
, insn
);
997 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1000 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1001 struct brw_instruction
*if_inst
,
1002 struct brw_instruction
*else_inst
)
1004 /* The next instruction (where the ENDIF would be, if it existed) */
1005 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1007 assert(p
->single_program_flow
);
1008 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1009 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1010 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1012 /* Convert IF to an ADD instruction that moves the instruction pointer
1013 * to the first instruction of the ELSE block. If there is no ELSE
1014 * block, point to where ENDIF would be. Reverse the predicate.
1016 * There's no need to execute an ENDIF since we don't need to do any
1017 * stack operations, and if we're currently executing, we just want to
1018 * continue normally.
1020 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1021 if_inst
->header
.predicate_inverse
= 1;
1023 if (else_inst
!= NULL
) {
1024 /* Convert ELSE to an ADD instruction that points where the ENDIF
1027 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1029 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1030 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1032 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1037 * Patch IF and ELSE instructions with appropriate jump targets.
1040 patch_IF_ELSE(struct brw_compile
*p
,
1041 struct brw_instruction
*if_inst
,
1042 struct brw_instruction
*else_inst
,
1043 struct brw_instruction
*endif_inst
)
1045 struct intel_context
*intel
= &p
->brw
->intel
;
1047 assert(!p
->single_program_flow
);
1048 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1049 assert(endif_inst
!= NULL
);
1050 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1053 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1054 * requires 2 chunks.
1056 if (intel
->gen
>= 5)
1059 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1060 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1062 if (else_inst
== NULL
) {
1063 /* Patch IF -> ENDIF */
1064 if (intel
->gen
< 6) {
1065 /* Turn it into an IFF, which means no mask stack operations for
1066 * all-false and jumping past the ENDIF.
1068 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1069 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1070 if_inst
->bits3
.if_else
.pop_count
= 0;
1071 if_inst
->bits3
.if_else
.pad0
= 0;
1072 } else if (intel
->gen
== 6) {
1073 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1074 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1076 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1077 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1080 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1082 /* Patch IF -> ELSE */
1083 if (intel
->gen
< 6) {
1084 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1085 if_inst
->bits3
.if_else
.pop_count
= 0;
1086 if_inst
->bits3
.if_else
.pad0
= 0;
1087 } else if (intel
->gen
== 6) {
1088 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1091 /* Patch ELSE -> ENDIF */
1092 if (intel
->gen
< 6) {
1093 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1096 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1097 else_inst
->bits3
.if_else
.pop_count
= 1;
1098 else_inst
->bits3
.if_else
.pad0
= 0;
1099 } else if (intel
->gen
== 6) {
1100 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1101 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1103 /* The IF instruction's JIP should point just past the ELSE */
1104 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1105 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1106 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1107 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1113 brw_ELSE(struct brw_compile
*p
)
1115 struct intel_context
*intel
= &p
->brw
->intel
;
1116 struct brw_instruction
*insn
;
1118 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1120 if (intel
->gen
< 6) {
1121 brw_set_dest(p
, insn
, brw_ip_reg());
1122 brw_set_src0(p
, insn
, brw_ip_reg());
1123 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1124 } else if (intel
->gen
== 6) {
1125 brw_set_dest(p
, insn
, brw_imm_w(0));
1126 insn
->bits1
.branch_gen6
.jump_count
= 0;
1127 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1128 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1130 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1131 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1132 brw_set_src1(p
, insn
, brw_imm_ud(0));
1133 insn
->bits3
.break_cont
.jip
= 0;
1134 insn
->bits3
.break_cont
.uip
= 0;
1137 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1138 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1139 if (!p
->single_program_flow
)
1140 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1142 push_if_stack(p
, insn
);
1146 brw_ENDIF(struct brw_compile
*p
)
1148 struct intel_context
*intel
= &p
->brw
->intel
;
1149 struct brw_instruction
*insn
;
1150 struct brw_instruction
*else_inst
= NULL
;
1151 struct brw_instruction
*if_inst
= NULL
;
1153 /* Pop the IF and (optional) ELSE instructions from the stack */
1154 p
->if_stack_depth
--;
1155 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1156 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1157 p
->if_stack_depth
--;
1159 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1161 if (p
->single_program_flow
) {
1162 /* ENDIF is useless; don't bother emitting it. */
1163 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1167 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1169 if (intel
->gen
< 6) {
1170 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1171 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1172 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1173 } else if (intel
->gen
== 6) {
1174 brw_set_dest(p
, insn
, brw_imm_w(0));
1175 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1176 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1178 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1179 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1180 brw_set_src1(p
, insn
, brw_imm_ud(0));
1183 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1184 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1185 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1187 /* Also pop item off the stack in the endif instruction: */
1188 if (intel
->gen
< 6) {
1189 insn
->bits3
.if_else
.jump_count
= 0;
1190 insn
->bits3
.if_else
.pop_count
= 1;
1191 insn
->bits3
.if_else
.pad0
= 0;
1192 } else if (intel
->gen
== 6) {
1193 insn
->bits1
.branch_gen6
.jump_count
= 2;
1195 insn
->bits3
.break_cont
.jip
= 2;
1197 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1200 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1202 struct intel_context
*intel
= &p
->brw
->intel
;
1203 struct brw_instruction
*insn
;
1205 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1206 if (intel
->gen
>= 6) {
1207 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1208 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1209 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1211 brw_set_dest(p
, insn
, brw_ip_reg());
1212 brw_set_src0(p
, insn
, brw_ip_reg());
1213 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1214 insn
->bits3
.if_else
.pad0
= 0;
1215 insn
->bits3
.if_else
.pop_count
= pop_count
;
1217 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1218 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1223 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1224 struct brw_instruction
*do_insn
)
1226 struct brw_instruction
*insn
;
1228 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1229 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1230 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1231 brw_set_dest(p
, insn
, brw_ip_reg());
1232 brw_set_src0(p
, insn
, brw_ip_reg());
1233 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1235 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1236 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1240 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1242 struct brw_instruction
*insn
;
1243 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1244 brw_set_dest(p
, insn
, brw_ip_reg());
1245 brw_set_src0(p
, insn
, brw_ip_reg());
1246 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1247 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1248 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1249 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1250 insn
->bits3
.if_else
.pad0
= 0;
1251 insn
->bits3
.if_else
.pop_count
= pop_count
;
1257 * The DO/WHILE is just an unterminated loop -- break or continue are
1258 * used for control within the loop. We have a few ways they can be
1261 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1262 * jip and no DO instruction.
1264 * For non-uniform control flow pre-gen6, there's a DO instruction to
1265 * push the mask, and a WHILE to jump back, and BREAK to get out and
1268 * For gen6, there's no more mask stack, so no need for DO. WHILE
1269 * just points back to the first instruction of the loop.
1271 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1273 struct intel_context
*intel
= &p
->brw
->intel
;
1275 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1276 return &p
->store
[p
->nr_insn
];
1278 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1280 /* Override the defaults for this instruction:
1282 brw_set_dest(p
, insn
, brw_null_reg());
1283 brw_set_src0(p
, insn
, brw_null_reg());
1284 brw_set_src1(p
, insn
, brw_null_reg());
1286 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1287 insn
->header
.execution_size
= execute_size
;
1288 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1289 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1290 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1298 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1299 struct brw_instruction
*do_insn
)
1301 struct intel_context
*intel
= &p
->brw
->intel
;
1302 struct brw_instruction
*insn
;
1305 if (intel
->gen
>= 5)
1308 if (intel
->gen
>= 7) {
1309 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1311 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1312 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1313 brw_set_src1(p
, insn
, brw_imm_ud(0));
1314 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1316 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1317 } else if (intel
->gen
== 6) {
1318 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1320 brw_set_dest(p
, insn
, brw_imm_w(0));
1321 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1322 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1323 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1325 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1327 if (p
->single_program_flow
) {
1328 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1330 brw_set_dest(p
, insn
, brw_ip_reg());
1331 brw_set_src0(p
, insn
, brw_ip_reg());
1332 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1333 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1335 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1337 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1339 brw_set_dest(p
, insn
, brw_ip_reg());
1340 brw_set_src0(p
, insn
, brw_ip_reg());
1341 brw_set_src1(p
, insn
, brw_imm_d(0));
1343 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1344 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1345 insn
->bits3
.if_else
.pop_count
= 0;
1346 insn
->bits3
.if_else
.pad0
= 0;
1349 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1350 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1358 void brw_land_fwd_jump(struct brw_compile
*p
,
1359 struct brw_instruction
*jmp_insn
)
1361 struct intel_context
*intel
= &p
->brw
->intel
;
1362 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1365 if (intel
->gen
>= 5)
1368 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1369 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1371 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1376 /* To integrate with the above, it makes sense that the comparison
1377 * instruction should populate the flag register. It might be simpler
1378 * just to use the flag reg for most WM tasks?
1380 void brw_CMP(struct brw_compile
*p
,
1381 struct brw_reg dest
,
1383 struct brw_reg src0
,
1384 struct brw_reg src1
)
1386 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1388 insn
->header
.destreg__conditionalmod
= conditional
;
1389 brw_set_dest(p
, insn
, dest
);
1390 brw_set_src0(p
, insn
, src0
);
1391 brw_set_src1(p
, insn
, src1
);
1393 /* guess_execution_size(insn, src0); */
1396 /* Make it so that future instructions will use the computed flag
1397 * value until brw_set_predicate_control_flag_value() is called
1400 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1402 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1403 p
->flag_value
= 0xff;
1407 /* Issue 'wait' instruction for n1, host could program MMIO
1408 to wake up thread. */
1409 void brw_WAIT (struct brw_compile
*p
)
1411 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1412 struct brw_reg src
= brw_notification_1_reg();
1414 brw_set_dest(p
, insn
, src
);
1415 brw_set_src0(p
, insn
, src
);
1416 brw_set_src1(p
, insn
, brw_null_reg());
1417 insn
->header
.execution_size
= 0; /* must */
1418 insn
->header
.predicate_control
= 0;
1419 insn
->header
.compression_control
= 0;
1423 /***********************************************************************
1424 * Helpers for the various SEND message types:
1427 /** Extended math function, float[8].
1429 void brw_math( struct brw_compile
*p
,
1430 struct brw_reg dest
,
1438 struct intel_context
*intel
= &p
->brw
->intel
;
1440 if (intel
->gen
>= 6) {
1441 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1443 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1444 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1446 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1447 if (intel
->gen
== 6)
1448 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1450 /* Source modifiers are ignored for extended math instructions on Gen6. */
1451 if (intel
->gen
== 6) {
1452 assert(!src
.negate
);
1456 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1457 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1458 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1459 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1461 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1464 /* Math is the same ISA format as other opcodes, except that CondModifier
1465 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1467 insn
->header
.destreg__conditionalmod
= function
;
1468 insn
->header
.saturate
= saturate
;
1470 brw_set_dest(p
, insn
, dest
);
1471 brw_set_src0(p
, insn
, src
);
1472 brw_set_src1(p
, insn
, brw_null_reg());
1474 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1476 /* Example code doesn't set predicate_control for send
1479 insn
->header
.predicate_control
= 0;
1480 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1482 brw_set_dest(p
, insn
, dest
);
1483 brw_set_src0(p
, insn
, src
);
1484 brw_set_math_message(p
,
1487 src
.type
== BRW_REGISTER_TYPE_D
,
1494 /** Extended math function, float[8].
1496 void brw_math2(struct brw_compile
*p
,
1497 struct brw_reg dest
,
1499 struct brw_reg src0
,
1500 struct brw_reg src1
)
1502 struct intel_context
*intel
= &p
->brw
->intel
;
1503 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1505 assert(intel
->gen
>= 6);
1509 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1510 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1511 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1513 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1514 if (intel
->gen
== 6) {
1515 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1516 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1519 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1520 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1521 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1522 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1523 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1525 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1526 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1529 /* Source modifiers are ignored for extended math instructions on Gen6. */
1530 if (intel
->gen
== 6) {
1531 assert(!src0
.negate
);
1533 assert(!src1
.negate
);
1537 /* Math is the same ISA format as other opcodes, except that CondModifier
1538 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1540 insn
->header
.destreg__conditionalmod
= function
;
1542 brw_set_dest(p
, insn
, dest
);
1543 brw_set_src0(p
, insn
, src0
);
1544 brw_set_src1(p
, insn
, src1
);
1548 * Extended math function, float[16].
1549 * Use 2 send instructions.
1551 void brw_math_16( struct brw_compile
*p
,
1552 struct brw_reg dest
,
1559 struct intel_context
*intel
= &p
->brw
->intel
;
1560 struct brw_instruction
*insn
;
1562 if (intel
->gen
>= 6) {
1563 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1565 /* Math is the same ISA format as other opcodes, except that CondModifier
1566 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1568 insn
->header
.destreg__conditionalmod
= function
;
1569 insn
->header
.saturate
= saturate
;
1571 /* Source modifiers are ignored for extended math instructions. */
1572 assert(!src
.negate
);
1575 brw_set_dest(p
, insn
, dest
);
1576 brw_set_src0(p
, insn
, src
);
1577 brw_set_src1(p
, insn
, brw_null_reg());
1581 /* First instruction:
1583 brw_push_insn_state(p
);
1584 brw_set_predicate_control_flag_value(p
, 0xff);
1585 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1587 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1588 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1590 brw_set_dest(p
, insn
, dest
);
1591 brw_set_src0(p
, insn
, src
);
1592 brw_set_math_message(p
,
1595 BRW_MATH_INTEGER_UNSIGNED
,
1598 BRW_MATH_DATA_VECTOR
);
1600 /* Second instruction:
1602 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1603 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1604 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1606 brw_set_dest(p
, insn
, offset(dest
,1));
1607 brw_set_src0(p
, insn
, src
);
1608 brw_set_math_message(p
,
1611 BRW_MATH_INTEGER_UNSIGNED
,
1614 BRW_MATH_DATA_VECTOR
);
1616 brw_pop_insn_state(p
);
1621 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1622 * using a constant offset per channel.
1624 * The offset must be aligned to oword size (16 bytes). Used for
1625 * register spilling.
1627 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1632 struct intel_context
*intel
= &p
->brw
->intel
;
1633 uint32_t msg_control
, msg_type
;
1636 if (intel
->gen
>= 6)
1639 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1641 if (num_regs
== 1) {
1642 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1645 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1649 /* Set up the message header. This is g0, with g0.2 filled with
1650 * the offset. We don't want to leave our offset around in g0 or
1651 * it'll screw up texture samples, so set it up inside the message
1655 brw_push_insn_state(p
);
1656 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1657 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1659 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1661 /* set message header global offset field (reg 0, element 2) */
1663 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1665 2), BRW_REGISTER_TYPE_UD
),
1666 brw_imm_ud(offset
));
1668 brw_pop_insn_state(p
);
1672 struct brw_reg dest
;
1673 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1674 int send_commit_msg
;
1675 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1676 BRW_REGISTER_TYPE_UW
);
1678 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1679 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1680 src_header
= vec16(src_header
);
1682 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1683 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1685 /* Until gen6, writes followed by reads from the same location
1686 * are not guaranteed to be ordered unless write_commit is set.
1687 * If set, then a no-op write is issued to the destination
1688 * register to set a dependency, and a read from the destination
1689 * can be used to ensure the ordering.
1691 * For gen6, only writes between different threads need ordering
1692 * protection. Our use of DP writes is all about register
1693 * spilling within a thread.
1695 if (intel
->gen
>= 6) {
1696 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1697 send_commit_msg
= 0;
1700 send_commit_msg
= 1;
1703 brw_set_dest(p
, insn
, dest
);
1704 if (intel
->gen
>= 6) {
1705 brw_set_src0(p
, insn
, mrf
);
1707 brw_set_src0(p
, insn
, brw_null_reg());
1710 if (intel
->gen
>= 6)
1711 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1713 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1715 brw_set_dp_write_message(p
,
1717 255, /* binding table index (255=stateless) */
1721 true, /* header_present */
1722 0, /* not a render target */
1723 send_commit_msg
, /* response_length */
1731 * Read a block of owords (half a GRF each) from the scratch buffer
1732 * using a constant index per channel.
1734 * Offset must be aligned to oword size (16 bytes). Used for register
1738 brw_oword_block_read_scratch(struct brw_compile
*p
,
1739 struct brw_reg dest
,
1744 struct intel_context
*intel
= &p
->brw
->intel
;
1745 uint32_t msg_control
;
1748 if (intel
->gen
>= 6)
1751 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1752 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1754 if (num_regs
== 1) {
1755 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1758 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1763 brw_push_insn_state(p
);
1764 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1765 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1767 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1769 /* set message header global offset field (reg 0, element 2) */
1771 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1773 2), BRW_REGISTER_TYPE_UD
),
1774 brw_imm_ud(offset
));
1776 brw_pop_insn_state(p
);
1780 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1782 assert(insn
->header
.predicate_control
== 0);
1783 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1784 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1786 brw_set_dest(p
, insn
, dest
); /* UW? */
1787 if (intel
->gen
>= 6) {
1788 brw_set_src0(p
, insn
, mrf
);
1790 brw_set_src0(p
, insn
, brw_null_reg());
1793 brw_set_dp_read_message(p
,
1795 255, /* binding table index (255=stateless) */
1797 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1798 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1805 * Read a float[4] vector from the data port Data Cache (const buffer).
1806 * Location (in buffer) should be a multiple of 16.
1807 * Used for fetching shader constants.
1809 void brw_oword_block_read(struct brw_compile
*p
,
1810 struct brw_reg dest
,
1813 uint32_t bind_table_index
)
1815 struct intel_context
*intel
= &p
->brw
->intel
;
1817 /* On newer hardware, offset is in units of owords. */
1818 if (intel
->gen
>= 6)
1821 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1823 brw_push_insn_state(p
);
1824 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1825 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1826 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1828 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1830 /* set message header global offset field (reg 0, element 2) */
1832 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1834 2), BRW_REGISTER_TYPE_UD
),
1835 brw_imm_ud(offset
));
1837 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1838 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1840 /* cast dest to a uword[8] vector */
1841 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1843 brw_set_dest(p
, insn
, dest
);
1844 if (intel
->gen
>= 6) {
1845 brw_set_src0(p
, insn
, mrf
);
1847 brw_set_src0(p
, insn
, brw_null_reg());
1850 brw_set_dp_read_message(p
,
1853 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1854 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1855 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1857 1); /* response_length (1 reg, 2 owords!) */
1859 brw_pop_insn_state(p
);
1863 * Read a set of dwords from the data port Data Cache (const buffer).
1865 * Location (in buffer) appears as UD offsets in the register after
1866 * the provided mrf header reg.
1868 void brw_dword_scattered_read(struct brw_compile
*p
,
1869 struct brw_reg dest
,
1871 uint32_t bind_table_index
)
1873 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1875 brw_push_insn_state(p
);
1876 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1877 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1878 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1879 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1880 brw_pop_insn_state(p
);
1882 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1883 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1885 /* cast dest to a uword[8] vector */
1886 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1888 brw_set_dest(p
, insn
, dest
);
1889 brw_set_src0(p
, insn
, brw_null_reg());
1891 brw_set_dp_read_message(p
,
1894 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1895 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1896 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1898 1); /* response_length */
1904 * Read float[4] constant(s) from VS constant buffer.
1905 * For relative addressing, two float[4] constants will be read into 'dest'.
1906 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1908 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1909 struct brw_reg dest
,
1911 GLuint bind_table_index
)
1913 struct intel_context
*intel
= &p
->brw
->intel
;
1914 struct brw_instruction
*insn
;
1915 GLuint msg_reg_nr
= 1;
1917 if (intel
->gen
>= 6)
1920 /* Setup MRF[1] with location/offset into const buffer */
1921 brw_push_insn_state(p
);
1922 brw_set_access_mode(p
, BRW_ALIGN_1
);
1923 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1924 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1925 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1926 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1927 BRW_REGISTER_TYPE_UD
),
1928 brw_imm_ud(location
));
1929 brw_pop_insn_state(p
);
1931 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1933 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1934 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1935 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1936 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1938 brw_set_dest(p
, insn
, dest
);
1939 if (intel
->gen
>= 6) {
1940 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1942 brw_set_src0(p
, insn
, brw_null_reg());
1945 brw_set_dp_read_message(p
,
1949 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1950 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1952 1); /* response_length (1 Oword) */
1956 * Read a float[4] constant per vertex from VS constant buffer, with
1957 * relative addressing.
1959 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1960 struct brw_reg dest
,
1961 struct brw_reg addr_reg
,
1963 GLuint bind_table_index
)
1965 struct intel_context
*intel
= &p
->brw
->intel
;
1966 struct brw_reg src
= brw_vec8_grf(0, 0);
1969 /* Setup MRF[1] with offset into const buffer */
1970 brw_push_insn_state(p
);
1971 brw_set_access_mode(p
, BRW_ALIGN_1
);
1972 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1973 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1974 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1976 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1979 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1980 addr_reg
, brw_imm_d(offset
));
1981 brw_pop_insn_state(p
);
1983 gen6_resolve_implied_move(p
, &src
, 0);
1984 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1986 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1987 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1988 insn
->header
.destreg__conditionalmod
= 0;
1989 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1991 brw_set_dest(p
, insn
, dest
);
1992 brw_set_src0(p
, insn
, src
);
1994 if (intel
->gen
>= 6)
1995 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1996 else if (intel
->gen
== 5 || intel
->is_g4x
)
1997 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1999 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2001 brw_set_dp_read_message(p
,
2004 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2006 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2008 1); /* response_length */
2013 void brw_fb_WRITE(struct brw_compile
*p
,
2016 struct brw_reg src0
,
2017 GLuint binding_table_index
,
2019 GLuint response_length
,
2021 bool header_present
)
2023 struct intel_context
*intel
= &p
->brw
->intel
;
2024 struct brw_instruction
*insn
;
2025 GLuint msg_control
, msg_type
;
2026 struct brw_reg dest
;
2028 if (dispatch_width
== 16)
2029 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2031 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2033 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2034 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2036 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2038 /* The execution mask is ignored for render target writes. */
2039 insn
->header
.predicate_control
= 0;
2040 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2042 if (intel
->gen
>= 6) {
2043 /* headerless version, just submit color payload */
2044 src0
= brw_message_reg(msg_reg_nr
);
2046 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2048 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2050 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2053 if (dispatch_width
== 16)
2054 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2056 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2058 brw_set_dest(p
, insn
, dest
);
2059 brw_set_src0(p
, insn
, src0
);
2060 brw_set_dp_write_message(p
,
2062 binding_table_index
,
2067 1, /* last render target write */
2070 0 /* send_commit_msg */);
2075 * Texture sample instruction.
2076 * Note: the msg_type plus msg_length values determine exactly what kind
2077 * of sampling operation is performed. See volume 4, page 161 of docs.
2079 void brw_SAMPLE(struct brw_compile
*p
,
2080 struct brw_reg dest
,
2082 struct brw_reg src0
,
2083 GLuint binding_table_index
,
2087 GLuint response_length
,
2089 GLuint header_present
,
2091 GLuint return_format
)
2093 struct intel_context
*intel
= &p
->brw
->intel
;
2094 bool need_stall
= 0;
2096 if (writemask
== 0) {
2097 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2101 /* Hardware doesn't do destination dependency checking on send
2102 * instructions properly. Add a workaround which generates the
2103 * dependency by other means. In practice it seems like this bug
2104 * only crops up for texture samples, and only where registers are
2105 * written by the send and then written again later without being
2106 * read in between. Luckily for us, we already track that
2107 * information and use it to modify the writemask for the
2108 * instruction, so that is a guide for whether a workaround is
2111 if (writemask
!= WRITEMASK_XYZW
) {
2112 GLuint dst_offset
= 0;
2113 GLuint i
, newmask
= 0, len
= 0;
2115 for (i
= 0; i
< 4; i
++) {
2116 if (writemask
& (1<<i
))
2120 for (; i
< 4; i
++) {
2121 if (!(writemask
& (1<<i
)))
2127 if (newmask
!= writemask
) {
2129 /* printf("need stall %x %x\n", newmask , writemask); */
2132 bool dispatch_16
= false;
2134 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2136 guess_execution_size(p
, p
->current
, dest
);
2137 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2140 newmask
= ~newmask
& WRITEMASK_XYZW
;
2142 brw_push_insn_state(p
);
2144 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2145 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2147 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2148 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2149 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2151 brw_pop_insn_state(p
);
2153 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2154 dest
= offset(dest
, dst_offset
);
2156 /* For 16-wide dispatch, masked channels are skipped in the
2157 * response. For 8-wide, masked channels still take up slots,
2158 * and are just not written to.
2161 response_length
= len
* 2;
2166 struct brw_instruction
*insn
;
2168 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2170 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2171 insn
->header
.predicate_control
= 0; /* XXX */
2172 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2174 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2176 brw_set_dest(p
, insn
, dest
);
2177 brw_set_src0(p
, insn
, src0
);
2178 brw_set_sampler_message(p
, insn
,
2179 binding_table_index
,
2190 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2192 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2194 brw_push_insn_state(p
);
2195 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2196 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2197 retype(reg
, BRW_REGISTER_TYPE_UD
));
2198 brw_pop_insn_state(p
);
2203 /* All these variables are pretty confusing - we might be better off
2204 * using bitmasks and macros for this, in the old style. Or perhaps
2205 * just having the caller instantiate the fields in dword3 itself.
2207 void brw_urb_WRITE(struct brw_compile
*p
,
2208 struct brw_reg dest
,
2210 struct brw_reg src0
,
2214 GLuint response_length
,
2216 bool writes_complete
,
2220 struct intel_context
*intel
= &p
->brw
->intel
;
2221 struct brw_instruction
*insn
;
2223 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2225 if (intel
->gen
== 7) {
2226 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2227 brw_push_insn_state(p
);
2228 brw_set_access_mode(p
, BRW_ALIGN_1
);
2229 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2230 BRW_REGISTER_TYPE_UD
),
2231 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2232 brw_imm_ud(0xff00));
2233 brw_pop_insn_state(p
);
2236 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2238 assert(msg_length
< BRW_MAX_MRF
);
2240 brw_set_dest(p
, insn
, dest
);
2241 brw_set_src0(p
, insn
, src0
);
2242 brw_set_src1(p
, insn
, brw_imm_d(0));
2245 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2247 brw_set_urb_message(p
,
2260 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2264 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2265 struct brw_instruction
*insn
= &p
->store
[ip
];
2267 switch (insn
->header
.opcode
) {
2268 case BRW_OPCODE_ENDIF
:
2269 case BRW_OPCODE_ELSE
:
2270 case BRW_OPCODE_WHILE
:
2274 assert(!"not reached");
2278 /* There is no DO instruction on gen6, so to find the end of the loop
2279 * we have to see if the loop is jumping back before our start
2283 brw_find_loop_end(struct brw_compile
*p
, int start
)
2285 struct intel_context
*intel
= &p
->brw
->intel
;
2289 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2290 struct brw_instruction
*insn
= &p
->store
[ip
];
2292 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2293 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2294 : insn
->bits3
.break_cont
.jip
;
2295 if (ip
+ jip
/ br
<= start
)
2299 assert(!"not reached");
2303 /* After program generation, go back and update the UIP and JIP of
2304 * BREAK and CONT instructions to their correct locations.
2307 brw_set_uip_jip(struct brw_compile
*p
)
2309 struct intel_context
*intel
= &p
->brw
->intel
;
2316 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2317 struct brw_instruction
*insn
= &p
->store
[ip
];
2319 switch (insn
->header
.opcode
) {
2320 case BRW_OPCODE_BREAK
:
2321 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2322 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2323 insn
->bits3
.break_cont
.uip
=
2324 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2326 case BRW_OPCODE_CONTINUE
:
2327 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2328 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2330 assert(insn
->bits3
.break_cont
.uip
!= 0);
2331 assert(insn
->bits3
.break_cont
.jip
!= 0);
2337 void brw_ff_sync(struct brw_compile
*p
,
2338 struct brw_reg dest
,
2340 struct brw_reg src0
,
2342 GLuint response_length
,
2345 struct intel_context
*intel
= &p
->brw
->intel
;
2346 struct brw_instruction
*insn
;
2348 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2350 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2351 brw_set_dest(p
, insn
, dest
);
2352 brw_set_src0(p
, insn
, src0
);
2353 brw_set_src1(p
, insn
, brw_imm_d(0));
2356 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2358 brw_set_ff_sync_message(p
,