2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
374 * Set the Message Descriptor and Extended Message Descriptor fields
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
382 brw_set_message_descriptor(struct brw_compile
*p
,
383 struct brw_instruction
*inst
,
384 enum brw_message_target sfid
,
386 unsigned response_length
,
390 struct intel_context
*intel
= &p
->brw
->intel
;
392 brw_set_src1(p
, inst
, brw_imm_d(0));
394 if (intel
->gen
>= 5) {
395 inst
->bits3
.generic_gen5
.header_present
= header_present
;
396 inst
->bits3
.generic_gen5
.response_length
= response_length
;
397 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
398 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
400 if (intel
->gen
>= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst
->header
.destreg__conditionalmod
= sfid
;
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst
->bits2
.send_gen5
.sfid
= sfid
;
406 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
409 inst
->bits3
.generic
.response_length
= response_length
;
410 inst
->bits3
.generic
.msg_length
= msg_length
;
411 inst
->bits3
.generic
.msg_target
= sfid
;
412 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
416 static void brw_set_math_message( struct brw_compile
*p
,
417 struct brw_instruction
*insn
,
424 struct brw_context
*brw
= p
->brw
;
425 struct intel_context
*intel
= &brw
->intel
;
427 unsigned response_length
;
429 /* Infer message length from the function */
431 case BRW_MATH_FUNCTION_POW
:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
442 /* Infer response length from the function */
444 case BRW_MATH_FUNCTION_SINCOS
:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
453 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
454 msg_length
, response_length
, false, false);
455 if (intel
->gen
== 5) {
456 insn
->bits3
.math_gen5
.function
= function
;
457 insn
->bits3
.math_gen5
.int_type
= integer_type
;
458 insn
->bits3
.math_gen5
.precision
= low_precision
;
459 insn
->bits3
.math_gen5
.saturate
= saturate
;
460 insn
->bits3
.math_gen5
.data_type
= dataType
;
461 insn
->bits3
.math_gen5
.snapshot
= 0;
463 insn
->bits3
.math
.function
= function
;
464 insn
->bits3
.math
.int_type
= integer_type
;
465 insn
->bits3
.math
.precision
= low_precision
;
466 insn
->bits3
.math
.saturate
= saturate
;
467 insn
->bits3
.math
.data_type
= dataType
;
472 static void brw_set_ff_sync_message(struct brw_compile
*p
,
473 struct brw_instruction
*insn
,
475 GLuint response_length
,
478 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
479 1, response_length
, true, end_of_thread
);
480 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
481 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
482 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
483 insn
->bits3
.urb_gen5
.allocate
= allocate
;
484 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
485 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
488 static void brw_set_urb_message( struct brw_compile
*p
,
489 struct brw_instruction
*insn
,
493 GLuint response_length
,
497 GLuint swizzle_control
)
499 struct brw_context
*brw
= p
->brw
;
500 struct intel_context
*intel
= &brw
->intel
;
502 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
503 msg_length
, response_length
, true, end_of_thread
);
504 if (intel
->gen
== 7) {
505 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
506 insn
->bits3
.urb_gen7
.offset
= offset
;
507 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
508 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
511 insn
->bits3
.urb_gen7
.complete
= complete
;
512 } else if (intel
->gen
>= 5) {
513 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
514 insn
->bits3
.urb_gen5
.offset
= offset
;
515 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
516 insn
->bits3
.urb_gen5
.allocate
= allocate
;
517 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
518 insn
->bits3
.urb_gen5
.complete
= complete
;
520 insn
->bits3
.urb
.opcode
= 0; /* ? */
521 insn
->bits3
.urb
.offset
= offset
;
522 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
523 insn
->bits3
.urb
.allocate
= allocate
;
524 insn
->bits3
.urb
.used
= used
; /* ? */
525 insn
->bits3
.urb
.complete
= complete
;
530 brw_set_dp_write_message(struct brw_compile
*p
,
531 struct brw_instruction
*insn
,
532 GLuint binding_table_index
,
537 GLuint last_render_target
,
538 GLuint response_length
,
539 GLuint end_of_thread
,
540 GLuint send_commit_msg
)
542 struct brw_context
*brw
= p
->brw
;
543 struct intel_context
*intel
= &brw
->intel
;
546 if (intel
->gen
>= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
549 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
551 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
552 } else if (intel
->gen
== 6) {
553 /* Use the render cache for all write messages. */
554 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
556 sfid
= BRW_SFID_DATAPORT_WRITE
;
559 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
560 header_present
, end_of_thread
);
562 if (intel
->gen
>= 7) {
563 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
564 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
565 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
566 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
567 } else if (intel
->gen
== 6) {
568 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
569 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
570 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
571 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
572 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
573 } else if (intel
->gen
== 5) {
574 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
575 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
576 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
577 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
578 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
580 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
581 insn
->bits3
.dp_write
.msg_control
= msg_control
;
582 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
583 insn
->bits3
.dp_write
.msg_type
= msg_type
;
584 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
589 brw_set_dp_read_message(struct brw_compile
*p
,
590 struct brw_instruction
*insn
,
591 GLuint binding_table_index
,
596 GLuint response_length
)
598 struct brw_context
*brw
= p
->brw
;
599 struct intel_context
*intel
= &brw
->intel
;
602 if (intel
->gen
>= 7) {
603 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
604 } else if (intel
->gen
== 6) {
605 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
606 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
608 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
610 sfid
= BRW_SFID_DATAPORT_READ
;
613 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
616 if (intel
->gen
>= 7) {
617 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
618 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
619 insn
->bits3
.gen7_dp
.last_render_target
= 0;
620 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
621 } else if (intel
->gen
== 6) {
622 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
623 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
624 insn
->bits3
.gen6_dp
.last_render_target
= 0;
625 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
626 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
627 } else if (intel
->gen
== 5) {
628 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
629 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
630 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
631 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
632 } else if (intel
->is_g4x
) {
633 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
634 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
635 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
636 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
638 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
639 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
640 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
641 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
645 static void brw_set_sampler_message(struct brw_compile
*p
,
646 struct brw_instruction
*insn
,
647 GLuint binding_table_index
,
650 GLuint response_length
,
652 GLuint header_present
,
655 struct brw_context
*brw
= p
->brw
;
656 struct intel_context
*intel
= &brw
->intel
;
658 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
659 response_length
, header_present
, false);
661 if (intel
->gen
>= 7) {
662 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
663 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
664 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
665 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
666 } else if (intel
->gen
>= 5) {
667 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
668 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
669 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
670 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
671 } else if (intel
->is_g4x
) {
672 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
673 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
674 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
676 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
677 insn
->bits3
.sampler
.sampler
= sampler
;
678 insn
->bits3
.sampler
.msg_type
= msg_type
;
679 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
684 #define next_insn brw_next_insn
685 struct brw_instruction
*
686 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
688 struct brw_instruction
*insn
;
690 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
692 insn
= &p
->store
[p
->nr_insn
++];
693 memcpy(insn
, p
->current
, sizeof(*insn
));
695 /* Reset this one-shot flag:
698 if (p
->current
->header
.destreg__conditionalmod
) {
699 p
->current
->header
.destreg__conditionalmod
= 0;
700 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
703 insn
->header
.opcode
= opcode
;
707 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
712 struct brw_instruction
*insn
= next_insn(p
, opcode
);
713 brw_set_dest(p
, insn
, dest
);
714 brw_set_src0(p
, insn
, src
);
718 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
722 struct brw_reg src1
)
724 struct brw_instruction
*insn
= next_insn(p
, opcode
);
725 brw_set_dest(p
, insn
, dest
);
726 brw_set_src0(p
, insn
, src0
);
727 brw_set_src1(p
, insn
, src1
);
732 /***********************************************************************
733 * Convenience routines.
736 struct brw_instruction *brw_##OP(struct brw_compile *p, \
737 struct brw_reg dest, \
738 struct brw_reg src0) \
740 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
744 struct brw_instruction *brw_##OP(struct brw_compile *p, \
745 struct brw_reg dest, \
746 struct brw_reg src0, \
747 struct brw_reg src1) \
749 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
752 /* Rounding operations (other than RNDD) require two instructions - the first
753 * stores a rounded value (possibly the wrong way) in the dest register, but
754 * also sets a per-channel "increment bit" in the flag register. A predicated
755 * add of 1.0 fixes dest to contain the desired result.
757 * Sandybridge and later appear to round correctly without an ADD.
760 void brw_##OP(struct brw_compile *p, \
761 struct brw_reg dest, \
762 struct brw_reg src) \
764 struct brw_instruction *rnd, *add; \
765 rnd = next_insn(p, BRW_OPCODE_##OP); \
766 brw_set_dest(p, rnd, dest); \
767 brw_set_src0(p, rnd, src); \
769 if (p->brw->intel.gen < 6) { \
770 /* turn on round-increments */ \
771 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
772 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
773 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
806 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
812 if (src0
.type
== BRW_REGISTER_TYPE_F
||
813 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
814 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
815 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
816 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
819 if (src1
.type
== BRW_REGISTER_TYPE_F
||
820 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
821 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
822 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
823 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
826 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
829 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
835 if (src0
.type
== BRW_REGISTER_TYPE_D
||
836 src0
.type
== BRW_REGISTER_TYPE_UD
||
837 src1
.type
== BRW_REGISTER_TYPE_D
||
838 src1
.type
== BRW_REGISTER_TYPE_UD
) {
839 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
842 if (src0
.type
== BRW_REGISTER_TYPE_F
||
843 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
844 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
845 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
846 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
849 if (src1
.type
== BRW_REGISTER_TYPE_F
||
850 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
851 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
852 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
853 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
856 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
857 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
858 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
859 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
861 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
865 void brw_NOP(struct brw_compile
*p
)
867 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
868 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
869 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
870 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
877 /***********************************************************************
878 * Comparisons, if/else/endif
881 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
886 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
888 insn
->header
.execution_size
= 1;
889 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
890 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
892 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
898 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
900 p
->if_stack
[p
->if_stack_depth
] = inst
;
903 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
904 p
->if_stack_array_size
*= 2;
905 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
906 p
->if_stack_array_size
);
910 /* EU takes the value from the flag register and pushes it onto some
911 * sort of a stack (presumably merging with any flag value already on
912 * the stack). Within an if block, the flags at the top of the stack
913 * control execution on each channel of the unit, eg. on each of the
914 * 16 pixel values in our wm programs.
916 * When the matching 'else' instruction is reached (presumably by
917 * countdown of the instruction count patched in by our ELSE/ENDIF
918 * functions), the relevent flags are inverted.
920 * When the matching 'endif' instruction is reached, the flags are
921 * popped off. If the stack is now empty, normal execution resumes.
923 struct brw_instruction
*
924 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
926 struct intel_context
*intel
= &p
->brw
->intel
;
927 struct brw_instruction
*insn
;
929 insn
= next_insn(p
, BRW_OPCODE_IF
);
931 /* Override the defaults for this instruction:
933 if (intel
->gen
< 6) {
934 brw_set_dest(p
, insn
, brw_ip_reg());
935 brw_set_src0(p
, insn
, brw_ip_reg());
936 brw_set_src1(p
, insn
, brw_imm_d(0x0));
937 } else if (intel
->gen
== 6) {
938 brw_set_dest(p
, insn
, brw_imm_w(0));
939 insn
->bits1
.branch_gen6
.jump_count
= 0;
940 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
941 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
943 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
944 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
945 brw_set_src1(p
, insn
, brw_imm_ud(0));
946 insn
->bits3
.break_cont
.jip
= 0;
947 insn
->bits3
.break_cont
.uip
= 0;
950 insn
->header
.execution_size
= execute_size
;
951 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
952 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
953 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
954 if (!p
->single_program_flow
)
955 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
957 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
959 push_if_stack(p
, insn
);
963 /* This function is only used for gen6-style IF instructions with an
964 * embedded comparison (conditional modifier). It is not used on gen7.
966 struct brw_instruction
*
967 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
968 struct brw_reg src0
, struct brw_reg src1
)
970 struct brw_instruction
*insn
;
972 insn
= next_insn(p
, BRW_OPCODE_IF
);
974 brw_set_dest(p
, insn
, brw_imm_w(0));
976 insn
->header
.execution_size
= BRW_EXECUTE_16
;
978 insn
->header
.execution_size
= BRW_EXECUTE_8
;
980 insn
->bits1
.branch_gen6
.jump_count
= 0;
981 brw_set_src0(p
, insn
, src0
);
982 brw_set_src1(p
, insn
, src1
);
984 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
985 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
986 insn
->header
.destreg__conditionalmod
= conditional
;
988 if (!p
->single_program_flow
)
989 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
991 push_if_stack(p
, insn
);
996 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
999 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1000 struct brw_instruction
*if_inst
,
1001 struct brw_instruction
*else_inst
)
1003 /* The next instruction (where the ENDIF would be, if it existed) */
1004 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1006 assert(p
->single_program_flow
);
1007 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1008 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1009 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1011 /* Convert IF to an ADD instruction that moves the instruction pointer
1012 * to the first instruction of the ELSE block. If there is no ELSE
1013 * block, point to where ENDIF would be. Reverse the predicate.
1015 * There's no need to execute an ENDIF since we don't need to do any
1016 * stack operations, and if we're currently executing, we just want to
1017 * continue normally.
1019 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1020 if_inst
->header
.predicate_inverse
= 1;
1022 if (else_inst
!= NULL
) {
1023 /* Convert ELSE to an ADD instruction that points where the ENDIF
1026 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1028 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1029 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1031 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1036 * Patch IF and ELSE instructions with appropriate jump targets.
1039 patch_IF_ELSE(struct brw_compile
*p
,
1040 struct brw_instruction
*if_inst
,
1041 struct brw_instruction
*else_inst
,
1042 struct brw_instruction
*endif_inst
)
1044 struct intel_context
*intel
= &p
->brw
->intel
;
1046 assert(!p
->single_program_flow
);
1047 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1048 assert(endif_inst
!= NULL
);
1049 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1052 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1053 * requires 2 chunks.
1055 if (intel
->gen
>= 5)
1058 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1059 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1061 if (else_inst
== NULL
) {
1062 /* Patch IF -> ENDIF */
1063 if (intel
->gen
< 6) {
1064 /* Turn it into an IFF, which means no mask stack operations for
1065 * all-false and jumping past the ENDIF.
1067 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1068 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1069 if_inst
->bits3
.if_else
.pop_count
= 0;
1070 if_inst
->bits3
.if_else
.pad0
= 0;
1071 } else if (intel
->gen
== 6) {
1072 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1073 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1075 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1076 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1079 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1081 /* Patch IF -> ELSE */
1082 if (intel
->gen
< 6) {
1083 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1084 if_inst
->bits3
.if_else
.pop_count
= 0;
1085 if_inst
->bits3
.if_else
.pad0
= 0;
1086 } else if (intel
->gen
== 6) {
1087 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1090 /* Patch ELSE -> ENDIF */
1091 if (intel
->gen
< 6) {
1092 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1095 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1096 else_inst
->bits3
.if_else
.pop_count
= 1;
1097 else_inst
->bits3
.if_else
.pad0
= 0;
1098 } else if (intel
->gen
== 6) {
1099 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1100 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1102 /* The IF instruction's JIP should point just past the ELSE */
1103 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1104 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1105 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1106 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1112 brw_ELSE(struct brw_compile
*p
)
1114 struct intel_context
*intel
= &p
->brw
->intel
;
1115 struct brw_instruction
*insn
;
1117 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1119 if (intel
->gen
< 6) {
1120 brw_set_dest(p
, insn
, brw_ip_reg());
1121 brw_set_src0(p
, insn
, brw_ip_reg());
1122 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1123 } else if (intel
->gen
== 6) {
1124 brw_set_dest(p
, insn
, brw_imm_w(0));
1125 insn
->bits1
.branch_gen6
.jump_count
= 0;
1126 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1127 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1129 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1130 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1131 brw_set_src1(p
, insn
, brw_imm_ud(0));
1132 insn
->bits3
.break_cont
.jip
= 0;
1133 insn
->bits3
.break_cont
.uip
= 0;
1136 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1137 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1138 if (!p
->single_program_flow
)
1139 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1141 push_if_stack(p
, insn
);
1145 brw_ENDIF(struct brw_compile
*p
)
1147 struct intel_context
*intel
= &p
->brw
->intel
;
1148 struct brw_instruction
*insn
;
1149 struct brw_instruction
*else_inst
= NULL
;
1150 struct brw_instruction
*if_inst
= NULL
;
1152 /* Pop the IF and (optional) ELSE instructions from the stack */
1153 p
->if_stack_depth
--;
1154 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1155 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1156 p
->if_stack_depth
--;
1158 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1160 if (p
->single_program_flow
) {
1161 /* ENDIF is useless; don't bother emitting it. */
1162 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1166 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1168 if (intel
->gen
< 6) {
1169 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1170 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1171 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1172 } else if (intel
->gen
== 6) {
1173 brw_set_dest(p
, insn
, brw_imm_w(0));
1174 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1175 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1177 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1178 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1179 brw_set_src1(p
, insn
, brw_imm_ud(0));
1182 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1183 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1184 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1186 /* Also pop item off the stack in the endif instruction: */
1187 if (intel
->gen
< 6) {
1188 insn
->bits3
.if_else
.jump_count
= 0;
1189 insn
->bits3
.if_else
.pop_count
= 1;
1190 insn
->bits3
.if_else
.pad0
= 0;
1191 } else if (intel
->gen
== 6) {
1192 insn
->bits1
.branch_gen6
.jump_count
= 2;
1194 insn
->bits3
.break_cont
.jip
= 2;
1196 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1199 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1201 struct intel_context
*intel
= &p
->brw
->intel
;
1202 struct brw_instruction
*insn
;
1204 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1205 if (intel
->gen
>= 6) {
1206 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1207 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1208 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1210 brw_set_dest(p
, insn
, brw_ip_reg());
1211 brw_set_src0(p
, insn
, brw_ip_reg());
1212 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1213 insn
->bits3
.if_else
.pad0
= 0;
1214 insn
->bits3
.if_else
.pop_count
= pop_count
;
1216 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1217 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1222 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1223 struct brw_instruction
*do_insn
)
1225 struct brw_instruction
*insn
;
1227 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1228 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1229 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1230 brw_set_dest(p
, insn
, brw_ip_reg());
1231 brw_set_src0(p
, insn
, brw_ip_reg());
1232 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1234 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1235 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1239 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1241 struct brw_instruction
*insn
;
1242 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1243 brw_set_dest(p
, insn
, brw_ip_reg());
1244 brw_set_src0(p
, insn
, brw_ip_reg());
1245 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1246 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1247 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1248 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1249 insn
->bits3
.if_else
.pad0
= 0;
1250 insn
->bits3
.if_else
.pop_count
= pop_count
;
1256 * The DO/WHILE is just an unterminated loop -- break or continue are
1257 * used for control within the loop. We have a few ways they can be
1260 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1261 * jip and no DO instruction.
1263 * For non-uniform control flow pre-gen6, there's a DO instruction to
1264 * push the mask, and a WHILE to jump back, and BREAK to get out and
1267 * For gen6, there's no more mask stack, so no need for DO. WHILE
1268 * just points back to the first instruction of the loop.
1270 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1272 struct intel_context
*intel
= &p
->brw
->intel
;
1274 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1275 return &p
->store
[p
->nr_insn
];
1277 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1279 /* Override the defaults for this instruction:
1281 brw_set_dest(p
, insn
, brw_null_reg());
1282 brw_set_src0(p
, insn
, brw_null_reg());
1283 brw_set_src1(p
, insn
, brw_null_reg());
1285 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1286 insn
->header
.execution_size
= execute_size
;
1287 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1288 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1289 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1297 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1298 struct brw_instruction
*do_insn
)
1300 struct intel_context
*intel
= &p
->brw
->intel
;
1301 struct brw_instruction
*insn
;
1304 if (intel
->gen
>= 5)
1307 if (intel
->gen
>= 7) {
1308 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1310 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1311 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1312 brw_set_src1(p
, insn
, brw_imm_ud(0));
1313 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1315 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1316 } else if (intel
->gen
== 6) {
1317 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1319 brw_set_dest(p
, insn
, brw_imm_w(0));
1320 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1321 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1322 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1324 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1326 if (p
->single_program_flow
) {
1327 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1329 brw_set_dest(p
, insn
, brw_ip_reg());
1330 brw_set_src0(p
, insn
, brw_ip_reg());
1331 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1332 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1334 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1336 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1338 brw_set_dest(p
, insn
, brw_ip_reg());
1339 brw_set_src0(p
, insn
, brw_ip_reg());
1340 brw_set_src1(p
, insn
, brw_imm_d(0));
1342 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1343 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1344 insn
->bits3
.if_else
.pop_count
= 0;
1345 insn
->bits3
.if_else
.pad0
= 0;
1348 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1349 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1357 void brw_land_fwd_jump(struct brw_compile
*p
,
1358 struct brw_instruction
*jmp_insn
)
1360 struct intel_context
*intel
= &p
->brw
->intel
;
1361 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1364 if (intel
->gen
>= 5)
1367 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1368 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1370 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1375 /* To integrate with the above, it makes sense that the comparison
1376 * instruction should populate the flag register. It might be simpler
1377 * just to use the flag reg for most WM tasks?
1379 void brw_CMP(struct brw_compile
*p
,
1380 struct brw_reg dest
,
1382 struct brw_reg src0
,
1383 struct brw_reg src1
)
1385 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1387 insn
->header
.destreg__conditionalmod
= conditional
;
1388 brw_set_dest(p
, insn
, dest
);
1389 brw_set_src0(p
, insn
, src0
);
1390 brw_set_src1(p
, insn
, src1
);
1392 /* guess_execution_size(insn, src0); */
1395 /* Make it so that future instructions will use the computed flag
1396 * value until brw_set_predicate_control_flag_value() is called
1399 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1401 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1402 p
->flag_value
= 0xff;
1406 /* Issue 'wait' instruction for n1, host could program MMIO
1407 to wake up thread. */
1408 void brw_WAIT (struct brw_compile
*p
)
1410 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1411 struct brw_reg src
= brw_notification_1_reg();
1413 brw_set_dest(p
, insn
, src
);
1414 brw_set_src0(p
, insn
, src
);
1415 brw_set_src1(p
, insn
, brw_null_reg());
1416 insn
->header
.execution_size
= 0; /* must */
1417 insn
->header
.predicate_control
= 0;
1418 insn
->header
.compression_control
= 0;
1422 /***********************************************************************
1423 * Helpers for the various SEND message types:
1426 /** Extended math function, float[8].
1428 void brw_math( struct brw_compile
*p
,
1429 struct brw_reg dest
,
1437 struct intel_context
*intel
= &p
->brw
->intel
;
1439 if (intel
->gen
>= 6) {
1440 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1442 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1443 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1445 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1446 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1448 /* Source modifiers are ignored for extended math instructions. */
1449 assert(!src
.negate
);
1452 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1453 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1454 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1455 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1457 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1460 /* Math is the same ISA format as other opcodes, except that CondModifier
1461 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1463 insn
->header
.destreg__conditionalmod
= function
;
1464 insn
->header
.saturate
= saturate
;
1466 brw_set_dest(p
, insn
, dest
);
1467 brw_set_src0(p
, insn
, src
);
1468 brw_set_src1(p
, insn
, brw_null_reg());
1470 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1472 /* Example code doesn't set predicate_control for send
1475 insn
->header
.predicate_control
= 0;
1476 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1478 brw_set_dest(p
, insn
, dest
);
1479 brw_set_src0(p
, insn
, src
);
1480 brw_set_math_message(p
,
1483 src
.type
== BRW_REGISTER_TYPE_D
,
1490 /** Extended math function, float[8].
1492 void brw_math2(struct brw_compile
*p
,
1493 struct brw_reg dest
,
1495 struct brw_reg src0
,
1496 struct brw_reg src1
)
1498 struct intel_context
*intel
= &p
->brw
->intel
;
1499 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1501 assert(intel
->gen
>= 6);
1505 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1506 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1507 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1509 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1510 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1511 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1513 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1514 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1515 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1516 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1517 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1519 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1520 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1523 /* Source modifiers are ignored for extended math instructions. */
1524 assert(!src0
.negate
);
1526 assert(!src1
.negate
);
1529 /* Math is the same ISA format as other opcodes, except that CondModifier
1530 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1532 insn
->header
.destreg__conditionalmod
= function
;
1534 brw_set_dest(p
, insn
, dest
);
1535 brw_set_src0(p
, insn
, src0
);
1536 brw_set_src1(p
, insn
, src1
);
1540 * Extended math function, float[16].
1541 * Use 2 send instructions.
1543 void brw_math_16( struct brw_compile
*p
,
1544 struct brw_reg dest
,
1551 struct intel_context
*intel
= &p
->brw
->intel
;
1552 struct brw_instruction
*insn
;
1554 if (intel
->gen
>= 6) {
1555 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1557 /* Math is the same ISA format as other opcodes, except that CondModifier
1558 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1560 insn
->header
.destreg__conditionalmod
= function
;
1561 insn
->header
.saturate
= saturate
;
1563 /* Source modifiers are ignored for extended math instructions. */
1564 assert(!src
.negate
);
1567 brw_set_dest(p
, insn
, dest
);
1568 brw_set_src0(p
, insn
, src
);
1569 brw_set_src1(p
, insn
, brw_null_reg());
1573 /* First instruction:
1575 brw_push_insn_state(p
);
1576 brw_set_predicate_control_flag_value(p
, 0xff);
1577 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1579 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1580 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1582 brw_set_dest(p
, insn
, dest
);
1583 brw_set_src0(p
, insn
, src
);
1584 brw_set_math_message(p
,
1587 BRW_MATH_INTEGER_UNSIGNED
,
1590 BRW_MATH_DATA_VECTOR
);
1592 /* Second instruction:
1594 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1595 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1596 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1598 brw_set_dest(p
, insn
, offset(dest
,1));
1599 brw_set_src0(p
, insn
, src
);
1600 brw_set_math_message(p
,
1603 BRW_MATH_INTEGER_UNSIGNED
,
1606 BRW_MATH_DATA_VECTOR
);
1608 brw_pop_insn_state(p
);
1613 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1614 * using a constant offset per channel.
1616 * The offset must be aligned to oword size (16 bytes). Used for
1617 * register spilling.
1619 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1624 struct intel_context
*intel
= &p
->brw
->intel
;
1625 uint32_t msg_control
, msg_type
;
1628 if (intel
->gen
>= 6)
1631 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1633 if (num_regs
== 1) {
1634 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1637 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1641 /* Set up the message header. This is g0, with g0.2 filled with
1642 * the offset. We don't want to leave our offset around in g0 or
1643 * it'll screw up texture samples, so set it up inside the message
1647 brw_push_insn_state(p
);
1648 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1649 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1651 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1653 /* set message header global offset field (reg 0, element 2) */
1655 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1657 2), BRW_REGISTER_TYPE_UD
),
1658 brw_imm_ud(offset
));
1660 brw_pop_insn_state(p
);
1664 struct brw_reg dest
;
1665 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1666 int send_commit_msg
;
1667 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1668 BRW_REGISTER_TYPE_UW
);
1670 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1671 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1672 src_header
= vec16(src_header
);
1674 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1675 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1677 /* Until gen6, writes followed by reads from the same location
1678 * are not guaranteed to be ordered unless write_commit is set.
1679 * If set, then a no-op write is issued to the destination
1680 * register to set a dependency, and a read from the destination
1681 * can be used to ensure the ordering.
1683 * For gen6, only writes between different threads need ordering
1684 * protection. Our use of DP writes is all about register
1685 * spilling within a thread.
1687 if (intel
->gen
>= 6) {
1688 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1689 send_commit_msg
= 0;
1692 send_commit_msg
= 1;
1695 brw_set_dest(p
, insn
, dest
);
1696 if (intel
->gen
>= 6) {
1697 brw_set_src0(p
, insn
, mrf
);
1699 brw_set_src0(p
, insn
, brw_null_reg());
1702 if (intel
->gen
>= 6)
1703 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1705 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1707 brw_set_dp_write_message(p
,
1709 255, /* binding table index (255=stateless) */
1713 true, /* header_present */
1714 0, /* not a render target */
1715 send_commit_msg
, /* response_length */
1723 * Read a block of owords (half a GRF each) from the scratch buffer
1724 * using a constant index per channel.
1726 * Offset must be aligned to oword size (16 bytes). Used for register
1730 brw_oword_block_read_scratch(struct brw_compile
*p
,
1731 struct brw_reg dest
,
1736 struct intel_context
*intel
= &p
->brw
->intel
;
1737 uint32_t msg_control
;
1740 if (intel
->gen
>= 6)
1743 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1744 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1746 if (num_regs
== 1) {
1747 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1750 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1755 brw_push_insn_state(p
);
1756 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1757 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1759 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1761 /* set message header global offset field (reg 0, element 2) */
1763 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1765 2), BRW_REGISTER_TYPE_UD
),
1766 brw_imm_ud(offset
));
1768 brw_pop_insn_state(p
);
1772 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1774 assert(insn
->header
.predicate_control
== 0);
1775 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1776 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1778 brw_set_dest(p
, insn
, dest
); /* UW? */
1779 if (intel
->gen
>= 6) {
1780 brw_set_src0(p
, insn
, mrf
);
1782 brw_set_src0(p
, insn
, brw_null_reg());
1785 brw_set_dp_read_message(p
,
1787 255, /* binding table index (255=stateless) */
1789 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1790 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1797 * Read a float[4] vector from the data port Data Cache (const buffer).
1798 * Location (in buffer) should be a multiple of 16.
1799 * Used for fetching shader constants.
1801 void brw_oword_block_read(struct brw_compile
*p
,
1802 struct brw_reg dest
,
1805 uint32_t bind_table_index
)
1807 struct intel_context
*intel
= &p
->brw
->intel
;
1809 /* On newer hardware, offset is in units of owords. */
1810 if (intel
->gen
>= 6)
1813 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1815 brw_push_insn_state(p
);
1816 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1817 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1818 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1820 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1822 /* set message header global offset field (reg 0, element 2) */
1824 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1826 2), BRW_REGISTER_TYPE_UD
),
1827 brw_imm_ud(offset
));
1829 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1830 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1832 /* cast dest to a uword[8] vector */
1833 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1835 brw_set_dest(p
, insn
, dest
);
1836 if (intel
->gen
>= 6) {
1837 brw_set_src0(p
, insn
, mrf
);
1839 brw_set_src0(p
, insn
, brw_null_reg());
1842 brw_set_dp_read_message(p
,
1845 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1846 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1847 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1849 1); /* response_length (1 reg, 2 owords!) */
1851 brw_pop_insn_state(p
);
1855 * Read a set of dwords from the data port Data Cache (const buffer).
1857 * Location (in buffer) appears as UD offsets in the register after
1858 * the provided mrf header reg.
1860 void brw_dword_scattered_read(struct brw_compile
*p
,
1861 struct brw_reg dest
,
1863 uint32_t bind_table_index
)
1865 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1867 brw_push_insn_state(p
);
1868 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1869 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1870 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1871 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1872 brw_pop_insn_state(p
);
1874 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1875 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1877 /* cast dest to a uword[8] vector */
1878 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1880 brw_set_dest(p
, insn
, dest
);
1881 brw_set_src0(p
, insn
, brw_null_reg());
1883 brw_set_dp_read_message(p
,
1886 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1887 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1888 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1890 1); /* response_length */
1896 * Read float[4] constant(s) from VS constant buffer.
1897 * For relative addressing, two float[4] constants will be read into 'dest'.
1898 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1900 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1901 struct brw_reg dest
,
1903 GLuint bind_table_index
)
1905 struct intel_context
*intel
= &p
->brw
->intel
;
1906 struct brw_instruction
*insn
;
1907 GLuint msg_reg_nr
= 1;
1909 if (intel
->gen
>= 6)
1912 /* Setup MRF[1] with location/offset into const buffer */
1913 brw_push_insn_state(p
);
1914 brw_set_access_mode(p
, BRW_ALIGN_1
);
1915 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1916 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1917 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1918 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1919 BRW_REGISTER_TYPE_UD
),
1920 brw_imm_ud(location
));
1921 brw_pop_insn_state(p
);
1923 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1925 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1926 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1927 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1928 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1930 brw_set_dest(p
, insn
, dest
);
1931 if (intel
->gen
>= 6) {
1932 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1934 brw_set_src0(p
, insn
, brw_null_reg());
1937 brw_set_dp_read_message(p
,
1941 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1942 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1944 1); /* response_length (1 Oword) */
1948 * Read a float[4] constant per vertex from VS constant buffer, with
1949 * relative addressing.
1951 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1952 struct brw_reg dest
,
1953 struct brw_reg addr_reg
,
1955 GLuint bind_table_index
)
1957 struct intel_context
*intel
= &p
->brw
->intel
;
1958 struct brw_reg src
= brw_vec8_grf(0, 0);
1961 /* Setup MRF[1] with offset into const buffer */
1962 brw_push_insn_state(p
);
1963 brw_set_access_mode(p
, BRW_ALIGN_1
);
1964 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1965 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1966 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1968 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1971 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1972 addr_reg
, brw_imm_d(offset
));
1973 brw_pop_insn_state(p
);
1975 gen6_resolve_implied_move(p
, &src
, 0);
1976 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1978 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1979 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1980 insn
->header
.destreg__conditionalmod
= 0;
1981 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1983 brw_set_dest(p
, insn
, dest
);
1984 brw_set_src0(p
, insn
, src
);
1986 if (intel
->gen
>= 6)
1987 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1988 else if (intel
->gen
== 5 || intel
->is_g4x
)
1989 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1991 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1993 brw_set_dp_read_message(p
,
1996 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1998 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2000 1); /* response_length */
2005 void brw_fb_WRITE(struct brw_compile
*p
,
2008 struct brw_reg src0
,
2009 GLuint binding_table_index
,
2011 GLuint response_length
,
2013 bool header_present
)
2015 struct intel_context
*intel
= &p
->brw
->intel
;
2016 struct brw_instruction
*insn
;
2017 GLuint msg_control
, msg_type
;
2018 struct brw_reg dest
;
2020 if (dispatch_width
== 16)
2021 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2023 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2025 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2026 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2028 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2030 /* The execution mask is ignored for render target writes. */
2031 insn
->header
.predicate_control
= 0;
2032 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2034 if (intel
->gen
>= 6) {
2035 /* headerless version, just submit color payload */
2036 src0
= brw_message_reg(msg_reg_nr
);
2038 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2040 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2042 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2045 if (dispatch_width
== 16)
2046 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2048 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2050 brw_set_dest(p
, insn
, dest
);
2051 brw_set_src0(p
, insn
, src0
);
2052 brw_set_dp_write_message(p
,
2054 binding_table_index
,
2059 1, /* last render target write */
2062 0 /* send_commit_msg */);
2067 * Texture sample instruction.
2068 * Note: the msg_type plus msg_length values determine exactly what kind
2069 * of sampling operation is performed. See volume 4, page 161 of docs.
2071 void brw_SAMPLE(struct brw_compile
*p
,
2072 struct brw_reg dest
,
2074 struct brw_reg src0
,
2075 GLuint binding_table_index
,
2079 GLuint response_length
,
2081 GLuint header_present
,
2084 struct intel_context
*intel
= &p
->brw
->intel
;
2085 bool need_stall
= 0;
2087 if (writemask
== 0) {
2088 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2092 /* Hardware doesn't do destination dependency checking on send
2093 * instructions properly. Add a workaround which generates the
2094 * dependency by other means. In practice it seems like this bug
2095 * only crops up for texture samples, and only where registers are
2096 * written by the send and then written again later without being
2097 * read in between. Luckily for us, we already track that
2098 * information and use it to modify the writemask for the
2099 * instruction, so that is a guide for whether a workaround is
2102 if (writemask
!= WRITEMASK_XYZW
) {
2103 GLuint dst_offset
= 0;
2104 GLuint i
, newmask
= 0, len
= 0;
2106 for (i
= 0; i
< 4; i
++) {
2107 if (writemask
& (1<<i
))
2111 for (; i
< 4; i
++) {
2112 if (!(writemask
& (1<<i
)))
2118 if (newmask
!= writemask
) {
2120 /* printf("need stall %x %x\n", newmask , writemask); */
2123 bool dispatch_16
= false;
2125 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2127 guess_execution_size(p
, p
->current
, dest
);
2128 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2131 newmask
= ~newmask
& WRITEMASK_XYZW
;
2133 brw_push_insn_state(p
);
2135 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2136 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2138 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2139 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2140 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2142 brw_pop_insn_state(p
);
2144 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2145 dest
= offset(dest
, dst_offset
);
2147 /* For 16-wide dispatch, masked channels are skipped in the
2148 * response. For 8-wide, masked channels still take up slots,
2149 * and are just not written to.
2152 response_length
= len
* 2;
2157 struct brw_instruction
*insn
;
2159 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2161 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2162 insn
->header
.predicate_control
= 0; /* XXX */
2163 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2165 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2167 brw_set_dest(p
, insn
, dest
);
2168 brw_set_src0(p
, insn
, src0
);
2169 brw_set_sampler_message(p
, insn
,
2170 binding_table_index
,
2180 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2182 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2184 brw_push_insn_state(p
);
2185 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2186 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2187 retype(reg
, BRW_REGISTER_TYPE_UD
));
2188 brw_pop_insn_state(p
);
2193 /* All these variables are pretty confusing - we might be better off
2194 * using bitmasks and macros for this, in the old style. Or perhaps
2195 * just having the caller instantiate the fields in dword3 itself.
2197 void brw_urb_WRITE(struct brw_compile
*p
,
2198 struct brw_reg dest
,
2200 struct brw_reg src0
,
2204 GLuint response_length
,
2206 bool writes_complete
,
2210 struct intel_context
*intel
= &p
->brw
->intel
;
2211 struct brw_instruction
*insn
;
2213 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2215 if (intel
->gen
== 7) {
2216 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2217 brw_push_insn_state(p
);
2218 brw_set_access_mode(p
, BRW_ALIGN_1
);
2219 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2220 BRW_REGISTER_TYPE_UD
),
2221 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2222 brw_imm_ud(0xff00));
2223 brw_pop_insn_state(p
);
2226 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2228 assert(msg_length
< BRW_MAX_MRF
);
2230 brw_set_dest(p
, insn
, dest
);
2231 brw_set_src0(p
, insn
, src0
);
2232 brw_set_src1(p
, insn
, brw_imm_d(0));
2235 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2237 brw_set_urb_message(p
,
2250 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2254 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2255 struct brw_instruction
*insn
= &p
->store
[ip
];
2257 switch (insn
->header
.opcode
) {
2258 case BRW_OPCODE_ENDIF
:
2259 case BRW_OPCODE_ELSE
:
2260 case BRW_OPCODE_WHILE
:
2264 assert(!"not reached");
2268 /* There is no DO instruction on gen6, so to find the end of the loop
2269 * we have to see if the loop is jumping back before our start
2273 brw_find_loop_end(struct brw_compile
*p
, int start
)
2275 struct intel_context
*intel
= &p
->brw
->intel
;
2279 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2280 struct brw_instruction
*insn
= &p
->store
[ip
];
2282 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2283 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2284 : insn
->bits3
.break_cont
.jip
;
2285 if (ip
+ jip
/ br
<= start
)
2289 assert(!"not reached");
2293 /* After program generation, go back and update the UIP and JIP of
2294 * BREAK and CONT instructions to their correct locations.
2297 brw_set_uip_jip(struct brw_compile
*p
)
2299 struct intel_context
*intel
= &p
->brw
->intel
;
2306 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2307 struct brw_instruction
*insn
= &p
->store
[ip
];
2309 switch (insn
->header
.opcode
) {
2310 case BRW_OPCODE_BREAK
:
2311 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2312 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2313 insn
->bits3
.break_cont
.uip
=
2314 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2316 case BRW_OPCODE_CONTINUE
:
2317 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2318 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2320 assert(insn
->bits3
.break_cont
.uip
!= 0);
2321 assert(insn
->bits3
.break_cont
.jip
!= 0);
2327 void brw_ff_sync(struct brw_compile
*p
,
2328 struct brw_reg dest
,
2330 struct brw_reg src0
,
2332 GLuint response_length
,
2335 struct intel_context
*intel
= &p
->brw
->intel
;
2336 struct brw_instruction
*insn
;
2338 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2340 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2341 brw_set_dest(p
, insn
, dest
);
2342 brw_set_src0(p
, insn
, src0
);
2343 brw_set_src1(p
, insn
, brw_imm_d(0));
2346 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2348 brw_set_ff_sync_message(p
,