2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
374 * Set the Message Descriptor and Extended Message Descriptor fields
377 * \note This zeroes out the Function Control bits, so it must be called
378 * \b before filling out any message-specific data. Callers can
379 * choose not to fill in irrelevant bits; they will be zero.
382 brw_set_message_descriptor(struct brw_compile
*p
,
383 struct brw_instruction
*inst
,
384 enum brw_message_target sfid
,
386 unsigned response_length
,
390 struct intel_context
*intel
= &p
->brw
->intel
;
392 brw_set_src1(p
, inst
, brw_imm_d(0));
394 if (intel
->gen
>= 5) {
395 inst
->bits3
.generic_gen5
.header_present
= header_present
;
396 inst
->bits3
.generic_gen5
.response_length
= response_length
;
397 inst
->bits3
.generic_gen5
.msg_length
= msg_length
;
398 inst
->bits3
.generic_gen5
.end_of_thread
= end_of_thread
;
400 if (intel
->gen
>= 6) {
401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
402 inst
->header
.destreg__conditionalmod
= sfid
;
404 /* Set Extended Message Descriptor (ex_desc) */
405 inst
->bits2
.send_gen5
.sfid
= sfid
;
406 inst
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
409 inst
->bits3
.generic
.response_length
= response_length
;
410 inst
->bits3
.generic
.msg_length
= msg_length
;
411 inst
->bits3
.generic
.msg_target
= sfid
;
412 inst
->bits3
.generic
.end_of_thread
= end_of_thread
;
416 static void brw_set_math_message( struct brw_compile
*p
,
417 struct brw_instruction
*insn
,
424 struct brw_context
*brw
= p
->brw
;
425 struct intel_context
*intel
= &brw
->intel
;
427 unsigned response_length
;
429 /* Infer message length from the function */
431 case BRW_MATH_FUNCTION_POW
:
432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
442 /* Infer response length from the function */
444 case BRW_MATH_FUNCTION_SINCOS
:
445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
453 brw_set_message_descriptor(p
, insn
, BRW_SFID_MATH
,
454 msg_length
, response_length
, false, false);
455 if (intel
->gen
== 5) {
456 insn
->bits3
.math_gen5
.function
= function
;
457 insn
->bits3
.math_gen5
.int_type
= integer_type
;
458 insn
->bits3
.math_gen5
.precision
= low_precision
;
459 insn
->bits3
.math_gen5
.saturate
= saturate
;
460 insn
->bits3
.math_gen5
.data_type
= dataType
;
461 insn
->bits3
.math_gen5
.snapshot
= 0;
463 insn
->bits3
.math
.function
= function
;
464 insn
->bits3
.math
.int_type
= integer_type
;
465 insn
->bits3
.math
.precision
= low_precision
;
466 insn
->bits3
.math
.saturate
= saturate
;
467 insn
->bits3
.math
.data_type
= dataType
;
472 static void brw_set_ff_sync_message(struct brw_compile
*p
,
473 struct brw_instruction
*insn
,
475 GLuint response_length
,
478 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
479 1, response_length
, true, end_of_thread
);
480 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
481 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
482 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
483 insn
->bits3
.urb_gen5
.allocate
= allocate
;
484 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
485 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
488 static void brw_set_urb_message( struct brw_compile
*p
,
489 struct brw_instruction
*insn
,
493 GLuint response_length
,
497 GLuint swizzle_control
)
499 struct brw_context
*brw
= p
->brw
;
500 struct intel_context
*intel
= &brw
->intel
;
502 brw_set_message_descriptor(p
, insn
, BRW_SFID_URB
,
503 msg_length
, response_length
, true, end_of_thread
);
504 if (intel
->gen
== 7) {
505 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
506 insn
->bits3
.urb_gen7
.offset
= offset
;
507 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
508 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
509 /* per_slot_offset = 0 makes it ignore offsets in message header */
510 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
511 insn
->bits3
.urb_gen7
.complete
= complete
;
512 } else if (intel
->gen
>= 5) {
513 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
514 insn
->bits3
.urb_gen5
.offset
= offset
;
515 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
516 insn
->bits3
.urb_gen5
.allocate
= allocate
;
517 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
518 insn
->bits3
.urb_gen5
.complete
= complete
;
520 insn
->bits3
.urb
.opcode
= 0; /* ? */
521 insn
->bits3
.urb
.offset
= offset
;
522 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
523 insn
->bits3
.urb
.allocate
= allocate
;
524 insn
->bits3
.urb
.used
= used
; /* ? */
525 insn
->bits3
.urb
.complete
= complete
;
530 brw_set_dp_write_message(struct brw_compile
*p
,
531 struct brw_instruction
*insn
,
532 GLuint binding_table_index
,
537 GLuint last_render_target
,
538 GLuint response_length
,
539 GLuint end_of_thread
,
540 GLuint send_commit_msg
)
542 struct brw_context
*brw
= p
->brw
;
543 struct intel_context
*intel
= &brw
->intel
;
546 if (intel
->gen
>= 7) {
547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
548 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
549 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
551 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
552 } else if (intel
->gen
== 6) {
553 /* Use the render cache for all write messages. */
554 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
556 sfid
= BRW_SFID_DATAPORT_WRITE
;
559 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
560 header_present
, end_of_thread
);
562 if (intel
->gen
>= 7) {
563 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
564 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
565 insn
->bits3
.gen7_dp
.last_render_target
= last_render_target
;
566 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
567 } else if (intel
->gen
== 6) {
568 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
569 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
570 insn
->bits3
.gen6_dp
.last_render_target
= last_render_target
;
571 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
572 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
573 } else if (intel
->gen
== 5) {
574 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
575 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
576 insn
->bits3
.dp_write_gen5
.last_render_target
= last_render_target
;
577 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
578 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
580 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
581 insn
->bits3
.dp_write
.msg_control
= msg_control
;
582 insn
->bits3
.dp_write
.last_render_target
= last_render_target
;
583 insn
->bits3
.dp_write
.msg_type
= msg_type
;
584 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
589 brw_set_dp_read_message(struct brw_compile
*p
,
590 struct brw_instruction
*insn
,
591 GLuint binding_table_index
,
596 GLuint response_length
)
598 struct brw_context
*brw
= p
->brw
;
599 struct intel_context
*intel
= &brw
->intel
;
602 if (intel
->gen
>= 7) {
603 sfid
= GEN7_SFID_DATAPORT_DATA_CACHE
;
604 } else if (intel
->gen
== 6) {
605 if (target_cache
== BRW_DATAPORT_READ_TARGET_RENDER_CACHE
)
606 sfid
= GEN6_SFID_DATAPORT_RENDER_CACHE
;
608 sfid
= GEN6_SFID_DATAPORT_SAMPLER_CACHE
;
610 sfid
= BRW_SFID_DATAPORT_READ
;
613 brw_set_message_descriptor(p
, insn
, sfid
, msg_length
, response_length
,
616 if (intel
->gen
>= 7) {
617 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
618 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
619 insn
->bits3
.gen7_dp
.last_render_target
= 0;
620 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
621 } else if (intel
->gen
== 6) {
622 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
623 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
624 insn
->bits3
.gen6_dp
.last_render_target
= 0;
625 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
626 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
627 } else if (intel
->gen
== 5) {
628 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
629 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
630 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
631 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
632 } else if (intel
->is_g4x
) {
633 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
634 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
635 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
636 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
638 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
639 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
640 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
641 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
645 static void brw_set_sampler_message(struct brw_compile
*p
,
646 struct brw_instruction
*insn
,
647 GLuint binding_table_index
,
650 GLuint response_length
,
652 GLuint header_present
,
655 struct brw_context
*brw
= p
->brw
;
656 struct intel_context
*intel
= &brw
->intel
;
658 brw_set_message_descriptor(p
, insn
, BRW_SFID_SAMPLER
, msg_length
,
659 response_length
, header_present
, false);
661 if (intel
->gen
>= 7) {
662 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
663 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
664 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
665 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
666 } else if (intel
->gen
>= 5) {
667 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
668 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
669 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
670 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
671 } else if (intel
->is_g4x
) {
672 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
673 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
674 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
676 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
677 insn
->bits3
.sampler
.sampler
= sampler
;
678 insn
->bits3
.sampler
.msg_type
= msg_type
;
679 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
684 #define next_insn brw_next_insn
685 struct brw_instruction
*
686 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
688 struct brw_instruction
*insn
;
690 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
692 insn
= &p
->store
[p
->nr_insn
++];
693 memcpy(insn
, p
->current
, sizeof(*insn
));
695 /* Reset this one-shot flag:
698 if (p
->current
->header
.destreg__conditionalmod
) {
699 p
->current
->header
.destreg__conditionalmod
= 0;
700 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
703 insn
->header
.opcode
= opcode
;
707 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
712 struct brw_instruction
*insn
= next_insn(p
, opcode
);
713 brw_set_dest(p
, insn
, dest
);
714 brw_set_src0(p
, insn
, src
);
718 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
722 struct brw_reg src1
)
724 struct brw_instruction
*insn
= next_insn(p
, opcode
);
725 brw_set_dest(p
, insn
, dest
);
726 brw_set_src0(p
, insn
, src0
);
727 brw_set_src1(p
, insn
, src1
);
732 /***********************************************************************
733 * Convenience routines.
736 struct brw_instruction *brw_##OP(struct brw_compile *p, \
737 struct brw_reg dest, \
738 struct brw_reg src0) \
740 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
744 struct brw_instruction *brw_##OP(struct brw_compile *p, \
745 struct brw_reg dest, \
746 struct brw_reg src0, \
747 struct brw_reg src1) \
749 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
752 /* Rounding operations (other than RNDD) require two instructions - the first
753 * stores a rounded value (possibly the wrong way) in the dest register, but
754 * also sets a per-channel "increment bit" in the flag register. A predicated
755 * add of 1.0 fixes dest to contain the desired result.
757 * Sandybridge and later appear to round correctly without an ADD.
760 void brw_##OP(struct brw_compile *p, \
761 struct brw_reg dest, \
762 struct brw_reg src) \
764 struct brw_instruction *rnd, *add; \
765 rnd = next_insn(p, BRW_OPCODE_##OP); \
766 brw_set_dest(p, rnd, dest); \
767 brw_set_src0(p, rnd, src); \
769 if (p->brw->intel.gen < 6) { \
770 /* turn on round-increments */ \
771 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
772 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
773 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
806 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
812 if (src0
.type
== BRW_REGISTER_TYPE_F
||
813 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
814 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
815 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
816 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
819 if (src1
.type
== BRW_REGISTER_TYPE_F
||
820 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
821 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
822 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
823 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
826 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
829 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
835 if (src0
.type
== BRW_REGISTER_TYPE_D
||
836 src0
.type
== BRW_REGISTER_TYPE_UD
||
837 src1
.type
== BRW_REGISTER_TYPE_D
||
838 src1
.type
== BRW_REGISTER_TYPE_UD
) {
839 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
842 if (src0
.type
== BRW_REGISTER_TYPE_F
||
843 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
844 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
845 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
846 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
849 if (src1
.type
== BRW_REGISTER_TYPE_F
||
850 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
851 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
852 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
853 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
856 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
857 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
858 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
859 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
861 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
865 void brw_NOP(struct brw_compile
*p
)
867 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
868 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
869 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
870 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
877 /***********************************************************************
878 * Comparisons, if/else/endif
881 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
886 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
888 insn
->header
.execution_size
= 1;
889 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
890 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
892 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
898 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
900 p
->if_stack
[p
->if_stack_depth
] = inst
;
903 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
904 p
->if_stack_array_size
*= 2;
905 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
906 p
->if_stack_array_size
);
910 /* EU takes the value from the flag register and pushes it onto some
911 * sort of a stack (presumably merging with any flag value already on
912 * the stack). Within an if block, the flags at the top of the stack
913 * control execution on each channel of the unit, eg. on each of the
914 * 16 pixel values in our wm programs.
916 * When the matching 'else' instruction is reached (presumably by
917 * countdown of the instruction count patched in by our ELSE/ENDIF
918 * functions), the relevent flags are inverted.
920 * When the matching 'endif' instruction is reached, the flags are
921 * popped off. If the stack is now empty, normal execution resumes.
923 struct brw_instruction
*
924 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
926 struct intel_context
*intel
= &p
->brw
->intel
;
927 struct brw_instruction
*insn
;
929 insn
= next_insn(p
, BRW_OPCODE_IF
);
931 /* Override the defaults for this instruction:
933 if (intel
->gen
< 6) {
934 brw_set_dest(p
, insn
, brw_ip_reg());
935 brw_set_src0(p
, insn
, brw_ip_reg());
936 brw_set_src1(p
, insn
, brw_imm_d(0x0));
937 } else if (intel
->gen
== 6) {
938 brw_set_dest(p
, insn
, brw_imm_w(0));
939 insn
->bits1
.branch_gen6
.jump_count
= 0;
940 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
941 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
943 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
944 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
945 brw_set_src1(p
, insn
, brw_imm_ud(0));
946 insn
->bits3
.break_cont
.jip
= 0;
947 insn
->bits3
.break_cont
.uip
= 0;
950 insn
->header
.execution_size
= execute_size
;
951 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
952 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
953 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
954 if (!p
->single_program_flow
)
955 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
957 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
959 push_if_stack(p
, insn
);
963 /* This function is only used for gen6-style IF instructions with an
964 * embedded comparison (conditional modifier). It is not used on gen7.
966 struct brw_instruction
*
967 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
968 struct brw_reg src0
, struct brw_reg src1
)
970 struct brw_instruction
*insn
;
972 insn
= next_insn(p
, BRW_OPCODE_IF
);
974 brw_set_dest(p
, insn
, brw_imm_w(0));
976 insn
->header
.execution_size
= BRW_EXECUTE_16
;
978 insn
->header
.execution_size
= BRW_EXECUTE_8
;
980 insn
->bits1
.branch_gen6
.jump_count
= 0;
981 brw_set_src0(p
, insn
, src0
);
982 brw_set_src1(p
, insn
, src1
);
984 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
985 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
986 insn
->header
.destreg__conditionalmod
= conditional
;
988 if (!p
->single_program_flow
)
989 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
991 push_if_stack(p
, insn
);
996 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
999 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1000 struct brw_instruction
*if_inst
,
1001 struct brw_instruction
*else_inst
)
1003 /* The next instruction (where the ENDIF would be, if it existed) */
1004 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1006 assert(p
->single_program_flow
);
1007 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1008 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1009 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1011 /* Convert IF to an ADD instruction that moves the instruction pointer
1012 * to the first instruction of the ELSE block. If there is no ELSE
1013 * block, point to where ENDIF would be. Reverse the predicate.
1015 * There's no need to execute an ENDIF since we don't need to do any
1016 * stack operations, and if we're currently executing, we just want to
1017 * continue normally.
1019 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1020 if_inst
->header
.predicate_inverse
= 1;
1022 if (else_inst
!= NULL
) {
1023 /* Convert ELSE to an ADD instruction that points where the ENDIF
1026 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1028 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1029 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1031 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1036 * Patch IF and ELSE instructions with appropriate jump targets.
1039 patch_IF_ELSE(struct brw_compile
*p
,
1040 struct brw_instruction
*if_inst
,
1041 struct brw_instruction
*else_inst
,
1042 struct brw_instruction
*endif_inst
)
1044 struct intel_context
*intel
= &p
->brw
->intel
;
1046 assert(!p
->single_program_flow
);
1047 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1048 assert(endif_inst
!= NULL
);
1049 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1052 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1053 * requires 2 chunks.
1055 if (intel
->gen
>= 5)
1058 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1059 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1061 if (else_inst
== NULL
) {
1062 /* Patch IF -> ENDIF */
1063 if (intel
->gen
< 6) {
1064 /* Turn it into an IFF, which means no mask stack operations for
1065 * all-false and jumping past the ENDIF.
1067 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1068 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1069 if_inst
->bits3
.if_else
.pop_count
= 0;
1070 if_inst
->bits3
.if_else
.pad0
= 0;
1071 } else if (intel
->gen
== 6) {
1072 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1073 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1075 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1076 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1079 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1081 /* Patch IF -> ELSE */
1082 if (intel
->gen
< 6) {
1083 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1084 if_inst
->bits3
.if_else
.pop_count
= 0;
1085 if_inst
->bits3
.if_else
.pad0
= 0;
1086 } else if (intel
->gen
== 6) {
1087 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1090 /* Patch ELSE -> ENDIF */
1091 if (intel
->gen
< 6) {
1092 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1095 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1096 else_inst
->bits3
.if_else
.pop_count
= 1;
1097 else_inst
->bits3
.if_else
.pad0
= 0;
1098 } else if (intel
->gen
== 6) {
1099 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1100 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1102 /* The IF instruction's JIP should point just past the ELSE */
1103 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1104 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1105 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1106 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1112 brw_ELSE(struct brw_compile
*p
)
1114 struct intel_context
*intel
= &p
->brw
->intel
;
1115 struct brw_instruction
*insn
;
1117 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1119 if (intel
->gen
< 6) {
1120 brw_set_dest(p
, insn
, brw_ip_reg());
1121 brw_set_src0(p
, insn
, brw_ip_reg());
1122 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1123 } else if (intel
->gen
== 6) {
1124 brw_set_dest(p
, insn
, brw_imm_w(0));
1125 insn
->bits1
.branch_gen6
.jump_count
= 0;
1126 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1127 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1129 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1130 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1131 brw_set_src1(p
, insn
, brw_imm_ud(0));
1132 insn
->bits3
.break_cont
.jip
= 0;
1133 insn
->bits3
.break_cont
.uip
= 0;
1136 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1137 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1138 if (!p
->single_program_flow
)
1139 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1141 push_if_stack(p
, insn
);
1145 brw_ENDIF(struct brw_compile
*p
)
1147 struct intel_context
*intel
= &p
->brw
->intel
;
1148 struct brw_instruction
*insn
;
1149 struct brw_instruction
*else_inst
= NULL
;
1150 struct brw_instruction
*if_inst
= NULL
;
1152 /* Pop the IF and (optional) ELSE instructions from the stack */
1153 p
->if_stack_depth
--;
1154 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1155 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1156 p
->if_stack_depth
--;
1158 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1160 if (p
->single_program_flow
) {
1161 /* ENDIF is useless; don't bother emitting it. */
1162 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1166 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1168 if (intel
->gen
< 6) {
1169 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1170 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1171 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1172 } else if (intel
->gen
== 6) {
1173 brw_set_dest(p
, insn
, brw_imm_w(0));
1174 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1175 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1177 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1178 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1179 brw_set_src1(p
, insn
, brw_imm_ud(0));
1182 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1183 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1184 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1186 /* Also pop item off the stack in the endif instruction: */
1187 if (intel
->gen
< 6) {
1188 insn
->bits3
.if_else
.jump_count
= 0;
1189 insn
->bits3
.if_else
.pop_count
= 1;
1190 insn
->bits3
.if_else
.pad0
= 0;
1191 } else if (intel
->gen
== 6) {
1192 insn
->bits1
.branch_gen6
.jump_count
= 2;
1194 insn
->bits3
.break_cont
.jip
= 2;
1196 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1199 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1201 struct intel_context
*intel
= &p
->brw
->intel
;
1202 struct brw_instruction
*insn
;
1204 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1205 if (intel
->gen
>= 6) {
1206 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1207 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1208 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1210 brw_set_dest(p
, insn
, brw_ip_reg());
1211 brw_set_src0(p
, insn
, brw_ip_reg());
1212 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1213 insn
->bits3
.if_else
.pad0
= 0;
1214 insn
->bits3
.if_else
.pop_count
= pop_count
;
1216 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1217 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1222 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1223 struct brw_instruction
*do_insn
)
1225 struct brw_instruction
*insn
;
1227 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1228 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1229 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1230 brw_set_dest(p
, insn
, brw_ip_reg());
1231 brw_set_src0(p
, insn
, brw_ip_reg());
1232 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1234 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1235 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1239 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1241 struct brw_instruction
*insn
;
1242 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1243 brw_set_dest(p
, insn
, brw_ip_reg());
1244 brw_set_src0(p
, insn
, brw_ip_reg());
1245 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1246 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1247 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1248 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1249 insn
->bits3
.if_else
.pad0
= 0;
1250 insn
->bits3
.if_else
.pop_count
= pop_count
;
1256 * The DO/WHILE is just an unterminated loop -- break or continue are
1257 * used for control within the loop. We have a few ways they can be
1260 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1261 * jip and no DO instruction.
1263 * For non-uniform control flow pre-gen6, there's a DO instruction to
1264 * push the mask, and a WHILE to jump back, and BREAK to get out and
1267 * For gen6, there's no more mask stack, so no need for DO. WHILE
1268 * just points back to the first instruction of the loop.
1270 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1272 struct intel_context
*intel
= &p
->brw
->intel
;
1274 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1275 return &p
->store
[p
->nr_insn
];
1277 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1279 /* Override the defaults for this instruction:
1281 brw_set_dest(p
, insn
, brw_null_reg());
1282 brw_set_src0(p
, insn
, brw_null_reg());
1283 brw_set_src1(p
, insn
, brw_null_reg());
1285 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1286 insn
->header
.execution_size
= execute_size
;
1287 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1288 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1289 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1297 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1298 struct brw_instruction
*do_insn
)
1300 struct intel_context
*intel
= &p
->brw
->intel
;
1301 struct brw_instruction
*insn
;
1304 if (intel
->gen
>= 5)
1307 if (intel
->gen
>= 7) {
1308 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1310 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1311 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1312 brw_set_src1(p
, insn
, brw_imm_ud(0));
1313 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1315 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1316 } else if (intel
->gen
== 6) {
1317 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1319 brw_set_dest(p
, insn
, brw_imm_w(0));
1320 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1321 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1322 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1324 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1326 if (p
->single_program_flow
) {
1327 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1329 brw_set_dest(p
, insn
, brw_ip_reg());
1330 brw_set_src0(p
, insn
, brw_ip_reg());
1331 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1332 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1334 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1336 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1338 brw_set_dest(p
, insn
, brw_ip_reg());
1339 brw_set_src0(p
, insn
, brw_ip_reg());
1340 brw_set_src1(p
, insn
, brw_imm_d(0));
1342 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1343 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1344 insn
->bits3
.if_else
.pop_count
= 0;
1345 insn
->bits3
.if_else
.pad0
= 0;
1348 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1349 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1357 void brw_land_fwd_jump(struct brw_compile
*p
,
1358 struct brw_instruction
*jmp_insn
)
1360 struct intel_context
*intel
= &p
->brw
->intel
;
1361 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1364 if (intel
->gen
>= 5)
1367 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1368 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1370 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1375 /* To integrate with the above, it makes sense that the comparison
1376 * instruction should populate the flag register. It might be simpler
1377 * just to use the flag reg for most WM tasks?
1379 void brw_CMP(struct brw_compile
*p
,
1380 struct brw_reg dest
,
1382 struct brw_reg src0
,
1383 struct brw_reg src1
)
1385 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1387 insn
->header
.destreg__conditionalmod
= conditional
;
1388 brw_set_dest(p
, insn
, dest
);
1389 brw_set_src0(p
, insn
, src0
);
1390 brw_set_src1(p
, insn
, src1
);
1392 /* guess_execution_size(insn, src0); */
1395 /* Make it so that future instructions will use the computed flag
1396 * value until brw_set_predicate_control_flag_value() is called
1399 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1401 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1402 p
->flag_value
= 0xff;
1406 /* Issue 'wait' instruction for n1, host could program MMIO
1407 to wake up thread. */
1408 void brw_WAIT (struct brw_compile
*p
)
1410 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1411 struct brw_reg src
= brw_notification_1_reg();
1413 brw_set_dest(p
, insn
, src
);
1414 brw_set_src0(p
, insn
, src
);
1415 brw_set_src1(p
, insn
, brw_null_reg());
1416 insn
->header
.execution_size
= 0; /* must */
1417 insn
->header
.predicate_control
= 0;
1418 insn
->header
.compression_control
= 0;
1422 /***********************************************************************
1423 * Helpers for the various SEND message types:
1426 /** Extended math function, float[8].
1428 void brw_math( struct brw_compile
*p
,
1429 struct brw_reg dest
,
1437 struct intel_context
*intel
= &p
->brw
->intel
;
1439 if (intel
->gen
>= 6) {
1440 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1442 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1443 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1445 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1446 if (intel
->gen
== 6)
1447 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1449 /* Source modifiers are ignored for extended math instructions on Gen6. */
1450 if (intel
->gen
== 6) {
1451 assert(!src
.negate
);
1455 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1456 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1457 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1458 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1460 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1463 /* Math is the same ISA format as other opcodes, except that CondModifier
1464 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1466 insn
->header
.destreg__conditionalmod
= function
;
1467 insn
->header
.saturate
= saturate
;
1469 brw_set_dest(p
, insn
, dest
);
1470 brw_set_src0(p
, insn
, src
);
1471 brw_set_src1(p
, insn
, brw_null_reg());
1473 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1475 /* Example code doesn't set predicate_control for send
1478 insn
->header
.predicate_control
= 0;
1479 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1481 brw_set_dest(p
, insn
, dest
);
1482 brw_set_src0(p
, insn
, src
);
1483 brw_set_math_message(p
,
1486 src
.type
== BRW_REGISTER_TYPE_D
,
1493 /** Extended math function, float[8].
1495 void brw_math2(struct brw_compile
*p
,
1496 struct brw_reg dest
,
1498 struct brw_reg src0
,
1499 struct brw_reg src1
)
1501 struct intel_context
*intel
= &p
->brw
->intel
;
1502 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1504 assert(intel
->gen
>= 6);
1508 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1509 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1510 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1512 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1513 if (intel
->gen
== 6) {
1514 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1515 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1518 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1519 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1520 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1521 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1522 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1524 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1525 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1528 /* Source modifiers are ignored for extended math instructions on Gen6. */
1529 if (intel
->gen
== 6) {
1530 assert(!src0
.negate
);
1532 assert(!src1
.negate
);
1536 /* Math is the same ISA format as other opcodes, except that CondModifier
1537 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1539 insn
->header
.destreg__conditionalmod
= function
;
1541 brw_set_dest(p
, insn
, dest
);
1542 brw_set_src0(p
, insn
, src0
);
1543 brw_set_src1(p
, insn
, src1
);
1547 * Extended math function, float[16].
1548 * Use 2 send instructions.
1550 void brw_math_16( struct brw_compile
*p
,
1551 struct brw_reg dest
,
1558 struct intel_context
*intel
= &p
->brw
->intel
;
1559 struct brw_instruction
*insn
;
1561 if (intel
->gen
>= 6) {
1562 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1564 /* Math is the same ISA format as other opcodes, except that CondModifier
1565 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1567 insn
->header
.destreg__conditionalmod
= function
;
1568 insn
->header
.saturate
= saturate
;
1570 /* Source modifiers are ignored for extended math instructions. */
1571 assert(!src
.negate
);
1574 brw_set_dest(p
, insn
, dest
);
1575 brw_set_src0(p
, insn
, src
);
1576 brw_set_src1(p
, insn
, brw_null_reg());
1580 /* First instruction:
1582 brw_push_insn_state(p
);
1583 brw_set_predicate_control_flag_value(p
, 0xff);
1584 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1586 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1587 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1589 brw_set_dest(p
, insn
, dest
);
1590 brw_set_src0(p
, insn
, src
);
1591 brw_set_math_message(p
,
1594 BRW_MATH_INTEGER_UNSIGNED
,
1597 BRW_MATH_DATA_VECTOR
);
1599 /* Second instruction:
1601 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1602 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1603 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1605 brw_set_dest(p
, insn
, offset(dest
,1));
1606 brw_set_src0(p
, insn
, src
);
1607 brw_set_math_message(p
,
1610 BRW_MATH_INTEGER_UNSIGNED
,
1613 BRW_MATH_DATA_VECTOR
);
1615 brw_pop_insn_state(p
);
1620 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1621 * using a constant offset per channel.
1623 * The offset must be aligned to oword size (16 bytes). Used for
1624 * register spilling.
1626 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1631 struct intel_context
*intel
= &p
->brw
->intel
;
1632 uint32_t msg_control
, msg_type
;
1635 if (intel
->gen
>= 6)
1638 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1640 if (num_regs
== 1) {
1641 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1644 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1648 /* Set up the message header. This is g0, with g0.2 filled with
1649 * the offset. We don't want to leave our offset around in g0 or
1650 * it'll screw up texture samples, so set it up inside the message
1654 brw_push_insn_state(p
);
1655 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1656 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1658 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1660 /* set message header global offset field (reg 0, element 2) */
1662 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1664 2), BRW_REGISTER_TYPE_UD
),
1665 brw_imm_ud(offset
));
1667 brw_pop_insn_state(p
);
1671 struct brw_reg dest
;
1672 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1673 int send_commit_msg
;
1674 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1675 BRW_REGISTER_TYPE_UW
);
1677 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1678 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1679 src_header
= vec16(src_header
);
1681 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1682 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1684 /* Until gen6, writes followed by reads from the same location
1685 * are not guaranteed to be ordered unless write_commit is set.
1686 * If set, then a no-op write is issued to the destination
1687 * register to set a dependency, and a read from the destination
1688 * can be used to ensure the ordering.
1690 * For gen6, only writes between different threads need ordering
1691 * protection. Our use of DP writes is all about register
1692 * spilling within a thread.
1694 if (intel
->gen
>= 6) {
1695 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1696 send_commit_msg
= 0;
1699 send_commit_msg
= 1;
1702 brw_set_dest(p
, insn
, dest
);
1703 if (intel
->gen
>= 6) {
1704 brw_set_src0(p
, insn
, mrf
);
1706 brw_set_src0(p
, insn
, brw_null_reg());
1709 if (intel
->gen
>= 6)
1710 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1712 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1714 brw_set_dp_write_message(p
,
1716 255, /* binding table index (255=stateless) */
1720 true, /* header_present */
1721 0, /* not a render target */
1722 send_commit_msg
, /* response_length */
1730 * Read a block of owords (half a GRF each) from the scratch buffer
1731 * using a constant index per channel.
1733 * Offset must be aligned to oword size (16 bytes). Used for register
1737 brw_oword_block_read_scratch(struct brw_compile
*p
,
1738 struct brw_reg dest
,
1743 struct intel_context
*intel
= &p
->brw
->intel
;
1744 uint32_t msg_control
;
1747 if (intel
->gen
>= 6)
1750 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1751 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1753 if (num_regs
== 1) {
1754 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1757 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1762 brw_push_insn_state(p
);
1763 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1764 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1766 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1768 /* set message header global offset field (reg 0, element 2) */
1770 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1772 2), BRW_REGISTER_TYPE_UD
),
1773 brw_imm_ud(offset
));
1775 brw_pop_insn_state(p
);
1779 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1781 assert(insn
->header
.predicate_control
== 0);
1782 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1783 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1785 brw_set_dest(p
, insn
, dest
); /* UW? */
1786 if (intel
->gen
>= 6) {
1787 brw_set_src0(p
, insn
, mrf
);
1789 brw_set_src0(p
, insn
, brw_null_reg());
1792 brw_set_dp_read_message(p
,
1794 255, /* binding table index (255=stateless) */
1796 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1797 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1804 * Read a float[4] vector from the data port Data Cache (const buffer).
1805 * Location (in buffer) should be a multiple of 16.
1806 * Used for fetching shader constants.
1808 void brw_oword_block_read(struct brw_compile
*p
,
1809 struct brw_reg dest
,
1812 uint32_t bind_table_index
)
1814 struct intel_context
*intel
= &p
->brw
->intel
;
1816 /* On newer hardware, offset is in units of owords. */
1817 if (intel
->gen
>= 6)
1820 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1822 brw_push_insn_state(p
);
1823 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1824 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1825 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1827 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1829 /* set message header global offset field (reg 0, element 2) */
1831 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1833 2), BRW_REGISTER_TYPE_UD
),
1834 brw_imm_ud(offset
));
1836 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1837 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1839 /* cast dest to a uword[8] vector */
1840 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1842 brw_set_dest(p
, insn
, dest
);
1843 if (intel
->gen
>= 6) {
1844 brw_set_src0(p
, insn
, mrf
);
1846 brw_set_src0(p
, insn
, brw_null_reg());
1849 brw_set_dp_read_message(p
,
1852 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1853 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1854 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1856 1); /* response_length (1 reg, 2 owords!) */
1858 brw_pop_insn_state(p
);
1862 * Read a set of dwords from the data port Data Cache (const buffer).
1864 * Location (in buffer) appears as UD offsets in the register after
1865 * the provided mrf header reg.
1867 void brw_dword_scattered_read(struct brw_compile
*p
,
1868 struct brw_reg dest
,
1870 uint32_t bind_table_index
)
1872 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1874 brw_push_insn_state(p
);
1875 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1876 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1877 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1878 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1879 brw_pop_insn_state(p
);
1881 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1882 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1884 /* cast dest to a uword[8] vector */
1885 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1887 brw_set_dest(p
, insn
, dest
);
1888 brw_set_src0(p
, insn
, brw_null_reg());
1890 brw_set_dp_read_message(p
,
1893 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1894 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1895 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1897 1); /* response_length */
1903 * Read float[4] constant(s) from VS constant buffer.
1904 * For relative addressing, two float[4] constants will be read into 'dest'.
1905 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1907 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1908 struct brw_reg dest
,
1910 GLuint bind_table_index
)
1912 struct intel_context
*intel
= &p
->brw
->intel
;
1913 struct brw_instruction
*insn
;
1914 GLuint msg_reg_nr
= 1;
1916 if (intel
->gen
>= 6)
1919 /* Setup MRF[1] with location/offset into const buffer */
1920 brw_push_insn_state(p
);
1921 brw_set_access_mode(p
, BRW_ALIGN_1
);
1922 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1923 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1924 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1925 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1926 BRW_REGISTER_TYPE_UD
),
1927 brw_imm_ud(location
));
1928 brw_pop_insn_state(p
);
1930 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1932 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1933 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1934 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1935 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1937 brw_set_dest(p
, insn
, dest
);
1938 if (intel
->gen
>= 6) {
1939 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1941 brw_set_src0(p
, insn
, brw_null_reg());
1944 brw_set_dp_read_message(p
,
1948 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1949 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1951 1); /* response_length (1 Oword) */
1955 * Read a float[4] constant per vertex from VS constant buffer, with
1956 * relative addressing.
1958 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1959 struct brw_reg dest
,
1960 struct brw_reg addr_reg
,
1962 GLuint bind_table_index
)
1964 struct intel_context
*intel
= &p
->brw
->intel
;
1965 struct brw_reg src
= brw_vec8_grf(0, 0);
1968 /* Setup MRF[1] with offset into const buffer */
1969 brw_push_insn_state(p
);
1970 brw_set_access_mode(p
, BRW_ALIGN_1
);
1971 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1972 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1973 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1975 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1978 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1979 addr_reg
, brw_imm_d(offset
));
1980 brw_pop_insn_state(p
);
1982 gen6_resolve_implied_move(p
, &src
, 0);
1983 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1985 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1986 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1987 insn
->header
.destreg__conditionalmod
= 0;
1988 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1990 brw_set_dest(p
, insn
, dest
);
1991 brw_set_src0(p
, insn
, src
);
1993 if (intel
->gen
>= 6)
1994 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1995 else if (intel
->gen
== 5 || intel
->is_g4x
)
1996 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1998 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2000 brw_set_dp_read_message(p
,
2003 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2005 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2007 1); /* response_length */
2012 void brw_fb_WRITE(struct brw_compile
*p
,
2015 struct brw_reg src0
,
2016 GLuint binding_table_index
,
2018 GLuint response_length
,
2020 bool header_present
)
2022 struct intel_context
*intel
= &p
->brw
->intel
;
2023 struct brw_instruction
*insn
;
2024 GLuint msg_control
, msg_type
;
2025 struct brw_reg dest
;
2027 if (dispatch_width
== 16)
2028 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2030 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2032 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2033 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2035 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2037 /* The execution mask is ignored for render target writes. */
2038 insn
->header
.predicate_control
= 0;
2039 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2041 if (intel
->gen
>= 6) {
2042 /* headerless version, just submit color payload */
2043 src0
= brw_message_reg(msg_reg_nr
);
2045 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2047 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2049 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2052 if (dispatch_width
== 16)
2053 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2055 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2057 brw_set_dest(p
, insn
, dest
);
2058 brw_set_src0(p
, insn
, src0
);
2059 brw_set_dp_write_message(p
,
2061 binding_table_index
,
2066 1, /* last render target write */
2069 0 /* send_commit_msg */);
2074 * Texture sample instruction.
2075 * Note: the msg_type plus msg_length values determine exactly what kind
2076 * of sampling operation is performed. See volume 4, page 161 of docs.
2078 void brw_SAMPLE(struct brw_compile
*p
,
2079 struct brw_reg dest
,
2081 struct brw_reg src0
,
2082 GLuint binding_table_index
,
2086 GLuint response_length
,
2088 GLuint header_present
,
2091 struct intel_context
*intel
= &p
->brw
->intel
;
2092 bool need_stall
= 0;
2094 if (writemask
== 0) {
2095 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2099 /* Hardware doesn't do destination dependency checking on send
2100 * instructions properly. Add a workaround which generates the
2101 * dependency by other means. In practice it seems like this bug
2102 * only crops up for texture samples, and only where registers are
2103 * written by the send and then written again later without being
2104 * read in between. Luckily for us, we already track that
2105 * information and use it to modify the writemask for the
2106 * instruction, so that is a guide for whether a workaround is
2109 if (writemask
!= WRITEMASK_XYZW
) {
2110 GLuint dst_offset
= 0;
2111 GLuint i
, newmask
= 0, len
= 0;
2113 for (i
= 0; i
< 4; i
++) {
2114 if (writemask
& (1<<i
))
2118 for (; i
< 4; i
++) {
2119 if (!(writemask
& (1<<i
)))
2125 if (newmask
!= writemask
) {
2127 /* printf("need stall %x %x\n", newmask , writemask); */
2130 bool dispatch_16
= false;
2132 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2134 guess_execution_size(p
, p
->current
, dest
);
2135 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2138 newmask
= ~newmask
& WRITEMASK_XYZW
;
2140 brw_push_insn_state(p
);
2142 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2143 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2145 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2146 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2147 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2149 brw_pop_insn_state(p
);
2151 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2152 dest
= offset(dest
, dst_offset
);
2154 /* For 16-wide dispatch, masked channels are skipped in the
2155 * response. For 8-wide, masked channels still take up slots,
2156 * and are just not written to.
2159 response_length
= len
* 2;
2164 struct brw_instruction
*insn
;
2166 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2168 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2169 insn
->header
.predicate_control
= 0; /* XXX */
2170 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2172 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2174 brw_set_dest(p
, insn
, dest
);
2175 brw_set_src0(p
, insn
, src0
);
2176 brw_set_sampler_message(p
, insn
,
2177 binding_table_index
,
2187 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2189 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2191 brw_push_insn_state(p
);
2192 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2193 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2194 retype(reg
, BRW_REGISTER_TYPE_UD
));
2195 brw_pop_insn_state(p
);
2200 /* All these variables are pretty confusing - we might be better off
2201 * using bitmasks and macros for this, in the old style. Or perhaps
2202 * just having the caller instantiate the fields in dword3 itself.
2204 void brw_urb_WRITE(struct brw_compile
*p
,
2205 struct brw_reg dest
,
2207 struct brw_reg src0
,
2211 GLuint response_length
,
2213 bool writes_complete
,
2217 struct intel_context
*intel
= &p
->brw
->intel
;
2218 struct brw_instruction
*insn
;
2220 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2222 if (intel
->gen
== 7) {
2223 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2224 brw_push_insn_state(p
);
2225 brw_set_access_mode(p
, BRW_ALIGN_1
);
2226 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2227 BRW_REGISTER_TYPE_UD
),
2228 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2229 brw_imm_ud(0xff00));
2230 brw_pop_insn_state(p
);
2233 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2235 assert(msg_length
< BRW_MAX_MRF
);
2237 brw_set_dest(p
, insn
, dest
);
2238 brw_set_src0(p
, insn
, src0
);
2239 brw_set_src1(p
, insn
, brw_imm_d(0));
2242 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2244 brw_set_urb_message(p
,
2257 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2261 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2262 struct brw_instruction
*insn
= &p
->store
[ip
];
2264 switch (insn
->header
.opcode
) {
2265 case BRW_OPCODE_ENDIF
:
2266 case BRW_OPCODE_ELSE
:
2267 case BRW_OPCODE_WHILE
:
2271 assert(!"not reached");
2275 /* There is no DO instruction on gen6, so to find the end of the loop
2276 * we have to see if the loop is jumping back before our start
2280 brw_find_loop_end(struct brw_compile
*p
, int start
)
2282 struct intel_context
*intel
= &p
->brw
->intel
;
2286 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2287 struct brw_instruction
*insn
= &p
->store
[ip
];
2289 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2290 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2291 : insn
->bits3
.break_cont
.jip
;
2292 if (ip
+ jip
/ br
<= start
)
2296 assert(!"not reached");
2300 /* After program generation, go back and update the UIP and JIP of
2301 * BREAK and CONT instructions to their correct locations.
2304 brw_set_uip_jip(struct brw_compile
*p
)
2306 struct intel_context
*intel
= &p
->brw
->intel
;
2313 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2314 struct brw_instruction
*insn
= &p
->store
[ip
];
2316 switch (insn
->header
.opcode
) {
2317 case BRW_OPCODE_BREAK
:
2318 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2319 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2320 insn
->bits3
.break_cont
.uip
=
2321 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2323 case BRW_OPCODE_CONTINUE
:
2324 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2325 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2327 assert(insn
->bits3
.break_cont
.uip
!= 0);
2328 assert(insn
->bits3
.break_cont
.jip
!= 0);
2334 void brw_ff_sync(struct brw_compile
*p
,
2335 struct brw_reg dest
,
2337 struct brw_reg src0
,
2339 GLuint response_length
,
2342 struct intel_context
*intel
= &p
->brw
->intel
;
2343 struct brw_instruction
*insn
;
2345 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2347 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2348 brw_set_dest(p
, insn
, dest
);
2349 brw_set_src0(p
, insn
, src0
);
2350 brw_set_src1(p
, insn
, brw_imm_d(0));
2353 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2355 brw_set_ff_sync_message(p
,