2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
375 static void brw_set_math_message( struct brw_compile
*p
,
376 struct brw_instruction
*insn
,
383 struct brw_context
*brw
= p
->brw
;
384 struct intel_context
*intel
= &brw
->intel
;
386 unsigned response_length
;
388 /* Infer message length from the function */
390 case BRW_MATH_FUNCTION_POW
:
391 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
392 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
393 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
401 /* Infer response length from the function */
403 case BRW_MATH_FUNCTION_SINCOS
:
404 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
412 brw_set_src1(p
, insn
, brw_imm_d(0));
413 if (intel
->gen
== 5) {
414 insn
->bits3
.math_gen5
.function
= function
;
415 insn
->bits3
.math_gen5
.int_type
= integer_type
;
416 insn
->bits3
.math_gen5
.precision
= low_precision
;
417 insn
->bits3
.math_gen5
.saturate
= saturate
;
418 insn
->bits3
.math_gen5
.data_type
= dataType
;
419 insn
->bits3
.math_gen5
.snapshot
= 0;
420 insn
->bits3
.math_gen5
.header_present
= 0;
421 insn
->bits3
.math_gen5
.response_length
= response_length
;
422 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
423 insn
->bits3
.math_gen5
.end_of_thread
= 0;
424 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
425 insn
->bits2
.send_gen5
.end_of_thread
= 0;
427 insn
->bits3
.math
.function
= function
;
428 insn
->bits3
.math
.int_type
= integer_type
;
429 insn
->bits3
.math
.precision
= low_precision
;
430 insn
->bits3
.math
.saturate
= saturate
;
431 insn
->bits3
.math
.data_type
= dataType
;
432 insn
->bits3
.math
.response_length
= response_length
;
433 insn
->bits3
.math
.msg_length
= msg_length
;
434 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
435 insn
->bits3
.math
.end_of_thread
= 0;
440 static void brw_set_ff_sync_message(struct brw_compile
*p
,
441 struct brw_instruction
*insn
,
443 GLuint response_length
,
446 struct brw_context
*brw
= p
->brw
;
447 struct intel_context
*intel
= &brw
->intel
;
448 brw_set_src1(p
, insn
, brw_imm_d(0));
450 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
451 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
452 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
453 insn
->bits3
.urb_gen5
.allocate
= allocate
;
454 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
455 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
456 insn
->bits3
.urb_gen5
.header_present
= 1;
457 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
458 insn
->bits3
.urb_gen5
.msg_length
= 1;
459 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
460 if (intel
->gen
>= 6) {
461 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
463 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
464 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
468 static void brw_set_urb_message( struct brw_compile
*p
,
469 struct brw_instruction
*insn
,
473 GLuint response_length
,
477 GLuint swizzle_control
)
479 struct brw_context
*brw
= p
->brw
;
480 struct intel_context
*intel
= &brw
->intel
;
481 brw_set_src1(p
, insn
, brw_imm_d(0));
483 if (intel
->gen
== 7) {
484 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
485 insn
->bits3
.urb_gen7
.offset
= offset
;
486 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
487 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
488 /* per_slot_offset = 0 makes it ignore offsets in message header */
489 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
490 insn
->bits3
.urb_gen7
.complete
= complete
;
491 insn
->bits3
.urb_gen7
.header_present
= 1;
492 insn
->bits3
.urb_gen7
.response_length
= response_length
;
493 insn
->bits3
.urb_gen7
.msg_length
= msg_length
;
494 insn
->bits3
.urb_gen7
.end_of_thread
= end_of_thread
;
495 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
496 } else if (intel
->gen
>= 5) {
497 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
498 insn
->bits3
.urb_gen5
.offset
= offset
;
499 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
500 insn
->bits3
.urb_gen5
.allocate
= allocate
;
501 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
502 insn
->bits3
.urb_gen5
.complete
= complete
;
503 insn
->bits3
.urb_gen5
.header_present
= 1;
504 insn
->bits3
.urb_gen5
.response_length
= response_length
;
505 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
506 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
507 if (intel
->gen
>= 6) {
508 /* For SNB, the SFID bits moved to the condmod bits, and
509 * EOT stayed in bits3 above. Does the EOT bit setting
510 * below on Ironlake even do anything?
512 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
514 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
515 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
518 insn
->bits3
.urb
.opcode
= 0; /* ? */
519 insn
->bits3
.urb
.offset
= offset
;
520 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
521 insn
->bits3
.urb
.allocate
= allocate
;
522 insn
->bits3
.urb
.used
= used
; /* ? */
523 insn
->bits3
.urb
.complete
= complete
;
524 insn
->bits3
.urb
.response_length
= response_length
;
525 insn
->bits3
.urb
.msg_length
= msg_length
;
526 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
527 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
532 brw_set_dp_write_message(struct brw_compile
*p
,
533 struct brw_instruction
*insn
,
534 GLuint binding_table_index
,
539 GLuint pixel_scoreboard_clear
,
540 GLuint response_length
,
541 GLuint end_of_thread
,
542 GLuint send_commit_msg
)
544 struct brw_context
*brw
= p
->brw
;
545 struct intel_context
*intel
= &brw
->intel
;
546 brw_set_src1(p
, insn
, brw_imm_ud(0));
548 if (intel
->gen
>= 7) {
549 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
550 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
551 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
552 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
553 insn
->bits3
.gen7_dp
.header_present
= header_present
;
554 insn
->bits3
.gen7_dp
.response_length
= response_length
;
555 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
556 insn
->bits3
.gen7_dp
.end_of_thread
= end_of_thread
;
558 /* We always use the render cache for write messages */
559 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
560 } else if (intel
->gen
== 6) {
561 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
562 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
563 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
564 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
565 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
566 insn
->bits3
.gen6_dp
.header_present
= header_present
;
567 insn
->bits3
.gen6_dp
.response_length
= response_length
;
568 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
569 insn
->bits3
.gen6_dp
.end_of_thread
= end_of_thread
;
571 /* We always use the render cache for write messages */
572 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
573 } else if (intel
->gen
== 5) {
574 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
575 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
576 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
577 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
578 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
579 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
580 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
581 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
582 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
583 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
584 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
586 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
587 insn
->bits3
.dp_write
.msg_control
= msg_control
;
588 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
589 insn
->bits3
.dp_write
.msg_type
= msg_type
;
590 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
591 insn
->bits3
.dp_write
.response_length
= response_length
;
592 insn
->bits3
.dp_write
.msg_length
= msg_length
;
593 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
594 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
599 brw_set_dp_read_message(struct brw_compile
*p
,
600 struct brw_instruction
*insn
,
601 GLuint binding_table_index
,
606 GLuint response_length
)
608 struct brw_context
*brw
= p
->brw
;
609 struct intel_context
*intel
= &brw
->intel
;
610 brw_set_src1(p
, insn
, brw_imm_d(0));
612 if (intel
->gen
>= 7) {
613 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
614 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
615 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= 0;
616 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
617 insn
->bits3
.gen7_dp
.header_present
= 1;
618 insn
->bits3
.gen7_dp
.response_length
= response_length
;
619 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
620 insn
->bits3
.gen7_dp
.end_of_thread
= 0;
621 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_CONST_CACHE
;
622 } else if (intel
->gen
== 6) {
623 uint32_t target_function
;
625 if (target_cache
== BRW_DATAPORT_READ_TARGET_DATA_CACHE
)
626 target_function
= GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE
;
628 target_function
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
630 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
631 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
632 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= 0;
633 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
634 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
635 insn
->bits3
.gen6_dp
.header_present
= 1;
636 insn
->bits3
.gen6_dp
.response_length
= response_length
;
637 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
638 insn
->bits3
.gen6_dp
.end_of_thread
= 0;
639 insn
->header
.destreg__conditionalmod
= target_function
;
640 } else if (intel
->gen
== 5) {
641 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
642 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
643 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
644 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
645 insn
->bits3
.dp_read_gen5
.header_present
= 1;
646 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
647 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
648 insn
->bits3
.dp_read_gen5
.pad1
= 0;
649 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
650 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
651 insn
->bits2
.send_gen5
.end_of_thread
= 0;
652 } else if (intel
->is_g4x
) {
653 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
654 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
655 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
656 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
657 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
658 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
659 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
660 insn
->bits3
.dp_read_g4x
.pad1
= 0;
661 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
663 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
664 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
665 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
666 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
667 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
668 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
669 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
670 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
671 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
675 static void brw_set_sampler_message(struct brw_compile
*p
,
676 struct brw_instruction
*insn
,
677 GLuint binding_table_index
,
680 GLuint response_length
,
683 GLuint header_present
,
686 struct brw_context
*brw
= p
->brw
;
687 struct intel_context
*intel
= &brw
->intel
;
689 brw_set_src1(p
, insn
, brw_imm_d(0));
691 if (intel
->gen
>= 7) {
692 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
693 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
694 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
695 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
696 insn
->bits3
.sampler_gen7
.header_present
= header_present
;
697 insn
->bits3
.sampler_gen7
.response_length
= response_length
;
698 insn
->bits3
.sampler_gen7
.msg_length
= msg_length
;
699 insn
->bits3
.sampler_gen7
.end_of_thread
= eot
;
700 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
701 } else if (intel
->gen
>= 5) {
702 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
703 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
704 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
705 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
706 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
707 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
708 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
709 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
711 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
713 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
714 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
716 } else if (intel
->is_g4x
) {
717 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
718 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
719 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
720 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
721 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
722 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
723 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
725 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
726 insn
->bits3
.sampler
.sampler
= sampler
;
727 insn
->bits3
.sampler
.msg_type
= msg_type
;
728 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
729 insn
->bits3
.sampler
.response_length
= response_length
;
730 insn
->bits3
.sampler
.msg_length
= msg_length
;
731 insn
->bits3
.sampler
.end_of_thread
= eot
;
732 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
737 #define next_insn brw_next_insn
738 struct brw_instruction
*
739 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
741 struct brw_instruction
*insn
;
743 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
745 insn
= &p
->store
[p
->nr_insn
++];
746 memcpy(insn
, p
->current
, sizeof(*insn
));
748 /* Reset this one-shot flag:
751 if (p
->current
->header
.destreg__conditionalmod
) {
752 p
->current
->header
.destreg__conditionalmod
= 0;
753 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
756 insn
->header
.opcode
= opcode
;
760 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
765 struct brw_instruction
*insn
= next_insn(p
, opcode
);
766 brw_set_dest(p
, insn
, dest
);
767 brw_set_src0(p
, insn
, src
);
771 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
775 struct brw_reg src1
)
777 struct brw_instruction
*insn
= next_insn(p
, opcode
);
778 brw_set_dest(p
, insn
, dest
);
779 brw_set_src0(p
, insn
, src0
);
780 brw_set_src1(p
, insn
, src1
);
785 /***********************************************************************
786 * Convenience routines.
789 struct brw_instruction *brw_##OP(struct brw_compile *p, \
790 struct brw_reg dest, \
791 struct brw_reg src0) \
793 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
797 struct brw_instruction *brw_##OP(struct brw_compile *p, \
798 struct brw_reg dest, \
799 struct brw_reg src0, \
800 struct brw_reg src1) \
802 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
805 /* Rounding operations (other than RNDD) require two instructions - the first
806 * stores a rounded value (possibly the wrong way) in the dest register, but
807 * also sets a per-channel "increment bit" in the flag register. A predicated
808 * add of 1.0 fixes dest to contain the desired result.
810 * Sandybridge and later appear to round correctly without an ADD.
813 void brw_##OP(struct brw_compile *p, \
814 struct brw_reg dest, \
815 struct brw_reg src) \
817 struct brw_instruction *rnd, *add; \
818 rnd = next_insn(p, BRW_OPCODE_##OP); \
819 brw_set_dest(p, rnd, dest); \
820 brw_set_src0(p, rnd, src); \
822 if (p->brw->intel.gen < 6) { \
823 /* turn on round-increments */ \
824 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
825 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
826 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
859 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
865 if (src0
.type
== BRW_REGISTER_TYPE_F
||
866 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
867 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
868 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
869 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
872 if (src1
.type
== BRW_REGISTER_TYPE_F
||
873 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
874 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
875 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
876 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
879 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
882 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
888 if (src0
.type
== BRW_REGISTER_TYPE_D
||
889 src0
.type
== BRW_REGISTER_TYPE_UD
||
890 src1
.type
== BRW_REGISTER_TYPE_D
||
891 src1
.type
== BRW_REGISTER_TYPE_UD
) {
892 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
895 if (src0
.type
== BRW_REGISTER_TYPE_F
||
896 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
897 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
898 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
899 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
902 if (src1
.type
== BRW_REGISTER_TYPE_F
||
903 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
904 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
905 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
906 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
909 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
910 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
911 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
912 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
914 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
918 void brw_NOP(struct brw_compile
*p
)
920 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
921 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
922 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
923 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
930 /***********************************************************************
931 * Comparisons, if/else/endif
934 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
939 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
941 insn
->header
.execution_size
= 1;
942 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
943 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
945 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
951 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
953 p
->if_stack
[p
->if_stack_depth
] = inst
;
956 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
957 p
->if_stack_array_size
*= 2;
958 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
959 p
->if_stack_array_size
);
963 /* EU takes the value from the flag register and pushes it onto some
964 * sort of a stack (presumably merging with any flag value already on
965 * the stack). Within an if block, the flags at the top of the stack
966 * control execution on each channel of the unit, eg. on each of the
967 * 16 pixel values in our wm programs.
969 * When the matching 'else' instruction is reached (presumably by
970 * countdown of the instruction count patched in by our ELSE/ENDIF
971 * functions), the relevent flags are inverted.
973 * When the matching 'endif' instruction is reached, the flags are
974 * popped off. If the stack is now empty, normal execution resumes.
976 struct brw_instruction
*
977 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
979 struct intel_context
*intel
= &p
->brw
->intel
;
980 struct brw_instruction
*insn
;
982 insn
= next_insn(p
, BRW_OPCODE_IF
);
984 /* Override the defaults for this instruction:
986 if (intel
->gen
< 6) {
987 brw_set_dest(p
, insn
, brw_ip_reg());
988 brw_set_src0(p
, insn
, brw_ip_reg());
989 brw_set_src1(p
, insn
, brw_imm_d(0x0));
990 } else if (intel
->gen
== 6) {
991 brw_set_dest(p
, insn
, brw_imm_w(0));
992 insn
->bits1
.branch_gen6
.jump_count
= 0;
993 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
994 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
996 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
997 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
998 brw_set_src1(p
, insn
, brw_imm_ud(0));
999 insn
->bits3
.break_cont
.jip
= 0;
1000 insn
->bits3
.break_cont
.uip
= 0;
1003 insn
->header
.execution_size
= execute_size
;
1004 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1005 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1006 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1007 if (!p
->single_program_flow
)
1008 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1010 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1012 push_if_stack(p
, insn
);
1016 /* This function is only used for gen6-style IF instructions with an
1017 * embedded comparison (conditional modifier). It is not used on gen7.
1019 struct brw_instruction
*
1020 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1021 struct brw_reg src0
, struct brw_reg src1
)
1023 struct brw_instruction
*insn
;
1025 insn
= next_insn(p
, BRW_OPCODE_IF
);
1027 brw_set_dest(p
, insn
, brw_imm_w(0));
1028 if (p
->compressed
) {
1029 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1031 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1033 insn
->bits1
.branch_gen6
.jump_count
= 0;
1034 brw_set_src0(p
, insn
, src0
);
1035 brw_set_src1(p
, insn
, src1
);
1037 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1038 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1039 insn
->header
.destreg__conditionalmod
= conditional
;
1041 if (!p
->single_program_flow
)
1042 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1044 push_if_stack(p
, insn
);
1049 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1052 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1053 struct brw_instruction
*if_inst
,
1054 struct brw_instruction
*else_inst
)
1056 /* The next instruction (where the ENDIF would be, if it existed) */
1057 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1059 assert(p
->single_program_flow
);
1060 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1061 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1062 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1064 /* Convert IF to an ADD instruction that moves the instruction pointer
1065 * to the first instruction of the ELSE block. If there is no ELSE
1066 * block, point to where ENDIF would be. Reverse the predicate.
1068 * There's no need to execute an ENDIF since we don't need to do any
1069 * stack operations, and if we're currently executing, we just want to
1070 * continue normally.
1072 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1073 if_inst
->header
.predicate_inverse
= 1;
1075 if (else_inst
!= NULL
) {
1076 /* Convert ELSE to an ADD instruction that points where the ENDIF
1079 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1081 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1082 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1084 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1089 * Patch IF and ELSE instructions with appropriate jump targets.
1092 patch_IF_ELSE(struct brw_compile
*p
,
1093 struct brw_instruction
*if_inst
,
1094 struct brw_instruction
*else_inst
,
1095 struct brw_instruction
*endif_inst
)
1097 struct intel_context
*intel
= &p
->brw
->intel
;
1099 assert(!p
->single_program_flow
);
1100 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1101 assert(endif_inst
!= NULL
);
1102 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1105 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1106 * requires 2 chunks.
1108 if (intel
->gen
>= 5)
1111 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1112 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1114 if (else_inst
== NULL
) {
1115 /* Patch IF -> ENDIF */
1116 if (intel
->gen
< 6) {
1117 /* Turn it into an IFF, which means no mask stack operations for
1118 * all-false and jumping past the ENDIF.
1120 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1121 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1122 if_inst
->bits3
.if_else
.pop_count
= 0;
1123 if_inst
->bits3
.if_else
.pad0
= 0;
1124 } else if (intel
->gen
== 6) {
1125 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1126 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1128 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1129 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1132 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1134 /* Patch IF -> ELSE */
1135 if (intel
->gen
< 6) {
1136 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1137 if_inst
->bits3
.if_else
.pop_count
= 0;
1138 if_inst
->bits3
.if_else
.pad0
= 0;
1139 } else if (intel
->gen
== 6) {
1140 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1143 /* Patch ELSE -> ENDIF */
1144 if (intel
->gen
< 6) {
1145 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1148 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1149 else_inst
->bits3
.if_else
.pop_count
= 1;
1150 else_inst
->bits3
.if_else
.pad0
= 0;
1151 } else if (intel
->gen
== 6) {
1152 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1153 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1155 /* The IF instruction's JIP should point just past the ELSE */
1156 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1157 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1158 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1159 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1165 brw_ELSE(struct brw_compile
*p
)
1167 struct intel_context
*intel
= &p
->brw
->intel
;
1168 struct brw_instruction
*insn
;
1170 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1172 if (intel
->gen
< 6) {
1173 brw_set_dest(p
, insn
, brw_ip_reg());
1174 brw_set_src0(p
, insn
, brw_ip_reg());
1175 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1176 } else if (intel
->gen
== 6) {
1177 brw_set_dest(p
, insn
, brw_imm_w(0));
1178 insn
->bits1
.branch_gen6
.jump_count
= 0;
1179 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1180 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1182 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1183 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1184 brw_set_src1(p
, insn
, brw_imm_ud(0));
1185 insn
->bits3
.break_cont
.jip
= 0;
1186 insn
->bits3
.break_cont
.uip
= 0;
1189 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1190 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1191 if (!p
->single_program_flow
)
1192 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1194 push_if_stack(p
, insn
);
1198 brw_ENDIF(struct brw_compile
*p
)
1200 struct intel_context
*intel
= &p
->brw
->intel
;
1201 struct brw_instruction
*insn
;
1202 struct brw_instruction
*else_inst
= NULL
;
1203 struct brw_instruction
*if_inst
= NULL
;
1205 /* Pop the IF and (optional) ELSE instructions from the stack */
1206 p
->if_stack_depth
--;
1207 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1208 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1209 p
->if_stack_depth
--;
1211 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1213 if (p
->single_program_flow
) {
1214 /* ENDIF is useless; don't bother emitting it. */
1215 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1219 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1221 if (intel
->gen
< 6) {
1222 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1223 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1224 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1225 } else if (intel
->gen
== 6) {
1226 brw_set_dest(p
, insn
, brw_imm_w(0));
1227 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1228 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1230 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1231 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1232 brw_set_src1(p
, insn
, brw_imm_ud(0));
1235 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1236 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1237 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1239 /* Also pop item off the stack in the endif instruction: */
1240 if (intel
->gen
< 6) {
1241 insn
->bits3
.if_else
.jump_count
= 0;
1242 insn
->bits3
.if_else
.pop_count
= 1;
1243 insn
->bits3
.if_else
.pad0
= 0;
1244 } else if (intel
->gen
== 6) {
1245 insn
->bits1
.branch_gen6
.jump_count
= 2;
1247 insn
->bits3
.break_cont
.jip
= 2;
1249 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1252 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1254 struct intel_context
*intel
= &p
->brw
->intel
;
1255 struct brw_instruction
*insn
;
1257 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1258 if (intel
->gen
>= 6) {
1259 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1260 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1261 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1263 brw_set_dest(p
, insn
, brw_ip_reg());
1264 brw_set_src0(p
, insn
, brw_ip_reg());
1265 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1266 insn
->bits3
.if_else
.pad0
= 0;
1267 insn
->bits3
.if_else
.pop_count
= pop_count
;
1269 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1270 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1275 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1276 struct brw_instruction
*do_insn
)
1278 struct brw_instruction
*insn
;
1280 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1281 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1282 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1283 brw_set_dest(p
, insn
, brw_ip_reg());
1284 brw_set_src0(p
, insn
, brw_ip_reg());
1285 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1287 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1288 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1292 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1294 struct brw_instruction
*insn
;
1295 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1296 brw_set_dest(p
, insn
, brw_ip_reg());
1297 brw_set_src0(p
, insn
, brw_ip_reg());
1298 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1299 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1300 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1301 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1302 insn
->bits3
.if_else
.pad0
= 0;
1303 insn
->bits3
.if_else
.pop_count
= pop_count
;
1309 * The DO/WHILE is just an unterminated loop -- break or continue are
1310 * used for control within the loop. We have a few ways they can be
1313 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1314 * jip and no DO instruction.
1316 * For non-uniform control flow pre-gen6, there's a DO instruction to
1317 * push the mask, and a WHILE to jump back, and BREAK to get out and
1320 * For gen6, there's no more mask stack, so no need for DO. WHILE
1321 * just points back to the first instruction of the loop.
1323 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1325 struct intel_context
*intel
= &p
->brw
->intel
;
1327 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1328 return &p
->store
[p
->nr_insn
];
1330 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1332 /* Override the defaults for this instruction:
1334 brw_set_dest(p
, insn
, brw_null_reg());
1335 brw_set_src0(p
, insn
, brw_null_reg());
1336 brw_set_src1(p
, insn
, brw_null_reg());
1338 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1339 insn
->header
.execution_size
= execute_size
;
1340 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1341 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1342 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1350 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1351 struct brw_instruction
*do_insn
)
1353 struct intel_context
*intel
= &p
->brw
->intel
;
1354 struct brw_instruction
*insn
;
1357 if (intel
->gen
>= 5)
1360 if (intel
->gen
>= 7) {
1361 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1363 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1364 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1365 brw_set_src1(p
, insn
, brw_imm_ud(0));
1366 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1368 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1369 } else if (intel
->gen
== 6) {
1370 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1372 brw_set_dest(p
, insn
, brw_imm_w(0));
1373 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1374 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1375 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1377 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1379 if (p
->single_program_flow
) {
1380 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1382 brw_set_dest(p
, insn
, brw_ip_reg());
1383 brw_set_src0(p
, insn
, brw_ip_reg());
1384 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1385 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1387 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1389 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1391 brw_set_dest(p
, insn
, brw_ip_reg());
1392 brw_set_src0(p
, insn
, brw_ip_reg());
1393 brw_set_src1(p
, insn
, brw_imm_d(0));
1395 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1396 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1397 insn
->bits3
.if_else
.pop_count
= 0;
1398 insn
->bits3
.if_else
.pad0
= 0;
1401 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1402 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1410 void brw_land_fwd_jump(struct brw_compile
*p
,
1411 struct brw_instruction
*jmp_insn
)
1413 struct intel_context
*intel
= &p
->brw
->intel
;
1414 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1417 if (intel
->gen
>= 5)
1420 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1421 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1423 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1428 /* To integrate with the above, it makes sense that the comparison
1429 * instruction should populate the flag register. It might be simpler
1430 * just to use the flag reg for most WM tasks?
1432 void brw_CMP(struct brw_compile
*p
,
1433 struct brw_reg dest
,
1435 struct brw_reg src0
,
1436 struct brw_reg src1
)
1438 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1440 insn
->header
.destreg__conditionalmod
= conditional
;
1441 brw_set_dest(p
, insn
, dest
);
1442 brw_set_src0(p
, insn
, src0
);
1443 brw_set_src1(p
, insn
, src1
);
1445 /* guess_execution_size(insn, src0); */
1448 /* Make it so that future instructions will use the computed flag
1449 * value until brw_set_predicate_control_flag_value() is called
1452 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1454 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1455 p
->flag_value
= 0xff;
1459 /* Issue 'wait' instruction for n1, host could program MMIO
1460 to wake up thread. */
1461 void brw_WAIT (struct brw_compile
*p
)
1463 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1464 struct brw_reg src
= brw_notification_1_reg();
1466 brw_set_dest(p
, insn
, src
);
1467 brw_set_src0(p
, insn
, src
);
1468 brw_set_src1(p
, insn
, brw_null_reg());
1469 insn
->header
.execution_size
= 0; /* must */
1470 insn
->header
.predicate_control
= 0;
1471 insn
->header
.compression_control
= 0;
1475 /***********************************************************************
1476 * Helpers for the various SEND message types:
1479 /** Extended math function, float[8].
1481 void brw_math( struct brw_compile
*p
,
1482 struct brw_reg dest
,
1490 struct intel_context
*intel
= &p
->brw
->intel
;
1492 if (intel
->gen
>= 6) {
1493 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1495 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1496 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1498 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1499 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1501 /* Source modifiers are ignored for extended math instructions. */
1502 assert(!src
.negate
);
1505 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1506 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1507 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1508 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1510 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1513 /* Math is the same ISA format as other opcodes, except that CondModifier
1514 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1516 insn
->header
.destreg__conditionalmod
= function
;
1517 insn
->header
.saturate
= saturate
;
1519 brw_set_dest(p
, insn
, dest
);
1520 brw_set_src0(p
, insn
, src
);
1521 brw_set_src1(p
, insn
, brw_null_reg());
1523 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1525 /* Example code doesn't set predicate_control for send
1528 insn
->header
.predicate_control
= 0;
1529 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1531 brw_set_dest(p
, insn
, dest
);
1532 brw_set_src0(p
, insn
, src
);
1533 brw_set_math_message(p
,
1536 src
.type
== BRW_REGISTER_TYPE_D
,
1543 /** Extended math function, float[8].
1545 void brw_math2(struct brw_compile
*p
,
1546 struct brw_reg dest
,
1548 struct brw_reg src0
,
1549 struct brw_reg src1
)
1551 struct intel_context
*intel
= &p
->brw
->intel
;
1552 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1554 assert(intel
->gen
>= 6);
1558 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1559 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1560 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1562 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1563 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1564 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1566 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1567 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1568 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1569 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1570 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1572 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1573 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1576 /* Source modifiers are ignored for extended math instructions. */
1577 assert(!src0
.negate
);
1579 assert(!src1
.negate
);
1582 /* Math is the same ISA format as other opcodes, except that CondModifier
1583 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1585 insn
->header
.destreg__conditionalmod
= function
;
1587 brw_set_dest(p
, insn
, dest
);
1588 brw_set_src0(p
, insn
, src0
);
1589 brw_set_src1(p
, insn
, src1
);
1593 * Extended math function, float[16].
1594 * Use 2 send instructions.
1596 void brw_math_16( struct brw_compile
*p
,
1597 struct brw_reg dest
,
1604 struct intel_context
*intel
= &p
->brw
->intel
;
1605 struct brw_instruction
*insn
;
1607 if (intel
->gen
>= 6) {
1608 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1610 /* Math is the same ISA format as other opcodes, except that CondModifier
1611 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1613 insn
->header
.destreg__conditionalmod
= function
;
1614 insn
->header
.saturate
= saturate
;
1616 /* Source modifiers are ignored for extended math instructions. */
1617 assert(!src
.negate
);
1620 brw_set_dest(p
, insn
, dest
);
1621 brw_set_src0(p
, insn
, src
);
1622 brw_set_src1(p
, insn
, brw_null_reg());
1626 /* First instruction:
1628 brw_push_insn_state(p
);
1629 brw_set_predicate_control_flag_value(p
, 0xff);
1630 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1632 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1633 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1635 brw_set_dest(p
, insn
, dest
);
1636 brw_set_src0(p
, insn
, src
);
1637 brw_set_math_message(p
,
1640 BRW_MATH_INTEGER_UNSIGNED
,
1643 BRW_MATH_DATA_VECTOR
);
1645 /* Second instruction:
1647 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1648 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1649 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1651 brw_set_dest(p
, insn
, offset(dest
,1));
1652 brw_set_src0(p
, insn
, src
);
1653 brw_set_math_message(p
,
1656 BRW_MATH_INTEGER_UNSIGNED
,
1659 BRW_MATH_DATA_VECTOR
);
1661 brw_pop_insn_state(p
);
1666 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1667 * using a constant offset per channel.
1669 * The offset must be aligned to oword size (16 bytes). Used for
1670 * register spilling.
1672 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1677 struct intel_context
*intel
= &p
->brw
->intel
;
1678 uint32_t msg_control
, msg_type
;
1681 if (intel
->gen
>= 6)
1684 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1686 if (num_regs
== 1) {
1687 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1690 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1694 /* Set up the message header. This is g0, with g0.2 filled with
1695 * the offset. We don't want to leave our offset around in g0 or
1696 * it'll screw up texture samples, so set it up inside the message
1700 brw_push_insn_state(p
);
1701 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1702 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1704 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1706 /* set message header global offset field (reg 0, element 2) */
1708 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1710 2), BRW_REGISTER_TYPE_UD
),
1711 brw_imm_ud(offset
));
1713 brw_pop_insn_state(p
);
1717 struct brw_reg dest
;
1718 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1719 int send_commit_msg
;
1720 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1721 BRW_REGISTER_TYPE_UW
);
1723 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1724 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1725 src_header
= vec16(src_header
);
1727 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1728 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1730 /* Until gen6, writes followed by reads from the same location
1731 * are not guaranteed to be ordered unless write_commit is set.
1732 * If set, then a no-op write is issued to the destination
1733 * register to set a dependency, and a read from the destination
1734 * can be used to ensure the ordering.
1736 * For gen6, only writes between different threads need ordering
1737 * protection. Our use of DP writes is all about register
1738 * spilling within a thread.
1740 if (intel
->gen
>= 6) {
1741 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1742 send_commit_msg
= 0;
1745 send_commit_msg
= 1;
1748 brw_set_dest(p
, insn
, dest
);
1749 if (intel
->gen
>= 6) {
1750 brw_set_src0(p
, insn
, mrf
);
1752 brw_set_src0(p
, insn
, brw_null_reg());
1755 if (intel
->gen
>= 6)
1756 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1758 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1760 brw_set_dp_write_message(p
,
1762 255, /* binding table index (255=stateless) */
1766 true, /* header_present */
1767 0, /* pixel scoreboard */
1768 send_commit_msg
, /* response_length */
1776 * Read a block of owords (half a GRF each) from the scratch buffer
1777 * using a constant index per channel.
1779 * Offset must be aligned to oword size (16 bytes). Used for register
1783 brw_oword_block_read_scratch(struct brw_compile
*p
,
1784 struct brw_reg dest
,
1789 struct intel_context
*intel
= &p
->brw
->intel
;
1790 uint32_t msg_control
;
1793 if (intel
->gen
>= 6)
1796 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1797 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1799 if (num_regs
== 1) {
1800 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1803 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1808 brw_push_insn_state(p
);
1809 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1810 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1812 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1814 /* set message header global offset field (reg 0, element 2) */
1816 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1818 2), BRW_REGISTER_TYPE_UD
),
1819 brw_imm_ud(offset
));
1821 brw_pop_insn_state(p
);
1825 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1827 assert(insn
->header
.predicate_control
== 0);
1828 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1829 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1831 brw_set_dest(p
, insn
, dest
); /* UW? */
1832 if (intel
->gen
>= 6) {
1833 brw_set_src0(p
, insn
, mrf
);
1835 brw_set_src0(p
, insn
, brw_null_reg());
1838 brw_set_dp_read_message(p
,
1840 255, /* binding table index (255=stateless) */
1842 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1843 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1850 * Read a float[4] vector from the data port Data Cache (const buffer).
1851 * Location (in buffer) should be a multiple of 16.
1852 * Used for fetching shader constants.
1854 void brw_oword_block_read(struct brw_compile
*p
,
1855 struct brw_reg dest
,
1858 uint32_t bind_table_index
)
1860 struct intel_context
*intel
= &p
->brw
->intel
;
1862 /* On newer hardware, offset is in units of owords. */
1863 if (intel
->gen
>= 6)
1866 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1868 brw_push_insn_state(p
);
1869 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1870 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1871 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1873 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1875 /* set message header global offset field (reg 0, element 2) */
1877 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1879 2), BRW_REGISTER_TYPE_UD
),
1880 brw_imm_ud(offset
));
1882 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1883 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1885 /* cast dest to a uword[8] vector */
1886 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1888 brw_set_dest(p
, insn
, dest
);
1889 if (intel
->gen
>= 6) {
1890 brw_set_src0(p
, insn
, mrf
);
1892 brw_set_src0(p
, insn
, brw_null_reg());
1895 brw_set_dp_read_message(p
,
1898 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1899 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1900 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1902 1); /* response_length (1 reg, 2 owords!) */
1904 brw_pop_insn_state(p
);
1908 * Read a set of dwords from the data port Data Cache (const buffer).
1910 * Location (in buffer) appears as UD offsets in the register after
1911 * the provided mrf header reg.
1913 void brw_dword_scattered_read(struct brw_compile
*p
,
1914 struct brw_reg dest
,
1916 uint32_t bind_table_index
)
1918 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1920 brw_push_insn_state(p
);
1921 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1922 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1923 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1924 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1925 brw_pop_insn_state(p
);
1927 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1928 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1930 /* cast dest to a uword[8] vector */
1931 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1933 brw_set_dest(p
, insn
, dest
);
1934 brw_set_src0(p
, insn
, brw_null_reg());
1936 brw_set_dp_read_message(p
,
1939 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1940 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1941 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1943 1); /* response_length */
1949 * Read float[4] constant(s) from VS constant buffer.
1950 * For relative addressing, two float[4] constants will be read into 'dest'.
1951 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1953 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1954 struct brw_reg dest
,
1956 GLuint bind_table_index
)
1958 struct intel_context
*intel
= &p
->brw
->intel
;
1959 struct brw_instruction
*insn
;
1960 GLuint msg_reg_nr
= 1;
1962 if (intel
->gen
>= 6)
1965 /* Setup MRF[1] with location/offset into const buffer */
1966 brw_push_insn_state(p
);
1967 brw_set_access_mode(p
, BRW_ALIGN_1
);
1968 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1969 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1970 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1971 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1972 BRW_REGISTER_TYPE_UD
),
1973 brw_imm_ud(location
));
1974 brw_pop_insn_state(p
);
1976 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1978 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1979 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1980 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1981 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1983 brw_set_dest(p
, insn
, dest
);
1984 if (intel
->gen
>= 6) {
1985 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1987 brw_set_src0(p
, insn
, brw_null_reg());
1990 brw_set_dp_read_message(p
,
1994 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1995 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1997 1); /* response_length (1 Oword) */
2001 * Read a float[4] constant per vertex from VS constant buffer, with
2002 * relative addressing.
2004 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
2005 struct brw_reg dest
,
2006 struct brw_reg addr_reg
,
2008 GLuint bind_table_index
)
2010 struct intel_context
*intel
= &p
->brw
->intel
;
2011 struct brw_reg src
= brw_vec8_grf(0, 0);
2014 /* Setup MRF[1] with offset into const buffer */
2015 brw_push_insn_state(p
);
2016 brw_set_access_mode(p
, BRW_ALIGN_1
);
2017 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2018 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2019 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2021 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2024 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
2025 addr_reg
, brw_imm_d(offset
));
2026 brw_pop_insn_state(p
);
2028 gen6_resolve_implied_move(p
, &src
, 0);
2029 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2031 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
2032 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2033 insn
->header
.destreg__conditionalmod
= 0;
2034 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
2036 brw_set_dest(p
, insn
, dest
);
2037 brw_set_src0(p
, insn
, src
);
2039 if (intel
->gen
>= 6)
2040 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2041 else if (intel
->gen
== 5 || intel
->is_g4x
)
2042 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2044 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2046 brw_set_dp_read_message(p
,
2049 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2051 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2053 1); /* response_length */
2058 void brw_fb_WRITE(struct brw_compile
*p
,
2061 struct brw_reg src0
,
2062 GLuint binding_table_index
,
2064 GLuint response_length
,
2066 bool header_present
)
2068 struct intel_context
*intel
= &p
->brw
->intel
;
2069 struct brw_instruction
*insn
;
2070 GLuint msg_control
, msg_type
;
2071 struct brw_reg dest
;
2073 if (dispatch_width
== 16)
2074 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2076 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2078 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2079 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2081 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2083 /* The execution mask is ignored for render target writes. */
2084 insn
->header
.predicate_control
= 0;
2085 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2087 if (intel
->gen
>= 6) {
2088 /* headerless version, just submit color payload */
2089 src0
= brw_message_reg(msg_reg_nr
);
2091 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2093 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2095 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2098 if (dispatch_width
== 16)
2099 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2101 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2103 brw_set_dest(p
, insn
, dest
);
2104 brw_set_src0(p
, insn
, src0
);
2105 brw_set_dp_write_message(p
,
2107 binding_table_index
,
2112 1, /* pixel scoreboard */
2115 0 /* send_commit_msg */);
2120 * Texture sample instruction.
2121 * Note: the msg_type plus msg_length values determine exactly what kind
2122 * of sampling operation is performed. See volume 4, page 161 of docs.
2124 void brw_SAMPLE(struct brw_compile
*p
,
2125 struct brw_reg dest
,
2127 struct brw_reg src0
,
2128 GLuint binding_table_index
,
2132 GLuint response_length
,
2135 GLuint header_present
,
2138 struct intel_context
*intel
= &p
->brw
->intel
;
2139 bool need_stall
= 0;
2141 if (writemask
== 0) {
2142 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2146 /* Hardware doesn't do destination dependency checking on send
2147 * instructions properly. Add a workaround which generates the
2148 * dependency by other means. In practice it seems like this bug
2149 * only crops up for texture samples, and only where registers are
2150 * written by the send and then written again later without being
2151 * read in between. Luckily for us, we already track that
2152 * information and use it to modify the writemask for the
2153 * instruction, so that is a guide for whether a workaround is
2156 if (writemask
!= WRITEMASK_XYZW
) {
2157 GLuint dst_offset
= 0;
2158 GLuint i
, newmask
= 0, len
= 0;
2160 for (i
= 0; i
< 4; i
++) {
2161 if (writemask
& (1<<i
))
2165 for (; i
< 4; i
++) {
2166 if (!(writemask
& (1<<i
)))
2172 if (newmask
!= writemask
) {
2174 /* printf("need stall %x %x\n", newmask , writemask); */
2177 bool dispatch_16
= false;
2179 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2181 guess_execution_size(p
, p
->current
, dest
);
2182 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2185 newmask
= ~newmask
& WRITEMASK_XYZW
;
2187 brw_push_insn_state(p
);
2189 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2190 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2192 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2193 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2194 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2196 brw_pop_insn_state(p
);
2198 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2199 dest
= offset(dest
, dst_offset
);
2201 /* For 16-wide dispatch, masked channels are skipped in the
2202 * response. For 8-wide, masked channels still take up slots,
2203 * and are just not written to.
2206 response_length
= len
* 2;
2211 struct brw_instruction
*insn
;
2213 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2215 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2216 insn
->header
.predicate_control
= 0; /* XXX */
2217 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2219 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2221 brw_set_dest(p
, insn
, dest
);
2222 brw_set_src0(p
, insn
, src0
);
2223 brw_set_sampler_message(p
, insn
,
2224 binding_table_index
,
2235 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2237 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2239 brw_push_insn_state(p
);
2240 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2241 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2242 retype(reg
, BRW_REGISTER_TYPE_UD
));
2243 brw_pop_insn_state(p
);
2248 /* All these variables are pretty confusing - we might be better off
2249 * using bitmasks and macros for this, in the old style. Or perhaps
2250 * just having the caller instantiate the fields in dword3 itself.
2252 void brw_urb_WRITE(struct brw_compile
*p
,
2253 struct brw_reg dest
,
2255 struct brw_reg src0
,
2259 GLuint response_length
,
2261 bool writes_complete
,
2265 struct intel_context
*intel
= &p
->brw
->intel
;
2266 struct brw_instruction
*insn
;
2268 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2270 if (intel
->gen
== 7) {
2271 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2272 brw_push_insn_state(p
);
2273 brw_set_access_mode(p
, BRW_ALIGN_1
);
2274 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2275 BRW_REGISTER_TYPE_UD
),
2276 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2277 brw_imm_ud(0xff00));
2278 brw_pop_insn_state(p
);
2281 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2283 assert(msg_length
< BRW_MAX_MRF
);
2285 brw_set_dest(p
, insn
, dest
);
2286 brw_set_src0(p
, insn
, src0
);
2287 brw_set_src1(p
, insn
, brw_imm_d(0));
2290 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2292 brw_set_urb_message(p
,
2305 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2309 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2310 struct brw_instruction
*insn
= &p
->store
[ip
];
2312 switch (insn
->header
.opcode
) {
2313 case BRW_OPCODE_ENDIF
:
2314 case BRW_OPCODE_ELSE
:
2315 case BRW_OPCODE_WHILE
:
2319 assert(!"not reached");
2323 /* There is no DO instruction on gen6, so to find the end of the loop
2324 * we have to see if the loop is jumping back before our start
2328 brw_find_loop_end(struct brw_compile
*p
, int start
)
2330 struct intel_context
*intel
= &p
->brw
->intel
;
2334 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2335 struct brw_instruction
*insn
= &p
->store
[ip
];
2337 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2338 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2339 : insn
->bits3
.break_cont
.jip
;
2340 if (ip
+ jip
/ br
<= start
)
2344 assert(!"not reached");
2348 /* After program generation, go back and update the UIP and JIP of
2349 * BREAK and CONT instructions to their correct locations.
2352 brw_set_uip_jip(struct brw_compile
*p
)
2354 struct intel_context
*intel
= &p
->brw
->intel
;
2361 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2362 struct brw_instruction
*insn
= &p
->store
[ip
];
2364 switch (insn
->header
.opcode
) {
2365 case BRW_OPCODE_BREAK
:
2366 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2367 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2368 insn
->bits3
.break_cont
.uip
=
2369 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2371 case BRW_OPCODE_CONTINUE
:
2372 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2373 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2375 assert(insn
->bits3
.break_cont
.uip
!= 0);
2376 assert(insn
->bits3
.break_cont
.jip
!= 0);
2382 void brw_ff_sync(struct brw_compile
*p
,
2383 struct brw_reg dest
,
2385 struct brw_reg src0
,
2387 GLuint response_length
,
2390 struct intel_context
*intel
= &p
->brw
->intel
;
2391 struct brw_instruction
*insn
;
2393 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2395 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2396 brw_set_dest(p
, insn
, dest
);
2397 brw_set_src0(p
, insn
, src0
);
2398 brw_set_src1(p
, insn
, brw_imm_d(0));
2401 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2403 brw_set_ff_sync_message(p
,