2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
375 static void brw_set_math_message( struct brw_compile
*p
,
376 struct brw_instruction
*insn
,
383 struct brw_context
*brw
= p
->brw
;
384 struct intel_context
*intel
= &brw
->intel
;
386 unsigned response_length
;
388 /* Infer message length from the function */
390 case BRW_MATH_FUNCTION_POW
:
391 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
:
392 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER
:
393 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
401 /* Infer response length from the function */
403 case BRW_MATH_FUNCTION_SINCOS
:
404 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
:
412 brw_set_src1(p
, insn
, brw_imm_d(0));
413 if (intel
->gen
== 5) {
414 insn
->bits3
.math_gen5
.function
= function
;
415 insn
->bits3
.math_gen5
.int_type
= integer_type
;
416 insn
->bits3
.math_gen5
.precision
= low_precision
;
417 insn
->bits3
.math_gen5
.saturate
= saturate
;
418 insn
->bits3
.math_gen5
.data_type
= dataType
;
419 insn
->bits3
.math_gen5
.snapshot
= 0;
420 insn
->bits3
.math_gen5
.header_present
= 0;
421 insn
->bits3
.math_gen5
.response_length
= response_length
;
422 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
423 insn
->bits3
.math_gen5
.end_of_thread
= 0;
424 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
425 insn
->bits2
.send_gen5
.end_of_thread
= 0;
427 insn
->bits3
.math
.function
= function
;
428 insn
->bits3
.math
.int_type
= integer_type
;
429 insn
->bits3
.math
.precision
= low_precision
;
430 insn
->bits3
.math
.saturate
= saturate
;
431 insn
->bits3
.math
.data_type
= dataType
;
432 insn
->bits3
.math
.response_length
= response_length
;
433 insn
->bits3
.math
.msg_length
= msg_length
;
434 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
435 insn
->bits3
.math
.end_of_thread
= 0;
440 static void brw_set_ff_sync_message(struct brw_compile
*p
,
441 struct brw_instruction
*insn
,
443 GLuint response_length
,
446 struct brw_context
*brw
= p
->brw
;
447 struct intel_context
*intel
= &brw
->intel
;
448 brw_set_src1(p
, insn
, brw_imm_d(0));
450 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
451 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
452 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
453 insn
->bits3
.urb_gen5
.allocate
= allocate
;
454 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
455 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
456 insn
->bits3
.urb_gen5
.header_present
= 1;
457 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
458 insn
->bits3
.urb_gen5
.msg_length
= 1;
459 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
460 if (intel
->gen
>= 6) {
461 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
463 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
464 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
468 static void brw_set_urb_message( struct brw_compile
*p
,
469 struct brw_instruction
*insn
,
473 GLuint response_length
,
477 GLuint swizzle_control
)
479 struct brw_context
*brw
= p
->brw
;
480 struct intel_context
*intel
= &brw
->intel
;
481 brw_set_src1(p
, insn
, brw_imm_d(0));
483 if (intel
->gen
== 7) {
484 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
485 insn
->bits3
.urb_gen7
.offset
= offset
;
486 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
487 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
488 /* per_slot_offset = 0 makes it ignore offsets in message header */
489 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
490 insn
->bits3
.urb_gen7
.complete
= complete
;
491 insn
->bits3
.urb_gen7
.header_present
= 1;
492 insn
->bits3
.urb_gen7
.response_length
= response_length
;
493 insn
->bits3
.urb_gen7
.msg_length
= msg_length
;
494 insn
->bits3
.urb_gen7
.end_of_thread
= end_of_thread
;
495 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
496 } else if (intel
->gen
>= 5) {
497 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
498 insn
->bits3
.urb_gen5
.offset
= offset
;
499 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
500 insn
->bits3
.urb_gen5
.allocate
= allocate
;
501 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
502 insn
->bits3
.urb_gen5
.complete
= complete
;
503 insn
->bits3
.urb_gen5
.header_present
= 1;
504 insn
->bits3
.urb_gen5
.response_length
= response_length
;
505 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
506 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
507 if (intel
->gen
>= 6) {
508 /* For SNB, the SFID bits moved to the condmod bits, and
509 * EOT stayed in bits3 above. Does the EOT bit setting
510 * below on Ironlake even do anything?
512 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
514 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
515 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
518 insn
->bits3
.urb
.opcode
= 0; /* ? */
519 insn
->bits3
.urb
.offset
= offset
;
520 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
521 insn
->bits3
.urb
.allocate
= allocate
;
522 insn
->bits3
.urb
.used
= used
; /* ? */
523 insn
->bits3
.urb
.complete
= complete
;
524 insn
->bits3
.urb
.response_length
= response_length
;
525 insn
->bits3
.urb
.msg_length
= msg_length
;
526 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
527 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
532 brw_set_dp_write_message(struct brw_compile
*p
,
533 struct brw_instruction
*insn
,
534 GLuint binding_table_index
,
539 GLuint pixel_scoreboard_clear
,
540 GLuint response_length
,
541 GLuint end_of_thread
,
542 GLuint send_commit_msg
)
544 struct brw_context
*brw
= p
->brw
;
545 struct intel_context
*intel
= &brw
->intel
;
546 brw_set_src1(p
, insn
, brw_imm_ud(0));
548 if (intel
->gen
>= 7) {
549 /* Use the Render Cache for RT writes; otherwise use the Data Cache */
550 unsigned sfid
= GEN7_MESSAGE_TARGET_DP_DATA_CACHE
;
551 if (msg_type
== GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
)
552 sfid
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
554 insn
->header
.destreg__conditionalmod
= sfid
;
556 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
557 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
558 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
559 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
560 insn
->bits3
.gen7_dp
.header_present
= header_present
;
561 insn
->bits3
.gen7_dp
.response_length
= response_length
;
562 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
563 insn
->bits3
.gen7_dp
.end_of_thread
= end_of_thread
;
564 } else if (intel
->gen
== 6) {
565 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
566 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
567 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
568 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
569 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
570 insn
->bits3
.gen6_dp
.header_present
= header_present
;
571 insn
->bits3
.gen6_dp
.response_length
= response_length
;
572 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
573 insn
->bits3
.gen6_dp
.end_of_thread
= end_of_thread
;
575 /* We always use the render cache for write messages */
576 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
577 } else if (intel
->gen
== 5) {
578 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
579 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
580 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
581 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
582 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
583 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
584 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
585 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
586 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
587 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
588 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
590 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
591 insn
->bits3
.dp_write
.msg_control
= msg_control
;
592 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
593 insn
->bits3
.dp_write
.msg_type
= msg_type
;
594 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
595 insn
->bits3
.dp_write
.response_length
= response_length
;
596 insn
->bits3
.dp_write
.msg_length
= msg_length
;
597 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
598 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
603 brw_set_dp_read_message(struct brw_compile
*p
,
604 struct brw_instruction
*insn
,
605 GLuint binding_table_index
,
610 GLuint response_length
)
612 struct brw_context
*brw
= p
->brw
;
613 struct intel_context
*intel
= &brw
->intel
;
614 brw_set_src1(p
, insn
, brw_imm_d(0));
616 if (intel
->gen
>= 7) {
617 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
618 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
619 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= 0;
620 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
621 insn
->bits3
.gen7_dp
.header_present
= 1;
622 insn
->bits3
.gen7_dp
.response_length
= response_length
;
623 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
624 insn
->bits3
.gen7_dp
.end_of_thread
= 0;
625 insn
->header
.destreg__conditionalmod
= GEN7_MESSAGE_TARGET_DP_DATA_CACHE
;
626 } else if (intel
->gen
== 6) {
627 uint32_t target_function
;
629 if (target_cache
== BRW_DATAPORT_READ_TARGET_DATA_CACHE
)
630 target_function
= GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE
;
632 target_function
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
634 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
635 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
636 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= 0;
637 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
638 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
639 insn
->bits3
.gen6_dp
.header_present
= 1;
640 insn
->bits3
.gen6_dp
.response_length
= response_length
;
641 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
642 insn
->bits3
.gen6_dp
.end_of_thread
= 0;
643 insn
->header
.destreg__conditionalmod
= target_function
;
644 } else if (intel
->gen
== 5) {
645 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
646 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
647 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
648 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
649 insn
->bits3
.dp_read_gen5
.header_present
= 1;
650 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
651 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
652 insn
->bits3
.dp_read_gen5
.pad1
= 0;
653 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
654 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
655 insn
->bits2
.send_gen5
.end_of_thread
= 0;
656 } else if (intel
->is_g4x
) {
657 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
658 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
659 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
660 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
661 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
662 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
663 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
664 insn
->bits3
.dp_read_g4x
.pad1
= 0;
665 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
667 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
668 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
669 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
670 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
671 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
672 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
673 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
674 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
675 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
679 static void brw_set_sampler_message(struct brw_compile
*p
,
680 struct brw_instruction
*insn
,
681 GLuint binding_table_index
,
684 GLuint response_length
,
687 GLuint header_present
,
690 struct brw_context
*brw
= p
->brw
;
691 struct intel_context
*intel
= &brw
->intel
;
693 brw_set_src1(p
, insn
, brw_imm_d(0));
695 if (intel
->gen
>= 7) {
696 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
697 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
698 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
699 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
700 insn
->bits3
.sampler_gen7
.header_present
= header_present
;
701 insn
->bits3
.sampler_gen7
.response_length
= response_length
;
702 insn
->bits3
.sampler_gen7
.msg_length
= msg_length
;
703 insn
->bits3
.sampler_gen7
.end_of_thread
= eot
;
704 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
705 } else if (intel
->gen
>= 5) {
706 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
707 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
708 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
709 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
710 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
711 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
712 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
713 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
715 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
717 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
718 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
720 } else if (intel
->is_g4x
) {
721 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
722 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
723 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
724 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
725 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
726 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
727 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
729 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
730 insn
->bits3
.sampler
.sampler
= sampler
;
731 insn
->bits3
.sampler
.msg_type
= msg_type
;
732 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
733 insn
->bits3
.sampler
.response_length
= response_length
;
734 insn
->bits3
.sampler
.msg_length
= msg_length
;
735 insn
->bits3
.sampler
.end_of_thread
= eot
;
736 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
741 #define next_insn brw_next_insn
742 struct brw_instruction
*
743 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
745 struct brw_instruction
*insn
;
747 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
749 insn
= &p
->store
[p
->nr_insn
++];
750 memcpy(insn
, p
->current
, sizeof(*insn
));
752 /* Reset this one-shot flag:
755 if (p
->current
->header
.destreg__conditionalmod
) {
756 p
->current
->header
.destreg__conditionalmod
= 0;
757 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
760 insn
->header
.opcode
= opcode
;
764 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
769 struct brw_instruction
*insn
= next_insn(p
, opcode
);
770 brw_set_dest(p
, insn
, dest
);
771 brw_set_src0(p
, insn
, src
);
775 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
779 struct brw_reg src1
)
781 struct brw_instruction
*insn
= next_insn(p
, opcode
);
782 brw_set_dest(p
, insn
, dest
);
783 brw_set_src0(p
, insn
, src0
);
784 brw_set_src1(p
, insn
, src1
);
789 /***********************************************************************
790 * Convenience routines.
793 struct brw_instruction *brw_##OP(struct brw_compile *p, \
794 struct brw_reg dest, \
795 struct brw_reg src0) \
797 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
801 struct brw_instruction *brw_##OP(struct brw_compile *p, \
802 struct brw_reg dest, \
803 struct brw_reg src0, \
804 struct brw_reg src1) \
806 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
809 /* Rounding operations (other than RNDD) require two instructions - the first
810 * stores a rounded value (possibly the wrong way) in the dest register, but
811 * also sets a per-channel "increment bit" in the flag register. A predicated
812 * add of 1.0 fixes dest to contain the desired result.
814 * Sandybridge and later appear to round correctly without an ADD.
817 void brw_##OP(struct brw_compile *p, \
818 struct brw_reg dest, \
819 struct brw_reg src) \
821 struct brw_instruction *rnd, *add; \
822 rnd = next_insn(p, BRW_OPCODE_##OP); \
823 brw_set_dest(p, rnd, dest); \
824 brw_set_src0(p, rnd, src); \
826 if (p->brw->intel.gen < 6) { \
827 /* turn on round-increments */ \
828 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
829 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
830 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
863 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
869 if (src0
.type
== BRW_REGISTER_TYPE_F
||
870 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
871 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
872 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
873 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
876 if (src1
.type
== BRW_REGISTER_TYPE_F
||
877 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
878 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
879 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
880 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
883 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
886 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
892 if (src0
.type
== BRW_REGISTER_TYPE_D
||
893 src0
.type
== BRW_REGISTER_TYPE_UD
||
894 src1
.type
== BRW_REGISTER_TYPE_D
||
895 src1
.type
== BRW_REGISTER_TYPE_UD
) {
896 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
899 if (src0
.type
== BRW_REGISTER_TYPE_F
||
900 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
901 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
902 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
903 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
906 if (src1
.type
== BRW_REGISTER_TYPE_F
||
907 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
908 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
909 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
910 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
913 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
914 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
915 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
916 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
918 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
922 void brw_NOP(struct brw_compile
*p
)
924 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
925 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
926 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
927 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
934 /***********************************************************************
935 * Comparisons, if/else/endif
938 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
943 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
945 insn
->header
.execution_size
= 1;
946 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
947 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
949 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
955 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
957 p
->if_stack
[p
->if_stack_depth
] = inst
;
960 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
961 p
->if_stack_array_size
*= 2;
962 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
963 p
->if_stack_array_size
);
967 /* EU takes the value from the flag register and pushes it onto some
968 * sort of a stack (presumably merging with any flag value already on
969 * the stack). Within an if block, the flags at the top of the stack
970 * control execution on each channel of the unit, eg. on each of the
971 * 16 pixel values in our wm programs.
973 * When the matching 'else' instruction is reached (presumably by
974 * countdown of the instruction count patched in by our ELSE/ENDIF
975 * functions), the relevent flags are inverted.
977 * When the matching 'endif' instruction is reached, the flags are
978 * popped off. If the stack is now empty, normal execution resumes.
980 struct brw_instruction
*
981 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
983 struct intel_context
*intel
= &p
->brw
->intel
;
984 struct brw_instruction
*insn
;
986 insn
= next_insn(p
, BRW_OPCODE_IF
);
988 /* Override the defaults for this instruction:
990 if (intel
->gen
< 6) {
991 brw_set_dest(p
, insn
, brw_ip_reg());
992 brw_set_src0(p
, insn
, brw_ip_reg());
993 brw_set_src1(p
, insn
, brw_imm_d(0x0));
994 } else if (intel
->gen
== 6) {
995 brw_set_dest(p
, insn
, brw_imm_w(0));
996 insn
->bits1
.branch_gen6
.jump_count
= 0;
997 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
998 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1000 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1001 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1002 brw_set_src1(p
, insn
, brw_imm_ud(0));
1003 insn
->bits3
.break_cont
.jip
= 0;
1004 insn
->bits3
.break_cont
.uip
= 0;
1007 insn
->header
.execution_size
= execute_size
;
1008 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1009 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1010 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1011 if (!p
->single_program_flow
)
1012 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1014 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1016 push_if_stack(p
, insn
);
1020 /* This function is only used for gen6-style IF instructions with an
1021 * embedded comparison (conditional modifier). It is not used on gen7.
1023 struct brw_instruction
*
1024 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
1025 struct brw_reg src0
, struct brw_reg src1
)
1027 struct brw_instruction
*insn
;
1029 insn
= next_insn(p
, BRW_OPCODE_IF
);
1031 brw_set_dest(p
, insn
, brw_imm_w(0));
1032 if (p
->compressed
) {
1033 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1035 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1037 insn
->bits1
.branch_gen6
.jump_count
= 0;
1038 brw_set_src0(p
, insn
, src0
);
1039 brw_set_src1(p
, insn
, src1
);
1041 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1042 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1043 insn
->header
.destreg__conditionalmod
= conditional
;
1045 if (!p
->single_program_flow
)
1046 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1048 push_if_stack(p
, insn
);
1053 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1056 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1057 struct brw_instruction
*if_inst
,
1058 struct brw_instruction
*else_inst
)
1060 /* The next instruction (where the ENDIF would be, if it existed) */
1061 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1063 assert(p
->single_program_flow
);
1064 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1065 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1066 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1068 /* Convert IF to an ADD instruction that moves the instruction pointer
1069 * to the first instruction of the ELSE block. If there is no ELSE
1070 * block, point to where ENDIF would be. Reverse the predicate.
1072 * There's no need to execute an ENDIF since we don't need to do any
1073 * stack operations, and if we're currently executing, we just want to
1074 * continue normally.
1076 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1077 if_inst
->header
.predicate_inverse
= 1;
1079 if (else_inst
!= NULL
) {
1080 /* Convert ELSE to an ADD instruction that points where the ENDIF
1083 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1085 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1086 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1088 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1093 * Patch IF and ELSE instructions with appropriate jump targets.
1096 patch_IF_ELSE(struct brw_compile
*p
,
1097 struct brw_instruction
*if_inst
,
1098 struct brw_instruction
*else_inst
,
1099 struct brw_instruction
*endif_inst
)
1101 struct intel_context
*intel
= &p
->brw
->intel
;
1103 assert(!p
->single_program_flow
);
1104 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1105 assert(endif_inst
!= NULL
);
1106 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1109 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1110 * requires 2 chunks.
1112 if (intel
->gen
>= 5)
1115 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1116 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1118 if (else_inst
== NULL
) {
1119 /* Patch IF -> ENDIF */
1120 if (intel
->gen
< 6) {
1121 /* Turn it into an IFF, which means no mask stack operations for
1122 * all-false and jumping past the ENDIF.
1124 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1125 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1126 if_inst
->bits3
.if_else
.pop_count
= 0;
1127 if_inst
->bits3
.if_else
.pad0
= 0;
1128 } else if (intel
->gen
== 6) {
1129 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1130 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1132 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1133 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1136 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1138 /* Patch IF -> ELSE */
1139 if (intel
->gen
< 6) {
1140 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1141 if_inst
->bits3
.if_else
.pop_count
= 0;
1142 if_inst
->bits3
.if_else
.pad0
= 0;
1143 } else if (intel
->gen
== 6) {
1144 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1147 /* Patch ELSE -> ENDIF */
1148 if (intel
->gen
< 6) {
1149 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1152 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1153 else_inst
->bits3
.if_else
.pop_count
= 1;
1154 else_inst
->bits3
.if_else
.pad0
= 0;
1155 } else if (intel
->gen
== 6) {
1156 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1157 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1159 /* The IF instruction's JIP should point just past the ELSE */
1160 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1161 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1162 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1163 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1169 brw_ELSE(struct brw_compile
*p
)
1171 struct intel_context
*intel
= &p
->brw
->intel
;
1172 struct brw_instruction
*insn
;
1174 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1176 if (intel
->gen
< 6) {
1177 brw_set_dest(p
, insn
, brw_ip_reg());
1178 brw_set_src0(p
, insn
, brw_ip_reg());
1179 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1180 } else if (intel
->gen
== 6) {
1181 brw_set_dest(p
, insn
, brw_imm_w(0));
1182 insn
->bits1
.branch_gen6
.jump_count
= 0;
1183 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1184 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1186 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1187 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1188 brw_set_src1(p
, insn
, brw_imm_ud(0));
1189 insn
->bits3
.break_cont
.jip
= 0;
1190 insn
->bits3
.break_cont
.uip
= 0;
1193 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1194 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1195 if (!p
->single_program_flow
)
1196 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1198 push_if_stack(p
, insn
);
1202 brw_ENDIF(struct brw_compile
*p
)
1204 struct intel_context
*intel
= &p
->brw
->intel
;
1205 struct brw_instruction
*insn
;
1206 struct brw_instruction
*else_inst
= NULL
;
1207 struct brw_instruction
*if_inst
= NULL
;
1209 /* Pop the IF and (optional) ELSE instructions from the stack */
1210 p
->if_stack_depth
--;
1211 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1212 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1213 p
->if_stack_depth
--;
1215 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1217 if (p
->single_program_flow
) {
1218 /* ENDIF is useless; don't bother emitting it. */
1219 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1223 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1225 if (intel
->gen
< 6) {
1226 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1227 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1228 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1229 } else if (intel
->gen
== 6) {
1230 brw_set_dest(p
, insn
, brw_imm_w(0));
1231 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1232 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1234 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1235 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1236 brw_set_src1(p
, insn
, brw_imm_ud(0));
1239 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1240 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1241 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1243 /* Also pop item off the stack in the endif instruction: */
1244 if (intel
->gen
< 6) {
1245 insn
->bits3
.if_else
.jump_count
= 0;
1246 insn
->bits3
.if_else
.pop_count
= 1;
1247 insn
->bits3
.if_else
.pad0
= 0;
1248 } else if (intel
->gen
== 6) {
1249 insn
->bits1
.branch_gen6
.jump_count
= 2;
1251 insn
->bits3
.break_cont
.jip
= 2;
1253 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1256 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1258 struct intel_context
*intel
= &p
->brw
->intel
;
1259 struct brw_instruction
*insn
;
1261 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1262 if (intel
->gen
>= 6) {
1263 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1264 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1265 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1267 brw_set_dest(p
, insn
, brw_ip_reg());
1268 brw_set_src0(p
, insn
, brw_ip_reg());
1269 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1270 insn
->bits3
.if_else
.pad0
= 0;
1271 insn
->bits3
.if_else
.pop_count
= pop_count
;
1273 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1274 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1279 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1280 struct brw_instruction
*do_insn
)
1282 struct brw_instruction
*insn
;
1284 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1285 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1286 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1287 brw_set_dest(p
, insn
, brw_ip_reg());
1288 brw_set_src0(p
, insn
, brw_ip_reg());
1289 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1291 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1292 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1296 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1298 struct brw_instruction
*insn
;
1299 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1300 brw_set_dest(p
, insn
, brw_ip_reg());
1301 brw_set_src0(p
, insn
, brw_ip_reg());
1302 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1303 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1304 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1305 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1306 insn
->bits3
.if_else
.pad0
= 0;
1307 insn
->bits3
.if_else
.pop_count
= pop_count
;
1313 * The DO/WHILE is just an unterminated loop -- break or continue are
1314 * used for control within the loop. We have a few ways they can be
1317 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1318 * jip and no DO instruction.
1320 * For non-uniform control flow pre-gen6, there's a DO instruction to
1321 * push the mask, and a WHILE to jump back, and BREAK to get out and
1324 * For gen6, there's no more mask stack, so no need for DO. WHILE
1325 * just points back to the first instruction of the loop.
1327 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1329 struct intel_context
*intel
= &p
->brw
->intel
;
1331 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1332 return &p
->store
[p
->nr_insn
];
1334 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1336 /* Override the defaults for this instruction:
1338 brw_set_dest(p
, insn
, brw_null_reg());
1339 brw_set_src0(p
, insn
, brw_null_reg());
1340 brw_set_src1(p
, insn
, brw_null_reg());
1342 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1343 insn
->header
.execution_size
= execute_size
;
1344 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1345 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1346 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1354 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1355 struct brw_instruction
*do_insn
)
1357 struct intel_context
*intel
= &p
->brw
->intel
;
1358 struct brw_instruction
*insn
;
1361 if (intel
->gen
>= 5)
1364 if (intel
->gen
>= 7) {
1365 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1367 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1368 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1369 brw_set_src1(p
, insn
, brw_imm_ud(0));
1370 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1372 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1373 } else if (intel
->gen
== 6) {
1374 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1376 brw_set_dest(p
, insn
, brw_imm_w(0));
1377 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1378 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1379 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1381 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1383 if (p
->single_program_flow
) {
1384 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1386 brw_set_dest(p
, insn
, brw_ip_reg());
1387 brw_set_src0(p
, insn
, brw_ip_reg());
1388 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1389 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1391 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1393 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1395 brw_set_dest(p
, insn
, brw_ip_reg());
1396 brw_set_src0(p
, insn
, brw_ip_reg());
1397 brw_set_src1(p
, insn
, brw_imm_d(0));
1399 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1400 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1401 insn
->bits3
.if_else
.pop_count
= 0;
1402 insn
->bits3
.if_else
.pad0
= 0;
1405 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1406 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1414 void brw_land_fwd_jump(struct brw_compile
*p
,
1415 struct brw_instruction
*jmp_insn
)
1417 struct intel_context
*intel
= &p
->brw
->intel
;
1418 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1421 if (intel
->gen
>= 5)
1424 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1425 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1427 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1432 /* To integrate with the above, it makes sense that the comparison
1433 * instruction should populate the flag register. It might be simpler
1434 * just to use the flag reg for most WM tasks?
1436 void brw_CMP(struct brw_compile
*p
,
1437 struct brw_reg dest
,
1439 struct brw_reg src0
,
1440 struct brw_reg src1
)
1442 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1444 insn
->header
.destreg__conditionalmod
= conditional
;
1445 brw_set_dest(p
, insn
, dest
);
1446 brw_set_src0(p
, insn
, src0
);
1447 brw_set_src1(p
, insn
, src1
);
1449 /* guess_execution_size(insn, src0); */
1452 /* Make it so that future instructions will use the computed flag
1453 * value until brw_set_predicate_control_flag_value() is called
1456 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1458 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1459 p
->flag_value
= 0xff;
1463 /* Issue 'wait' instruction for n1, host could program MMIO
1464 to wake up thread. */
1465 void brw_WAIT (struct brw_compile
*p
)
1467 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1468 struct brw_reg src
= brw_notification_1_reg();
1470 brw_set_dest(p
, insn
, src
);
1471 brw_set_src0(p
, insn
, src
);
1472 brw_set_src1(p
, insn
, brw_null_reg());
1473 insn
->header
.execution_size
= 0; /* must */
1474 insn
->header
.predicate_control
= 0;
1475 insn
->header
.compression_control
= 0;
1479 /***********************************************************************
1480 * Helpers for the various SEND message types:
1483 /** Extended math function, float[8].
1485 void brw_math( struct brw_compile
*p
,
1486 struct brw_reg dest
,
1494 struct intel_context
*intel
= &p
->brw
->intel
;
1496 if (intel
->gen
>= 6) {
1497 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1499 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1500 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1502 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1503 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1505 /* Source modifiers are ignored for extended math instructions. */
1506 assert(!src
.negate
);
1509 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1510 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1511 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1512 assert(src
.type
!= BRW_REGISTER_TYPE_F
);
1514 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1517 /* Math is the same ISA format as other opcodes, except that CondModifier
1518 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1520 insn
->header
.destreg__conditionalmod
= function
;
1521 insn
->header
.saturate
= saturate
;
1523 brw_set_dest(p
, insn
, dest
);
1524 brw_set_src0(p
, insn
, src
);
1525 brw_set_src1(p
, insn
, brw_null_reg());
1527 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1529 /* Example code doesn't set predicate_control for send
1532 insn
->header
.predicate_control
= 0;
1533 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1535 brw_set_dest(p
, insn
, dest
);
1536 brw_set_src0(p
, insn
, src
);
1537 brw_set_math_message(p
,
1540 src
.type
== BRW_REGISTER_TYPE_D
,
1547 /** Extended math function, float[8].
1549 void brw_math2(struct brw_compile
*p
,
1550 struct brw_reg dest
,
1552 struct brw_reg src0
,
1553 struct brw_reg src1
)
1555 struct intel_context
*intel
= &p
->brw
->intel
;
1556 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1558 assert(intel
->gen
>= 6);
1562 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1563 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1564 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1566 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1567 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1568 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1570 if (function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
1571 function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
1572 function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1573 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
1574 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
1576 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1577 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1580 /* Source modifiers are ignored for extended math instructions. */
1581 assert(!src0
.negate
);
1583 assert(!src1
.negate
);
1586 /* Math is the same ISA format as other opcodes, except that CondModifier
1587 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1589 insn
->header
.destreg__conditionalmod
= function
;
1591 brw_set_dest(p
, insn
, dest
);
1592 brw_set_src0(p
, insn
, src0
);
1593 brw_set_src1(p
, insn
, src1
);
1597 * Extended math function, float[16].
1598 * Use 2 send instructions.
1600 void brw_math_16( struct brw_compile
*p
,
1601 struct brw_reg dest
,
1608 struct intel_context
*intel
= &p
->brw
->intel
;
1609 struct brw_instruction
*insn
;
1611 if (intel
->gen
>= 6) {
1612 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1614 /* Math is the same ISA format as other opcodes, except that CondModifier
1615 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1617 insn
->header
.destreg__conditionalmod
= function
;
1618 insn
->header
.saturate
= saturate
;
1620 /* Source modifiers are ignored for extended math instructions. */
1621 assert(!src
.negate
);
1624 brw_set_dest(p
, insn
, dest
);
1625 brw_set_src0(p
, insn
, src
);
1626 brw_set_src1(p
, insn
, brw_null_reg());
1630 /* First instruction:
1632 brw_push_insn_state(p
);
1633 brw_set_predicate_control_flag_value(p
, 0xff);
1634 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1636 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1637 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1639 brw_set_dest(p
, insn
, dest
);
1640 brw_set_src0(p
, insn
, src
);
1641 brw_set_math_message(p
,
1644 BRW_MATH_INTEGER_UNSIGNED
,
1647 BRW_MATH_DATA_VECTOR
);
1649 /* Second instruction:
1651 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1652 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1653 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1655 brw_set_dest(p
, insn
, offset(dest
,1));
1656 brw_set_src0(p
, insn
, src
);
1657 brw_set_math_message(p
,
1660 BRW_MATH_INTEGER_UNSIGNED
,
1663 BRW_MATH_DATA_VECTOR
);
1665 brw_pop_insn_state(p
);
1670 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1671 * using a constant offset per channel.
1673 * The offset must be aligned to oword size (16 bytes). Used for
1674 * register spilling.
1676 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1681 struct intel_context
*intel
= &p
->brw
->intel
;
1682 uint32_t msg_control
, msg_type
;
1685 if (intel
->gen
>= 6)
1688 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1690 if (num_regs
== 1) {
1691 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1694 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1698 /* Set up the message header. This is g0, with g0.2 filled with
1699 * the offset. We don't want to leave our offset around in g0 or
1700 * it'll screw up texture samples, so set it up inside the message
1704 brw_push_insn_state(p
);
1705 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1706 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1708 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1710 /* set message header global offset field (reg 0, element 2) */
1712 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1714 2), BRW_REGISTER_TYPE_UD
),
1715 brw_imm_ud(offset
));
1717 brw_pop_insn_state(p
);
1721 struct brw_reg dest
;
1722 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1723 int send_commit_msg
;
1724 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1725 BRW_REGISTER_TYPE_UW
);
1727 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1728 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1729 src_header
= vec16(src_header
);
1731 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1732 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1734 /* Until gen6, writes followed by reads from the same location
1735 * are not guaranteed to be ordered unless write_commit is set.
1736 * If set, then a no-op write is issued to the destination
1737 * register to set a dependency, and a read from the destination
1738 * can be used to ensure the ordering.
1740 * For gen6, only writes between different threads need ordering
1741 * protection. Our use of DP writes is all about register
1742 * spilling within a thread.
1744 if (intel
->gen
>= 6) {
1745 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1746 send_commit_msg
= 0;
1749 send_commit_msg
= 1;
1752 brw_set_dest(p
, insn
, dest
);
1753 if (intel
->gen
>= 6) {
1754 brw_set_src0(p
, insn
, mrf
);
1756 brw_set_src0(p
, insn
, brw_null_reg());
1759 if (intel
->gen
>= 6)
1760 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1762 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1764 brw_set_dp_write_message(p
,
1766 255, /* binding table index (255=stateless) */
1770 true, /* header_present */
1771 0, /* pixel scoreboard */
1772 send_commit_msg
, /* response_length */
1780 * Read a block of owords (half a GRF each) from the scratch buffer
1781 * using a constant index per channel.
1783 * Offset must be aligned to oword size (16 bytes). Used for register
1787 brw_oword_block_read_scratch(struct brw_compile
*p
,
1788 struct brw_reg dest
,
1793 struct intel_context
*intel
= &p
->brw
->intel
;
1794 uint32_t msg_control
;
1797 if (intel
->gen
>= 6)
1800 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1801 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1803 if (num_regs
== 1) {
1804 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1807 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1812 brw_push_insn_state(p
);
1813 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1814 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1816 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1818 /* set message header global offset field (reg 0, element 2) */
1820 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1822 2), BRW_REGISTER_TYPE_UD
),
1823 brw_imm_ud(offset
));
1825 brw_pop_insn_state(p
);
1829 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1831 assert(insn
->header
.predicate_control
== 0);
1832 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1833 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1835 brw_set_dest(p
, insn
, dest
); /* UW? */
1836 if (intel
->gen
>= 6) {
1837 brw_set_src0(p
, insn
, mrf
);
1839 brw_set_src0(p
, insn
, brw_null_reg());
1842 brw_set_dp_read_message(p
,
1844 255, /* binding table index (255=stateless) */
1846 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1847 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1854 * Read a float[4] vector from the data port Data Cache (const buffer).
1855 * Location (in buffer) should be a multiple of 16.
1856 * Used for fetching shader constants.
1858 void brw_oword_block_read(struct brw_compile
*p
,
1859 struct brw_reg dest
,
1862 uint32_t bind_table_index
)
1864 struct intel_context
*intel
= &p
->brw
->intel
;
1866 /* On newer hardware, offset is in units of owords. */
1867 if (intel
->gen
>= 6)
1870 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1872 brw_push_insn_state(p
);
1873 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1874 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1875 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1877 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1879 /* set message header global offset field (reg 0, element 2) */
1881 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1883 2), BRW_REGISTER_TYPE_UD
),
1884 brw_imm_ud(offset
));
1886 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1887 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1889 /* cast dest to a uword[8] vector */
1890 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1892 brw_set_dest(p
, insn
, dest
);
1893 if (intel
->gen
>= 6) {
1894 brw_set_src0(p
, insn
, mrf
);
1896 brw_set_src0(p
, insn
, brw_null_reg());
1899 brw_set_dp_read_message(p
,
1902 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1903 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1904 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1906 1); /* response_length (1 reg, 2 owords!) */
1908 brw_pop_insn_state(p
);
1912 * Read a set of dwords from the data port Data Cache (const buffer).
1914 * Location (in buffer) appears as UD offsets in the register after
1915 * the provided mrf header reg.
1917 void brw_dword_scattered_read(struct brw_compile
*p
,
1918 struct brw_reg dest
,
1920 uint32_t bind_table_index
)
1922 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1924 brw_push_insn_state(p
);
1925 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1926 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1927 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1928 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1929 brw_pop_insn_state(p
);
1931 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1932 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1934 /* cast dest to a uword[8] vector */
1935 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1937 brw_set_dest(p
, insn
, dest
);
1938 brw_set_src0(p
, insn
, brw_null_reg());
1940 brw_set_dp_read_message(p
,
1943 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1944 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1945 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1947 1); /* response_length */
1953 * Read float[4] constant(s) from VS constant buffer.
1954 * For relative addressing, two float[4] constants will be read into 'dest'.
1955 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1957 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1958 struct brw_reg dest
,
1960 GLuint bind_table_index
)
1962 struct intel_context
*intel
= &p
->brw
->intel
;
1963 struct brw_instruction
*insn
;
1964 GLuint msg_reg_nr
= 1;
1966 if (intel
->gen
>= 6)
1969 /* Setup MRF[1] with location/offset into const buffer */
1970 brw_push_insn_state(p
);
1971 brw_set_access_mode(p
, BRW_ALIGN_1
);
1972 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1973 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1974 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1975 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1976 BRW_REGISTER_TYPE_UD
),
1977 brw_imm_ud(location
));
1978 brw_pop_insn_state(p
);
1980 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1982 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1983 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1984 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1985 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1987 brw_set_dest(p
, insn
, dest
);
1988 if (intel
->gen
>= 6) {
1989 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1991 brw_set_src0(p
, insn
, brw_null_reg());
1994 brw_set_dp_read_message(p
,
1998 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1999 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2001 1); /* response_length (1 Oword) */
2005 * Read a float[4] constant per vertex from VS constant buffer, with
2006 * relative addressing.
2008 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
2009 struct brw_reg dest
,
2010 struct brw_reg addr_reg
,
2012 GLuint bind_table_index
)
2014 struct intel_context
*intel
= &p
->brw
->intel
;
2015 struct brw_reg src
= brw_vec8_grf(0, 0);
2018 /* Setup MRF[1] with offset into const buffer */
2019 brw_push_insn_state(p
);
2020 brw_set_access_mode(p
, BRW_ALIGN_1
);
2021 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2022 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2023 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2025 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2028 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
2029 addr_reg
, brw_imm_d(offset
));
2030 brw_pop_insn_state(p
);
2032 gen6_resolve_implied_move(p
, &src
, 0);
2033 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2035 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
2036 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2037 insn
->header
.destreg__conditionalmod
= 0;
2038 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
2040 brw_set_dest(p
, insn
, dest
);
2041 brw_set_src0(p
, insn
, src
);
2043 if (intel
->gen
>= 6)
2044 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2045 else if (intel
->gen
== 5 || intel
->is_g4x
)
2046 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2048 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2050 brw_set_dp_read_message(p
,
2053 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2055 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2057 1); /* response_length */
2062 void brw_fb_WRITE(struct brw_compile
*p
,
2065 struct brw_reg src0
,
2066 GLuint binding_table_index
,
2068 GLuint response_length
,
2070 bool header_present
)
2072 struct intel_context
*intel
= &p
->brw
->intel
;
2073 struct brw_instruction
*insn
;
2074 GLuint msg_control
, msg_type
;
2075 struct brw_reg dest
;
2077 if (dispatch_width
== 16)
2078 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2080 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2082 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2083 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2085 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2087 /* The execution mask is ignored for render target writes. */
2088 insn
->header
.predicate_control
= 0;
2089 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2091 if (intel
->gen
>= 6) {
2092 /* headerless version, just submit color payload */
2093 src0
= brw_message_reg(msg_reg_nr
);
2095 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2097 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2099 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2102 if (dispatch_width
== 16)
2103 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2105 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2107 brw_set_dest(p
, insn
, dest
);
2108 brw_set_src0(p
, insn
, src0
);
2109 brw_set_dp_write_message(p
,
2111 binding_table_index
,
2116 1, /* pixel scoreboard */
2119 0 /* send_commit_msg */);
2124 * Texture sample instruction.
2125 * Note: the msg_type plus msg_length values determine exactly what kind
2126 * of sampling operation is performed. See volume 4, page 161 of docs.
2128 void brw_SAMPLE(struct brw_compile
*p
,
2129 struct brw_reg dest
,
2131 struct brw_reg src0
,
2132 GLuint binding_table_index
,
2136 GLuint response_length
,
2139 GLuint header_present
,
2142 struct intel_context
*intel
= &p
->brw
->intel
;
2143 bool need_stall
= 0;
2145 if (writemask
== 0) {
2146 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2150 /* Hardware doesn't do destination dependency checking on send
2151 * instructions properly. Add a workaround which generates the
2152 * dependency by other means. In practice it seems like this bug
2153 * only crops up for texture samples, and only where registers are
2154 * written by the send and then written again later without being
2155 * read in between. Luckily for us, we already track that
2156 * information and use it to modify the writemask for the
2157 * instruction, so that is a guide for whether a workaround is
2160 if (writemask
!= WRITEMASK_XYZW
) {
2161 GLuint dst_offset
= 0;
2162 GLuint i
, newmask
= 0, len
= 0;
2164 for (i
= 0; i
< 4; i
++) {
2165 if (writemask
& (1<<i
))
2169 for (; i
< 4; i
++) {
2170 if (!(writemask
& (1<<i
)))
2176 if (newmask
!= writemask
) {
2178 /* printf("need stall %x %x\n", newmask , writemask); */
2181 bool dispatch_16
= false;
2183 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2185 guess_execution_size(p
, p
->current
, dest
);
2186 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2189 newmask
= ~newmask
& WRITEMASK_XYZW
;
2191 brw_push_insn_state(p
);
2193 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2194 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2196 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2197 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2198 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2200 brw_pop_insn_state(p
);
2202 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2203 dest
= offset(dest
, dst_offset
);
2205 /* For 16-wide dispatch, masked channels are skipped in the
2206 * response. For 8-wide, masked channels still take up slots,
2207 * and are just not written to.
2210 response_length
= len
* 2;
2215 struct brw_instruction
*insn
;
2217 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2219 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2220 insn
->header
.predicate_control
= 0; /* XXX */
2221 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2223 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2225 brw_set_dest(p
, insn
, dest
);
2226 brw_set_src0(p
, insn
, src0
);
2227 brw_set_sampler_message(p
, insn
,
2228 binding_table_index
,
2239 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2241 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2243 brw_push_insn_state(p
);
2244 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2245 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2246 retype(reg
, BRW_REGISTER_TYPE_UD
));
2247 brw_pop_insn_state(p
);
2252 /* All these variables are pretty confusing - we might be better off
2253 * using bitmasks and macros for this, in the old style. Or perhaps
2254 * just having the caller instantiate the fields in dword3 itself.
2256 void brw_urb_WRITE(struct brw_compile
*p
,
2257 struct brw_reg dest
,
2259 struct brw_reg src0
,
2263 GLuint response_length
,
2265 bool writes_complete
,
2269 struct intel_context
*intel
= &p
->brw
->intel
;
2270 struct brw_instruction
*insn
;
2272 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2274 if (intel
->gen
== 7) {
2275 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2276 brw_push_insn_state(p
);
2277 brw_set_access_mode(p
, BRW_ALIGN_1
);
2278 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2279 BRW_REGISTER_TYPE_UD
),
2280 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2281 brw_imm_ud(0xff00));
2282 brw_pop_insn_state(p
);
2285 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2287 assert(msg_length
< BRW_MAX_MRF
);
2289 brw_set_dest(p
, insn
, dest
);
2290 brw_set_src0(p
, insn
, src0
);
2291 brw_set_src1(p
, insn
, brw_imm_d(0));
2294 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2296 brw_set_urb_message(p
,
2309 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2313 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2314 struct brw_instruction
*insn
= &p
->store
[ip
];
2316 switch (insn
->header
.opcode
) {
2317 case BRW_OPCODE_ENDIF
:
2318 case BRW_OPCODE_ELSE
:
2319 case BRW_OPCODE_WHILE
:
2323 assert(!"not reached");
2327 /* There is no DO instruction on gen6, so to find the end of the loop
2328 * we have to see if the loop is jumping back before our start
2332 brw_find_loop_end(struct brw_compile
*p
, int start
)
2334 struct intel_context
*intel
= &p
->brw
->intel
;
2338 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2339 struct brw_instruction
*insn
= &p
->store
[ip
];
2341 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2342 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2343 : insn
->bits3
.break_cont
.jip
;
2344 if (ip
+ jip
/ br
<= start
)
2348 assert(!"not reached");
2352 /* After program generation, go back and update the UIP and JIP of
2353 * BREAK and CONT instructions to their correct locations.
2356 brw_set_uip_jip(struct brw_compile
*p
)
2358 struct intel_context
*intel
= &p
->brw
->intel
;
2365 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2366 struct brw_instruction
*insn
= &p
->store
[ip
];
2368 switch (insn
->header
.opcode
) {
2369 case BRW_OPCODE_BREAK
:
2370 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2371 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2372 insn
->bits3
.break_cont
.uip
=
2373 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2375 case BRW_OPCODE_CONTINUE
:
2376 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2377 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2379 assert(insn
->bits3
.break_cont
.uip
!= 0);
2380 assert(insn
->bits3
.break_cont
.jip
!= 0);
2386 void brw_ff_sync(struct brw_compile
*p
,
2387 struct brw_reg dest
,
2389 struct brw_reg src0
,
2391 GLuint response_length
,
2394 struct intel_context
*intel
= &p
->brw
->intel
;
2395 struct brw_instruction
*insn
;
2397 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2399 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2400 brw_set_dest(p
, insn
, dest
);
2401 brw_set_src0(p
, insn
, src0
);
2402 brw_set_src1(p
, insn
, brw_imm_d(0));
2405 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2407 brw_set_ff_sync_message(p
,