2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
37 #include "../glsl/ralloc.h"
39 /***********************************************************************
40 * Internal helper for constructing instructions
43 static void guess_execution_size(struct brw_compile
*p
,
44 struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
48 insn
->header
.execution_size
= BRW_EXECUTE_16
;
50 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
58 * On Sandybridge, this is no longer the case. This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
62 gen6_resolve_implied_move(struct brw_compile
*p
,
66 struct intel_context
*intel
= &p
->brw
->intel
;
70 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
71 brw_push_insn_state(p
);
72 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
73 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
74 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
75 retype(*src
, BRW_REGISTER_TYPE_UD
));
76 brw_pop_insn_state(p
);
78 *src
= brw_message_reg(msg_reg_nr
);
82 gen7_convert_mrf_to_grf(struct brw_compile
*p
, struct brw_reg
*reg
)
84 struct intel_context
*intel
= &p
->brw
->intel
;
85 if (intel
->gen
== 7 && reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
86 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
93 brw_set_dest(struct brw_compile
*p
, struct brw_instruction
*insn
,
96 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
97 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
98 assert(dest
.nr
< 128);
100 gen7_convert_mrf_to_grf(p
, &dest
);
102 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
103 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
104 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
106 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
107 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
109 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
110 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
111 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
112 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
113 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
116 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
117 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
118 /* even ignored in da16, still need to set as '01' */
119 insn
->bits1
.da16
.dest_horiz_stride
= 1;
123 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
125 /* These are different sizes in align1 vs align16:
127 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
128 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
129 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
130 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
131 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
134 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
135 /* even ignored in da16, still need to set as '01' */
136 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
140 /* NEW: Set the execution size based on dest.width and
141 * insn->compression_control:
143 guess_execution_size(p
, insn
, dest
);
146 extern int reg_type_size
[];
149 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
151 int hstride_for_reg
[] = {0, 1, 2, 4};
152 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
153 int width_for_reg
[] = {1, 2, 4, 8, 16};
154 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
155 int width
, hstride
, vstride
, execsize
;
157 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
159 * mean the destination has to be 128-bit aligned and the
160 * destination horiz stride has to be a word.
162 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
163 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
164 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
170 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
171 reg
.file
== BRW_ARF_NULL
)
174 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
175 hstride
= hstride_for_reg
[reg
.hstride
];
177 if (reg
.vstride
== 0xf) {
180 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
181 vstride
= vstride_for_reg
[reg
.vstride
];
184 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
185 width
= width_for_reg
[reg
.width
];
187 assert(insn
->header
.execution_size
>= 0 &&
188 insn
->header
.execution_size
< Elements(execsize_for_reg
));
189 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
191 /* Restrictions from 3.3.10: Register Region Restrictions. */
193 assert(execsize
>= width
);
196 if (execsize
== width
&& hstride
!= 0) {
197 assert(vstride
== -1 || vstride
== width
* hstride
);
201 if (execsize
== width
&& hstride
== 0) {
202 /* no restriction on vstride. */
207 assert(hstride
== 0);
211 if (execsize
== 1 && width
== 1) {
212 assert(hstride
== 0);
213 assert(vstride
== 0);
217 if (vstride
== 0 && hstride
== 0) {
221 /* 10. Check destination issues. */
225 brw_set_src0(struct brw_compile
*p
, struct brw_instruction
*insn
,
228 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
229 assert(reg
.nr
< 128);
231 gen7_convert_mrf_to_grf(p
, ®
);
233 validate_reg(insn
, reg
);
235 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
236 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
237 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
238 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
239 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
241 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
242 insn
->bits3
.ud
= reg
.dw1
.ud
;
244 /* Required to set some fields in src1 as well:
246 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
247 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
251 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
252 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
253 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
254 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
257 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
258 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
262 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
264 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
265 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
268 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
272 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
273 if (reg
.width
== BRW_WIDTH_1
&&
274 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
275 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
276 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
277 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
280 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
281 insn
->bits2
.da1
.src0_width
= reg
.width
;
282 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
286 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
287 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
288 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
289 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
291 /* This is an oddity of the fact we're using the same
292 * descriptions for registers in align_16 as align_1:
294 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
295 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
297 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
303 void brw_set_src1(struct brw_compile
*p
,
304 struct brw_instruction
*insn
,
307 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
309 assert(reg
.nr
< 128);
311 gen7_convert_mrf_to_grf(p
, ®
);
313 validate_reg(insn
, reg
);
315 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
316 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
317 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
318 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
320 /* Only src1 can be immediate in two-argument instructions.
322 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
324 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
325 insn
->bits3
.ud
= reg
.dw1
.ud
;
328 /* This is a hardware restriction, which may or may not be lifted
331 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
334 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
335 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
336 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
339 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
340 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
343 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
344 if (reg
.width
== BRW_WIDTH_1
&&
345 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
346 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
347 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
348 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
351 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
352 insn
->bits3
.da1
.src1_width
= reg
.width
;
353 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
357 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
358 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
359 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
360 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
362 /* This is an oddity of the fact we're using the same
363 * descriptions for registers in align_16 as align_1:
365 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
366 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
368 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
375 static void brw_set_math_message( struct brw_compile
*p
,
376 struct brw_instruction
*insn
,
378 GLuint response_length
,
381 GLboolean low_precision
,
385 struct brw_context
*brw
= p
->brw
;
386 struct intel_context
*intel
= &brw
->intel
;
387 brw_set_src1(p
, insn
, brw_imm_d(0));
389 if (intel
->gen
== 5) {
390 insn
->bits3
.math_gen5
.function
= function
;
391 insn
->bits3
.math_gen5
.int_type
= integer_type
;
392 insn
->bits3
.math_gen5
.precision
= low_precision
;
393 insn
->bits3
.math_gen5
.saturate
= saturate
;
394 insn
->bits3
.math_gen5
.data_type
= dataType
;
395 insn
->bits3
.math_gen5
.snapshot
= 0;
396 insn
->bits3
.math_gen5
.header_present
= 0;
397 insn
->bits3
.math_gen5
.response_length
= response_length
;
398 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
399 insn
->bits3
.math_gen5
.end_of_thread
= 0;
400 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
401 insn
->bits2
.send_gen5
.end_of_thread
= 0;
403 insn
->bits3
.math
.function
= function
;
404 insn
->bits3
.math
.int_type
= integer_type
;
405 insn
->bits3
.math
.precision
= low_precision
;
406 insn
->bits3
.math
.saturate
= saturate
;
407 insn
->bits3
.math
.data_type
= dataType
;
408 insn
->bits3
.math
.response_length
= response_length
;
409 insn
->bits3
.math
.msg_length
= msg_length
;
410 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
411 insn
->bits3
.math
.end_of_thread
= 0;
416 static void brw_set_ff_sync_message(struct brw_compile
*p
,
417 struct brw_instruction
*insn
,
419 GLuint response_length
,
420 GLboolean end_of_thread
)
422 struct brw_context
*brw
= p
->brw
;
423 struct intel_context
*intel
= &brw
->intel
;
424 brw_set_src1(p
, insn
, brw_imm_d(0));
426 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
427 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
428 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
429 insn
->bits3
.urb_gen5
.allocate
= allocate
;
430 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
431 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
432 insn
->bits3
.urb_gen5
.header_present
= 1;
433 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
434 insn
->bits3
.urb_gen5
.msg_length
= 1;
435 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
436 if (intel
->gen
>= 6) {
437 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
439 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
440 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
444 static void brw_set_urb_message( struct brw_compile
*p
,
445 struct brw_instruction
*insn
,
449 GLuint response_length
,
450 GLboolean end_of_thread
,
453 GLuint swizzle_control
)
455 struct brw_context
*brw
= p
->brw
;
456 struct intel_context
*intel
= &brw
->intel
;
457 brw_set_src1(p
, insn
, brw_imm_d(0));
459 if (intel
->gen
== 7) {
460 insn
->bits3
.urb_gen7
.opcode
= 0; /* URB_WRITE_HWORD */
461 insn
->bits3
.urb_gen7
.offset
= offset
;
462 assert(swizzle_control
!= BRW_URB_SWIZZLE_TRANSPOSE
);
463 insn
->bits3
.urb_gen7
.swizzle_control
= swizzle_control
;
464 /* per_slot_offset = 0 makes it ignore offsets in message header */
465 insn
->bits3
.urb_gen7
.per_slot_offset
= 0;
466 insn
->bits3
.urb_gen7
.complete
= complete
;
467 insn
->bits3
.urb_gen7
.header_present
= 1;
468 insn
->bits3
.urb_gen7
.response_length
= response_length
;
469 insn
->bits3
.urb_gen7
.msg_length
= msg_length
;
470 insn
->bits3
.urb_gen7
.end_of_thread
= end_of_thread
;
471 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
472 } else if (intel
->gen
>= 5) {
473 insn
->bits3
.urb_gen5
.opcode
= 0; /* URB_WRITE */
474 insn
->bits3
.urb_gen5
.offset
= offset
;
475 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
476 insn
->bits3
.urb_gen5
.allocate
= allocate
;
477 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
478 insn
->bits3
.urb_gen5
.complete
= complete
;
479 insn
->bits3
.urb_gen5
.header_present
= 1;
480 insn
->bits3
.urb_gen5
.response_length
= response_length
;
481 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
482 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
483 if (intel
->gen
>= 6) {
484 /* For SNB, the SFID bits moved to the condmod bits, and
485 * EOT stayed in bits3 above. Does the EOT bit setting
486 * below on Ironlake even do anything?
488 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
490 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
491 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
494 insn
->bits3
.urb
.opcode
= 0; /* ? */
495 insn
->bits3
.urb
.offset
= offset
;
496 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
497 insn
->bits3
.urb
.allocate
= allocate
;
498 insn
->bits3
.urb
.used
= used
; /* ? */
499 insn
->bits3
.urb
.complete
= complete
;
500 insn
->bits3
.urb
.response_length
= response_length
;
501 insn
->bits3
.urb
.msg_length
= msg_length
;
502 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
503 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
508 brw_set_dp_write_message(struct brw_compile
*p
,
509 struct brw_instruction
*insn
,
510 GLuint binding_table_index
,
514 GLboolean header_present
,
515 GLuint pixel_scoreboard_clear
,
516 GLuint response_length
,
517 GLuint end_of_thread
,
518 GLuint send_commit_msg
)
520 struct brw_context
*brw
= p
->brw
;
521 struct intel_context
*intel
= &brw
->intel
;
522 brw_set_src1(p
, insn
, brw_imm_ud(0));
524 if (intel
->gen
>= 7) {
525 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
526 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
527 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
528 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
529 insn
->bits3
.gen7_dp
.header_present
= header_present
;
530 insn
->bits3
.gen7_dp
.response_length
= response_length
;
531 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
532 insn
->bits3
.gen7_dp
.end_of_thread
= end_of_thread
;
534 /* We always use the render cache for write messages */
535 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
536 } else if (intel
->gen
== 6) {
537 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
538 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
539 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
540 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
541 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
542 insn
->bits3
.gen6_dp
.header_present
= header_present
;
543 insn
->bits3
.gen6_dp
.response_length
= response_length
;
544 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
545 insn
->bits3
.gen6_dp
.end_of_thread
= end_of_thread
;
547 /* We always use the render cache for write messages */
548 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
549 } else if (intel
->gen
== 5) {
550 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
551 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
552 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
553 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
554 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
555 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
556 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
557 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
558 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
559 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
560 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
562 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
563 insn
->bits3
.dp_write
.msg_control
= msg_control
;
564 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
565 insn
->bits3
.dp_write
.msg_type
= msg_type
;
566 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
567 insn
->bits3
.dp_write
.response_length
= response_length
;
568 insn
->bits3
.dp_write
.msg_length
= msg_length
;
569 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
570 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
575 brw_set_dp_read_message(struct brw_compile
*p
,
576 struct brw_instruction
*insn
,
577 GLuint binding_table_index
,
582 GLuint response_length
)
584 struct brw_context
*brw
= p
->brw
;
585 struct intel_context
*intel
= &brw
->intel
;
586 brw_set_src1(p
, insn
, brw_imm_d(0));
588 if (intel
->gen
>= 7) {
589 insn
->bits3
.gen7_dp
.binding_table_index
= binding_table_index
;
590 insn
->bits3
.gen7_dp
.msg_control
= msg_control
;
591 insn
->bits3
.gen7_dp
.pixel_scoreboard_clear
= 0;
592 insn
->bits3
.gen7_dp
.msg_type
= msg_type
;
593 insn
->bits3
.gen7_dp
.header_present
= 1;
594 insn
->bits3
.gen7_dp
.response_length
= response_length
;
595 insn
->bits3
.gen7_dp
.msg_length
= msg_length
;
596 insn
->bits3
.gen7_dp
.end_of_thread
= 0;
597 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_CONST_CACHE
;
598 } else if (intel
->gen
== 6) {
599 uint32_t target_function
;
601 if (target_cache
== BRW_DATAPORT_READ_TARGET_DATA_CACHE
)
602 target_function
= GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE
;
604 target_function
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
606 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
607 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
608 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= 0;
609 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
610 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
611 insn
->bits3
.gen6_dp
.header_present
= 1;
612 insn
->bits3
.gen6_dp
.response_length
= response_length
;
613 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
614 insn
->bits3
.gen6_dp
.end_of_thread
= 0;
615 insn
->header
.destreg__conditionalmod
= target_function
;
616 } else if (intel
->gen
== 5) {
617 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
618 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
619 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
620 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
621 insn
->bits3
.dp_read_gen5
.header_present
= 1;
622 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
623 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
624 insn
->bits3
.dp_read_gen5
.pad1
= 0;
625 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
626 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
627 insn
->bits2
.send_gen5
.end_of_thread
= 0;
628 } else if (intel
->is_g4x
) {
629 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
630 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
631 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
632 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
633 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
634 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
635 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
636 insn
->bits3
.dp_read_g4x
.pad1
= 0;
637 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
639 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
640 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
641 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
642 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
643 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
644 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
645 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
646 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
647 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
651 static void brw_set_sampler_message(struct brw_compile
*p
,
652 struct brw_instruction
*insn
,
653 GLuint binding_table_index
,
656 GLuint response_length
,
659 GLuint header_present
,
662 struct brw_context
*brw
= p
->brw
;
663 struct intel_context
*intel
= &brw
->intel
;
665 brw_set_src1(p
, insn
, brw_imm_d(0));
667 if (intel
->gen
>= 7) {
668 insn
->bits3
.sampler_gen7
.binding_table_index
= binding_table_index
;
669 insn
->bits3
.sampler_gen7
.sampler
= sampler
;
670 insn
->bits3
.sampler_gen7
.msg_type
= msg_type
;
671 insn
->bits3
.sampler_gen7
.simd_mode
= simd_mode
;
672 insn
->bits3
.sampler_gen7
.header_present
= header_present
;
673 insn
->bits3
.sampler_gen7
.response_length
= response_length
;
674 insn
->bits3
.sampler_gen7
.msg_length
= msg_length
;
675 insn
->bits3
.sampler_gen7
.end_of_thread
= eot
;
676 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
677 } else if (intel
->gen
>= 5) {
678 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
679 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
680 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
681 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
682 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
683 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
684 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
685 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
687 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
689 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
690 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
692 } else if (intel
->is_g4x
) {
693 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
694 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
695 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
696 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
697 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
698 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
699 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
701 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
702 insn
->bits3
.sampler
.sampler
= sampler
;
703 insn
->bits3
.sampler
.msg_type
= msg_type
;
704 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
705 insn
->bits3
.sampler
.response_length
= response_length
;
706 insn
->bits3
.sampler
.msg_length
= msg_length
;
707 insn
->bits3
.sampler
.end_of_thread
= eot
;
708 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
713 #define next_insn brw_next_insn
714 struct brw_instruction
*
715 brw_next_insn(struct brw_compile
*p
, GLuint opcode
)
717 struct brw_instruction
*insn
;
719 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
721 insn
= &p
->store
[p
->nr_insn
++];
722 memcpy(insn
, p
->current
, sizeof(*insn
));
724 /* Reset this one-shot flag:
727 if (p
->current
->header
.destreg__conditionalmod
) {
728 p
->current
->header
.destreg__conditionalmod
= 0;
729 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
732 insn
->header
.opcode
= opcode
;
736 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
741 struct brw_instruction
*insn
= next_insn(p
, opcode
);
742 brw_set_dest(p
, insn
, dest
);
743 brw_set_src0(p
, insn
, src
);
747 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
751 struct brw_reg src1
)
753 struct brw_instruction
*insn
= next_insn(p
, opcode
);
754 brw_set_dest(p
, insn
, dest
);
755 brw_set_src0(p
, insn
, src0
);
756 brw_set_src1(p
, insn
, src1
);
761 /***********************************************************************
762 * Convenience routines.
765 struct brw_instruction *brw_##OP(struct brw_compile *p, \
766 struct brw_reg dest, \
767 struct brw_reg src0) \
769 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
773 struct brw_instruction *brw_##OP(struct brw_compile *p, \
774 struct brw_reg dest, \
775 struct brw_reg src0, \
776 struct brw_reg src1) \
778 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
781 /* Rounding operations (other than RNDD) require two instructions - the first
782 * stores a rounded value (possibly the wrong way) in the dest register, but
783 * also sets a per-channel "increment bit" in the flag register. A predicated
784 * add of 1.0 fixes dest to contain the desired result.
786 * Sandybridge and later appear to round correctly without an ADD.
789 void brw_##OP(struct brw_compile *p, \
790 struct brw_reg dest, \
791 struct brw_reg src) \
793 struct brw_instruction *rnd, *add; \
794 rnd = next_insn(p, BRW_OPCODE_##OP); \
795 brw_set_dest(p, rnd, dest); \
796 brw_set_src0(p, rnd, src); \
798 if (p->brw->intel.gen < 6) { \
799 /* turn on round-increments */ \
800 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
801 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
802 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
835 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
841 if (src0
.type
== BRW_REGISTER_TYPE_F
||
842 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
843 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
844 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
845 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
848 if (src1
.type
== BRW_REGISTER_TYPE_F
||
849 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
850 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
851 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
852 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
855 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
858 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
864 if (src0
.type
== BRW_REGISTER_TYPE_D
||
865 src0
.type
== BRW_REGISTER_TYPE_UD
||
866 src1
.type
== BRW_REGISTER_TYPE_D
||
867 src1
.type
== BRW_REGISTER_TYPE_UD
) {
868 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
871 if (src0
.type
== BRW_REGISTER_TYPE_F
||
872 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
873 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
874 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
875 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
878 if (src1
.type
== BRW_REGISTER_TYPE_F
||
879 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
880 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
881 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
882 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
885 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
886 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
887 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
888 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
890 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
894 void brw_NOP(struct brw_compile
*p
)
896 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
897 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
898 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
899 brw_set_src1(p
, insn
, brw_imm_ud(0x0));
906 /***********************************************************************
907 * Comparisons, if/else/endif
910 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
915 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
917 insn
->header
.execution_size
= 1;
918 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
919 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
921 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
927 push_if_stack(struct brw_compile
*p
, struct brw_instruction
*inst
)
929 p
->if_stack
[p
->if_stack_depth
] = inst
;
932 if (p
->if_stack_array_size
<= p
->if_stack_depth
) {
933 p
->if_stack_array_size
*= 2;
934 p
->if_stack
= reralloc(p
->mem_ctx
, p
->if_stack
, struct brw_instruction
*,
935 p
->if_stack_array_size
);
939 /* EU takes the value from the flag register and pushes it onto some
940 * sort of a stack (presumably merging with any flag value already on
941 * the stack). Within an if block, the flags at the top of the stack
942 * control execution on each channel of the unit, eg. on each of the
943 * 16 pixel values in our wm programs.
945 * When the matching 'else' instruction is reached (presumably by
946 * countdown of the instruction count patched in by our ELSE/ENDIF
947 * functions), the relevent flags are inverted.
949 * When the matching 'endif' instruction is reached, the flags are
950 * popped off. If the stack is now empty, normal execution resumes.
952 struct brw_instruction
*
953 brw_IF(struct brw_compile
*p
, GLuint execute_size
)
955 struct intel_context
*intel
= &p
->brw
->intel
;
956 struct brw_instruction
*insn
;
958 insn
= next_insn(p
, BRW_OPCODE_IF
);
960 /* Override the defaults for this instruction:
962 if (intel
->gen
< 6) {
963 brw_set_dest(p
, insn
, brw_ip_reg());
964 brw_set_src0(p
, insn
, brw_ip_reg());
965 brw_set_src1(p
, insn
, brw_imm_d(0x0));
966 } else if (intel
->gen
== 6) {
967 brw_set_dest(p
, insn
, brw_imm_w(0));
968 insn
->bits1
.branch_gen6
.jump_count
= 0;
969 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
970 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
972 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
973 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
974 brw_set_src1(p
, insn
, brw_imm_ud(0));
975 insn
->bits3
.break_cont
.jip
= 0;
976 insn
->bits3
.break_cont
.uip
= 0;
979 insn
->header
.execution_size
= execute_size
;
980 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
981 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
982 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
983 if (!p
->single_program_flow
)
984 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
986 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
988 push_if_stack(p
, insn
);
992 /* This function is only used for gen6-style IF instructions with an
993 * embedded comparison (conditional modifier). It is not used on gen7.
995 struct brw_instruction
*
996 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
997 struct brw_reg src0
, struct brw_reg src1
)
999 struct brw_instruction
*insn
;
1001 insn
= next_insn(p
, BRW_OPCODE_IF
);
1003 brw_set_dest(p
, insn
, brw_imm_w(0));
1004 if (p
->compressed
) {
1005 insn
->header
.execution_size
= BRW_EXECUTE_16
;
1007 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1009 insn
->bits1
.branch_gen6
.jump_count
= 0;
1010 brw_set_src0(p
, insn
, src0
);
1011 brw_set_src1(p
, insn
, src1
);
1013 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
1014 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1015 insn
->header
.destreg__conditionalmod
= conditional
;
1017 if (!p
->single_program_flow
)
1018 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1020 push_if_stack(p
, insn
);
1025 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1028 convert_IF_ELSE_to_ADD(struct brw_compile
*p
,
1029 struct brw_instruction
*if_inst
,
1030 struct brw_instruction
*else_inst
)
1032 /* The next instruction (where the ENDIF would be, if it existed) */
1033 struct brw_instruction
*next_inst
= &p
->store
[p
->nr_insn
];
1035 assert(p
->single_program_flow
);
1036 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1037 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1038 assert(if_inst
->header
.execution_size
== BRW_EXECUTE_1
);
1040 /* Convert IF to an ADD instruction that moves the instruction pointer
1041 * to the first instruction of the ELSE block. If there is no ELSE
1042 * block, point to where ENDIF would be. Reverse the predicate.
1044 * There's no need to execute an ENDIF since we don't need to do any
1045 * stack operations, and if we're currently executing, we just want to
1046 * continue normally.
1048 if_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1049 if_inst
->header
.predicate_inverse
= 1;
1051 if (else_inst
!= NULL
) {
1052 /* Convert ELSE to an ADD instruction that points where the ENDIF
1055 else_inst
->header
.opcode
= BRW_OPCODE_ADD
;
1057 if_inst
->bits3
.ud
= (else_inst
- if_inst
+ 1) * 16;
1058 else_inst
->bits3
.ud
= (next_inst
- else_inst
) * 16;
1060 if_inst
->bits3
.ud
= (next_inst
- if_inst
) * 16;
1065 * Patch IF and ELSE instructions with appropriate jump targets.
1068 patch_IF_ELSE(struct brw_compile
*p
,
1069 struct brw_instruction
*if_inst
,
1070 struct brw_instruction
*else_inst
,
1071 struct brw_instruction
*endif_inst
)
1073 struct intel_context
*intel
= &p
->brw
->intel
;
1075 assert(!p
->single_program_flow
);
1076 assert(if_inst
!= NULL
&& if_inst
->header
.opcode
== BRW_OPCODE_IF
);
1077 assert(endif_inst
!= NULL
);
1078 assert(else_inst
== NULL
|| else_inst
->header
.opcode
== BRW_OPCODE_ELSE
);
1081 /* Jump count is for 64bit data chunk each, so one 128bit instruction
1082 * requires 2 chunks.
1084 if (intel
->gen
>= 5)
1087 assert(endif_inst
->header
.opcode
== BRW_OPCODE_ENDIF
);
1088 endif_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1090 if (else_inst
== NULL
) {
1091 /* Patch IF -> ENDIF */
1092 if (intel
->gen
< 6) {
1093 /* Turn it into an IFF, which means no mask stack operations for
1094 * all-false and jumping past the ENDIF.
1096 if_inst
->header
.opcode
= BRW_OPCODE_IFF
;
1097 if_inst
->bits3
.if_else
.jump_count
= br
* (endif_inst
- if_inst
+ 1);
1098 if_inst
->bits3
.if_else
.pop_count
= 0;
1099 if_inst
->bits3
.if_else
.pad0
= 0;
1100 } else if (intel
->gen
== 6) {
1101 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1102 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (endif_inst
- if_inst
);
1104 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1105 if_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- if_inst
);
1108 else_inst
->header
.execution_size
= if_inst
->header
.execution_size
;
1110 /* Patch IF -> ELSE */
1111 if (intel
->gen
< 6) {
1112 if_inst
->bits3
.if_else
.jump_count
= br
* (else_inst
- if_inst
);
1113 if_inst
->bits3
.if_else
.pop_count
= 0;
1114 if_inst
->bits3
.if_else
.pad0
= 0;
1115 } else if (intel
->gen
== 6) {
1116 if_inst
->bits1
.branch_gen6
.jump_count
= br
* (else_inst
- if_inst
+ 1);
1119 /* Patch ELSE -> ENDIF */
1120 if (intel
->gen
< 6) {
1121 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1124 else_inst
->bits3
.if_else
.jump_count
= br
*(endif_inst
- else_inst
+ 1);
1125 else_inst
->bits3
.if_else
.pop_count
= 1;
1126 else_inst
->bits3
.if_else
.pad0
= 0;
1127 } else if (intel
->gen
== 6) {
1128 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1129 else_inst
->bits1
.branch_gen6
.jump_count
= br
*(endif_inst
- else_inst
);
1131 /* The IF instruction's JIP should point just past the ELSE */
1132 if_inst
->bits3
.break_cont
.jip
= br
* (else_inst
- if_inst
+ 1);
1133 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1134 if_inst
->bits3
.break_cont
.uip
= br
* (endif_inst
- if_inst
);
1135 else_inst
->bits3
.break_cont
.jip
= br
* (endif_inst
- else_inst
);
1141 brw_ELSE(struct brw_compile
*p
)
1143 struct intel_context
*intel
= &p
->brw
->intel
;
1144 struct brw_instruction
*insn
;
1146 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
1148 if (intel
->gen
< 6) {
1149 brw_set_dest(p
, insn
, brw_ip_reg());
1150 brw_set_src0(p
, insn
, brw_ip_reg());
1151 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1152 } else if (intel
->gen
== 6) {
1153 brw_set_dest(p
, insn
, brw_imm_w(0));
1154 insn
->bits1
.branch_gen6
.jump_count
= 0;
1155 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1156 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1158 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1159 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1160 brw_set_src1(p
, insn
, brw_imm_ud(0));
1161 insn
->bits3
.break_cont
.jip
= 0;
1162 insn
->bits3
.break_cont
.uip
= 0;
1165 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1166 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1167 if (!p
->single_program_flow
)
1168 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1170 push_if_stack(p
, insn
);
1174 brw_ENDIF(struct brw_compile
*p
)
1176 struct intel_context
*intel
= &p
->brw
->intel
;
1177 struct brw_instruction
*insn
;
1178 struct brw_instruction
*else_inst
= NULL
;
1179 struct brw_instruction
*if_inst
= NULL
;
1181 /* Pop the IF and (optional) ELSE instructions from the stack */
1182 p
->if_stack_depth
--;
1183 if (p
->if_stack
[p
->if_stack_depth
]->header
.opcode
== BRW_OPCODE_ELSE
) {
1184 else_inst
= p
->if_stack
[p
->if_stack_depth
];
1185 p
->if_stack_depth
--;
1187 if_inst
= p
->if_stack
[p
->if_stack_depth
];
1189 if (p
->single_program_flow
) {
1190 /* ENDIF is useless; don't bother emitting it. */
1191 convert_IF_ELSE_to_ADD(p
, if_inst
, else_inst
);
1195 insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1197 if (intel
->gen
< 6) {
1198 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1199 brw_set_src0(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1200 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1201 } else if (intel
->gen
== 6) {
1202 brw_set_dest(p
, insn
, brw_imm_w(0));
1203 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1204 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1206 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1207 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1208 brw_set_src1(p
, insn
, brw_imm_ud(0));
1211 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1212 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1213 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1215 /* Also pop item off the stack in the endif instruction: */
1216 if (intel
->gen
< 6) {
1217 insn
->bits3
.if_else
.jump_count
= 0;
1218 insn
->bits3
.if_else
.pop_count
= 1;
1219 insn
->bits3
.if_else
.pad0
= 0;
1220 } else if (intel
->gen
== 6) {
1221 insn
->bits1
.branch_gen6
.jump_count
= 2;
1223 insn
->bits3
.break_cont
.jip
= 2;
1225 patch_IF_ELSE(p
, if_inst
, else_inst
, insn
);
1228 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1230 struct intel_context
*intel
= &p
->brw
->intel
;
1231 struct brw_instruction
*insn
;
1233 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1234 if (intel
->gen
>= 6) {
1235 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1236 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1237 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1239 brw_set_dest(p
, insn
, brw_ip_reg());
1240 brw_set_src0(p
, insn
, brw_ip_reg());
1241 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1242 insn
->bits3
.if_else
.pad0
= 0;
1243 insn
->bits3
.if_else
.pop_count
= pop_count
;
1245 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1246 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1251 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1252 struct brw_instruction
*do_insn
)
1254 struct brw_instruction
*insn
;
1256 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1257 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1258 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1259 brw_set_dest(p
, insn
, brw_ip_reg());
1260 brw_set_src0(p
, insn
, brw_ip_reg());
1261 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1263 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1264 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1268 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1270 struct brw_instruction
*insn
;
1271 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1272 brw_set_dest(p
, insn
, brw_ip_reg());
1273 brw_set_src0(p
, insn
, brw_ip_reg());
1274 brw_set_src1(p
, insn
, brw_imm_d(0x0));
1275 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1276 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1277 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1278 insn
->bits3
.if_else
.pad0
= 0;
1279 insn
->bits3
.if_else
.pop_count
= pop_count
;
1285 * The DO/WHILE is just an unterminated loop -- break or continue are
1286 * used for control within the loop. We have a few ways they can be
1289 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1290 * jip and no DO instruction.
1292 * For non-uniform control flow pre-gen6, there's a DO instruction to
1293 * push the mask, and a WHILE to jump back, and BREAK to get out and
1296 * For gen6, there's no more mask stack, so no need for DO. WHILE
1297 * just points back to the first instruction of the loop.
1299 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1301 struct intel_context
*intel
= &p
->brw
->intel
;
1303 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1304 return &p
->store
[p
->nr_insn
];
1306 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1308 /* Override the defaults for this instruction:
1310 brw_set_dest(p
, insn
, brw_null_reg());
1311 brw_set_src0(p
, insn
, brw_null_reg());
1312 brw_set_src1(p
, insn
, brw_null_reg());
1314 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1315 insn
->header
.execution_size
= execute_size
;
1316 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1317 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1318 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1326 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1327 struct brw_instruction
*do_insn
)
1329 struct intel_context
*intel
= &p
->brw
->intel
;
1330 struct brw_instruction
*insn
;
1333 if (intel
->gen
>= 5)
1336 if (intel
->gen
>= 7) {
1337 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1339 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1340 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1341 brw_set_src1(p
, insn
, brw_imm_ud(0));
1342 insn
->bits3
.break_cont
.jip
= br
* (do_insn
- insn
);
1344 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1345 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1346 } else if (intel
->gen
== 6) {
1347 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1349 brw_set_dest(p
, insn
, brw_imm_w(0));
1350 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1351 brw_set_src0(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1352 brw_set_src1(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1354 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1355 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1357 if (p
->single_program_flow
) {
1358 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1360 brw_set_dest(p
, insn
, brw_ip_reg());
1361 brw_set_src0(p
, insn
, brw_ip_reg());
1362 brw_set_src1(p
, insn
, brw_imm_d((do_insn
- insn
) * 16));
1363 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1365 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1367 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1369 brw_set_dest(p
, insn
, brw_ip_reg());
1370 brw_set_src0(p
, insn
, brw_ip_reg());
1371 brw_set_src1(p
, insn
, brw_imm_d(0));
1373 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1374 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1375 insn
->bits3
.if_else
.pop_count
= 0;
1376 insn
->bits3
.if_else
.pad0
= 0;
1379 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1380 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1388 void brw_land_fwd_jump(struct brw_compile
*p
,
1389 struct brw_instruction
*jmp_insn
)
1391 struct intel_context
*intel
= &p
->brw
->intel
;
1392 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1395 if (intel
->gen
>= 5)
1398 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1399 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1401 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1406 /* To integrate with the above, it makes sense that the comparison
1407 * instruction should populate the flag register. It might be simpler
1408 * just to use the flag reg for most WM tasks?
1410 void brw_CMP(struct brw_compile
*p
,
1411 struct brw_reg dest
,
1413 struct brw_reg src0
,
1414 struct brw_reg src1
)
1416 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1418 insn
->header
.destreg__conditionalmod
= conditional
;
1419 brw_set_dest(p
, insn
, dest
);
1420 brw_set_src0(p
, insn
, src0
);
1421 brw_set_src1(p
, insn
, src1
);
1423 /* guess_execution_size(insn, src0); */
1426 /* Make it so that future instructions will use the computed flag
1427 * value until brw_set_predicate_control_flag_value() is called
1430 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1432 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1433 p
->flag_value
= 0xff;
1437 /* Issue 'wait' instruction for n1, host could program MMIO
1438 to wake up thread. */
1439 void brw_WAIT (struct brw_compile
*p
)
1441 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1442 struct brw_reg src
= brw_notification_1_reg();
1444 brw_set_dest(p
, insn
, src
);
1445 brw_set_src0(p
, insn
, src
);
1446 brw_set_src1(p
, insn
, brw_null_reg());
1447 insn
->header
.execution_size
= 0; /* must */
1448 insn
->header
.predicate_control
= 0;
1449 insn
->header
.compression_control
= 0;
1453 /***********************************************************************
1454 * Helpers for the various SEND message types:
1457 /** Extended math function, float[8].
1459 void brw_math( struct brw_compile
*p
,
1460 struct brw_reg dest
,
1468 struct intel_context
*intel
= &p
->brw
->intel
;
1470 if (intel
->gen
>= 6) {
1471 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1473 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1474 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1476 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1477 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1479 /* Source modifiers are ignored for extended math instructions. */
1480 assert(!src
.negate
);
1483 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1484 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1485 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1488 /* Math is the same ISA format as other opcodes, except that CondModifier
1489 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1491 insn
->header
.destreg__conditionalmod
= function
;
1492 insn
->header
.saturate
= saturate
;
1494 brw_set_dest(p
, insn
, dest
);
1495 brw_set_src0(p
, insn
, src
);
1496 brw_set_src1(p
, insn
, brw_null_reg());
1498 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1499 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1500 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1501 /* Example code doesn't set predicate_control for send
1504 insn
->header
.predicate_control
= 0;
1505 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1507 brw_set_dest(p
, insn
, dest
);
1508 brw_set_src0(p
, insn
, src
);
1509 brw_set_math_message(p
,
1511 msg_length
, response_length
,
1513 BRW_MATH_INTEGER_UNSIGNED
,
1520 /** Extended math function, float[8].
1522 void brw_math2(struct brw_compile
*p
,
1523 struct brw_reg dest
,
1525 struct brw_reg src0
,
1526 struct brw_reg src1
)
1528 struct intel_context
*intel
= &p
->brw
->intel
;
1529 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1531 assert(intel
->gen
>= 6);
1535 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1536 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1537 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1539 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1540 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1541 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1543 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1544 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1545 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1546 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1549 /* Source modifiers are ignored for extended math instructions. */
1550 assert(!src0
.negate
);
1552 assert(!src1
.negate
);
1555 /* Math is the same ISA format as other opcodes, except that CondModifier
1556 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1558 insn
->header
.destreg__conditionalmod
= function
;
1560 brw_set_dest(p
, insn
, dest
);
1561 brw_set_src0(p
, insn
, src0
);
1562 brw_set_src1(p
, insn
, src1
);
1566 * Extended math function, float[16].
1567 * Use 2 send instructions.
1569 void brw_math_16( struct brw_compile
*p
,
1570 struct brw_reg dest
,
1577 struct intel_context
*intel
= &p
->brw
->intel
;
1578 struct brw_instruction
*insn
;
1579 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1580 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1582 if (intel
->gen
>= 6) {
1583 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1585 /* Math is the same ISA format as other opcodes, except that CondModifier
1586 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1588 insn
->header
.destreg__conditionalmod
= function
;
1589 insn
->header
.saturate
= saturate
;
1591 /* Source modifiers are ignored for extended math instructions. */
1592 assert(!src
.negate
);
1595 brw_set_dest(p
, insn
, dest
);
1596 brw_set_src0(p
, insn
, src
);
1597 brw_set_src1(p
, insn
, brw_null_reg());
1601 /* First instruction:
1603 brw_push_insn_state(p
);
1604 brw_set_predicate_control_flag_value(p
, 0xff);
1605 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1607 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1608 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1610 brw_set_dest(p
, insn
, dest
);
1611 brw_set_src0(p
, insn
, src
);
1612 brw_set_math_message(p
,
1614 msg_length
, response_length
,
1616 BRW_MATH_INTEGER_UNSIGNED
,
1619 BRW_MATH_DATA_VECTOR
);
1621 /* Second instruction:
1623 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1624 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1625 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1627 brw_set_dest(p
, insn
, offset(dest
,1));
1628 brw_set_src0(p
, insn
, src
);
1629 brw_set_math_message(p
,
1631 msg_length
, response_length
,
1633 BRW_MATH_INTEGER_UNSIGNED
,
1636 BRW_MATH_DATA_VECTOR
);
1638 brw_pop_insn_state(p
);
1643 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1644 * using a constant offset per channel.
1646 * The offset must be aligned to oword size (16 bytes). Used for
1647 * register spilling.
1649 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1654 struct intel_context
*intel
= &p
->brw
->intel
;
1655 uint32_t msg_control
, msg_type
;
1658 if (intel
->gen
>= 6)
1661 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1663 if (num_regs
== 1) {
1664 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1667 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1671 /* Set up the message header. This is g0, with g0.2 filled with
1672 * the offset. We don't want to leave our offset around in g0 or
1673 * it'll screw up texture samples, so set it up inside the message
1677 brw_push_insn_state(p
);
1678 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1679 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1681 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1683 /* set message header global offset field (reg 0, element 2) */
1685 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1687 2), BRW_REGISTER_TYPE_UD
),
1688 brw_imm_ud(offset
));
1690 brw_pop_insn_state(p
);
1694 struct brw_reg dest
;
1695 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1696 int send_commit_msg
;
1697 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1698 BRW_REGISTER_TYPE_UW
);
1700 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1701 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1702 src_header
= vec16(src_header
);
1704 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1705 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1707 /* Until gen6, writes followed by reads from the same location
1708 * are not guaranteed to be ordered unless write_commit is set.
1709 * If set, then a no-op write is issued to the destination
1710 * register to set a dependency, and a read from the destination
1711 * can be used to ensure the ordering.
1713 * For gen6, only writes between different threads need ordering
1714 * protection. Our use of DP writes is all about register
1715 * spilling within a thread.
1717 if (intel
->gen
>= 6) {
1718 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1719 send_commit_msg
= 0;
1722 send_commit_msg
= 1;
1725 brw_set_dest(p
, insn
, dest
);
1726 if (intel
->gen
>= 6) {
1727 brw_set_src0(p
, insn
, mrf
);
1729 brw_set_src0(p
, insn
, brw_null_reg());
1732 if (intel
->gen
>= 6)
1733 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1735 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1737 brw_set_dp_write_message(p
,
1739 255, /* binding table index (255=stateless) */
1743 GL_TRUE
, /* header_present */
1744 0, /* pixel scoreboard */
1745 send_commit_msg
, /* response_length */
1753 * Read a block of owords (half a GRF each) from the scratch buffer
1754 * using a constant index per channel.
1756 * Offset must be aligned to oword size (16 bytes). Used for register
1760 brw_oword_block_read_scratch(struct brw_compile
*p
,
1761 struct brw_reg dest
,
1766 struct intel_context
*intel
= &p
->brw
->intel
;
1767 uint32_t msg_control
;
1770 if (intel
->gen
>= 6)
1773 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1774 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1776 if (num_regs
== 1) {
1777 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1780 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1785 brw_push_insn_state(p
);
1786 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1787 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1789 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1791 /* set message header global offset field (reg 0, element 2) */
1793 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1795 2), BRW_REGISTER_TYPE_UD
),
1796 brw_imm_ud(offset
));
1798 brw_pop_insn_state(p
);
1802 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1804 assert(insn
->header
.predicate_control
== 0);
1805 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1806 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1808 brw_set_dest(p
, insn
, dest
); /* UW? */
1809 if (intel
->gen
>= 6) {
1810 brw_set_src0(p
, insn
, mrf
);
1812 brw_set_src0(p
, insn
, brw_null_reg());
1815 brw_set_dp_read_message(p
,
1817 255, /* binding table index (255=stateless) */
1819 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1820 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1827 * Read a float[4] vector from the data port Data Cache (const buffer).
1828 * Location (in buffer) should be a multiple of 16.
1829 * Used for fetching shader constants.
1831 void brw_oword_block_read(struct brw_compile
*p
,
1832 struct brw_reg dest
,
1835 uint32_t bind_table_index
)
1837 struct intel_context
*intel
= &p
->brw
->intel
;
1839 /* On newer hardware, offset is in units of owords. */
1840 if (intel
->gen
>= 6)
1843 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1845 brw_push_insn_state(p
);
1846 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1847 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1848 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1850 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1852 /* set message header global offset field (reg 0, element 2) */
1854 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1856 2), BRW_REGISTER_TYPE_UD
),
1857 brw_imm_ud(offset
));
1859 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1860 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1862 /* cast dest to a uword[8] vector */
1863 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1865 brw_set_dest(p
, insn
, dest
);
1866 if (intel
->gen
>= 6) {
1867 brw_set_src0(p
, insn
, mrf
);
1869 brw_set_src0(p
, insn
, brw_null_reg());
1872 brw_set_dp_read_message(p
,
1875 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1876 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1877 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1879 1); /* response_length (1 reg, 2 owords!) */
1881 brw_pop_insn_state(p
);
1885 * Read a set of dwords from the data port Data Cache (const buffer).
1887 * Location (in buffer) appears as UD offsets in the register after
1888 * the provided mrf header reg.
1890 void brw_dword_scattered_read(struct brw_compile
*p
,
1891 struct brw_reg dest
,
1893 uint32_t bind_table_index
)
1895 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1897 brw_push_insn_state(p
);
1898 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1899 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1900 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1901 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1902 brw_pop_insn_state(p
);
1904 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1905 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1907 /* cast dest to a uword[8] vector */
1908 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1910 brw_set_dest(p
, insn
, dest
);
1911 brw_set_src0(p
, insn
, brw_null_reg());
1913 brw_set_dp_read_message(p
,
1916 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1917 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1918 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1920 1); /* response_length */
1926 * Read float[4] constant(s) from VS constant buffer.
1927 * For relative addressing, two float[4] constants will be read into 'dest'.
1928 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1930 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1931 struct brw_reg dest
,
1933 GLuint bind_table_index
)
1935 struct intel_context
*intel
= &p
->brw
->intel
;
1936 struct brw_instruction
*insn
;
1937 GLuint msg_reg_nr
= 1;
1939 if (intel
->gen
>= 6)
1942 /* Setup MRF[1] with location/offset into const buffer */
1943 brw_push_insn_state(p
);
1944 brw_set_access_mode(p
, BRW_ALIGN_1
);
1945 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1946 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1947 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1948 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1949 BRW_REGISTER_TYPE_UD
),
1950 brw_imm_ud(location
));
1951 brw_pop_insn_state(p
);
1953 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1955 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1956 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1957 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1958 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1960 brw_set_dest(p
, insn
, dest
);
1961 if (intel
->gen
>= 6) {
1962 brw_set_src0(p
, insn
, brw_message_reg(msg_reg_nr
));
1964 brw_set_src0(p
, insn
, brw_null_reg());
1967 brw_set_dp_read_message(p
,
1971 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1972 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1974 1); /* response_length (1 Oword) */
1978 * Read a float[4] constant per vertex from VS constant buffer, with
1979 * relative addressing.
1981 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1982 struct brw_reg dest
,
1983 struct brw_reg addr_reg
,
1985 GLuint bind_table_index
)
1987 struct intel_context
*intel
= &p
->brw
->intel
;
1988 struct brw_reg src
= brw_vec8_grf(0, 0);
1991 /* Setup MRF[1] with offset into const buffer */
1992 brw_push_insn_state(p
);
1993 brw_set_access_mode(p
, BRW_ALIGN_1
);
1994 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1995 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1996 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1998 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
2001 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
2002 addr_reg
, brw_imm_d(offset
));
2003 brw_pop_insn_state(p
);
2005 gen6_resolve_implied_move(p
, &src
, 0);
2006 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
2008 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
2009 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2010 insn
->header
.destreg__conditionalmod
= 0;
2011 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
2013 brw_set_dest(p
, insn
, dest
);
2014 brw_set_src0(p
, insn
, src
);
2016 if (intel
->gen
>= 6)
2017 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2018 else if (intel
->gen
== 5 || intel
->is_g4x
)
2019 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2021 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
2023 brw_set_dp_read_message(p
,
2026 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
2028 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
2030 1); /* response_length */
2035 void brw_fb_WRITE(struct brw_compile
*p
,
2038 struct brw_reg src0
,
2039 GLuint binding_table_index
,
2041 GLuint response_length
,
2043 GLboolean header_present
)
2045 struct intel_context
*intel
= &p
->brw
->intel
;
2046 struct brw_instruction
*insn
;
2047 GLuint msg_control
, msg_type
;
2048 struct brw_reg dest
;
2050 if (dispatch_width
== 16)
2051 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2053 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
2055 if (intel
->gen
>= 6 && binding_table_index
== 0) {
2056 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
2058 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2060 /* The execution mask is ignored for render target writes. */
2061 insn
->header
.predicate_control
= 0;
2062 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2064 if (intel
->gen
>= 6) {
2065 /* headerless version, just submit color payload */
2066 src0
= brw_message_reg(msg_reg_nr
);
2068 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2070 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2072 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
2075 if (dispatch_width
== 16)
2076 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
2078 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
2080 brw_set_dest(p
, insn
, dest
);
2081 brw_set_src0(p
, insn
, src0
);
2082 brw_set_dp_write_message(p
,
2084 binding_table_index
,
2089 1, /* pixel scoreboard */
2092 0 /* send_commit_msg */);
2097 * Texture sample instruction.
2098 * Note: the msg_type plus msg_length values determine exactly what kind
2099 * of sampling operation is performed. See volume 4, page 161 of docs.
2101 void brw_SAMPLE(struct brw_compile
*p
,
2102 struct brw_reg dest
,
2104 struct brw_reg src0
,
2105 GLuint binding_table_index
,
2109 GLuint response_length
,
2112 GLuint header_present
,
2115 struct intel_context
*intel
= &p
->brw
->intel
;
2116 GLboolean need_stall
= 0;
2118 if (writemask
== 0) {
2119 /*printf("%s: zero writemask??\n", __FUNCTION__); */
2123 /* Hardware doesn't do destination dependency checking on send
2124 * instructions properly. Add a workaround which generates the
2125 * dependency by other means. In practice it seems like this bug
2126 * only crops up for texture samples, and only where registers are
2127 * written by the send and then written again later without being
2128 * read in between. Luckily for us, we already track that
2129 * information and use it to modify the writemask for the
2130 * instruction, so that is a guide for whether a workaround is
2133 if (writemask
!= WRITEMASK_XYZW
) {
2134 GLuint dst_offset
= 0;
2135 GLuint i
, newmask
= 0, len
= 0;
2137 for (i
= 0; i
< 4; i
++) {
2138 if (writemask
& (1<<i
))
2142 for (; i
< 4; i
++) {
2143 if (!(writemask
& (1<<i
)))
2149 if (newmask
!= writemask
) {
2151 /* printf("need stall %x %x\n", newmask , writemask); */
2154 GLboolean dispatch_16
= GL_FALSE
;
2156 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
2158 guess_execution_size(p
, p
->current
, dest
);
2159 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
2160 dispatch_16
= GL_TRUE
;
2162 newmask
= ~newmask
& WRITEMASK_XYZW
;
2164 brw_push_insn_state(p
);
2166 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2167 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2169 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2170 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2171 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2173 brw_pop_insn_state(p
);
2175 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2176 dest
= offset(dest
, dst_offset
);
2178 /* For 16-wide dispatch, masked channels are skipped in the
2179 * response. For 8-wide, masked channels still take up slots,
2180 * and are just not written to.
2183 response_length
= len
* 2;
2188 struct brw_instruction
*insn
;
2190 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2192 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2193 insn
->header
.predicate_control
= 0; /* XXX */
2194 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2196 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2198 brw_set_dest(p
, insn
, dest
);
2199 brw_set_src0(p
, insn
, src0
);
2200 brw_set_sampler_message(p
, insn
,
2201 binding_table_index
,
2212 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2214 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2216 brw_push_insn_state(p
);
2217 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2218 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2219 retype(reg
, BRW_REGISTER_TYPE_UD
));
2220 brw_pop_insn_state(p
);
2225 /* All these variables are pretty confusing - we might be better off
2226 * using bitmasks and macros for this, in the old style. Or perhaps
2227 * just having the caller instantiate the fields in dword3 itself.
2229 void brw_urb_WRITE(struct brw_compile
*p
,
2230 struct brw_reg dest
,
2232 struct brw_reg src0
,
2236 GLuint response_length
,
2238 GLboolean writes_complete
,
2242 struct intel_context
*intel
= &p
->brw
->intel
;
2243 struct brw_instruction
*insn
;
2245 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2247 if (intel
->gen
== 7) {
2248 /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2249 brw_OR(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 5),
2250 BRW_REGISTER_TYPE_UD
),
2251 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD
),
2252 brw_imm_ud(0xff00));
2255 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2257 assert(msg_length
< BRW_MAX_MRF
);
2259 brw_set_dest(p
, insn
, dest
);
2260 brw_set_src0(p
, insn
, src0
);
2261 brw_set_src1(p
, insn
, brw_imm_d(0));
2264 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2266 brw_set_urb_message(p
,
2279 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2283 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2284 struct brw_instruction
*insn
= &p
->store
[ip
];
2286 switch (insn
->header
.opcode
) {
2287 case BRW_OPCODE_ENDIF
:
2288 case BRW_OPCODE_ELSE
:
2289 case BRW_OPCODE_WHILE
:
2293 assert(!"not reached");
2297 /* There is no DO instruction on gen6, so to find the end of the loop
2298 * we have to see if the loop is jumping back before our start
2302 brw_find_loop_end(struct brw_compile
*p
, int start
)
2304 struct intel_context
*intel
= &p
->brw
->intel
;
2308 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2309 struct brw_instruction
*insn
= &p
->store
[ip
];
2311 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2312 int jip
= intel
->gen
== 6 ? insn
->bits1
.branch_gen6
.jump_count
2313 : insn
->bits3
.break_cont
.jip
;
2314 if (ip
+ jip
/ br
< start
)
2318 assert(!"not reached");
2322 /* After program generation, go back and update the UIP and JIP of
2323 * BREAK and CONT instructions to their correct locations.
2326 brw_set_uip_jip(struct brw_compile
*p
)
2328 struct intel_context
*intel
= &p
->brw
->intel
;
2335 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2336 struct brw_instruction
*insn
= &p
->store
[ip
];
2338 switch (insn
->header
.opcode
) {
2339 case BRW_OPCODE_BREAK
:
2340 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2341 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2342 insn
->bits3
.break_cont
.uip
=
2343 br
* (brw_find_loop_end(p
, ip
) - ip
+ (intel
->gen
== 6 ? 1 : 0));
2345 case BRW_OPCODE_CONTINUE
:
2346 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2347 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
);
2349 assert(insn
->bits3
.break_cont
.uip
!= 0);
2350 assert(insn
->bits3
.break_cont
.jip
!= 0);
2356 void brw_ff_sync(struct brw_compile
*p
,
2357 struct brw_reg dest
,
2359 struct brw_reg src0
,
2361 GLuint response_length
,
2364 struct intel_context
*intel
= &p
->brw
->intel
;
2365 struct brw_instruction
*insn
;
2367 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2369 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2370 brw_set_dest(p
, insn
, dest
);
2371 brw_set_src0(p
, insn
, src0
);
2372 brw_set_src1(p
, insn
, brw_imm_d(0));
2375 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2377 brw_set_ff_sync_message(p
,