2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size(struct brw_compile
*p
,
45 struct brw_instruction
*insn
,
48 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
57 * registers, implicitly moving the operand to a message register.
59 * On Sandybridge, this is no longer the case. This function performs the
60 * explicit move; it should be called before emitting a SEND instruction.
63 gen6_resolve_implied_move(struct brw_compile
*p
,
67 struct intel_context
*intel
= &p
->brw
->intel
;
71 if (src
->file
== BRW_ARCHITECTURE_REGISTER_FILE
&& src
->nr
== BRW_ARF_NULL
)
74 brw_push_insn_state(p
);
75 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
76 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
77 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
78 retype(*src
, BRW_REGISTER_TYPE_UD
));
79 brw_pop_insn_state(p
);
80 *src
= brw_message_reg(msg_reg_nr
);
84 static void brw_set_dest(struct brw_compile
*p
,
85 struct brw_instruction
*insn
,
88 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
89 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
90 assert(dest
.nr
< 128);
92 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
93 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
94 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
96 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
97 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
99 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
100 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
101 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
102 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
103 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
106 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
107 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
108 /* even ignored in da16, still need to set as '01' */
109 insn
->bits1
.da16
.dest_horiz_stride
= 1;
113 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
115 /* These are different sizes in align1 vs align16:
117 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
118 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
119 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
120 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
121 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
124 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
125 /* even ignored in da16, still need to set as '01' */
126 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
130 /* NEW: Set the execution size based on dest.width and
131 * insn->compression_control:
133 guess_execution_size(p
, insn
, dest
);
136 extern int reg_type_size
[];
139 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
141 int hstride_for_reg
[] = {0, 1, 2, 4};
142 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
143 int width_for_reg
[] = {1, 2, 4, 8, 16};
144 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
145 int width
, hstride
, vstride
, execsize
;
147 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
148 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
149 * mean the destination has to be 128-bit aligned and the
150 * destination horiz stride has to be a word.
152 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
153 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
154 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
160 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
161 reg
.file
== BRW_ARF_NULL
)
164 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
165 hstride
= hstride_for_reg
[reg
.hstride
];
167 if (reg
.vstride
== 0xf) {
170 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
171 vstride
= vstride_for_reg
[reg
.vstride
];
174 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
175 width
= width_for_reg
[reg
.width
];
177 assert(insn
->header
.execution_size
>= 0 &&
178 insn
->header
.execution_size
< Elements(execsize_for_reg
));
179 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
181 /* Restrictions from 3.3.10: Register Region Restrictions. */
183 assert(execsize
>= width
);
186 if (execsize
== width
&& hstride
!= 0) {
187 assert(vstride
== -1 || vstride
== width
* hstride
);
191 if (execsize
== width
&& hstride
== 0) {
192 /* no restriction on vstride. */
197 assert(hstride
== 0);
201 if (execsize
== 1 && width
== 1) {
202 assert(hstride
== 0);
203 assert(vstride
== 0);
207 if (vstride
== 0 && hstride
== 0) {
211 /* 10. Check destination issues. */
214 static void brw_set_src0( struct brw_instruction
*insn
,
217 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
218 assert(reg
.nr
< 128);
220 validate_reg(insn
, reg
);
222 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
223 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
224 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
225 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
226 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
228 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
229 insn
->bits3
.ud
= reg
.dw1
.ud
;
231 /* Required to set some fields in src1 as well:
233 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
234 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
238 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
239 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
240 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
241 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
244 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
245 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
249 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
251 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
252 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
255 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
259 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
260 if (reg
.width
== BRW_WIDTH_1
&&
261 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
262 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
263 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
264 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
267 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
268 insn
->bits2
.da1
.src0_width
= reg
.width
;
269 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
273 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
274 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
275 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
276 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
278 /* This is an oddity of the fact we're using the same
279 * descriptions for registers in align_16 as align_1:
281 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
282 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
284 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
290 void brw_set_src1( struct brw_instruction
*insn
,
293 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
295 assert(reg
.nr
< 128);
297 validate_reg(insn
, reg
);
299 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
300 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
301 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
302 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
304 /* Only src1 can be immediate in two-argument instructions.
306 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
308 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
309 insn
->bits3
.ud
= reg
.dw1
.ud
;
312 /* This is a hardware restriction, which may or may not be lifted
315 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
316 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
318 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
319 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
320 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
323 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
324 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
327 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
328 if (reg
.width
== BRW_WIDTH_1
&&
329 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
330 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
331 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
332 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
335 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
336 insn
->bits3
.da1
.src1_width
= reg
.width
;
337 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
341 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
342 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
343 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
344 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
346 /* This is an oddity of the fact we're using the same
347 * descriptions for registers in align_16 as align_1:
349 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
350 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
352 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
359 static void brw_set_math_message( struct brw_context
*brw
,
360 struct brw_instruction
*insn
,
362 GLuint response_length
,
365 GLboolean low_precision
,
369 struct intel_context
*intel
= &brw
->intel
;
370 brw_set_src1(insn
, brw_imm_d(0));
372 if (intel
->gen
== 5) {
373 insn
->bits3
.math_gen5
.function
= function
;
374 insn
->bits3
.math_gen5
.int_type
= integer_type
;
375 insn
->bits3
.math_gen5
.precision
= low_precision
;
376 insn
->bits3
.math_gen5
.saturate
= saturate
;
377 insn
->bits3
.math_gen5
.data_type
= dataType
;
378 insn
->bits3
.math_gen5
.snapshot
= 0;
379 insn
->bits3
.math_gen5
.header_present
= 0;
380 insn
->bits3
.math_gen5
.response_length
= response_length
;
381 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
382 insn
->bits3
.math_gen5
.end_of_thread
= 0;
383 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
384 insn
->bits2
.send_gen5
.end_of_thread
= 0;
386 insn
->bits3
.math
.function
= function
;
387 insn
->bits3
.math
.int_type
= integer_type
;
388 insn
->bits3
.math
.precision
= low_precision
;
389 insn
->bits3
.math
.saturate
= saturate
;
390 insn
->bits3
.math
.data_type
= dataType
;
391 insn
->bits3
.math
.response_length
= response_length
;
392 insn
->bits3
.math
.msg_length
= msg_length
;
393 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
394 insn
->bits3
.math
.end_of_thread
= 0;
399 static void brw_set_ff_sync_message(struct brw_context
*brw
,
400 struct brw_instruction
*insn
,
402 GLuint response_length
,
403 GLboolean end_of_thread
)
405 struct intel_context
*intel
= &brw
->intel
;
406 brw_set_src1(insn
, brw_imm_d(0));
408 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
409 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
410 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
411 insn
->bits3
.urb_gen5
.allocate
= allocate
;
412 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
413 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
414 insn
->bits3
.urb_gen5
.header_present
= 1;
415 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
416 insn
->bits3
.urb_gen5
.msg_length
= 1;
417 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
418 if (intel
->gen
>= 6) {
419 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
421 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
422 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
426 static void brw_set_urb_message( struct brw_context
*brw
,
427 struct brw_instruction
*insn
,
431 GLuint response_length
,
432 GLboolean end_of_thread
,
435 GLuint swizzle_control
)
437 struct intel_context
*intel
= &brw
->intel
;
438 brw_set_src1(insn
, brw_imm_d(0));
440 if (intel
->gen
>= 5) {
441 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
442 insn
->bits3
.urb_gen5
.offset
= offset
;
443 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
444 insn
->bits3
.urb_gen5
.allocate
= allocate
;
445 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
446 insn
->bits3
.urb_gen5
.complete
= complete
;
447 insn
->bits3
.urb_gen5
.header_present
= 1;
448 insn
->bits3
.urb_gen5
.response_length
= response_length
;
449 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
450 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
451 if (intel
->gen
>= 6) {
452 /* For SNB, the SFID bits moved to the condmod bits, and
453 * EOT stayed in bits3 above. Does the EOT bit setting
454 * below on Ironlake even do anything?
456 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
458 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
459 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
462 insn
->bits3
.urb
.opcode
= 0; /* ? */
463 insn
->bits3
.urb
.offset
= offset
;
464 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
465 insn
->bits3
.urb
.allocate
= allocate
;
466 insn
->bits3
.urb
.used
= used
; /* ? */
467 insn
->bits3
.urb
.complete
= complete
;
468 insn
->bits3
.urb
.response_length
= response_length
;
469 insn
->bits3
.urb
.msg_length
= msg_length
;
470 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
471 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
475 static void brw_set_dp_write_message( struct brw_context
*brw
,
476 struct brw_instruction
*insn
,
477 GLuint binding_table_index
,
481 GLboolean header_present
,
482 GLuint pixel_scoreboard_clear
,
483 GLuint response_length
,
484 GLuint end_of_thread
,
485 GLuint send_commit_msg
)
487 struct intel_context
*intel
= &brw
->intel
;
488 brw_set_src1(insn
, brw_imm_ud(0));
490 if (intel
->gen
>= 6) {
491 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
492 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
493 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
494 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
495 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
496 insn
->bits3
.dp_render_cache
.header_present
= header_present
;
497 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
498 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
499 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
500 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
501 /* XXX really need below? */
502 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
503 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
504 } else if (intel
->gen
== 5) {
505 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
506 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
507 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
508 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
509 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
510 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
511 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
512 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
513 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
514 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
515 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
517 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
518 insn
->bits3
.dp_write
.msg_control
= msg_control
;
519 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
520 insn
->bits3
.dp_write
.msg_type
= msg_type
;
521 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
522 insn
->bits3
.dp_write
.response_length
= response_length
;
523 insn
->bits3
.dp_write
.msg_length
= msg_length
;
524 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
525 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
530 brw_set_dp_read_message(struct brw_context
*brw
,
531 struct brw_instruction
*insn
,
532 GLuint binding_table_index
,
537 GLuint response_length
)
539 struct intel_context
*intel
= &brw
->intel
;
540 brw_set_src1(insn
, brw_imm_d(0));
542 if (intel
->gen
>= 6) {
543 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
544 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
545 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= 0;
546 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
547 insn
->bits3
.dp_render_cache
.send_commit_msg
= 0;
548 insn
->bits3
.dp_render_cache
.header_present
= 1;
549 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
550 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
551 insn
->bits3
.dp_render_cache
.end_of_thread
= 0;
552 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
553 /* XXX really need below? */
554 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
555 insn
->bits2
.send_gen5
.end_of_thread
= 0;
556 } else if (intel
->gen
== 5) {
557 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
558 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
559 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
560 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
561 insn
->bits3
.dp_read_gen5
.header_present
= 1;
562 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
563 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
564 insn
->bits3
.dp_read_gen5
.pad1
= 0;
565 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
566 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
567 insn
->bits2
.send_gen5
.end_of_thread
= 0;
568 } else if (intel
->is_g4x
) {
569 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
570 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
571 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
572 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
573 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
574 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
575 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
576 insn
->bits3
.dp_read_g4x
.pad1
= 0;
577 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
579 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
580 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
581 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
582 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
583 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
584 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
585 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
586 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
587 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
591 static void brw_set_sampler_message(struct brw_context
*brw
,
592 struct brw_instruction
*insn
,
593 GLuint binding_table_index
,
596 GLuint response_length
,
599 GLuint header_present
,
602 struct intel_context
*intel
= &brw
->intel
;
604 brw_set_src1(insn
, brw_imm_d(0));
606 if (intel
->gen
>= 5) {
607 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
608 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
609 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
610 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
611 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
612 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
613 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
614 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
616 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
618 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
619 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
621 } else if (intel
->is_g4x
) {
622 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
623 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
624 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
625 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
626 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
627 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
628 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
630 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
631 insn
->bits3
.sampler
.sampler
= sampler
;
632 insn
->bits3
.sampler
.msg_type
= msg_type
;
633 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
634 insn
->bits3
.sampler
.response_length
= response_length
;
635 insn
->bits3
.sampler
.msg_length
= msg_length
;
636 insn
->bits3
.sampler
.end_of_thread
= eot
;
637 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
643 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
646 struct brw_instruction
*insn
;
648 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
650 insn
= &p
->store
[p
->nr_insn
++];
651 memcpy(insn
, p
->current
, sizeof(*insn
));
653 /* Reset this one-shot flag:
656 if (p
->current
->header
.destreg__conditionalmod
) {
657 p
->current
->header
.destreg__conditionalmod
= 0;
658 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
661 insn
->header
.opcode
= opcode
;
666 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
671 struct brw_instruction
*insn
= next_insn(p
, opcode
);
672 brw_set_dest(p
, insn
, dest
);
673 brw_set_src0(insn
, src
);
677 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
681 struct brw_reg src1
)
683 struct brw_instruction
*insn
= next_insn(p
, opcode
);
684 brw_set_dest(p
, insn
, dest
);
685 brw_set_src0(insn
, src0
);
686 brw_set_src1(insn
, src1
);
691 /***********************************************************************
692 * Convenience routines.
695 struct brw_instruction *brw_##OP(struct brw_compile *p, \
696 struct brw_reg dest, \
697 struct brw_reg src0) \
699 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
703 struct brw_instruction *brw_##OP(struct brw_compile *p, \
704 struct brw_reg dest, \
705 struct brw_reg src0, \
706 struct brw_reg src1) \
708 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
711 /* Rounding operations (other than RNDD) require two instructions - the first
712 * stores a rounded value (possibly the wrong way) in the dest register, but
713 * also sets a per-channel "increment bit" in the flag register. A predicated
714 * add of 1.0 fixes dest to contain the desired result.
717 void brw_##OP(struct brw_compile *p, \
718 struct brw_reg dest, \
719 struct brw_reg src) \
721 struct brw_instruction *rnd, *add; \
722 rnd = next_insn(p, BRW_OPCODE_##OP); \
723 brw_set_dest(p, rnd, dest); \
724 brw_set_src0(rnd, src); \
725 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
727 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
728 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
760 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
766 if (src0
.type
== BRW_REGISTER_TYPE_F
||
767 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
768 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
769 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
770 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
773 if (src1
.type
== BRW_REGISTER_TYPE_F
||
774 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
775 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
776 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
777 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
780 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
783 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
789 if (src0
.type
== BRW_REGISTER_TYPE_D
||
790 src0
.type
== BRW_REGISTER_TYPE_UD
||
791 src1
.type
== BRW_REGISTER_TYPE_D
||
792 src1
.type
== BRW_REGISTER_TYPE_UD
) {
793 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
796 if (src0
.type
== BRW_REGISTER_TYPE_F
||
797 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
798 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
799 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
800 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
803 if (src1
.type
== BRW_REGISTER_TYPE_F
||
804 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
805 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
806 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
807 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
810 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
811 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
812 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
813 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
815 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
819 void brw_NOP(struct brw_compile
*p
)
821 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
822 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
823 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
824 brw_set_src1(insn
, brw_imm_ud(0x0));
831 /***********************************************************************
832 * Comparisons, if/else/endif
835 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
840 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
842 insn
->header
.execution_size
= 1;
843 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
844 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
846 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
851 /* EU takes the value from the flag register and pushes it onto some
852 * sort of a stack (presumably merging with any flag value already on
853 * the stack). Within an if block, the flags at the top of the stack
854 * control execution on each channel of the unit, eg. on each of the
855 * 16 pixel values in our wm programs.
857 * When the matching 'else' instruction is reached (presumably by
858 * countdown of the instruction count patched in by our ELSE/ENDIF
859 * functions), the relevent flags are inverted.
861 * When the matching 'endif' instruction is reached, the flags are
862 * popped off. If the stack is now empty, normal execution resumes.
864 * No attempt is made to deal with stack overflow (14 elements?).
866 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
868 struct intel_context
*intel
= &p
->brw
->intel
;
869 struct brw_instruction
*insn
;
871 if (p
->single_program_flow
) {
872 assert(execute_size
== BRW_EXECUTE_1
);
874 insn
= next_insn(p
, BRW_OPCODE_ADD
);
875 insn
->header
.predicate_inverse
= 1;
877 insn
= next_insn(p
, BRW_OPCODE_IF
);
880 /* Override the defaults for this instruction:
882 if (intel
->gen
< 6) {
883 brw_set_dest(p
, insn
, brw_ip_reg());
884 brw_set_src0(insn
, brw_ip_reg());
885 brw_set_src1(insn
, brw_imm_d(0x0));
887 brw_set_dest(p
, insn
, brw_imm_w(0));
888 insn
->bits1
.branch_gen6
.jump_count
= 0;
889 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
890 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
893 insn
->header
.execution_size
= execute_size
;
894 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
895 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
896 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
897 if (!p
->single_program_flow
)
898 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
900 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
905 struct brw_instruction
*
906 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
907 struct brw_reg src0
, struct brw_reg src1
)
909 struct brw_instruction
*insn
;
911 insn
= next_insn(p
, BRW_OPCODE_IF
);
913 brw_set_dest(p
, insn
, brw_imm_w(0));
914 insn
->header
.execution_size
= BRW_EXECUTE_8
;
915 insn
->bits1
.branch_gen6
.jump_count
= 0;
916 brw_set_src0(insn
, src0
);
917 brw_set_src1(insn
, src1
);
919 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
920 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
921 insn
->header
.destreg__conditionalmod
= conditional
;
923 if (!p
->single_program_flow
)
924 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
929 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
930 struct brw_instruction
*if_insn
)
932 struct intel_context
*intel
= &p
->brw
->intel
;
933 struct brw_instruction
*insn
;
936 /* jump count is for 64bit data chunk each, so one 128bit
937 instruction requires 2 chunks. */
941 if (p
->single_program_flow
) {
942 insn
= next_insn(p
, BRW_OPCODE_ADD
);
944 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
947 if (intel
->gen
< 6) {
948 brw_set_dest(p
, insn
, brw_ip_reg());
949 brw_set_src0(insn
, brw_ip_reg());
950 brw_set_src1(insn
, brw_imm_d(0x0));
952 brw_set_dest(p
, insn
, brw_imm_w(0));
953 insn
->bits1
.branch_gen6
.jump_count
= 0;
954 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
955 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
958 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
959 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
960 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
961 if (!p
->single_program_flow
)
962 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
964 /* Patch the if instruction to point at this instruction.
966 if (p
->single_program_flow
) {
967 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
969 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
971 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
973 if (intel
->gen
< 6) {
974 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
975 if_insn
->bits3
.if_else
.pop_count
= 0;
976 if_insn
->bits3
.if_else
.pad0
= 0;
978 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
985 void brw_ENDIF(struct brw_compile
*p
,
986 struct brw_instruction
*patch_insn
)
988 struct intel_context
*intel
= &p
->brw
->intel
;
994 if (p
->single_program_flow
) {
995 /* In single program flow mode, there's no need to execute an ENDIF,
996 * since we don't need to do any stack operations, and if we're executing
997 * currently, we want to just continue executing.
999 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
1001 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
1003 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
1005 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1007 if (intel
->gen
< 6) {
1008 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1009 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1010 brw_set_src1(insn
, brw_imm_d(0x0));
1012 brw_set_dest(p
, insn
, brw_imm_w(0));
1013 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1014 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1017 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1018 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
1019 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1020 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1023 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
1025 assert(patch_insn
->bits1
.branch_gen6
.jump_count
== 0);
1027 /* Patch the if or else instructions to point at this or the next
1028 * instruction respectively.
1030 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
1031 if (intel
->gen
< 6) {
1032 /* Turn it into an IFF, which means no mask stack operations for
1033 * all-false and jumping past the ENDIF.
1035 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
1036 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1037 patch_insn
->bits3
.if_else
.pop_count
= 0;
1038 patch_insn
->bits3
.if_else
.pad0
= 0;
1040 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1041 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1044 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
1045 if (intel
->gen
< 6) {
1046 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1049 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1050 patch_insn
->bits3
.if_else
.pop_count
= 1;
1051 patch_insn
->bits3
.if_else
.pad0
= 0;
1053 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1054 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1058 /* Also pop item off the stack in the endif instruction:
1060 if (intel
->gen
< 6) {
1061 insn
->bits3
.if_else
.jump_count
= 0;
1062 insn
->bits3
.if_else
.pop_count
= 1;
1063 insn
->bits3
.if_else
.pad0
= 0;
1065 insn
->bits1
.branch_gen6
.jump_count
= 2;
1070 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1072 struct intel_context
*intel
= &p
->brw
->intel
;
1073 struct brw_instruction
*insn
;
1075 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1076 if (intel
->gen
>= 6) {
1077 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1078 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1079 brw_set_src1(insn
, brw_imm_d(0x0));
1081 brw_set_dest(p
, insn
, brw_ip_reg());
1082 brw_set_src0(insn
, brw_ip_reg());
1083 brw_set_src1(insn
, brw_imm_d(0x0));
1084 insn
->bits3
.if_else
.pad0
= 0;
1085 insn
->bits3
.if_else
.pop_count
= pop_count
;
1087 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1088 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1093 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1094 struct brw_instruction
*do_insn
)
1096 struct brw_instruction
*insn
;
1099 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1100 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1101 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1102 brw_set_dest(p
, insn
, brw_ip_reg());
1103 brw_set_src0(insn
, brw_ip_reg());
1104 brw_set_src1(insn
, brw_imm_d(0x0));
1106 insn
->bits3
.break_cont
.uip
= br
* (do_insn
- insn
);
1108 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1109 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1113 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1115 struct brw_instruction
*insn
;
1116 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1117 brw_set_dest(p
, insn
, brw_ip_reg());
1118 brw_set_src0(insn
, brw_ip_reg());
1119 brw_set_src1(insn
, brw_imm_d(0x0));
1120 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1121 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1122 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1123 insn
->bits3
.if_else
.pad0
= 0;
1124 insn
->bits3
.if_else
.pop_count
= pop_count
;
1130 * The DO/WHILE is just an unterminated loop -- break or continue are
1131 * used for control within the loop. We have a few ways they can be
1134 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1135 * jip and no DO instruction.
1137 * For non-uniform control flow pre-gen6, there's a DO instruction to
1138 * push the mask, and a WHILE to jump back, and BREAK to get out and
1141 * For gen6, there's no more mask stack, so no need for DO. WHILE
1142 * just points back to the first instruction of the loop.
1144 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1146 struct intel_context
*intel
= &p
->brw
->intel
;
1148 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1149 return &p
->store
[p
->nr_insn
];
1151 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1153 /* Override the defaults for this instruction:
1155 brw_set_dest(p
, insn
, brw_null_reg());
1156 brw_set_src0(insn
, brw_null_reg());
1157 brw_set_src1(insn
, brw_null_reg());
1159 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1160 insn
->header
.execution_size
= execute_size
;
1161 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1162 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1163 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1171 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1172 struct brw_instruction
*do_insn
)
1174 struct intel_context
*intel
= &p
->brw
->intel
;
1175 struct brw_instruction
*insn
;
1178 if (intel
->gen
>= 5)
1181 if (intel
->gen
>= 6) {
1182 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1184 brw_set_dest(p
, insn
, brw_imm_w(0));
1185 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1186 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1187 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1189 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1190 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1192 if (p
->single_program_flow
) {
1193 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1195 brw_set_dest(p
, insn
, brw_ip_reg());
1196 brw_set_src0(insn
, brw_ip_reg());
1197 brw_set_src1(insn
, brw_imm_d((do_insn
- insn
) * 16));
1198 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1200 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1202 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1204 brw_set_dest(p
, insn
, brw_ip_reg());
1205 brw_set_src0(insn
, brw_ip_reg());
1206 brw_set_src1(insn
, brw_imm_d(0));
1208 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1209 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1210 insn
->bits3
.if_else
.pop_count
= 0;
1211 insn
->bits3
.if_else
.pad0
= 0;
1214 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1215 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1223 void brw_land_fwd_jump(struct brw_compile
*p
,
1224 struct brw_instruction
*jmp_insn
)
1226 struct intel_context
*intel
= &p
->brw
->intel
;
1227 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1230 if (intel
->gen
>= 5)
1233 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1234 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1236 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1241 /* To integrate with the above, it makes sense that the comparison
1242 * instruction should populate the flag register. It might be simpler
1243 * just to use the flag reg for most WM tasks?
1245 void brw_CMP(struct brw_compile
*p
,
1246 struct brw_reg dest
,
1248 struct brw_reg src0
,
1249 struct brw_reg src1
)
1251 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1253 insn
->header
.destreg__conditionalmod
= conditional
;
1254 brw_set_dest(p
, insn
, dest
);
1255 brw_set_src0(insn
, src0
);
1256 brw_set_src1(insn
, src1
);
1258 /* guess_execution_size(insn, src0); */
1261 /* Make it so that future instructions will use the computed flag
1262 * value until brw_set_predicate_control_flag_value() is called
1265 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1267 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1268 p
->flag_value
= 0xff;
1272 /* Issue 'wait' instruction for n1, host could program MMIO
1273 to wake up thread. */
1274 void brw_WAIT (struct brw_compile
*p
)
1276 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1277 struct brw_reg src
= brw_notification_1_reg();
1279 brw_set_dest(p
, insn
, src
);
1280 brw_set_src0(insn
, src
);
1281 brw_set_src1(insn
, brw_null_reg());
1282 insn
->header
.execution_size
= 0; /* must */
1283 insn
->header
.predicate_control
= 0;
1284 insn
->header
.compression_control
= 0;
1288 /***********************************************************************
1289 * Helpers for the various SEND message types:
1292 /** Extended math function, float[8].
1294 void brw_math( struct brw_compile
*p
,
1295 struct brw_reg dest
,
1303 struct intel_context
*intel
= &p
->brw
->intel
;
1305 if (intel
->gen
>= 6) {
1306 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1308 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1309 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1311 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1312 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1314 /* Source modifiers are ignored for extended math instructions. */
1315 assert(!src
.negate
);
1318 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1319 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1320 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1323 /* Math is the same ISA format as other opcodes, except that CondModifier
1324 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1326 insn
->header
.destreg__conditionalmod
= function
;
1327 insn
->header
.saturate
= saturate
;
1329 brw_set_dest(p
, insn
, dest
);
1330 brw_set_src0(insn
, src
);
1331 brw_set_src1(insn
, brw_null_reg());
1333 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1334 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1335 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1336 /* Example code doesn't set predicate_control for send
1339 insn
->header
.predicate_control
= 0;
1340 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1342 brw_set_dest(p
, insn
, dest
);
1343 brw_set_src0(insn
, src
);
1344 brw_set_math_message(p
->brw
,
1346 msg_length
, response_length
,
1348 BRW_MATH_INTEGER_UNSIGNED
,
1355 /** Extended math function, float[8].
1357 void brw_math2(struct brw_compile
*p
,
1358 struct brw_reg dest
,
1360 struct brw_reg src0
,
1361 struct brw_reg src1
)
1363 struct intel_context
*intel
= &p
->brw
->intel
;
1364 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1366 assert(intel
->gen
>= 6);
1370 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1371 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1372 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1374 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1375 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1376 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1378 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1379 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1380 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1381 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1384 /* Source modifiers are ignored for extended math instructions. */
1385 assert(!src0
.negate
);
1387 assert(!src1
.negate
);
1390 /* Math is the same ISA format as other opcodes, except that CondModifier
1391 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1393 insn
->header
.destreg__conditionalmod
= function
;
1395 brw_set_dest(p
, insn
, dest
);
1396 brw_set_src0(insn
, src0
);
1397 brw_set_src1(insn
, src1
);
1401 * Extended math function, float[16].
1402 * Use 2 send instructions.
1404 void brw_math_16( struct brw_compile
*p
,
1405 struct brw_reg dest
,
1412 struct intel_context
*intel
= &p
->brw
->intel
;
1413 struct brw_instruction
*insn
;
1414 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1415 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1417 if (intel
->gen
>= 6) {
1418 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1420 /* Math is the same ISA format as other opcodes, except that CondModifier
1421 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1423 insn
->header
.destreg__conditionalmod
= function
;
1424 insn
->header
.saturate
= saturate
;
1426 /* Source modifiers are ignored for extended math instructions. */
1427 assert(!src
.negate
);
1430 brw_set_dest(p
, insn
, dest
);
1431 brw_set_src0(insn
, src
);
1432 brw_set_src1(insn
, brw_null_reg());
1436 /* First instruction:
1438 brw_push_insn_state(p
);
1439 brw_set_predicate_control_flag_value(p
, 0xff);
1440 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1442 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1443 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1445 brw_set_dest(p
, insn
, dest
);
1446 brw_set_src0(insn
, src
);
1447 brw_set_math_message(p
->brw
,
1449 msg_length
, response_length
,
1451 BRW_MATH_INTEGER_UNSIGNED
,
1454 BRW_MATH_DATA_VECTOR
);
1456 /* Second instruction:
1458 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1459 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1460 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1462 brw_set_dest(p
, insn
, offset(dest
,1));
1463 brw_set_src0(insn
, src
);
1464 brw_set_math_message(p
->brw
,
1466 msg_length
, response_length
,
1468 BRW_MATH_INTEGER_UNSIGNED
,
1471 BRW_MATH_DATA_VECTOR
);
1473 brw_pop_insn_state(p
);
1478 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1479 * using a constant offset per channel.
1481 * The offset must be aligned to oword size (16 bytes). Used for
1482 * register spilling.
1484 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1489 struct intel_context
*intel
= &p
->brw
->intel
;
1490 uint32_t msg_control
;
1493 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1495 if (num_regs
== 1) {
1496 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1499 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1503 /* Set up the message header. This is g0, with g0.2 filled with
1504 * the offset. We don't want to leave our offset around in g0 or
1505 * it'll screw up texture samples, so set it up inside the message
1509 brw_push_insn_state(p
);
1510 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1511 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1513 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1515 /* set message header global offset field (reg 0, element 2) */
1517 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1519 2), BRW_REGISTER_TYPE_UD
),
1520 brw_imm_ud(offset
));
1522 brw_pop_insn_state(p
);
1526 struct brw_reg dest
;
1527 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1528 int send_commit_msg
;
1529 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1530 BRW_REGISTER_TYPE_UW
);
1532 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1533 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1534 src_header
= vec16(src_header
);
1536 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1537 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1539 /* Until gen6, writes followed by reads from the same location
1540 * are not guaranteed to be ordered unless write_commit is set.
1541 * If set, then a no-op write is issued to the destination
1542 * register to set a dependency, and a read from the destination
1543 * can be used to ensure the ordering.
1545 * For gen6, only writes between different threads need ordering
1546 * protection. Our use of DP writes is all about register
1547 * spilling within a thread.
1549 if (intel
->gen
>= 6) {
1550 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1551 send_commit_msg
= 0;
1554 send_commit_msg
= 1;
1557 brw_set_dest(p
, insn
, dest
);
1558 brw_set_src0(insn
, brw_null_reg());
1560 brw_set_dp_write_message(p
->brw
,
1562 255, /* binding table index (255=stateless) */
1564 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1566 GL_TRUE
, /* header_present */
1567 0, /* pixel scoreboard */
1568 send_commit_msg
, /* response_length */
1576 * Read a block of owords (half a GRF each) from the scratch buffer
1577 * using a constant index per channel.
1579 * Offset must be aligned to oword size (16 bytes). Used for register
1583 brw_oword_block_read_scratch(struct brw_compile
*p
,
1584 struct brw_reg dest
,
1589 uint32_t msg_control
;
1592 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1593 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1595 if (num_regs
== 1) {
1596 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1599 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1604 brw_push_insn_state(p
);
1605 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1606 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1608 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1610 /* set message header global offset field (reg 0, element 2) */
1612 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1614 2), BRW_REGISTER_TYPE_UD
),
1615 brw_imm_ud(offset
));
1617 brw_pop_insn_state(p
);
1621 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1623 assert(insn
->header
.predicate_control
== 0);
1624 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1625 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1627 brw_set_dest(p
, insn
, dest
); /* UW? */
1628 brw_set_src0(insn
, brw_null_reg());
1630 brw_set_dp_read_message(p
->brw
,
1632 255, /* binding table index (255=stateless) */
1634 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1635 1, /* target cache (render/scratch) */
1642 * Read a float[4] vector from the data port Data Cache (const buffer).
1643 * Location (in buffer) should be a multiple of 16.
1644 * Used for fetching shader constants.
1646 void brw_oword_block_read(struct brw_compile
*p
,
1647 struct brw_reg dest
,
1650 uint32_t bind_table_index
)
1652 struct intel_context
*intel
= &p
->brw
->intel
;
1654 /* On newer hardware, offset is in units of owords. */
1655 if (intel
->gen
>= 6)
1658 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1660 brw_push_insn_state(p
);
1661 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1662 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1663 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1665 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1667 /* set message header global offset field (reg 0, element 2) */
1669 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1671 2), BRW_REGISTER_TYPE_UD
),
1672 brw_imm_ud(offset
));
1674 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1675 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1677 /* cast dest to a uword[8] vector */
1678 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1680 brw_set_dest(p
, insn
, dest
);
1681 if (intel
->gen
>= 6) {
1682 brw_set_src0(insn
, mrf
);
1684 brw_set_src0(insn
, brw_null_reg());
1687 brw_set_dp_read_message(p
->brw
,
1690 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1691 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1692 0, /* source cache = data cache */
1694 1); /* response_length (1 reg, 2 owords!) */
1696 brw_pop_insn_state(p
);
1700 * Read a set of dwords from the data port Data Cache (const buffer).
1702 * Location (in buffer) appears as UD offsets in the register after
1703 * the provided mrf header reg.
1705 void brw_dword_scattered_read(struct brw_compile
*p
,
1706 struct brw_reg dest
,
1708 uint32_t bind_table_index
)
1710 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1712 brw_push_insn_state(p
);
1713 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1714 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1715 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1716 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1717 brw_pop_insn_state(p
);
1719 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1720 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1722 /* cast dest to a uword[8] vector */
1723 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1725 brw_set_dest(p
, insn
, dest
);
1726 brw_set_src0(insn
, brw_null_reg());
1728 brw_set_dp_read_message(p
->brw
,
1731 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1732 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1733 0, /* source cache = data cache */
1735 1); /* response_length */
1741 * Read float[4] constant(s) from VS constant buffer.
1742 * For relative addressing, two float[4] constants will be read into 'dest'.
1743 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1745 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1746 struct brw_reg dest
,
1748 GLuint bind_table_index
)
1750 struct intel_context
*intel
= &p
->brw
->intel
;
1751 struct brw_instruction
*insn
;
1752 GLuint msg_reg_nr
= 1;
1754 if (intel
->gen
>= 6)
1757 /* Setup MRF[1] with location/offset into const buffer */
1758 brw_push_insn_state(p
);
1759 brw_set_access_mode(p
, BRW_ALIGN_1
);
1760 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1761 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1762 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1763 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1764 BRW_REGISTER_TYPE_UD
),
1765 brw_imm_ud(location
));
1766 brw_pop_insn_state(p
);
1768 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1770 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1771 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1772 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1773 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1775 brw_set_dest(p
, insn
, dest
);
1776 if (intel
->gen
>= 6) {
1777 brw_set_src0(insn
, brw_message_reg(msg_reg_nr
));
1779 brw_set_src0(insn
, brw_null_reg());
1782 brw_set_dp_read_message(p
->brw
,
1786 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1787 0, /* source cache = data cache */
1789 1); /* response_length (1 Oword) */
1793 * Read a float[4] constant per vertex from VS constant buffer, with
1794 * relative addressing.
1796 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1797 struct brw_reg dest
,
1798 struct brw_reg addr_reg
,
1800 GLuint bind_table_index
)
1802 struct intel_context
*intel
= &p
->brw
->intel
;
1805 /* Setup MRF[1] with offset into const buffer */
1806 brw_push_insn_state(p
);
1807 brw_set_access_mode(p
, BRW_ALIGN_1
);
1808 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1809 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1810 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1812 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1815 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1816 addr_reg
, brw_imm_d(offset
));
1817 brw_pop_insn_state(p
);
1819 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1821 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1822 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1823 insn
->header
.destreg__conditionalmod
= 0;
1824 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1826 brw_set_dest(p
, insn
, dest
);
1827 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1829 if (intel
->gen
== 6)
1830 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1831 else if (intel
->gen
== 5 || intel
->is_g4x
)
1832 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1834 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1836 brw_set_dp_read_message(p
->brw
,
1839 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1841 0, /* source cache = data cache */
1843 1); /* response_length */
1848 void brw_fb_WRITE(struct brw_compile
*p
,
1850 struct brw_reg dest
,
1852 struct brw_reg src0
,
1853 GLuint binding_table_index
,
1855 GLuint response_length
,
1857 GLboolean header_present
)
1859 struct intel_context
*intel
= &p
->brw
->intel
;
1860 struct brw_instruction
*insn
;
1861 GLuint msg_control
, msg_type
;
1863 if (intel
->gen
>= 6 && binding_table_index
== 0) {
1864 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
1866 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1868 /* The execution mask is ignored for render target writes. */
1869 insn
->header
.predicate_control
= 0;
1870 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1872 if (intel
->gen
>= 6) {
1873 /* headerless version, just submit color payload */
1874 src0
= brw_message_reg(msg_reg_nr
);
1876 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1878 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1880 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1883 if (dispatch_width
== 16)
1884 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1886 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1888 brw_set_dest(p
, insn
, dest
);
1889 brw_set_src0(insn
, src0
);
1890 brw_set_dp_write_message(p
->brw
,
1892 binding_table_index
,
1897 1, /* pixel scoreboard */
1900 0 /* send_commit_msg */);
1905 * Texture sample instruction.
1906 * Note: the msg_type plus msg_length values determine exactly what kind
1907 * of sampling operation is performed. See volume 4, page 161 of docs.
1909 void brw_SAMPLE(struct brw_compile
*p
,
1910 struct brw_reg dest
,
1912 struct brw_reg src0
,
1913 GLuint binding_table_index
,
1917 GLuint response_length
,
1920 GLuint header_present
,
1923 struct intel_context
*intel
= &p
->brw
->intel
;
1924 GLboolean need_stall
= 0;
1926 if (writemask
== 0) {
1927 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1931 /* Hardware doesn't do destination dependency checking on send
1932 * instructions properly. Add a workaround which generates the
1933 * dependency by other means. In practice it seems like this bug
1934 * only crops up for texture samples, and only where registers are
1935 * written by the send and then written again later without being
1936 * read in between. Luckily for us, we already track that
1937 * information and use it to modify the writemask for the
1938 * instruction, so that is a guide for whether a workaround is
1941 if (writemask
!= WRITEMASK_XYZW
) {
1942 GLuint dst_offset
= 0;
1943 GLuint i
, newmask
= 0, len
= 0;
1945 for (i
= 0; i
< 4; i
++) {
1946 if (writemask
& (1<<i
))
1950 for (; i
< 4; i
++) {
1951 if (!(writemask
& (1<<i
)))
1957 if (newmask
!= writemask
) {
1959 /* printf("need stall %x %x\n", newmask , writemask); */
1962 GLboolean dispatch_16
= GL_FALSE
;
1964 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1966 guess_execution_size(p
, p
->current
, dest
);
1967 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1968 dispatch_16
= GL_TRUE
;
1970 newmask
= ~newmask
& WRITEMASK_XYZW
;
1972 brw_push_insn_state(p
);
1974 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1975 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1977 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
1978 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
1979 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1981 brw_pop_insn_state(p
);
1983 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1984 dest
= offset(dest
, dst_offset
);
1986 /* For 16-wide dispatch, masked channels are skipped in the
1987 * response. For 8-wide, masked channels still take up slots,
1988 * and are just not written to.
1991 response_length
= len
* 2;
1996 struct brw_instruction
*insn
;
1998 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2000 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2001 insn
->header
.predicate_control
= 0; /* XXX */
2002 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2004 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2006 brw_set_dest(p
, insn
, dest
);
2007 brw_set_src0(insn
, src0
);
2008 brw_set_sampler_message(p
->brw
, insn
,
2009 binding_table_index
,
2020 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2022 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2024 brw_push_insn_state(p
);
2025 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2026 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2027 retype(reg
, BRW_REGISTER_TYPE_UD
));
2028 brw_pop_insn_state(p
);
2033 /* All these variables are pretty confusing - we might be better off
2034 * using bitmasks and macros for this, in the old style. Or perhaps
2035 * just having the caller instantiate the fields in dword3 itself.
2037 void brw_urb_WRITE(struct brw_compile
*p
,
2038 struct brw_reg dest
,
2040 struct brw_reg src0
,
2044 GLuint response_length
,
2046 GLboolean writes_complete
,
2050 struct intel_context
*intel
= &p
->brw
->intel
;
2051 struct brw_instruction
*insn
;
2053 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2055 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2057 assert(msg_length
< BRW_MAX_MRF
);
2059 brw_set_dest(p
, insn
, dest
);
2060 brw_set_src0(insn
, src0
);
2061 brw_set_src1(insn
, brw_imm_d(0));
2064 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2066 brw_set_urb_message(p
->brw
,
2079 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2083 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2084 struct brw_instruction
*insn
= &p
->store
[ip
];
2086 switch (insn
->header
.opcode
) {
2087 case BRW_OPCODE_ENDIF
:
2088 case BRW_OPCODE_ELSE
:
2089 case BRW_OPCODE_WHILE
:
2093 assert(!"not reached");
2097 /* There is no DO instruction on gen6, so to find the end of the loop
2098 * we have to see if the loop is jumping back before our start
2102 brw_find_loop_end(struct brw_compile
*p
, int start
)
2107 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2108 struct brw_instruction
*insn
= &p
->store
[ip
];
2110 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2111 if (ip
+ insn
->bits1
.branch_gen6
.jump_count
/ br
< start
)
2115 assert(!"not reached");
2119 /* After program generation, go back and update the UIP and JIP of
2120 * BREAK and CONT instructions to their correct locations.
2123 brw_set_uip_jip(struct brw_compile
*p
)
2125 struct intel_context
*intel
= &p
->brw
->intel
;
2132 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2133 struct brw_instruction
*insn
= &p
->store
[ip
];
2135 switch (insn
->header
.opcode
) {
2136 case BRW_OPCODE_BREAK
:
2137 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2138 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
+ 1);
2140 case BRW_OPCODE_CONTINUE
:
2141 /* JIP is set at CONTINUE emit time, since that's when we
2142 * know where the start of the loop is.
2144 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2145 assert(insn
->bits3
.break_cont
.uip
!= 0);
2146 assert(insn
->bits3
.break_cont
.jip
!= 0);
2152 void brw_ff_sync(struct brw_compile
*p
,
2153 struct brw_reg dest
,
2155 struct brw_reg src0
,
2157 GLuint response_length
,
2160 struct intel_context
*intel
= &p
->brw
->intel
;
2161 struct brw_instruction
*insn
;
2163 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2165 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2166 brw_set_dest(p
, insn
, dest
);
2167 brw_set_src0(insn
, src0
);
2168 brw_set_src1(insn
, brw_imm_d(0));
2171 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2173 brw_set_ff_sync_message(p
->brw
,