2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size(struct brw_compile
*p
,
45 struct brw_instruction
*insn
,
48 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
57 * registers, implicitly moving the operand to a message register.
59 * On Sandybridge, this is no longer the case. This function performs the
60 * explicit move; it should be called before emitting a SEND instruction.
63 gen6_resolve_implied_move(struct brw_compile
*p
,
67 struct intel_context
*intel
= &p
->brw
->intel
;
71 if (src
->file
!= BRW_ARCHITECTURE_REGISTER_FILE
|| src
->nr
!= BRW_ARF_NULL
) {
72 brw_push_insn_state(p
);
73 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
74 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
75 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
76 retype(*src
, BRW_REGISTER_TYPE_UD
));
77 brw_pop_insn_state(p
);
79 *src
= brw_message_reg(msg_reg_nr
);
83 static void brw_set_dest(struct brw_compile
*p
,
84 struct brw_instruction
*insn
,
87 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
88 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
89 assert(dest
.nr
< 128);
91 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
92 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
93 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
95 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
96 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
98 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
99 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
100 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
101 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
102 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
105 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
106 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
107 /* even ignored in da16, still need to set as '01' */
108 insn
->bits1
.da16
.dest_horiz_stride
= 1;
112 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
114 /* These are different sizes in align1 vs align16:
116 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
117 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
118 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
119 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
120 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
123 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
124 /* even ignored in da16, still need to set as '01' */
125 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
129 /* NEW: Set the execution size based on dest.width and
130 * insn->compression_control:
132 guess_execution_size(p
, insn
, dest
);
135 extern int reg_type_size
[];
138 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
140 int hstride_for_reg
[] = {0, 1, 2, 4};
141 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
142 int width_for_reg
[] = {1, 2, 4, 8, 16};
143 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
144 int width
, hstride
, vstride
, execsize
;
146 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
147 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
148 * mean the destination has to be 128-bit aligned and the
149 * destination horiz stride has to be a word.
151 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
152 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
153 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
159 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
160 reg
.file
== BRW_ARF_NULL
)
163 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
164 hstride
= hstride_for_reg
[reg
.hstride
];
166 if (reg
.vstride
== 0xf) {
169 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
170 vstride
= vstride_for_reg
[reg
.vstride
];
173 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
174 width
= width_for_reg
[reg
.width
];
176 assert(insn
->header
.execution_size
>= 0 &&
177 insn
->header
.execution_size
< Elements(execsize_for_reg
));
178 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
180 /* Restrictions from 3.3.10: Register Region Restrictions. */
182 assert(execsize
>= width
);
185 if (execsize
== width
&& hstride
!= 0) {
186 assert(vstride
== -1 || vstride
== width
* hstride
);
190 if (execsize
== width
&& hstride
== 0) {
191 /* no restriction on vstride. */
196 assert(hstride
== 0);
200 if (execsize
== 1 && width
== 1) {
201 assert(hstride
== 0);
202 assert(vstride
== 0);
206 if (vstride
== 0 && hstride
== 0) {
210 /* 10. Check destination issues. */
213 static void brw_set_src0( struct brw_instruction
*insn
,
216 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
217 assert(reg
.nr
< 128);
219 validate_reg(insn
, reg
);
221 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
222 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
223 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
224 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
225 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
227 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
228 insn
->bits3
.ud
= reg
.dw1
.ud
;
230 /* Required to set some fields in src1 as well:
232 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
233 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
237 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
238 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
239 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
240 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
243 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
244 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
248 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
250 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
251 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
254 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
258 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
259 if (reg
.width
== BRW_WIDTH_1
&&
260 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
261 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
262 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
263 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
266 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
267 insn
->bits2
.da1
.src0_width
= reg
.width
;
268 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
272 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
273 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
274 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
275 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
277 /* This is an oddity of the fact we're using the same
278 * descriptions for registers in align_16 as align_1:
280 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
281 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
283 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
289 void brw_set_src1( struct brw_instruction
*insn
,
292 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
294 assert(reg
.nr
< 128);
296 validate_reg(insn
, reg
);
298 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
299 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
300 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
301 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
303 /* Only src1 can be immediate in two-argument instructions.
305 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
307 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
308 insn
->bits3
.ud
= reg
.dw1
.ud
;
311 /* This is a hardware restriction, which may or may not be lifted
314 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
315 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
317 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
318 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
319 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
322 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
323 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
326 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
327 if (reg
.width
== BRW_WIDTH_1
&&
328 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
329 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
330 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
331 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
334 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
335 insn
->bits3
.da1
.src1_width
= reg
.width
;
336 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
340 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
341 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
342 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
343 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
345 /* This is an oddity of the fact we're using the same
346 * descriptions for registers in align_16 as align_1:
348 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
349 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
351 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
358 static void brw_set_math_message( struct brw_context
*brw
,
359 struct brw_instruction
*insn
,
361 GLuint response_length
,
364 GLboolean low_precision
,
368 struct intel_context
*intel
= &brw
->intel
;
369 brw_set_src1(insn
, brw_imm_d(0));
371 if (intel
->gen
== 5) {
372 insn
->bits3
.math_gen5
.function
= function
;
373 insn
->bits3
.math_gen5
.int_type
= integer_type
;
374 insn
->bits3
.math_gen5
.precision
= low_precision
;
375 insn
->bits3
.math_gen5
.saturate
= saturate
;
376 insn
->bits3
.math_gen5
.data_type
= dataType
;
377 insn
->bits3
.math_gen5
.snapshot
= 0;
378 insn
->bits3
.math_gen5
.header_present
= 0;
379 insn
->bits3
.math_gen5
.response_length
= response_length
;
380 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
381 insn
->bits3
.math_gen5
.end_of_thread
= 0;
382 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
383 insn
->bits2
.send_gen5
.end_of_thread
= 0;
385 insn
->bits3
.math
.function
= function
;
386 insn
->bits3
.math
.int_type
= integer_type
;
387 insn
->bits3
.math
.precision
= low_precision
;
388 insn
->bits3
.math
.saturate
= saturate
;
389 insn
->bits3
.math
.data_type
= dataType
;
390 insn
->bits3
.math
.response_length
= response_length
;
391 insn
->bits3
.math
.msg_length
= msg_length
;
392 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
393 insn
->bits3
.math
.end_of_thread
= 0;
398 static void brw_set_ff_sync_message(struct brw_context
*brw
,
399 struct brw_instruction
*insn
,
401 GLuint response_length
,
402 GLboolean end_of_thread
)
404 struct intel_context
*intel
= &brw
->intel
;
405 brw_set_src1(insn
, brw_imm_d(0));
407 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
408 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
409 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
410 insn
->bits3
.urb_gen5
.allocate
= allocate
;
411 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
412 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
413 insn
->bits3
.urb_gen5
.header_present
= 1;
414 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
415 insn
->bits3
.urb_gen5
.msg_length
= 1;
416 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
417 if (intel
->gen
>= 6) {
418 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
420 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
421 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
425 static void brw_set_urb_message( struct brw_context
*brw
,
426 struct brw_instruction
*insn
,
430 GLuint response_length
,
431 GLboolean end_of_thread
,
434 GLuint swizzle_control
)
436 struct intel_context
*intel
= &brw
->intel
;
437 brw_set_src1(insn
, brw_imm_d(0));
439 if (intel
->gen
>= 5) {
440 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
441 insn
->bits3
.urb_gen5
.offset
= offset
;
442 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
443 insn
->bits3
.urb_gen5
.allocate
= allocate
;
444 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
445 insn
->bits3
.urb_gen5
.complete
= complete
;
446 insn
->bits3
.urb_gen5
.header_present
= 1;
447 insn
->bits3
.urb_gen5
.response_length
= response_length
;
448 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
449 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
450 if (intel
->gen
>= 6) {
451 /* For SNB, the SFID bits moved to the condmod bits, and
452 * EOT stayed in bits3 above. Does the EOT bit setting
453 * below on Ironlake even do anything?
455 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
457 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
458 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
461 insn
->bits3
.urb
.opcode
= 0; /* ? */
462 insn
->bits3
.urb
.offset
= offset
;
463 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
464 insn
->bits3
.urb
.allocate
= allocate
;
465 insn
->bits3
.urb
.used
= used
; /* ? */
466 insn
->bits3
.urb
.complete
= complete
;
467 insn
->bits3
.urb
.response_length
= response_length
;
468 insn
->bits3
.urb
.msg_length
= msg_length
;
469 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
470 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
474 static void brw_set_dp_write_message( struct brw_context
*brw
,
475 struct brw_instruction
*insn
,
476 GLuint binding_table_index
,
480 GLboolean header_present
,
481 GLuint pixel_scoreboard_clear
,
482 GLuint response_length
,
483 GLuint end_of_thread
,
484 GLuint send_commit_msg
)
486 struct intel_context
*intel
= &brw
->intel
;
487 brw_set_src1(insn
, brw_imm_ud(0));
489 if (intel
->gen
>= 6) {
490 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
491 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
492 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
493 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
494 insn
->bits3
.gen6_dp
.send_commit_msg
= send_commit_msg
;
495 insn
->bits3
.gen6_dp
.header_present
= header_present
;
496 insn
->bits3
.gen6_dp
.response_length
= response_length
;
497 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
498 insn
->bits3
.gen6_dp
.end_of_thread
= end_of_thread
;
500 /* We always use the render cache for write messages */
501 insn
->header
.destreg__conditionalmod
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
502 } else if (intel
->gen
== 5) {
503 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
504 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
505 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
506 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
507 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
508 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
509 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
510 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
511 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
512 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
513 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
515 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
516 insn
->bits3
.dp_write
.msg_control
= msg_control
;
517 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
518 insn
->bits3
.dp_write
.msg_type
= msg_type
;
519 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
520 insn
->bits3
.dp_write
.response_length
= response_length
;
521 insn
->bits3
.dp_write
.msg_length
= msg_length
;
522 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
523 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
528 brw_set_dp_read_message(struct brw_context
*brw
,
529 struct brw_instruction
*insn
,
530 GLuint binding_table_index
,
535 GLuint response_length
)
537 struct intel_context
*intel
= &brw
->intel
;
538 brw_set_src1(insn
, brw_imm_d(0));
540 if (intel
->gen
>= 6) {
541 uint32_t target_function
;
543 if (target_cache
== BRW_DATAPORT_READ_TARGET_DATA_CACHE
)
544 target_function
= GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE
;
546 target_function
= GEN6_MESSAGE_TARGET_DP_RENDER_CACHE
;
548 insn
->bits3
.gen6_dp
.binding_table_index
= binding_table_index
;
549 insn
->bits3
.gen6_dp
.msg_control
= msg_control
;
550 insn
->bits3
.gen6_dp
.pixel_scoreboard_clear
= 0;
551 insn
->bits3
.gen6_dp
.msg_type
= msg_type
;
552 insn
->bits3
.gen6_dp
.send_commit_msg
= 0;
553 insn
->bits3
.gen6_dp
.header_present
= 1;
554 insn
->bits3
.gen6_dp
.response_length
= response_length
;
555 insn
->bits3
.gen6_dp
.msg_length
= msg_length
;
556 insn
->bits3
.gen6_dp
.end_of_thread
= 0;
557 insn
->header
.destreg__conditionalmod
= target_function
;
558 } else if (intel
->gen
== 5) {
559 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
560 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
561 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
562 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
563 insn
->bits3
.dp_read_gen5
.header_present
= 1;
564 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
565 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
566 insn
->bits3
.dp_read_gen5
.pad1
= 0;
567 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
568 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
569 insn
->bits2
.send_gen5
.end_of_thread
= 0;
570 } else if (intel
->is_g4x
) {
571 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
572 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
573 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
574 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
575 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
576 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
577 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
578 insn
->bits3
.dp_read_g4x
.pad1
= 0;
579 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
581 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
582 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
583 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
584 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
585 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
586 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
587 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
588 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
589 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
593 static void brw_set_sampler_message(struct brw_context
*brw
,
594 struct brw_instruction
*insn
,
595 GLuint binding_table_index
,
598 GLuint response_length
,
601 GLuint header_present
,
604 struct intel_context
*intel
= &brw
->intel
;
606 brw_set_src1(insn
, brw_imm_d(0));
608 if (intel
->gen
>= 5) {
609 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
610 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
611 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
612 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
613 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
614 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
615 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
616 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
618 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
620 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
621 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
623 } else if (intel
->is_g4x
) {
624 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
625 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
626 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
627 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
628 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
629 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
630 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
632 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
633 insn
->bits3
.sampler
.sampler
= sampler
;
634 insn
->bits3
.sampler
.msg_type
= msg_type
;
635 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
636 insn
->bits3
.sampler
.response_length
= response_length
;
637 insn
->bits3
.sampler
.msg_length
= msg_length
;
638 insn
->bits3
.sampler
.end_of_thread
= eot
;
639 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
645 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
648 struct brw_instruction
*insn
;
650 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
652 insn
= &p
->store
[p
->nr_insn
++];
653 memcpy(insn
, p
->current
, sizeof(*insn
));
655 /* Reset this one-shot flag:
658 if (p
->current
->header
.destreg__conditionalmod
) {
659 p
->current
->header
.destreg__conditionalmod
= 0;
660 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
663 insn
->header
.opcode
= opcode
;
668 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
673 struct brw_instruction
*insn
= next_insn(p
, opcode
);
674 brw_set_dest(p
, insn
, dest
);
675 brw_set_src0(insn
, src
);
679 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
683 struct brw_reg src1
)
685 struct brw_instruction
*insn
= next_insn(p
, opcode
);
686 brw_set_dest(p
, insn
, dest
);
687 brw_set_src0(insn
, src0
);
688 brw_set_src1(insn
, src1
);
693 /***********************************************************************
694 * Convenience routines.
697 struct brw_instruction *brw_##OP(struct brw_compile *p, \
698 struct brw_reg dest, \
699 struct brw_reg src0) \
701 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
705 struct brw_instruction *brw_##OP(struct brw_compile *p, \
706 struct brw_reg dest, \
707 struct brw_reg src0, \
708 struct brw_reg src1) \
710 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
713 /* Rounding operations (other than RNDD) require two instructions - the first
714 * stores a rounded value (possibly the wrong way) in the dest register, but
715 * also sets a per-channel "increment bit" in the flag register. A predicated
716 * add of 1.0 fixes dest to contain the desired result.
719 void brw_##OP(struct brw_compile *p, \
720 struct brw_reg dest, \
721 struct brw_reg src) \
723 struct brw_instruction *rnd, *add; \
724 rnd = next_insn(p, BRW_OPCODE_##OP); \
725 brw_set_dest(p, rnd, dest); \
726 brw_set_src0(rnd, src); \
727 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
729 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
730 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
762 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
768 if (src0
.type
== BRW_REGISTER_TYPE_F
||
769 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
770 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
771 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
772 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
775 if (src1
.type
== BRW_REGISTER_TYPE_F
||
776 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
777 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
778 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
779 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
782 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
785 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
791 if (src0
.type
== BRW_REGISTER_TYPE_D
||
792 src0
.type
== BRW_REGISTER_TYPE_UD
||
793 src1
.type
== BRW_REGISTER_TYPE_D
||
794 src1
.type
== BRW_REGISTER_TYPE_UD
) {
795 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
798 if (src0
.type
== BRW_REGISTER_TYPE_F
||
799 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
800 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
801 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
802 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
805 if (src1
.type
== BRW_REGISTER_TYPE_F
||
806 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
807 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
808 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
809 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
812 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
813 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
814 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
815 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
817 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
821 void brw_NOP(struct brw_compile
*p
)
823 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
824 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
825 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
826 brw_set_src1(insn
, brw_imm_ud(0x0));
833 /***********************************************************************
834 * Comparisons, if/else/endif
837 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
842 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
844 insn
->header
.execution_size
= 1;
845 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
846 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
848 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
853 /* EU takes the value from the flag register and pushes it onto some
854 * sort of a stack (presumably merging with any flag value already on
855 * the stack). Within an if block, the flags at the top of the stack
856 * control execution on each channel of the unit, eg. on each of the
857 * 16 pixel values in our wm programs.
859 * When the matching 'else' instruction is reached (presumably by
860 * countdown of the instruction count patched in by our ELSE/ENDIF
861 * functions), the relevent flags are inverted.
863 * When the matching 'endif' instruction is reached, the flags are
864 * popped off. If the stack is now empty, normal execution resumes.
866 * No attempt is made to deal with stack overflow (14 elements?).
868 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
870 struct intel_context
*intel
= &p
->brw
->intel
;
871 struct brw_instruction
*insn
;
873 if (p
->single_program_flow
) {
874 assert(execute_size
== BRW_EXECUTE_1
);
876 insn
= next_insn(p
, BRW_OPCODE_ADD
);
877 insn
->header
.predicate_inverse
= 1;
879 insn
= next_insn(p
, BRW_OPCODE_IF
);
882 /* Override the defaults for this instruction:
884 if (intel
->gen
< 6) {
885 brw_set_dest(p
, insn
, brw_ip_reg());
886 brw_set_src0(insn
, brw_ip_reg());
887 brw_set_src1(insn
, brw_imm_d(0x0));
889 brw_set_dest(p
, insn
, brw_imm_w(0));
890 insn
->bits1
.branch_gen6
.jump_count
= 0;
891 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
892 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
895 insn
->header
.execution_size
= execute_size
;
896 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
897 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
898 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
899 if (!p
->single_program_flow
)
900 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
902 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
907 struct brw_instruction
*
908 gen6_IF(struct brw_compile
*p
, uint32_t conditional
,
909 struct brw_reg src0
, struct brw_reg src1
)
911 struct brw_instruction
*insn
;
913 insn
= next_insn(p
, BRW_OPCODE_IF
);
915 brw_set_dest(p
, insn
, brw_imm_w(0));
916 insn
->header
.execution_size
= BRW_EXECUTE_8
;
917 insn
->bits1
.branch_gen6
.jump_count
= 0;
918 brw_set_src0(insn
, src0
);
919 brw_set_src1(insn
, src1
);
921 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
922 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
923 insn
->header
.destreg__conditionalmod
= conditional
;
925 if (!p
->single_program_flow
)
926 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
931 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
932 struct brw_instruction
*if_insn
)
934 struct intel_context
*intel
= &p
->brw
->intel
;
935 struct brw_instruction
*insn
;
938 /* jump count is for 64bit data chunk each, so one 128bit
939 instruction requires 2 chunks. */
943 if (p
->single_program_flow
) {
944 insn
= next_insn(p
, BRW_OPCODE_ADD
);
946 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
949 if (intel
->gen
< 6) {
950 brw_set_dest(p
, insn
, brw_ip_reg());
951 brw_set_src0(insn
, brw_ip_reg());
952 brw_set_src1(insn
, brw_imm_d(0x0));
954 brw_set_dest(p
, insn
, brw_imm_w(0));
955 insn
->bits1
.branch_gen6
.jump_count
= 0;
956 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
957 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
960 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
961 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
962 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
963 if (!p
->single_program_flow
)
964 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
966 /* Patch the if instruction to point at this instruction.
968 if (p
->single_program_flow
) {
969 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
971 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
973 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
975 if (intel
->gen
< 6) {
976 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
977 if_insn
->bits3
.if_else
.pop_count
= 0;
978 if_insn
->bits3
.if_else
.pad0
= 0;
980 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
987 void brw_ENDIF(struct brw_compile
*p
,
988 struct brw_instruction
*patch_insn
)
990 struct intel_context
*intel
= &p
->brw
->intel
;
996 if (p
->single_program_flow
) {
997 /* In single program flow mode, there's no need to execute an ENDIF,
998 * since we don't need to do any stack operations, and if we're executing
999 * currently, we want to just continue executing.
1001 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
1003 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
1005 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
1007 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
1009 if (intel
->gen
< 6) {
1010 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1011 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
1012 brw_set_src1(insn
, brw_imm_d(0x0));
1014 brw_set_dest(p
, insn
, brw_imm_w(0));
1015 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1016 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1019 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1020 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
1021 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
1022 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
1025 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
1027 assert(patch_insn
->bits1
.branch_gen6
.jump_count
== 0);
1029 /* Patch the if or else instructions to point at this or the next
1030 * instruction respectively.
1032 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
1033 if (intel
->gen
< 6) {
1034 /* Turn it into an IFF, which means no mask stack operations for
1035 * all-false and jumping past the ENDIF.
1037 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
1038 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1039 patch_insn
->bits3
.if_else
.pop_count
= 0;
1040 patch_insn
->bits3
.if_else
.pad0
= 0;
1042 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1043 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1046 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
1047 if (intel
->gen
< 6) {
1048 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1051 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1052 patch_insn
->bits3
.if_else
.pop_count
= 1;
1053 patch_insn
->bits3
.if_else
.pad0
= 0;
1055 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1056 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1060 /* Also pop item off the stack in the endif instruction:
1062 if (intel
->gen
< 6) {
1063 insn
->bits3
.if_else
.jump_count
= 0;
1064 insn
->bits3
.if_else
.pop_count
= 1;
1065 insn
->bits3
.if_else
.pad0
= 0;
1067 insn
->bits1
.branch_gen6
.jump_count
= 2;
1072 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1074 struct intel_context
*intel
= &p
->brw
->intel
;
1075 struct brw_instruction
*insn
;
1077 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1078 if (intel
->gen
>= 6) {
1079 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1080 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1081 brw_set_src1(insn
, brw_imm_d(0x0));
1083 brw_set_dest(p
, insn
, brw_ip_reg());
1084 brw_set_src0(insn
, brw_ip_reg());
1085 brw_set_src1(insn
, brw_imm_d(0x0));
1086 insn
->bits3
.if_else
.pad0
= 0;
1087 insn
->bits3
.if_else
.pop_count
= pop_count
;
1089 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1090 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1095 struct brw_instruction
*gen6_CONT(struct brw_compile
*p
,
1096 struct brw_instruction
*do_insn
)
1098 struct brw_instruction
*insn
;
1101 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1102 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1103 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1104 brw_set_dest(p
, insn
, brw_ip_reg());
1105 brw_set_src0(insn
, brw_ip_reg());
1106 brw_set_src1(insn
, brw_imm_d(0x0));
1108 insn
->bits3
.break_cont
.uip
= br
* (do_insn
- insn
);
1110 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1111 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1115 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1117 struct brw_instruction
*insn
;
1118 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1119 brw_set_dest(p
, insn
, brw_ip_reg());
1120 brw_set_src0(insn
, brw_ip_reg());
1121 brw_set_src1(insn
, brw_imm_d(0x0));
1122 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1123 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1124 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1125 insn
->bits3
.if_else
.pad0
= 0;
1126 insn
->bits3
.if_else
.pop_count
= pop_count
;
1132 * The DO/WHILE is just an unterminated loop -- break or continue are
1133 * used for control within the loop. We have a few ways they can be
1136 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1137 * jip and no DO instruction.
1139 * For non-uniform control flow pre-gen6, there's a DO instruction to
1140 * push the mask, and a WHILE to jump back, and BREAK to get out and
1143 * For gen6, there's no more mask stack, so no need for DO. WHILE
1144 * just points back to the first instruction of the loop.
1146 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1148 struct intel_context
*intel
= &p
->brw
->intel
;
1150 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1151 return &p
->store
[p
->nr_insn
];
1153 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1155 /* Override the defaults for this instruction:
1157 brw_set_dest(p
, insn
, brw_null_reg());
1158 brw_set_src0(insn
, brw_null_reg());
1159 brw_set_src1(insn
, brw_null_reg());
1161 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1162 insn
->header
.execution_size
= execute_size
;
1163 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1164 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1165 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1173 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1174 struct brw_instruction
*do_insn
)
1176 struct intel_context
*intel
= &p
->brw
->intel
;
1177 struct brw_instruction
*insn
;
1180 if (intel
->gen
>= 5)
1183 if (intel
->gen
>= 6) {
1184 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1186 brw_set_dest(p
, insn
, brw_imm_w(0));
1187 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1188 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1189 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1191 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1192 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1194 if (p
->single_program_flow
) {
1195 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1197 brw_set_dest(p
, insn
, brw_ip_reg());
1198 brw_set_src0(insn
, brw_ip_reg());
1199 brw_set_src1(insn
, brw_imm_d((do_insn
- insn
) * 16));
1200 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1202 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1204 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1206 brw_set_dest(p
, insn
, brw_ip_reg());
1207 brw_set_src0(insn
, brw_ip_reg());
1208 brw_set_src1(insn
, brw_imm_d(0));
1210 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1211 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1212 insn
->bits3
.if_else
.pop_count
= 0;
1213 insn
->bits3
.if_else
.pad0
= 0;
1216 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1217 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1225 void brw_land_fwd_jump(struct brw_compile
*p
,
1226 struct brw_instruction
*jmp_insn
)
1228 struct intel_context
*intel
= &p
->brw
->intel
;
1229 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1232 if (intel
->gen
>= 5)
1235 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1236 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1238 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1243 /* To integrate with the above, it makes sense that the comparison
1244 * instruction should populate the flag register. It might be simpler
1245 * just to use the flag reg for most WM tasks?
1247 void brw_CMP(struct brw_compile
*p
,
1248 struct brw_reg dest
,
1250 struct brw_reg src0
,
1251 struct brw_reg src1
)
1253 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1255 insn
->header
.destreg__conditionalmod
= conditional
;
1256 brw_set_dest(p
, insn
, dest
);
1257 brw_set_src0(insn
, src0
);
1258 brw_set_src1(insn
, src1
);
1260 /* guess_execution_size(insn, src0); */
1263 /* Make it so that future instructions will use the computed flag
1264 * value until brw_set_predicate_control_flag_value() is called
1267 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1269 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1270 p
->flag_value
= 0xff;
1274 /* Issue 'wait' instruction for n1, host could program MMIO
1275 to wake up thread. */
1276 void brw_WAIT (struct brw_compile
*p
)
1278 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1279 struct brw_reg src
= brw_notification_1_reg();
1281 brw_set_dest(p
, insn
, src
);
1282 brw_set_src0(insn
, src
);
1283 brw_set_src1(insn
, brw_null_reg());
1284 insn
->header
.execution_size
= 0; /* must */
1285 insn
->header
.predicate_control
= 0;
1286 insn
->header
.compression_control
= 0;
1290 /***********************************************************************
1291 * Helpers for the various SEND message types:
1294 /** Extended math function, float[8].
1296 void brw_math( struct brw_compile
*p
,
1297 struct brw_reg dest
,
1305 struct intel_context
*intel
= &p
->brw
->intel
;
1307 if (intel
->gen
>= 6) {
1308 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1310 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1311 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1313 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1314 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1316 /* Source modifiers are ignored for extended math instructions. */
1317 assert(!src
.negate
);
1320 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1321 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1322 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1325 /* Math is the same ISA format as other opcodes, except that CondModifier
1326 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1328 insn
->header
.destreg__conditionalmod
= function
;
1329 insn
->header
.saturate
= saturate
;
1331 brw_set_dest(p
, insn
, dest
);
1332 brw_set_src0(insn
, src
);
1333 brw_set_src1(insn
, brw_null_reg());
1335 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1336 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1337 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1338 /* Example code doesn't set predicate_control for send
1341 insn
->header
.predicate_control
= 0;
1342 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1344 brw_set_dest(p
, insn
, dest
);
1345 brw_set_src0(insn
, src
);
1346 brw_set_math_message(p
->brw
,
1348 msg_length
, response_length
,
1350 BRW_MATH_INTEGER_UNSIGNED
,
1357 /** Extended math function, float[8].
1359 void brw_math2(struct brw_compile
*p
,
1360 struct brw_reg dest
,
1362 struct brw_reg src0
,
1363 struct brw_reg src1
)
1365 struct intel_context
*intel
= &p
->brw
->intel
;
1366 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1368 assert(intel
->gen
>= 6);
1372 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1373 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1374 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1376 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1377 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1378 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1380 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1381 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1382 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1383 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1386 /* Source modifiers are ignored for extended math instructions. */
1387 assert(!src0
.negate
);
1389 assert(!src1
.negate
);
1392 /* Math is the same ISA format as other opcodes, except that CondModifier
1393 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1395 insn
->header
.destreg__conditionalmod
= function
;
1397 brw_set_dest(p
, insn
, dest
);
1398 brw_set_src0(insn
, src0
);
1399 brw_set_src1(insn
, src1
);
1403 * Extended math function, float[16].
1404 * Use 2 send instructions.
1406 void brw_math_16( struct brw_compile
*p
,
1407 struct brw_reg dest
,
1414 struct intel_context
*intel
= &p
->brw
->intel
;
1415 struct brw_instruction
*insn
;
1416 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1417 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1419 if (intel
->gen
>= 6) {
1420 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1422 /* Math is the same ISA format as other opcodes, except that CondModifier
1423 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1425 insn
->header
.destreg__conditionalmod
= function
;
1426 insn
->header
.saturate
= saturate
;
1428 /* Source modifiers are ignored for extended math instructions. */
1429 assert(!src
.negate
);
1432 brw_set_dest(p
, insn
, dest
);
1433 brw_set_src0(insn
, src
);
1434 brw_set_src1(insn
, brw_null_reg());
1438 /* First instruction:
1440 brw_push_insn_state(p
);
1441 brw_set_predicate_control_flag_value(p
, 0xff);
1442 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1444 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1445 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1447 brw_set_dest(p
, insn
, dest
);
1448 brw_set_src0(insn
, src
);
1449 brw_set_math_message(p
->brw
,
1451 msg_length
, response_length
,
1453 BRW_MATH_INTEGER_UNSIGNED
,
1456 BRW_MATH_DATA_VECTOR
);
1458 /* Second instruction:
1460 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1461 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1462 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1464 brw_set_dest(p
, insn
, offset(dest
,1));
1465 brw_set_src0(insn
, src
);
1466 brw_set_math_message(p
->brw
,
1468 msg_length
, response_length
,
1470 BRW_MATH_INTEGER_UNSIGNED
,
1473 BRW_MATH_DATA_VECTOR
);
1475 brw_pop_insn_state(p
);
1480 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1481 * using a constant offset per channel.
1483 * The offset must be aligned to oword size (16 bytes). Used for
1484 * register spilling.
1486 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1491 struct intel_context
*intel
= &p
->brw
->intel
;
1492 uint32_t msg_control
, msg_type
;
1495 if (intel
->gen
>= 6)
1498 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1500 if (num_regs
== 1) {
1501 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1504 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1508 /* Set up the message header. This is g0, with g0.2 filled with
1509 * the offset. We don't want to leave our offset around in g0 or
1510 * it'll screw up texture samples, so set it up inside the message
1514 brw_push_insn_state(p
);
1515 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1516 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1518 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1520 /* set message header global offset field (reg 0, element 2) */
1522 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1524 2), BRW_REGISTER_TYPE_UD
),
1525 brw_imm_ud(offset
));
1527 brw_pop_insn_state(p
);
1531 struct brw_reg dest
;
1532 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1533 int send_commit_msg
;
1534 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1535 BRW_REGISTER_TYPE_UW
);
1537 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1538 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1539 src_header
= vec16(src_header
);
1541 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1542 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1544 /* Until gen6, writes followed by reads from the same location
1545 * are not guaranteed to be ordered unless write_commit is set.
1546 * If set, then a no-op write is issued to the destination
1547 * register to set a dependency, and a read from the destination
1548 * can be used to ensure the ordering.
1550 * For gen6, only writes between different threads need ordering
1551 * protection. Our use of DP writes is all about register
1552 * spilling within a thread.
1554 if (intel
->gen
>= 6) {
1555 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1556 send_commit_msg
= 0;
1559 send_commit_msg
= 1;
1562 brw_set_dest(p
, insn
, dest
);
1563 if (intel
->gen
>= 6) {
1564 brw_set_src0(insn
, mrf
);
1566 brw_set_src0(insn
, brw_null_reg());
1569 if (intel
->gen
>= 6)
1570 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1572 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
;
1574 brw_set_dp_write_message(p
->brw
,
1576 255, /* binding table index (255=stateless) */
1580 GL_TRUE
, /* header_present */
1581 0, /* pixel scoreboard */
1582 send_commit_msg
, /* response_length */
1590 * Read a block of owords (half a GRF each) from the scratch buffer
1591 * using a constant index per channel.
1593 * Offset must be aligned to oword size (16 bytes). Used for register
1597 brw_oword_block_read_scratch(struct brw_compile
*p
,
1598 struct brw_reg dest
,
1603 struct intel_context
*intel
= &p
->brw
->intel
;
1604 uint32_t msg_control
;
1607 if (intel
->gen
>= 6)
1610 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1611 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1613 if (num_regs
== 1) {
1614 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1617 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1622 brw_push_insn_state(p
);
1623 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1624 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1626 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1628 /* set message header global offset field (reg 0, element 2) */
1630 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1632 2), BRW_REGISTER_TYPE_UD
),
1633 brw_imm_ud(offset
));
1635 brw_pop_insn_state(p
);
1639 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1641 assert(insn
->header
.predicate_control
== 0);
1642 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1643 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1645 brw_set_dest(p
, insn
, dest
); /* UW? */
1646 if (intel
->gen
>= 6) {
1647 brw_set_src0(insn
, mrf
);
1649 brw_set_src0(insn
, brw_null_reg());
1652 brw_set_dp_read_message(p
->brw
,
1654 255, /* binding table index (255=stateless) */
1656 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1657 BRW_DATAPORT_READ_TARGET_RENDER_CACHE
,
1664 * Read a float[4] vector from the data port Data Cache (const buffer).
1665 * Location (in buffer) should be a multiple of 16.
1666 * Used for fetching shader constants.
1668 void brw_oword_block_read(struct brw_compile
*p
,
1669 struct brw_reg dest
,
1672 uint32_t bind_table_index
)
1674 struct intel_context
*intel
= &p
->brw
->intel
;
1676 /* On newer hardware, offset is in units of owords. */
1677 if (intel
->gen
>= 6)
1680 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1682 brw_push_insn_state(p
);
1683 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1684 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1685 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1687 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1689 /* set message header global offset field (reg 0, element 2) */
1691 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1693 2), BRW_REGISTER_TYPE_UD
),
1694 brw_imm_ud(offset
));
1696 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1697 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1699 /* cast dest to a uword[8] vector */
1700 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1702 brw_set_dest(p
, insn
, dest
);
1703 if (intel
->gen
>= 6) {
1704 brw_set_src0(insn
, mrf
);
1706 brw_set_src0(insn
, brw_null_reg());
1709 brw_set_dp_read_message(p
->brw
,
1712 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1713 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1714 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1716 1); /* response_length (1 reg, 2 owords!) */
1718 brw_pop_insn_state(p
);
1722 * Read a set of dwords from the data port Data Cache (const buffer).
1724 * Location (in buffer) appears as UD offsets in the register after
1725 * the provided mrf header reg.
1727 void brw_dword_scattered_read(struct brw_compile
*p
,
1728 struct brw_reg dest
,
1730 uint32_t bind_table_index
)
1732 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1734 brw_push_insn_state(p
);
1735 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1736 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1737 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1738 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1739 brw_pop_insn_state(p
);
1741 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1742 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1744 /* cast dest to a uword[8] vector */
1745 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1747 brw_set_dest(p
, insn
, dest
);
1748 brw_set_src0(insn
, brw_null_reg());
1750 brw_set_dp_read_message(p
->brw
,
1753 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1754 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1755 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1757 1); /* response_length */
1763 * Read float[4] constant(s) from VS constant buffer.
1764 * For relative addressing, two float[4] constants will be read into 'dest'.
1765 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1767 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1768 struct brw_reg dest
,
1770 GLuint bind_table_index
)
1772 struct intel_context
*intel
= &p
->brw
->intel
;
1773 struct brw_instruction
*insn
;
1774 GLuint msg_reg_nr
= 1;
1776 if (intel
->gen
>= 6)
1779 /* Setup MRF[1] with location/offset into const buffer */
1780 brw_push_insn_state(p
);
1781 brw_set_access_mode(p
, BRW_ALIGN_1
);
1782 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1783 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1784 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1785 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1786 BRW_REGISTER_TYPE_UD
),
1787 brw_imm_ud(location
));
1788 brw_pop_insn_state(p
);
1790 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1792 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1793 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1794 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1795 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1797 brw_set_dest(p
, insn
, dest
);
1798 if (intel
->gen
>= 6) {
1799 brw_set_src0(insn
, brw_message_reg(msg_reg_nr
));
1801 brw_set_src0(insn
, brw_null_reg());
1804 brw_set_dp_read_message(p
->brw
,
1808 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1809 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1811 1); /* response_length (1 Oword) */
1815 * Read a float[4] constant per vertex from VS constant buffer, with
1816 * relative addressing.
1818 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1819 struct brw_reg dest
,
1820 struct brw_reg addr_reg
,
1822 GLuint bind_table_index
)
1824 struct intel_context
*intel
= &p
->brw
->intel
;
1825 struct brw_reg src
= brw_vec8_grf(0, 0);
1828 /* Setup MRF[1] with offset into const buffer */
1829 brw_push_insn_state(p
);
1830 brw_set_access_mode(p
, BRW_ALIGN_1
);
1831 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1832 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1833 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1835 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1838 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1839 addr_reg
, brw_imm_d(offset
));
1840 brw_pop_insn_state(p
);
1842 gen6_resolve_implied_move(p
, &src
, 0);
1843 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1845 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1846 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1847 insn
->header
.destreg__conditionalmod
= 0;
1848 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1850 brw_set_dest(p
, insn
, dest
);
1851 brw_set_src0(insn
, src
);
1853 if (intel
->gen
== 6)
1854 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1855 else if (intel
->gen
== 5 || intel
->is_g4x
)
1856 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1858 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1860 brw_set_dp_read_message(p
->brw
,
1863 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1865 BRW_DATAPORT_READ_TARGET_DATA_CACHE
,
1867 1); /* response_length */
1872 void brw_fb_WRITE(struct brw_compile
*p
,
1875 struct brw_reg src0
,
1876 GLuint binding_table_index
,
1878 GLuint response_length
,
1880 GLboolean header_present
)
1882 struct intel_context
*intel
= &p
->brw
->intel
;
1883 struct brw_instruction
*insn
;
1884 GLuint msg_control
, msg_type
;
1885 struct brw_reg dest
;
1887 if (dispatch_width
== 16)
1888 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1890 dest
= retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1892 if (intel
->gen
>= 6 && binding_table_index
== 0) {
1893 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
1895 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1897 /* The execution mask is ignored for render target writes. */
1898 insn
->header
.predicate_control
= 0;
1899 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1901 if (intel
->gen
>= 6) {
1902 /* headerless version, just submit color payload */
1903 src0
= brw_message_reg(msg_reg_nr
);
1905 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1907 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1909 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1912 if (dispatch_width
== 16)
1913 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1915 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1917 brw_set_dest(p
, insn
, dest
);
1918 brw_set_src0(insn
, src0
);
1919 brw_set_dp_write_message(p
->brw
,
1921 binding_table_index
,
1926 1, /* pixel scoreboard */
1929 0 /* send_commit_msg */);
1934 * Texture sample instruction.
1935 * Note: the msg_type plus msg_length values determine exactly what kind
1936 * of sampling operation is performed. See volume 4, page 161 of docs.
1938 void brw_SAMPLE(struct brw_compile
*p
,
1939 struct brw_reg dest
,
1941 struct brw_reg src0
,
1942 GLuint binding_table_index
,
1946 GLuint response_length
,
1949 GLuint header_present
,
1952 struct intel_context
*intel
= &p
->brw
->intel
;
1953 GLboolean need_stall
= 0;
1955 if (writemask
== 0) {
1956 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1960 /* Hardware doesn't do destination dependency checking on send
1961 * instructions properly. Add a workaround which generates the
1962 * dependency by other means. In practice it seems like this bug
1963 * only crops up for texture samples, and only where registers are
1964 * written by the send and then written again later without being
1965 * read in between. Luckily for us, we already track that
1966 * information and use it to modify the writemask for the
1967 * instruction, so that is a guide for whether a workaround is
1970 if (writemask
!= WRITEMASK_XYZW
) {
1971 GLuint dst_offset
= 0;
1972 GLuint i
, newmask
= 0, len
= 0;
1974 for (i
= 0; i
< 4; i
++) {
1975 if (writemask
& (1<<i
))
1979 for (; i
< 4; i
++) {
1980 if (!(writemask
& (1<<i
)))
1986 if (newmask
!= writemask
) {
1988 /* printf("need stall %x %x\n", newmask , writemask); */
1991 GLboolean dispatch_16
= GL_FALSE
;
1993 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1995 guess_execution_size(p
, p
->current
, dest
);
1996 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1997 dispatch_16
= GL_TRUE
;
1999 newmask
= ~newmask
& WRITEMASK_XYZW
;
2001 brw_push_insn_state(p
);
2003 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2004 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
2006 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
2007 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
2008 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
2010 brw_pop_insn_state(p
);
2012 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
2013 dest
= offset(dest
, dst_offset
);
2015 /* For 16-wide dispatch, masked channels are skipped in the
2016 * response. For 8-wide, masked channels still take up slots,
2017 * and are just not written to.
2020 response_length
= len
* 2;
2025 struct brw_instruction
*insn
;
2027 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2029 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2030 insn
->header
.predicate_control
= 0; /* XXX */
2031 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
2033 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2035 brw_set_dest(p
, insn
, dest
);
2036 brw_set_src0(insn
, src0
);
2037 brw_set_sampler_message(p
->brw
, insn
,
2038 binding_table_index
,
2049 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2051 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2053 brw_push_insn_state(p
);
2054 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2055 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2056 retype(reg
, BRW_REGISTER_TYPE_UD
));
2057 brw_pop_insn_state(p
);
2062 /* All these variables are pretty confusing - we might be better off
2063 * using bitmasks and macros for this, in the old style. Or perhaps
2064 * just having the caller instantiate the fields in dword3 itself.
2066 void brw_urb_WRITE(struct brw_compile
*p
,
2067 struct brw_reg dest
,
2069 struct brw_reg src0
,
2073 GLuint response_length
,
2075 GLboolean writes_complete
,
2079 struct intel_context
*intel
= &p
->brw
->intel
;
2080 struct brw_instruction
*insn
;
2082 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2084 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2086 assert(msg_length
< BRW_MAX_MRF
);
2088 brw_set_dest(p
, insn
, dest
);
2089 brw_set_src0(insn
, src0
);
2090 brw_set_src1(insn
, brw_imm_d(0));
2093 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2095 brw_set_urb_message(p
->brw
,
2108 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2112 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2113 struct brw_instruction
*insn
= &p
->store
[ip
];
2115 switch (insn
->header
.opcode
) {
2116 case BRW_OPCODE_ENDIF
:
2117 case BRW_OPCODE_ELSE
:
2118 case BRW_OPCODE_WHILE
:
2122 assert(!"not reached");
2126 /* There is no DO instruction on gen6, so to find the end of the loop
2127 * we have to see if the loop is jumping back before our start
2131 brw_find_loop_end(struct brw_compile
*p
, int start
)
2136 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2137 struct brw_instruction
*insn
= &p
->store
[ip
];
2139 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2140 if (ip
+ insn
->bits1
.branch_gen6
.jump_count
/ br
< start
)
2144 assert(!"not reached");
2148 /* After program generation, go back and update the UIP and JIP of
2149 * BREAK and CONT instructions to their correct locations.
2152 brw_set_uip_jip(struct brw_compile
*p
)
2154 struct intel_context
*intel
= &p
->brw
->intel
;
2161 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2162 struct brw_instruction
*insn
= &p
->store
[ip
];
2164 switch (insn
->header
.opcode
) {
2165 case BRW_OPCODE_BREAK
:
2166 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2167 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
+ 1);
2169 case BRW_OPCODE_CONTINUE
:
2170 /* JIP is set at CONTINUE emit time, since that's when we
2171 * know where the start of the loop is.
2173 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2174 assert(insn
->bits3
.break_cont
.uip
!= 0);
2175 assert(insn
->bits3
.break_cont
.jip
!= 0);
2181 void brw_ff_sync(struct brw_compile
*p
,
2182 struct brw_reg dest
,
2184 struct brw_reg src0
,
2186 GLuint response_length
,
2189 struct intel_context
*intel
= &p
->brw
->intel
;
2190 struct brw_instruction
*insn
;
2192 gen6_resolve_implied_move(p
, &src0
, msg_reg_nr
);
2194 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2195 brw_set_dest(p
, insn
, dest
);
2196 brw_set_src0(insn
, src0
);
2197 brw_set_src1(insn
, brw_imm_d(0));
2200 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2202 brw_set_ff_sync_message(p
->brw
,