2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size(struct brw_compile
*p
,
45 struct brw_instruction
*insn
,
48 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest(struct brw_compile
*p
,
56 struct brw_instruction
*insn
,
59 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
60 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
61 assert(dest
.nr
< 128);
63 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
64 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
65 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
67 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
68 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
70 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
71 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
72 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
73 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
74 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
77 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
78 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
79 /* even ignored in da16, still need to set as '01' */
80 insn
->bits1
.da16
.dest_horiz_stride
= 1;
84 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
86 /* These are different sizes in align1 vs align16:
88 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
89 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
90 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
91 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
92 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
95 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
96 /* even ignored in da16, still need to set as '01' */
97 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
101 /* NEW: Set the execution size based on dest.width and
102 * insn->compression_control:
104 guess_execution_size(p
, insn
, dest
);
107 extern int reg_type_size
[];
110 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
112 int hstride_for_reg
[] = {0, 1, 2, 4};
113 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
114 int width_for_reg
[] = {1, 2, 4, 8, 16};
115 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
116 int width
, hstride
, vstride
, execsize
;
118 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
119 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
120 * mean the destination has to be 128-bit aligned and the
121 * destination horiz stride has to be a word.
123 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
124 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
125 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
131 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
132 reg
.file
== BRW_ARF_NULL
)
135 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
136 hstride
= hstride_for_reg
[reg
.hstride
];
138 if (reg
.vstride
== 0xf) {
141 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
142 vstride
= vstride_for_reg
[reg
.vstride
];
145 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
146 width
= width_for_reg
[reg
.width
];
148 assert(insn
->header
.execution_size
>= 0 &&
149 insn
->header
.execution_size
< Elements(execsize_for_reg
));
150 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
152 /* Restrictions from 3.3.10: Register Region Restrictions. */
154 assert(execsize
>= width
);
157 if (execsize
== width
&& hstride
!= 0) {
158 assert(vstride
== -1 || vstride
== width
* hstride
);
162 if (execsize
== width
&& hstride
== 0) {
163 /* no restriction on vstride. */
168 assert(hstride
== 0);
172 if (execsize
== 1 && width
== 1) {
173 assert(hstride
== 0);
174 assert(vstride
== 0);
178 if (vstride
== 0 && hstride
== 0) {
182 /* 10. Check destination issues. */
185 static void brw_set_src0( struct brw_instruction
*insn
,
188 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
189 assert(reg
.nr
< 128);
191 validate_reg(insn
, reg
);
193 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
194 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
195 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
196 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
197 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
199 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
200 insn
->bits3
.ud
= reg
.dw1
.ud
;
202 /* Required to set some fields in src1 as well:
204 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
205 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
209 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
210 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
211 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
212 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
215 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
216 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
220 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
222 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
223 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
226 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
230 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
231 if (reg
.width
== BRW_WIDTH_1
&&
232 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
233 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
234 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
235 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
238 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
239 insn
->bits2
.da1
.src0_width
= reg
.width
;
240 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
244 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
245 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
246 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
247 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
249 /* This is an oddity of the fact we're using the same
250 * descriptions for registers in align_16 as align_1:
252 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
253 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
255 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
261 void brw_set_src1( struct brw_instruction
*insn
,
264 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
266 assert(reg
.nr
< 128);
268 validate_reg(insn
, reg
);
270 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
271 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
272 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
273 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
275 /* Only src1 can be immediate in two-argument instructions.
277 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
279 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
280 insn
->bits3
.ud
= reg
.dw1
.ud
;
283 /* This is a hardware restriction, which may or may not be lifted
286 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
287 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
289 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
290 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
291 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
294 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
295 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
298 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
299 if (reg
.width
== BRW_WIDTH_1
&&
300 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
301 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
302 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
303 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
306 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
307 insn
->bits3
.da1
.src1_width
= reg
.width
;
308 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
312 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
313 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
314 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
315 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
317 /* This is an oddity of the fact we're using the same
318 * descriptions for registers in align_16 as align_1:
320 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
321 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
323 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
330 static void brw_set_math_message( struct brw_context
*brw
,
331 struct brw_instruction
*insn
,
333 GLuint response_length
,
336 GLboolean low_precision
,
340 struct intel_context
*intel
= &brw
->intel
;
341 brw_set_src1(insn
, brw_imm_d(0));
343 if (intel
->gen
== 5) {
344 insn
->bits3
.math_gen5
.function
= function
;
345 insn
->bits3
.math_gen5
.int_type
= integer_type
;
346 insn
->bits3
.math_gen5
.precision
= low_precision
;
347 insn
->bits3
.math_gen5
.saturate
= saturate
;
348 insn
->bits3
.math_gen5
.data_type
= dataType
;
349 insn
->bits3
.math_gen5
.snapshot
= 0;
350 insn
->bits3
.math_gen5
.header_present
= 0;
351 insn
->bits3
.math_gen5
.response_length
= response_length
;
352 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
353 insn
->bits3
.math_gen5
.end_of_thread
= 0;
354 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
355 insn
->bits2
.send_gen5
.end_of_thread
= 0;
357 insn
->bits3
.math
.function
= function
;
358 insn
->bits3
.math
.int_type
= integer_type
;
359 insn
->bits3
.math
.precision
= low_precision
;
360 insn
->bits3
.math
.saturate
= saturate
;
361 insn
->bits3
.math
.data_type
= dataType
;
362 insn
->bits3
.math
.response_length
= response_length
;
363 insn
->bits3
.math
.msg_length
= msg_length
;
364 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
365 insn
->bits3
.math
.end_of_thread
= 0;
370 static void brw_set_ff_sync_message(struct brw_context
*brw
,
371 struct brw_instruction
*insn
,
373 GLuint response_length
,
374 GLboolean end_of_thread
)
376 struct intel_context
*intel
= &brw
->intel
;
377 brw_set_src1(insn
, brw_imm_d(0));
379 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
380 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
381 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
382 insn
->bits3
.urb_gen5
.allocate
= allocate
;
383 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
384 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
385 insn
->bits3
.urb_gen5
.header_present
= 1;
386 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
387 insn
->bits3
.urb_gen5
.msg_length
= 1;
388 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
389 if (intel
->gen
>= 6) {
390 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
392 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
393 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
397 static void brw_set_urb_message( struct brw_context
*brw
,
398 struct brw_instruction
*insn
,
402 GLuint response_length
,
403 GLboolean end_of_thread
,
406 GLuint swizzle_control
)
408 struct intel_context
*intel
= &brw
->intel
;
409 brw_set_src1(insn
, brw_imm_d(0));
411 if (intel
->gen
>= 5) {
412 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
413 insn
->bits3
.urb_gen5
.offset
= offset
;
414 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
415 insn
->bits3
.urb_gen5
.allocate
= allocate
;
416 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
417 insn
->bits3
.urb_gen5
.complete
= complete
;
418 insn
->bits3
.urb_gen5
.header_present
= 1;
419 insn
->bits3
.urb_gen5
.response_length
= response_length
;
420 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
421 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
422 if (intel
->gen
>= 6) {
423 /* For SNB, the SFID bits moved to the condmod bits, and
424 * EOT stayed in bits3 above. Does the EOT bit setting
425 * below on Ironlake even do anything?
427 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
429 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
430 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
433 insn
->bits3
.urb
.opcode
= 0; /* ? */
434 insn
->bits3
.urb
.offset
= offset
;
435 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
436 insn
->bits3
.urb
.allocate
= allocate
;
437 insn
->bits3
.urb
.used
= used
; /* ? */
438 insn
->bits3
.urb
.complete
= complete
;
439 insn
->bits3
.urb
.response_length
= response_length
;
440 insn
->bits3
.urb
.msg_length
= msg_length
;
441 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
442 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
446 static void brw_set_dp_write_message( struct brw_context
*brw
,
447 struct brw_instruction
*insn
,
448 GLuint binding_table_index
,
452 GLboolean header_present
,
453 GLuint pixel_scoreboard_clear
,
454 GLuint response_length
,
455 GLuint end_of_thread
,
456 GLuint send_commit_msg
)
458 struct intel_context
*intel
= &brw
->intel
;
459 brw_set_src1(insn
, brw_imm_ud(0));
461 if (intel
->gen
>= 6) {
462 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
463 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
464 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
465 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
466 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
467 insn
->bits3
.dp_render_cache
.header_present
= header_present
;
468 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
469 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
470 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
471 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
472 /* XXX really need below? */
473 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
474 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
475 } else if (intel
->gen
== 5) {
476 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
477 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
478 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
479 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
480 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
481 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
482 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
483 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
484 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
485 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
486 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
488 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
489 insn
->bits3
.dp_write
.msg_control
= msg_control
;
490 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
491 insn
->bits3
.dp_write
.msg_type
= msg_type
;
492 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
493 insn
->bits3
.dp_write
.response_length
= response_length
;
494 insn
->bits3
.dp_write
.msg_length
= msg_length
;
495 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
496 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
501 brw_set_dp_read_message(struct brw_context
*brw
,
502 struct brw_instruction
*insn
,
503 GLuint binding_table_index
,
508 GLuint response_length
)
510 struct intel_context
*intel
= &brw
->intel
;
511 brw_set_src1(insn
, brw_imm_d(0));
513 if (intel
->gen
>= 6) {
514 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
515 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
516 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= 0;
517 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
518 insn
->bits3
.dp_render_cache
.send_commit_msg
= 0;
519 insn
->bits3
.dp_render_cache
.header_present
= 1;
520 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
521 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
522 insn
->bits3
.dp_render_cache
.end_of_thread
= 0;
523 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
524 /* XXX really need below? */
525 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
526 insn
->bits2
.send_gen5
.end_of_thread
= 0;
527 } else if (intel
->gen
== 5) {
528 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
529 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
530 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
531 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
532 insn
->bits3
.dp_read_gen5
.header_present
= 1;
533 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
534 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
535 insn
->bits3
.dp_read_gen5
.pad1
= 0;
536 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
537 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
538 insn
->bits2
.send_gen5
.end_of_thread
= 0;
540 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
541 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
542 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
543 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
544 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
545 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
546 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
547 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
548 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
552 static void brw_set_sampler_message(struct brw_context
*brw
,
553 struct brw_instruction
*insn
,
554 GLuint binding_table_index
,
557 GLuint response_length
,
560 GLuint header_present
,
563 struct intel_context
*intel
= &brw
->intel
;
565 brw_set_src1(insn
, brw_imm_d(0));
567 if (intel
->gen
>= 5) {
568 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
569 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
570 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
571 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
572 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
573 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
574 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
575 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
577 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
579 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
580 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
582 } else if (intel
->is_g4x
) {
583 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
584 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
585 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
586 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
587 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
588 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
589 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
591 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
592 insn
->bits3
.sampler
.sampler
= sampler
;
593 insn
->bits3
.sampler
.msg_type
= msg_type
;
594 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
595 insn
->bits3
.sampler
.response_length
= response_length
;
596 insn
->bits3
.sampler
.msg_length
= msg_length
;
597 insn
->bits3
.sampler
.end_of_thread
= eot
;
598 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
604 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
607 struct brw_instruction
*insn
;
609 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
611 insn
= &p
->store
[p
->nr_insn
++];
612 memcpy(insn
, p
->current
, sizeof(*insn
));
614 /* Reset this one-shot flag:
617 if (p
->current
->header
.destreg__conditionalmod
) {
618 p
->current
->header
.destreg__conditionalmod
= 0;
619 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
622 insn
->header
.opcode
= opcode
;
627 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
632 struct brw_instruction
*insn
= next_insn(p
, opcode
);
633 brw_set_dest(p
, insn
, dest
);
634 brw_set_src0(insn
, src
);
638 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
642 struct brw_reg src1
)
644 struct brw_instruction
*insn
= next_insn(p
, opcode
);
645 brw_set_dest(p
, insn
, dest
);
646 brw_set_src0(insn
, src0
);
647 brw_set_src1(insn
, src1
);
652 /***********************************************************************
653 * Convenience routines.
656 struct brw_instruction *brw_##OP(struct brw_compile *p, \
657 struct brw_reg dest, \
658 struct brw_reg src0) \
660 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
664 struct brw_instruction *brw_##OP(struct brw_compile *p, \
665 struct brw_reg dest, \
666 struct brw_reg src0, \
667 struct brw_reg src1) \
669 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
672 /* Rounding operations (other than RNDD) require two instructions - the first
673 * stores a rounded value (possibly the wrong way) in the dest register, but
674 * also sets a per-channel "increment bit" in the flag register. A predicated
675 * add of 1.0 fixes dest to contain the desired result.
678 void brw_##OP(struct brw_compile *p, \
679 struct brw_reg dest, \
680 struct brw_reg src) \
682 struct brw_instruction *rnd, *add; \
683 rnd = next_insn(p, BRW_OPCODE_##OP); \
684 brw_set_dest(p, rnd, dest); \
685 brw_set_src0(rnd, src); \
686 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
688 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
689 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
721 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
727 if (src0
.type
== BRW_REGISTER_TYPE_F
||
728 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
729 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
730 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
731 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
734 if (src1
.type
== BRW_REGISTER_TYPE_F
||
735 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
736 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
737 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
738 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
741 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
744 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
750 if (src0
.type
== BRW_REGISTER_TYPE_D
||
751 src0
.type
== BRW_REGISTER_TYPE_UD
||
752 src1
.type
== BRW_REGISTER_TYPE_D
||
753 src1
.type
== BRW_REGISTER_TYPE_UD
) {
754 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
757 if (src0
.type
== BRW_REGISTER_TYPE_F
||
758 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
759 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
760 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
761 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
764 if (src1
.type
== BRW_REGISTER_TYPE_F
||
765 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
766 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
767 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
768 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
771 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
772 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
773 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
774 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
776 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
780 void brw_NOP(struct brw_compile
*p
)
782 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
783 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
784 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
785 brw_set_src1(insn
, brw_imm_ud(0x0));
792 /***********************************************************************
793 * Comparisons, if/else/endif
796 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
801 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
803 insn
->header
.execution_size
= 1;
804 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
805 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
807 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
812 /* EU takes the value from the flag register and pushes it onto some
813 * sort of a stack (presumably merging with any flag value already on
814 * the stack). Within an if block, the flags at the top of the stack
815 * control execution on each channel of the unit, eg. on each of the
816 * 16 pixel values in our wm programs.
818 * When the matching 'else' instruction is reached (presumably by
819 * countdown of the instruction count patched in by our ELSE/ENDIF
820 * functions), the relevent flags are inverted.
822 * When the matching 'endif' instruction is reached, the flags are
823 * popped off. If the stack is now empty, normal execution resumes.
825 * No attempt is made to deal with stack overflow (14 elements?).
827 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
829 struct intel_context
*intel
= &p
->brw
->intel
;
830 struct brw_instruction
*insn
;
832 if (p
->single_program_flow
) {
833 assert(execute_size
== BRW_EXECUTE_1
);
835 insn
= next_insn(p
, BRW_OPCODE_ADD
);
836 insn
->header
.predicate_inverse
= 1;
838 insn
= next_insn(p
, BRW_OPCODE_IF
);
841 /* Override the defaults for this instruction:
843 if (intel
->gen
< 6) {
844 brw_set_dest(p
, insn
, brw_ip_reg());
845 brw_set_src0(insn
, brw_ip_reg());
846 brw_set_src1(insn
, brw_imm_d(0x0));
848 brw_set_dest(p
, insn
, brw_imm_w(0));
849 insn
->bits1
.branch_gen6
.jump_count
= 0;
850 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
851 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
854 insn
->header
.execution_size
= execute_size
;
855 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
856 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
857 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
858 if (!p
->single_program_flow
)
859 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
861 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
866 struct brw_instruction
*
867 brw_IF_gen6(struct brw_compile
*p
, uint32_t conditional
,
868 struct brw_reg src0
, struct brw_reg src1
)
870 struct brw_instruction
*insn
;
872 insn
= next_insn(p
, BRW_OPCODE_IF
);
874 brw_set_dest(p
, insn
, brw_imm_w(0));
875 insn
->header
.execution_size
= BRW_EXECUTE_8
;
876 insn
->bits1
.branch_gen6
.jump_count
= 0;
877 brw_set_src0(insn
, src0
);
878 brw_set_src1(insn
, src1
);
880 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
881 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
882 insn
->header
.destreg__conditionalmod
= conditional
;
884 if (!p
->single_program_flow
)
885 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
890 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
891 struct brw_instruction
*if_insn
)
893 struct intel_context
*intel
= &p
->brw
->intel
;
894 struct brw_instruction
*insn
;
897 /* jump count is for 64bit data chunk each, so one 128bit
898 instruction requires 2 chunks. */
902 if (p
->single_program_flow
) {
903 insn
= next_insn(p
, BRW_OPCODE_ADD
);
905 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
908 if (intel
->gen
< 6) {
909 brw_set_dest(p
, insn
, brw_ip_reg());
910 brw_set_src0(insn
, brw_ip_reg());
911 brw_set_src1(insn
, brw_imm_d(0x0));
913 brw_set_dest(p
, insn
, brw_imm_w(0));
914 insn
->bits1
.branch_gen6
.jump_count
= 0;
915 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
916 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
919 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
920 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
921 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
922 if (!p
->single_program_flow
)
923 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
925 /* Patch the if instruction to point at this instruction.
927 if (p
->single_program_flow
) {
928 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
930 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
932 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
934 if (intel
->gen
< 6) {
935 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
936 if_insn
->bits3
.if_else
.pop_count
= 0;
937 if_insn
->bits3
.if_else
.pad0
= 0;
939 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
946 void brw_ENDIF(struct brw_compile
*p
,
947 struct brw_instruction
*patch_insn
)
949 struct intel_context
*intel
= &p
->brw
->intel
;
955 if (p
->single_program_flow
) {
956 /* In single program flow mode, there's no need to execute an ENDIF,
957 * since we don't need to do any stack operations, and if we're executing
958 * currently, we want to just continue executing.
960 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
962 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
964 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
966 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
968 if (intel
->gen
< 6) {
969 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
970 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
971 brw_set_src1(insn
, brw_imm_d(0x0));
973 brw_set_dest(p
, insn
, brw_imm_w(0));
974 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
975 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
978 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
979 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
980 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
981 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
984 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
986 assert(patch_insn
->bits1
.branch_gen6
.jump_count
== 0);
988 /* Patch the if or else instructions to point at this or the next
989 * instruction respectively.
991 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
992 if (intel
->gen
< 6) {
993 /* Turn it into an IFF, which means no mask stack operations for
994 * all-false and jumping past the ENDIF.
996 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
997 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
998 patch_insn
->bits3
.if_else
.pop_count
= 0;
999 patch_insn
->bits3
.if_else
.pad0
= 0;
1001 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1002 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1005 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
1006 if (intel
->gen
< 6) {
1007 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1010 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1011 patch_insn
->bits3
.if_else
.pop_count
= 1;
1012 patch_insn
->bits3
.if_else
.pad0
= 0;
1014 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1015 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1019 /* Also pop item off the stack in the endif instruction:
1021 if (intel
->gen
< 6) {
1022 insn
->bits3
.if_else
.jump_count
= 0;
1023 insn
->bits3
.if_else
.pop_count
= 1;
1024 insn
->bits3
.if_else
.pad0
= 0;
1026 insn
->bits1
.branch_gen6
.jump_count
= 2;
1031 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1033 struct intel_context
*intel
= &p
->brw
->intel
;
1034 struct brw_instruction
*insn
;
1036 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1037 if (intel
->gen
>= 6) {
1038 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1039 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1040 brw_set_src1(insn
, brw_imm_d(0x0));
1042 brw_set_dest(p
, insn
, brw_ip_reg());
1043 brw_set_src0(insn
, brw_ip_reg());
1044 brw_set_src1(insn
, brw_imm_d(0x0));
1045 insn
->bits3
.if_else
.pad0
= 0;
1046 insn
->bits3
.if_else
.pop_count
= pop_count
;
1048 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1049 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1054 struct brw_instruction
*brw_CONT_gen6(struct brw_compile
*p
,
1055 struct brw_instruction
*do_insn
)
1057 struct brw_instruction
*insn
;
1060 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1061 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1062 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1063 brw_set_dest(p
, insn
, brw_ip_reg());
1064 brw_set_src0(insn
, brw_ip_reg());
1065 brw_set_src1(insn
, brw_imm_d(0x0));
1067 insn
->bits3
.break_cont
.uip
= br
* (do_insn
- insn
);
1069 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1070 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1074 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1076 struct brw_instruction
*insn
;
1077 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1078 brw_set_dest(p
, insn
, brw_ip_reg());
1079 brw_set_src0(insn
, brw_ip_reg());
1080 brw_set_src1(insn
, brw_imm_d(0x0));
1081 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1082 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1083 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1084 insn
->bits3
.if_else
.pad0
= 0;
1085 insn
->bits3
.if_else
.pop_count
= pop_count
;
1091 * The DO/WHILE is just an unterminated loop -- break or continue are
1092 * used for control within the loop. We have a few ways they can be
1095 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1096 * jip and no DO instruction.
1098 * For non-uniform control flow pre-gen6, there's a DO instruction to
1099 * push the mask, and a WHILE to jump back, and BREAK to get out and
1102 * For gen6, there's no more mask stack, so no need for DO. WHILE
1103 * just points back to the first instruction of the loop.
1105 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1107 struct intel_context
*intel
= &p
->brw
->intel
;
1109 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1110 return &p
->store
[p
->nr_insn
];
1112 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1114 /* Override the defaults for this instruction:
1116 brw_set_dest(p
, insn
, brw_null_reg());
1117 brw_set_src0(insn
, brw_null_reg());
1118 brw_set_src1(insn
, brw_null_reg());
1120 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1121 insn
->header
.execution_size
= execute_size
;
1122 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1123 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1124 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1132 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1133 struct brw_instruction
*do_insn
)
1135 struct intel_context
*intel
= &p
->brw
->intel
;
1136 struct brw_instruction
*insn
;
1139 if (intel
->gen
>= 5)
1142 if (intel
->gen
>= 6) {
1143 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1145 brw_set_dest(p
, insn
, brw_imm_w(0));
1146 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1147 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1148 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1150 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1151 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1153 if (p
->single_program_flow
) {
1154 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1156 brw_set_dest(p
, insn
, brw_ip_reg());
1157 brw_set_src0(insn
, brw_ip_reg());
1158 brw_set_src1(insn
, brw_imm_d((do_insn
- insn
) * 16));
1159 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1161 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1163 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1165 brw_set_dest(p
, insn
, brw_ip_reg());
1166 brw_set_src0(insn
, brw_ip_reg());
1167 brw_set_src1(insn
, brw_imm_d(0));
1169 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1170 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1171 insn
->bits3
.if_else
.pop_count
= 0;
1172 insn
->bits3
.if_else
.pad0
= 0;
1175 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1176 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1184 void brw_land_fwd_jump(struct brw_compile
*p
,
1185 struct brw_instruction
*jmp_insn
)
1187 struct intel_context
*intel
= &p
->brw
->intel
;
1188 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1191 if (intel
->gen
>= 5)
1194 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1195 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1197 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1202 /* To integrate with the above, it makes sense that the comparison
1203 * instruction should populate the flag register. It might be simpler
1204 * just to use the flag reg for most WM tasks?
1206 void brw_CMP(struct brw_compile
*p
,
1207 struct brw_reg dest
,
1209 struct brw_reg src0
,
1210 struct brw_reg src1
)
1212 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1214 insn
->header
.destreg__conditionalmod
= conditional
;
1215 brw_set_dest(p
, insn
, dest
);
1216 brw_set_src0(insn
, src0
);
1217 brw_set_src1(insn
, src1
);
1219 /* guess_execution_size(insn, src0); */
1222 /* Make it so that future instructions will use the computed flag
1223 * value until brw_set_predicate_control_flag_value() is called
1226 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1228 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1229 p
->flag_value
= 0xff;
1233 /* Issue 'wait' instruction for n1, host could program MMIO
1234 to wake up thread. */
1235 void brw_WAIT (struct brw_compile
*p
)
1237 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1238 struct brw_reg src
= brw_notification_1_reg();
1240 brw_set_dest(p
, insn
, src
);
1241 brw_set_src0(insn
, src
);
1242 brw_set_src1(insn
, brw_null_reg());
1243 insn
->header
.execution_size
= 0; /* must */
1244 insn
->header
.predicate_control
= 0;
1245 insn
->header
.compression_control
= 0;
1249 /***********************************************************************
1250 * Helpers for the various SEND message types:
1253 /** Extended math function, float[8].
1255 void brw_math( struct brw_compile
*p
,
1256 struct brw_reg dest
,
1264 struct intel_context
*intel
= &p
->brw
->intel
;
1266 if (intel
->gen
>= 6) {
1267 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1269 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1270 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1272 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1273 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1275 /* Source modifiers are ignored for extended math instructions. */
1276 assert(!src
.negate
);
1279 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1280 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1281 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1284 /* Math is the same ISA format as other opcodes, except that CondModifier
1285 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1287 insn
->header
.destreg__conditionalmod
= function
;
1288 insn
->header
.saturate
= saturate
;
1290 brw_set_dest(p
, insn
, dest
);
1291 brw_set_src0(insn
, src
);
1292 brw_set_src1(insn
, brw_null_reg());
1294 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1295 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1296 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1297 /* Example code doesn't set predicate_control for send
1300 insn
->header
.predicate_control
= 0;
1301 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1303 brw_set_dest(p
, insn
, dest
);
1304 brw_set_src0(insn
, src
);
1305 brw_set_math_message(p
->brw
,
1307 msg_length
, response_length
,
1309 BRW_MATH_INTEGER_UNSIGNED
,
1316 /** Extended math function, float[8].
1318 void brw_math2(struct brw_compile
*p
,
1319 struct brw_reg dest
,
1321 struct brw_reg src0
,
1322 struct brw_reg src1
)
1324 struct intel_context
*intel
= &p
->brw
->intel
;
1325 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1327 assert(intel
->gen
>= 6);
1331 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1332 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1333 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1335 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1336 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1337 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1339 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1340 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1341 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1342 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1345 /* Source modifiers are ignored for extended math instructions. */
1346 assert(!src0
.negate
);
1348 assert(!src1
.negate
);
1351 /* Math is the same ISA format as other opcodes, except that CondModifier
1352 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1354 insn
->header
.destreg__conditionalmod
= function
;
1356 brw_set_dest(p
, insn
, dest
);
1357 brw_set_src0(insn
, src0
);
1358 brw_set_src1(insn
, src1
);
1362 * Extended math function, float[16].
1363 * Use 2 send instructions.
1365 void brw_math_16( struct brw_compile
*p
,
1366 struct brw_reg dest
,
1373 struct intel_context
*intel
= &p
->brw
->intel
;
1374 struct brw_instruction
*insn
;
1375 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1376 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1378 if (intel
->gen
>= 6) {
1379 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1381 /* Math is the same ISA format as other opcodes, except that CondModifier
1382 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1384 insn
->header
.destreg__conditionalmod
= function
;
1385 insn
->header
.saturate
= saturate
;
1387 /* Source modifiers are ignored for extended math instructions. */
1388 assert(!src
.negate
);
1391 brw_set_dest(p
, insn
, dest
);
1392 brw_set_src0(insn
, src
);
1393 brw_set_src1(insn
, brw_null_reg());
1397 /* First instruction:
1399 brw_push_insn_state(p
);
1400 brw_set_predicate_control_flag_value(p
, 0xff);
1401 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1403 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1404 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1406 brw_set_dest(p
, insn
, dest
);
1407 brw_set_src0(insn
, src
);
1408 brw_set_math_message(p
->brw
,
1410 msg_length
, response_length
,
1412 BRW_MATH_INTEGER_UNSIGNED
,
1415 BRW_MATH_DATA_VECTOR
);
1417 /* Second instruction:
1419 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1420 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1421 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1423 brw_set_dest(p
, insn
, offset(dest
,1));
1424 brw_set_src0(insn
, src
);
1425 brw_set_math_message(p
->brw
,
1427 msg_length
, response_length
,
1429 BRW_MATH_INTEGER_UNSIGNED
,
1432 BRW_MATH_DATA_VECTOR
);
1434 brw_pop_insn_state(p
);
1439 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1440 * using a constant offset per channel.
1442 * The offset must be aligned to oword size (16 bytes). Used for
1443 * register spilling.
1445 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1450 struct intel_context
*intel
= &p
->brw
->intel
;
1451 uint32_t msg_control
;
1454 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1456 if (num_regs
== 1) {
1457 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1460 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1464 /* Set up the message header. This is g0, with g0.2 filled with
1465 * the offset. We don't want to leave our offset around in g0 or
1466 * it'll screw up texture samples, so set it up inside the message
1470 brw_push_insn_state(p
);
1471 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1472 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1474 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1476 /* set message header global offset field (reg 0, element 2) */
1478 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1480 2), BRW_REGISTER_TYPE_UD
),
1481 brw_imm_ud(offset
));
1483 brw_pop_insn_state(p
);
1487 struct brw_reg dest
;
1488 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1489 int send_commit_msg
;
1490 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1491 BRW_REGISTER_TYPE_UW
);
1493 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1494 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1495 src_header
= vec16(src_header
);
1497 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1498 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1500 /* Until gen6, writes followed by reads from the same location
1501 * are not guaranteed to be ordered unless write_commit is set.
1502 * If set, then a no-op write is issued to the destination
1503 * register to set a dependency, and a read from the destination
1504 * can be used to ensure the ordering.
1506 * For gen6, only writes between different threads need ordering
1507 * protection. Our use of DP writes is all about register
1508 * spilling within a thread.
1510 if (intel
->gen
>= 6) {
1511 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1512 send_commit_msg
= 0;
1515 send_commit_msg
= 1;
1518 brw_set_dest(p
, insn
, dest
);
1519 brw_set_src0(insn
, brw_null_reg());
1521 brw_set_dp_write_message(p
->brw
,
1523 255, /* binding table index (255=stateless) */
1525 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1527 GL_TRUE
, /* header_present */
1528 0, /* pixel scoreboard */
1529 send_commit_msg
, /* response_length */
1537 * Read a block of owords (half a GRF each) from the scratch buffer
1538 * using a constant index per channel.
1540 * Offset must be aligned to oword size (16 bytes). Used for register
1544 brw_oword_block_read_scratch(struct brw_compile
*p
,
1545 struct brw_reg dest
,
1550 uint32_t msg_control
;
1553 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1554 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1556 if (num_regs
== 1) {
1557 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1560 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1565 brw_push_insn_state(p
);
1566 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1567 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1569 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1571 /* set message header global offset field (reg 0, element 2) */
1573 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1575 2), BRW_REGISTER_TYPE_UD
),
1576 brw_imm_ud(offset
));
1578 brw_pop_insn_state(p
);
1582 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1584 assert(insn
->header
.predicate_control
== 0);
1585 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1586 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1588 brw_set_dest(p
, insn
, dest
); /* UW? */
1589 brw_set_src0(insn
, brw_null_reg());
1591 brw_set_dp_read_message(p
->brw
,
1593 255, /* binding table index (255=stateless) */
1595 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1596 1, /* target cache (render/scratch) */
1603 * Read a float[4] vector from the data port Data Cache (const buffer).
1604 * Location (in buffer) should be a multiple of 16.
1605 * Used for fetching shader constants.
1607 void brw_oword_block_read(struct brw_compile
*p
,
1608 struct brw_reg dest
,
1611 uint32_t bind_table_index
)
1613 struct intel_context
*intel
= &p
->brw
->intel
;
1615 /* On newer hardware, offset is in units of owords. */
1616 if (intel
->gen
>= 6)
1619 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1621 brw_push_insn_state(p
);
1622 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1623 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1624 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1626 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1628 /* set message header global offset field (reg 0, element 2) */
1630 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1632 2), BRW_REGISTER_TYPE_UD
),
1633 brw_imm_ud(offset
));
1635 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1636 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1638 /* cast dest to a uword[8] vector */
1639 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1641 brw_set_dest(p
, insn
, dest
);
1642 if (intel
->gen
>= 6) {
1643 brw_set_src0(insn
, mrf
);
1645 brw_set_src0(insn
, brw_null_reg());
1648 brw_set_dp_read_message(p
->brw
,
1651 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1652 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1653 0, /* source cache = data cache */
1655 1); /* response_length (1 reg, 2 owords!) */
1657 brw_pop_insn_state(p
);
1661 * Read a set of dwords from the data port Data Cache (const buffer).
1663 * Location (in buffer) appears as UD offsets in the register after
1664 * the provided mrf header reg.
1666 void brw_dword_scattered_read(struct brw_compile
*p
,
1667 struct brw_reg dest
,
1669 uint32_t bind_table_index
)
1671 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1673 brw_push_insn_state(p
);
1674 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1675 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1676 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1677 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1678 brw_pop_insn_state(p
);
1680 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1681 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1683 /* cast dest to a uword[8] vector */
1684 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1686 brw_set_dest(p
, insn
, dest
);
1687 brw_set_src0(insn
, brw_null_reg());
1689 brw_set_dp_read_message(p
->brw
,
1692 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1693 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1694 0, /* source cache = data cache */
1696 1); /* response_length */
1702 * Read float[4] constant(s) from VS constant buffer.
1703 * For relative addressing, two float[4] constants will be read into 'dest'.
1704 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1706 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1707 struct brw_reg dest
,
1709 GLuint bind_table_index
)
1711 struct intel_context
*intel
= &p
->brw
->intel
;
1712 struct brw_instruction
*insn
;
1713 GLuint msg_reg_nr
= 1;
1715 if (intel
->gen
>= 6)
1718 /* Setup MRF[1] with location/offset into const buffer */
1719 brw_push_insn_state(p
);
1720 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1721 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1722 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1723 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1724 BRW_REGISTER_TYPE_UD
),
1725 brw_imm_ud(location
));
1726 brw_pop_insn_state(p
);
1728 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1730 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1731 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1732 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1733 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1735 brw_set_dest(p
, insn
, dest
);
1736 if (intel
->gen
>= 6) {
1737 brw_set_src0(insn
, brw_message_reg(msg_reg_nr
));
1739 brw_set_src0(insn
, brw_null_reg());
1742 brw_set_dp_read_message(p
->brw
,
1746 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1747 0, /* source cache = data cache */
1749 1); /* response_length (1 Oword) */
1753 * Read a float[4] constant per vertex from VS constant buffer, with
1754 * relative addressing.
1756 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1757 struct brw_reg dest
,
1758 struct brw_reg addr_reg
,
1760 GLuint bind_table_index
)
1762 struct intel_context
*intel
= &p
->brw
->intel
;
1765 /* Setup MRF[1] with offset into const buffer */
1766 brw_push_insn_state(p
);
1767 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1768 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1769 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1771 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1774 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1775 addr_reg
, brw_imm_d(offset
));
1776 brw_pop_insn_state(p
);
1778 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1780 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1781 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1782 insn
->header
.destreg__conditionalmod
= 0;
1783 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1785 brw_set_dest(p
, insn
, dest
);
1786 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1788 if (intel
->gen
== 6)
1789 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1790 else if (intel
->gen
== 5 || intel
->is_g4x
)
1791 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1793 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1795 brw_set_dp_read_message(p
->brw
,
1798 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1800 0, /* source cache = data cache */
1802 1); /* response_length */
1807 void brw_fb_WRITE(struct brw_compile
*p
,
1809 struct brw_reg dest
,
1811 struct brw_reg src0
,
1812 GLuint binding_table_index
,
1814 GLuint response_length
,
1817 struct intel_context
*intel
= &p
->brw
->intel
;
1818 struct brw_instruction
*insn
;
1819 GLuint msg_control
, msg_type
;
1820 GLboolean header_present
= GL_TRUE
;
1822 if (intel
->gen
>= 6 && binding_table_index
== 0) {
1823 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
1825 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1827 /* The execution mask is ignored for render target writes. */
1828 insn
->header
.predicate_control
= 0;
1829 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1831 if (intel
->gen
>= 6) {
1832 if (msg_length
== 4)
1833 header_present
= GL_FALSE
;
1835 /* headerless version, just submit color payload */
1836 src0
= brw_message_reg(msg_reg_nr
);
1838 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6
;
1840 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1842 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1845 if (dispatch_width
== 16)
1846 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1848 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1850 brw_set_dest(p
, insn
, dest
);
1851 brw_set_src0(insn
, src0
);
1852 brw_set_dp_write_message(p
->brw
,
1854 binding_table_index
,
1859 1, /* pixel scoreboard */
1862 0 /* send_commit_msg */);
1867 * Texture sample instruction.
1868 * Note: the msg_type plus msg_length values determine exactly what kind
1869 * of sampling operation is performed. See volume 4, page 161 of docs.
1871 void brw_SAMPLE(struct brw_compile
*p
,
1872 struct brw_reg dest
,
1874 struct brw_reg src0
,
1875 GLuint binding_table_index
,
1879 GLuint response_length
,
1882 GLuint header_present
,
1885 struct intel_context
*intel
= &p
->brw
->intel
;
1886 GLboolean need_stall
= 0;
1888 if (writemask
== 0) {
1889 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1893 /* Hardware doesn't do destination dependency checking on send
1894 * instructions properly. Add a workaround which generates the
1895 * dependency by other means. In practice it seems like this bug
1896 * only crops up for texture samples, and only where registers are
1897 * written by the send and then written again later without being
1898 * read in between. Luckily for us, we already track that
1899 * information and use it to modify the writemask for the
1900 * instruction, so that is a guide for whether a workaround is
1903 if (writemask
!= WRITEMASK_XYZW
) {
1904 GLuint dst_offset
= 0;
1905 GLuint i
, newmask
= 0, len
= 0;
1907 for (i
= 0; i
< 4; i
++) {
1908 if (writemask
& (1<<i
))
1912 for (; i
< 4; i
++) {
1913 if (!(writemask
& (1<<i
)))
1919 if (newmask
!= writemask
) {
1921 /* printf("need stall %x %x\n", newmask , writemask); */
1924 GLboolean dispatch_16
= GL_FALSE
;
1926 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1928 guess_execution_size(p
, p
->current
, dest
);
1929 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1930 dispatch_16
= GL_TRUE
;
1932 newmask
= ~newmask
& WRITEMASK_XYZW
;
1934 brw_push_insn_state(p
);
1936 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1937 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1939 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1940 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1942 brw_pop_insn_state(p
);
1944 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1945 dest
= offset(dest
, dst_offset
);
1947 /* For 16-wide dispatch, masked channels are skipped in the
1948 * response. For 8-wide, masked channels still take up slots,
1949 * and are just not written to.
1952 response_length
= len
* 2;
1957 struct brw_instruction
*insn
;
1959 /* Sandybridge doesn't have the implied move for SENDs,
1960 * and the first message register index comes from src0.
1962 if (intel
->gen
>= 6) {
1963 if (src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1964 src0
.nr
!= BRW_ARF_NULL
) {
1965 brw_push_insn_state(p
);
1966 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1967 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1968 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), src0
.type
), src0
);
1969 brw_pop_insn_state(p
);
1971 src0
= brw_message_reg(msg_reg_nr
);
1974 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1975 insn
->header
.predicate_control
= 0; /* XXX */
1976 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1978 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1980 brw_set_dest(p
, insn
, dest
);
1981 brw_set_src0(insn
, src0
);
1982 brw_set_sampler_message(p
->brw
, insn
,
1983 binding_table_index
,
1994 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1996 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1998 brw_push_insn_state(p
);
1999 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2000 brw_MOV(p
, reg
, reg
);
2001 brw_pop_insn_state(p
);
2006 /* All these variables are pretty confusing - we might be better off
2007 * using bitmasks and macros for this, in the old style. Or perhaps
2008 * just having the caller instantiate the fields in dword3 itself.
2010 void brw_urb_WRITE(struct brw_compile
*p
,
2011 struct brw_reg dest
,
2013 struct brw_reg src0
,
2017 GLuint response_length
,
2019 GLboolean writes_complete
,
2023 struct intel_context
*intel
= &p
->brw
->intel
;
2024 struct brw_instruction
*insn
;
2026 /* Sandybridge doesn't have the implied move for SENDs,
2027 * and the first message register index comes from src0.
2029 if (intel
->gen
>= 6) {
2030 brw_push_insn_state(p
);
2031 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2032 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
2033 brw_pop_insn_state(p
);
2034 src0
= brw_message_reg(msg_reg_nr
);
2037 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2039 assert(msg_length
< BRW_MAX_MRF
);
2041 brw_set_dest(p
, insn
, dest
);
2042 brw_set_src0(insn
, src0
);
2043 brw_set_src1(insn
, brw_imm_d(0));
2046 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2048 brw_set_urb_message(p
->brw
,
2061 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2065 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2066 struct brw_instruction
*insn
= &p
->store
[ip
];
2068 switch (insn
->header
.opcode
) {
2069 case BRW_OPCODE_ENDIF
:
2070 case BRW_OPCODE_ELSE
:
2071 case BRW_OPCODE_WHILE
:
2075 assert(!"not reached");
2079 /* There is no DO instruction on gen6, so to find the end of the loop
2080 * we have to see if the loop is jumping back before our start
2084 brw_find_loop_end(struct brw_compile
*p
, int start
)
2089 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2090 struct brw_instruction
*insn
= &p
->store
[ip
];
2092 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2093 if (ip
+ insn
->bits1
.branch_gen6
.jump_count
/ br
< start
)
2097 assert(!"not reached");
2101 /* After program generation, go back and update the UIP and JIP of
2102 * BREAK and CONT instructions to their correct locations.
2105 brw_set_uip_jip(struct brw_compile
*p
)
2107 struct intel_context
*intel
= &p
->brw
->intel
;
2114 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2115 struct brw_instruction
*insn
= &p
->store
[ip
];
2117 switch (insn
->header
.opcode
) {
2118 case BRW_OPCODE_BREAK
:
2119 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2120 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
+ 1);
2122 case BRW_OPCODE_CONTINUE
:
2123 /* JIP is set at CONTINUE emit time, since that's when we
2124 * know where the start of the loop is.
2126 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2127 assert(insn
->bits3
.break_cont
.uip
!= 0);
2128 assert(insn
->bits3
.break_cont
.jip
!= 0);
2134 void brw_ff_sync(struct brw_compile
*p
,
2135 struct brw_reg dest
,
2137 struct brw_reg src0
,
2139 GLuint response_length
,
2142 struct intel_context
*intel
= &p
->brw
->intel
;
2143 struct brw_instruction
*insn
;
2145 /* Sandybridge doesn't have the implied move for SENDs,
2146 * and the first message register index comes from src0.
2148 if (intel
->gen
>= 6) {
2149 brw_push_insn_state(p
);
2150 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2151 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
2152 retype(src0
, BRW_REGISTER_TYPE_UD
));
2153 brw_pop_insn_state(p
);
2154 src0
= brw_message_reg(msg_reg_nr
);
2157 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2158 brw_set_dest(p
, insn
, dest
);
2159 brw_set_src0(insn
, src0
);
2160 brw_set_src1(insn
, brw_imm_d(0));
2163 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2165 brw_set_ff_sync_message(p
->brw
,