2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size(struct brw_compile
*p
,
45 struct brw_instruction
*insn
,
48 if (reg
.width
== BRW_WIDTH_8
&& p
->compressed
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest(struct brw_compile
*p
,
56 struct brw_instruction
*insn
,
59 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
60 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
61 assert(dest
.nr
< 128);
63 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
64 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
65 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
67 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
68 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
70 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
71 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
72 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
73 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
74 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
77 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
78 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
79 /* even ignored in da16, still need to set as '01' */
80 insn
->bits1
.da16
.dest_horiz_stride
= 1;
84 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
86 /* These are different sizes in align1 vs align16:
88 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
89 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
90 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
91 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
92 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
95 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
96 /* even ignored in da16, still need to set as '01' */
97 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
101 /* NEW: Set the execution size based on dest.width and
102 * insn->compression_control:
104 guess_execution_size(p
, insn
, dest
);
107 extern int reg_type_size
[];
110 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
112 int hstride_for_reg
[] = {0, 1, 2, 4};
113 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
114 int width_for_reg
[] = {1, 2, 4, 8, 16};
115 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
116 int width
, hstride
, vstride
, execsize
;
118 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
119 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
120 * mean the destination has to be 128-bit aligned and the
121 * destination horiz stride has to be a word.
123 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
124 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
125 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
131 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
132 reg
.file
== BRW_ARF_NULL
)
135 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
136 hstride
= hstride_for_reg
[reg
.hstride
];
138 if (reg
.vstride
== 0xf) {
141 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
142 vstride
= vstride_for_reg
[reg
.vstride
];
145 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
146 width
= width_for_reg
[reg
.width
];
148 assert(insn
->header
.execution_size
>= 0 &&
149 insn
->header
.execution_size
< Elements(execsize_for_reg
));
150 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
152 /* Restrictions from 3.3.10: Register Region Restrictions. */
154 assert(execsize
>= width
);
157 if (execsize
== width
&& hstride
!= 0) {
158 assert(vstride
== -1 || vstride
== width
* hstride
);
162 if (execsize
== width
&& hstride
== 0) {
163 /* no restriction on vstride. */
168 assert(hstride
== 0);
172 if (execsize
== 1 && width
== 1) {
173 assert(hstride
== 0);
174 assert(vstride
== 0);
178 if (vstride
== 0 && hstride
== 0) {
182 /* 10. Check destination issues. */
185 static void brw_set_src0( struct brw_instruction
*insn
,
188 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
189 assert(reg
.nr
< 128);
191 validate_reg(insn
, reg
);
193 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
194 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
195 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
196 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
197 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
199 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
200 insn
->bits3
.ud
= reg
.dw1
.ud
;
202 /* Required to set some fields in src1 as well:
204 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
205 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
209 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
210 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
211 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
212 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
215 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
216 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
220 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
222 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
223 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
226 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
230 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
231 if (reg
.width
== BRW_WIDTH_1
&&
232 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
233 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
234 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
235 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
238 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
239 insn
->bits2
.da1
.src0_width
= reg
.width
;
240 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
244 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
245 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
246 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
247 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
249 /* This is an oddity of the fact we're using the same
250 * descriptions for registers in align_16 as align_1:
252 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
253 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
255 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
261 void brw_set_src1( struct brw_instruction
*insn
,
264 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
266 assert(reg
.nr
< 128);
268 validate_reg(insn
, reg
);
270 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
271 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
272 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
273 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
275 /* Only src1 can be immediate in two-argument instructions.
277 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
279 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
280 insn
->bits3
.ud
= reg
.dw1
.ud
;
283 /* This is a hardware restriction, which may or may not be lifted
286 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
287 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
289 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
290 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
291 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
294 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
295 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
298 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
299 if (reg
.width
== BRW_WIDTH_1
&&
300 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
301 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
302 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
303 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
306 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
307 insn
->bits3
.da1
.src1_width
= reg
.width
;
308 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
312 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
313 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
314 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
315 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
317 /* This is an oddity of the fact we're using the same
318 * descriptions for registers in align_16 as align_1:
320 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
321 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
323 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
330 static void brw_set_math_message( struct brw_context
*brw
,
331 struct brw_instruction
*insn
,
333 GLuint response_length
,
336 GLboolean low_precision
,
340 struct intel_context
*intel
= &brw
->intel
;
341 brw_set_src1(insn
, brw_imm_d(0));
343 if (intel
->gen
== 5) {
344 insn
->bits3
.math_gen5
.function
= function
;
345 insn
->bits3
.math_gen5
.int_type
= integer_type
;
346 insn
->bits3
.math_gen5
.precision
= low_precision
;
347 insn
->bits3
.math_gen5
.saturate
= saturate
;
348 insn
->bits3
.math_gen5
.data_type
= dataType
;
349 insn
->bits3
.math_gen5
.snapshot
= 0;
350 insn
->bits3
.math_gen5
.header_present
= 0;
351 insn
->bits3
.math_gen5
.response_length
= response_length
;
352 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
353 insn
->bits3
.math_gen5
.end_of_thread
= 0;
354 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
355 insn
->bits2
.send_gen5
.end_of_thread
= 0;
357 insn
->bits3
.math
.function
= function
;
358 insn
->bits3
.math
.int_type
= integer_type
;
359 insn
->bits3
.math
.precision
= low_precision
;
360 insn
->bits3
.math
.saturate
= saturate
;
361 insn
->bits3
.math
.data_type
= dataType
;
362 insn
->bits3
.math
.response_length
= response_length
;
363 insn
->bits3
.math
.msg_length
= msg_length
;
364 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
365 insn
->bits3
.math
.end_of_thread
= 0;
370 static void brw_set_ff_sync_message(struct brw_context
*brw
,
371 struct brw_instruction
*insn
,
373 GLuint response_length
,
374 GLboolean end_of_thread
)
376 struct intel_context
*intel
= &brw
->intel
;
377 brw_set_src1(insn
, brw_imm_d(0));
379 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
380 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
381 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
382 insn
->bits3
.urb_gen5
.allocate
= allocate
;
383 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
384 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
385 insn
->bits3
.urb_gen5
.header_present
= 1;
386 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
387 insn
->bits3
.urb_gen5
.msg_length
= 1;
388 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
389 if (intel
->gen
>= 6) {
390 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
392 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
393 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
397 static void brw_set_urb_message( struct brw_context
*brw
,
398 struct brw_instruction
*insn
,
402 GLuint response_length
,
403 GLboolean end_of_thread
,
406 GLuint swizzle_control
)
408 struct intel_context
*intel
= &brw
->intel
;
409 brw_set_src1(insn
, brw_imm_d(0));
411 if (intel
->gen
>= 5) {
412 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
413 insn
->bits3
.urb_gen5
.offset
= offset
;
414 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
415 insn
->bits3
.urb_gen5
.allocate
= allocate
;
416 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
417 insn
->bits3
.urb_gen5
.complete
= complete
;
418 insn
->bits3
.urb_gen5
.header_present
= 1;
419 insn
->bits3
.urb_gen5
.response_length
= response_length
;
420 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
421 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
422 if (intel
->gen
>= 6) {
423 /* For SNB, the SFID bits moved to the condmod bits, and
424 * EOT stayed in bits3 above. Does the EOT bit setting
425 * below on Ironlake even do anything?
427 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
429 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
430 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
433 insn
->bits3
.urb
.opcode
= 0; /* ? */
434 insn
->bits3
.urb
.offset
= offset
;
435 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
436 insn
->bits3
.urb
.allocate
= allocate
;
437 insn
->bits3
.urb
.used
= used
; /* ? */
438 insn
->bits3
.urb
.complete
= complete
;
439 insn
->bits3
.urb
.response_length
= response_length
;
440 insn
->bits3
.urb
.msg_length
= msg_length
;
441 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
442 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
446 static void brw_set_dp_write_message( struct brw_context
*brw
,
447 struct brw_instruction
*insn
,
448 GLuint binding_table_index
,
452 GLboolean header_present
,
453 GLuint pixel_scoreboard_clear
,
454 GLuint response_length
,
455 GLuint end_of_thread
,
456 GLuint send_commit_msg
)
458 struct intel_context
*intel
= &brw
->intel
;
459 brw_set_src1(insn
, brw_imm_ud(0));
461 if (intel
->gen
>= 6) {
462 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
463 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
464 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
465 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
466 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
467 insn
->bits3
.dp_render_cache
.header_present
= header_present
;
468 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
469 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
470 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
471 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
472 /* XXX really need below? */
473 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
474 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
475 } else if (intel
->gen
== 5) {
476 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
477 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
478 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
479 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
480 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
481 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
482 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
483 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
484 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
485 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
486 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
488 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
489 insn
->bits3
.dp_write
.msg_control
= msg_control
;
490 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
491 insn
->bits3
.dp_write
.msg_type
= msg_type
;
492 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
493 insn
->bits3
.dp_write
.response_length
= response_length
;
494 insn
->bits3
.dp_write
.msg_length
= msg_length
;
495 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
496 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
501 brw_set_dp_read_message(struct brw_context
*brw
,
502 struct brw_instruction
*insn
,
503 GLuint binding_table_index
,
508 GLuint response_length
)
510 struct intel_context
*intel
= &brw
->intel
;
511 brw_set_src1(insn
, brw_imm_d(0));
513 if (intel
->gen
>= 6) {
514 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
515 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
516 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= 0;
517 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
518 insn
->bits3
.dp_render_cache
.send_commit_msg
= 0;
519 insn
->bits3
.dp_render_cache
.header_present
= 1;
520 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
521 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
522 insn
->bits3
.dp_render_cache
.end_of_thread
= 0;
523 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
524 /* XXX really need below? */
525 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
526 insn
->bits2
.send_gen5
.end_of_thread
= 0;
527 } else if (intel
->gen
== 5) {
528 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
529 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
530 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
531 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
532 insn
->bits3
.dp_read_gen5
.header_present
= 1;
533 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
534 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
535 insn
->bits3
.dp_read_gen5
.pad1
= 0;
536 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
537 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
538 insn
->bits2
.send_gen5
.end_of_thread
= 0;
539 } else if (intel
->is_g4x
) {
540 insn
->bits3
.dp_read_g4x
.binding_table_index
= binding_table_index
; /*0:7*/
541 insn
->bits3
.dp_read_g4x
.msg_control
= msg_control
; /*8:10*/
542 insn
->bits3
.dp_read_g4x
.msg_type
= msg_type
; /*11:13*/
543 insn
->bits3
.dp_read_g4x
.target_cache
= target_cache
; /*14:15*/
544 insn
->bits3
.dp_read_g4x
.response_length
= response_length
; /*16:19*/
545 insn
->bits3
.dp_read_g4x
.msg_length
= msg_length
; /*20:23*/
546 insn
->bits3
.dp_read_g4x
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
547 insn
->bits3
.dp_read_g4x
.pad1
= 0;
548 insn
->bits3
.dp_read_g4x
.end_of_thread
= 0;
550 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
551 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
552 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
553 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
554 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
555 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
556 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
557 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
558 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
562 static void brw_set_sampler_message(struct brw_context
*brw
,
563 struct brw_instruction
*insn
,
564 GLuint binding_table_index
,
567 GLuint response_length
,
570 GLuint header_present
,
573 struct intel_context
*intel
= &brw
->intel
;
575 brw_set_src1(insn
, brw_imm_d(0));
577 if (intel
->gen
>= 5) {
578 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
579 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
580 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
581 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
582 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
583 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
584 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
585 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
587 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
589 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
590 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
592 } else if (intel
->is_g4x
) {
593 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
594 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
595 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
596 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
597 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
598 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
599 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
601 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
602 insn
->bits3
.sampler
.sampler
= sampler
;
603 insn
->bits3
.sampler
.msg_type
= msg_type
;
604 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
605 insn
->bits3
.sampler
.response_length
= response_length
;
606 insn
->bits3
.sampler
.msg_length
= msg_length
;
607 insn
->bits3
.sampler
.end_of_thread
= eot
;
608 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
614 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
617 struct brw_instruction
*insn
;
619 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
621 insn
= &p
->store
[p
->nr_insn
++];
622 memcpy(insn
, p
->current
, sizeof(*insn
));
624 /* Reset this one-shot flag:
627 if (p
->current
->header
.destreg__conditionalmod
) {
628 p
->current
->header
.destreg__conditionalmod
= 0;
629 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
632 insn
->header
.opcode
= opcode
;
637 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
642 struct brw_instruction
*insn
= next_insn(p
, opcode
);
643 brw_set_dest(p
, insn
, dest
);
644 brw_set_src0(insn
, src
);
648 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
652 struct brw_reg src1
)
654 struct brw_instruction
*insn
= next_insn(p
, opcode
);
655 brw_set_dest(p
, insn
, dest
);
656 brw_set_src0(insn
, src0
);
657 brw_set_src1(insn
, src1
);
662 /***********************************************************************
663 * Convenience routines.
666 struct brw_instruction *brw_##OP(struct brw_compile *p, \
667 struct brw_reg dest, \
668 struct brw_reg src0) \
670 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
674 struct brw_instruction *brw_##OP(struct brw_compile *p, \
675 struct brw_reg dest, \
676 struct brw_reg src0, \
677 struct brw_reg src1) \
679 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
682 /* Rounding operations (other than RNDD) require two instructions - the first
683 * stores a rounded value (possibly the wrong way) in the dest register, but
684 * also sets a per-channel "increment bit" in the flag register. A predicated
685 * add of 1.0 fixes dest to contain the desired result.
688 void brw_##OP(struct brw_compile *p, \
689 struct brw_reg dest, \
690 struct brw_reg src) \
692 struct brw_instruction *rnd, *add; \
693 rnd = next_insn(p, BRW_OPCODE_##OP); \
694 brw_set_dest(p, rnd, dest); \
695 brw_set_src0(rnd, src); \
696 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
698 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
699 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
731 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
737 if (src0
.type
== BRW_REGISTER_TYPE_F
||
738 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
739 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
740 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
741 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
744 if (src1
.type
== BRW_REGISTER_TYPE_F
||
745 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
746 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
747 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
748 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
751 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
754 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
760 if (src0
.type
== BRW_REGISTER_TYPE_D
||
761 src0
.type
== BRW_REGISTER_TYPE_UD
||
762 src1
.type
== BRW_REGISTER_TYPE_D
||
763 src1
.type
== BRW_REGISTER_TYPE_UD
) {
764 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
767 if (src0
.type
== BRW_REGISTER_TYPE_F
||
768 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
769 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
770 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
771 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
774 if (src1
.type
== BRW_REGISTER_TYPE_F
||
775 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
776 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
777 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
778 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
781 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
782 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
783 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
784 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
786 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
790 void brw_NOP(struct brw_compile
*p
)
792 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
793 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
794 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
795 brw_set_src1(insn
, brw_imm_ud(0x0));
802 /***********************************************************************
803 * Comparisons, if/else/endif
806 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
811 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
813 insn
->header
.execution_size
= 1;
814 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
815 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
817 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
822 /* EU takes the value from the flag register and pushes it onto some
823 * sort of a stack (presumably merging with any flag value already on
824 * the stack). Within an if block, the flags at the top of the stack
825 * control execution on each channel of the unit, eg. on each of the
826 * 16 pixel values in our wm programs.
828 * When the matching 'else' instruction is reached (presumably by
829 * countdown of the instruction count patched in by our ELSE/ENDIF
830 * functions), the relevent flags are inverted.
832 * When the matching 'endif' instruction is reached, the flags are
833 * popped off. If the stack is now empty, normal execution resumes.
835 * No attempt is made to deal with stack overflow (14 elements?).
837 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
839 struct intel_context
*intel
= &p
->brw
->intel
;
840 struct brw_instruction
*insn
;
842 if (p
->single_program_flow
) {
843 assert(execute_size
== BRW_EXECUTE_1
);
845 insn
= next_insn(p
, BRW_OPCODE_ADD
);
846 insn
->header
.predicate_inverse
= 1;
848 insn
= next_insn(p
, BRW_OPCODE_IF
);
851 /* Override the defaults for this instruction:
853 if (intel
->gen
< 6) {
854 brw_set_dest(p
, insn
, brw_ip_reg());
855 brw_set_src0(insn
, brw_ip_reg());
856 brw_set_src1(insn
, brw_imm_d(0x0));
858 brw_set_dest(p
, insn
, brw_imm_w(0));
859 insn
->bits1
.branch_gen6
.jump_count
= 0;
860 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
861 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
864 insn
->header
.execution_size
= execute_size
;
865 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
866 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
867 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
868 if (!p
->single_program_flow
)
869 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
871 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
876 struct brw_instruction
*
877 brw_IF_gen6(struct brw_compile
*p
, uint32_t conditional
,
878 struct brw_reg src0
, struct brw_reg src1
)
880 struct brw_instruction
*insn
;
882 insn
= next_insn(p
, BRW_OPCODE_IF
);
884 brw_set_dest(p
, insn
, brw_imm_w(0));
885 insn
->header
.execution_size
= BRW_EXECUTE_8
;
886 insn
->bits1
.branch_gen6
.jump_count
= 0;
887 brw_set_src0(insn
, src0
);
888 brw_set_src1(insn
, src1
);
890 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
891 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
892 insn
->header
.destreg__conditionalmod
= conditional
;
894 if (!p
->single_program_flow
)
895 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
900 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
901 struct brw_instruction
*if_insn
)
903 struct intel_context
*intel
= &p
->brw
->intel
;
904 struct brw_instruction
*insn
;
907 /* jump count is for 64bit data chunk each, so one 128bit
908 instruction requires 2 chunks. */
912 if (p
->single_program_flow
) {
913 insn
= next_insn(p
, BRW_OPCODE_ADD
);
915 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
918 if (intel
->gen
< 6) {
919 brw_set_dest(p
, insn
, brw_ip_reg());
920 brw_set_src0(insn
, brw_ip_reg());
921 brw_set_src1(insn
, brw_imm_d(0x0));
923 brw_set_dest(p
, insn
, brw_imm_w(0));
924 insn
->bits1
.branch_gen6
.jump_count
= 0;
925 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
926 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
929 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
930 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
931 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
932 if (!p
->single_program_flow
)
933 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
935 /* Patch the if instruction to point at this instruction.
937 if (p
->single_program_flow
) {
938 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
940 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
942 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
944 if (intel
->gen
< 6) {
945 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
946 if_insn
->bits3
.if_else
.pop_count
= 0;
947 if_insn
->bits3
.if_else
.pad0
= 0;
949 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
956 void brw_ENDIF(struct brw_compile
*p
,
957 struct brw_instruction
*patch_insn
)
959 struct intel_context
*intel
= &p
->brw
->intel
;
965 if (p
->single_program_flow
) {
966 /* In single program flow mode, there's no need to execute an ENDIF,
967 * since we don't need to do any stack operations, and if we're executing
968 * currently, we want to just continue executing.
970 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
972 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
974 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
976 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
978 if (intel
->gen
< 6) {
979 brw_set_dest(p
, insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
980 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
981 brw_set_src1(insn
, brw_imm_d(0x0));
983 brw_set_dest(p
, insn
, brw_imm_w(0));
984 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
985 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
988 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
989 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
990 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
991 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
994 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
996 assert(patch_insn
->bits1
.branch_gen6
.jump_count
== 0);
998 /* Patch the if or else instructions to point at this or the next
999 * instruction respectively.
1001 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
1002 if (intel
->gen
< 6) {
1003 /* Turn it into an IFF, which means no mask stack operations for
1004 * all-false and jumping past the ENDIF.
1006 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
1007 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1008 patch_insn
->bits3
.if_else
.pop_count
= 0;
1009 patch_insn
->bits3
.if_else
.pad0
= 0;
1011 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1012 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1015 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
1016 if (intel
->gen
< 6) {
1017 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1020 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1021 patch_insn
->bits3
.if_else
.pop_count
= 1;
1022 patch_insn
->bits3
.if_else
.pad0
= 0;
1024 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1025 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1029 /* Also pop item off the stack in the endif instruction:
1031 if (intel
->gen
< 6) {
1032 insn
->bits3
.if_else
.jump_count
= 0;
1033 insn
->bits3
.if_else
.pop_count
= 1;
1034 insn
->bits3
.if_else
.pad0
= 0;
1036 insn
->bits1
.branch_gen6
.jump_count
= 2;
1041 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1043 struct intel_context
*intel
= &p
->brw
->intel
;
1044 struct brw_instruction
*insn
;
1046 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1047 if (intel
->gen
>= 6) {
1048 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1049 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1050 brw_set_src1(insn
, brw_imm_d(0x0));
1052 brw_set_dest(p
, insn
, brw_ip_reg());
1053 brw_set_src0(insn
, brw_ip_reg());
1054 brw_set_src1(insn
, brw_imm_d(0x0));
1055 insn
->bits3
.if_else
.pad0
= 0;
1056 insn
->bits3
.if_else
.pop_count
= pop_count
;
1058 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1059 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1064 struct brw_instruction
*brw_CONT_gen6(struct brw_compile
*p
,
1065 struct brw_instruction
*do_insn
)
1067 struct brw_instruction
*insn
;
1070 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1071 brw_set_dest(p
, insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1072 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1073 brw_set_dest(p
, insn
, brw_ip_reg());
1074 brw_set_src0(insn
, brw_ip_reg());
1075 brw_set_src1(insn
, brw_imm_d(0x0));
1077 insn
->bits3
.break_cont
.uip
= br
* (do_insn
- insn
);
1079 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1080 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1084 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1086 struct brw_instruction
*insn
;
1087 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1088 brw_set_dest(p
, insn
, brw_ip_reg());
1089 brw_set_src0(insn
, brw_ip_reg());
1090 brw_set_src1(insn
, brw_imm_d(0x0));
1091 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1092 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1093 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1094 insn
->bits3
.if_else
.pad0
= 0;
1095 insn
->bits3
.if_else
.pop_count
= pop_count
;
1101 * The DO/WHILE is just an unterminated loop -- break or continue are
1102 * used for control within the loop. We have a few ways they can be
1105 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1106 * jip and no DO instruction.
1108 * For non-uniform control flow pre-gen6, there's a DO instruction to
1109 * push the mask, and a WHILE to jump back, and BREAK to get out and
1112 * For gen6, there's no more mask stack, so no need for DO. WHILE
1113 * just points back to the first instruction of the loop.
1115 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1117 struct intel_context
*intel
= &p
->brw
->intel
;
1119 if (intel
->gen
>= 6 || p
->single_program_flow
) {
1120 return &p
->store
[p
->nr_insn
];
1122 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1124 /* Override the defaults for this instruction:
1126 brw_set_dest(p
, insn
, brw_null_reg());
1127 brw_set_src0(insn
, brw_null_reg());
1128 brw_set_src1(insn
, brw_null_reg());
1130 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1131 insn
->header
.execution_size
= execute_size
;
1132 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1133 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1134 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1142 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1143 struct brw_instruction
*do_insn
)
1145 struct intel_context
*intel
= &p
->brw
->intel
;
1146 struct brw_instruction
*insn
;
1149 if (intel
->gen
>= 5)
1152 if (intel
->gen
>= 6) {
1153 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1155 brw_set_dest(p
, insn
, brw_imm_w(0));
1156 insn
->bits1
.branch_gen6
.jump_count
= br
* (do_insn
- insn
);
1157 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1158 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
1160 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1161 assert(insn
->header
.execution_size
== BRW_EXECUTE_8
);
1163 if (p
->single_program_flow
) {
1164 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1166 brw_set_dest(p
, insn
, brw_ip_reg());
1167 brw_set_src0(insn
, brw_ip_reg());
1168 brw_set_src1(insn
, brw_imm_d((do_insn
- insn
) * 16));
1169 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1171 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1173 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1175 brw_set_dest(p
, insn
, brw_ip_reg());
1176 brw_set_src0(insn
, brw_ip_reg());
1177 brw_set_src1(insn
, brw_imm_d(0));
1179 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1180 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1181 insn
->bits3
.if_else
.pop_count
= 0;
1182 insn
->bits3
.if_else
.pad0
= 0;
1185 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1186 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1194 void brw_land_fwd_jump(struct brw_compile
*p
,
1195 struct brw_instruction
*jmp_insn
)
1197 struct intel_context
*intel
= &p
->brw
->intel
;
1198 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1201 if (intel
->gen
>= 5)
1204 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1205 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1207 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1212 /* To integrate with the above, it makes sense that the comparison
1213 * instruction should populate the flag register. It might be simpler
1214 * just to use the flag reg for most WM tasks?
1216 void brw_CMP(struct brw_compile
*p
,
1217 struct brw_reg dest
,
1219 struct brw_reg src0
,
1220 struct brw_reg src1
)
1222 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1224 insn
->header
.destreg__conditionalmod
= conditional
;
1225 brw_set_dest(p
, insn
, dest
);
1226 brw_set_src0(insn
, src0
);
1227 brw_set_src1(insn
, src1
);
1229 /* guess_execution_size(insn, src0); */
1232 /* Make it so that future instructions will use the computed flag
1233 * value until brw_set_predicate_control_flag_value() is called
1236 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1238 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1239 p
->flag_value
= 0xff;
1243 /* Issue 'wait' instruction for n1, host could program MMIO
1244 to wake up thread. */
1245 void brw_WAIT (struct brw_compile
*p
)
1247 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1248 struct brw_reg src
= brw_notification_1_reg();
1250 brw_set_dest(p
, insn
, src
);
1251 brw_set_src0(insn
, src
);
1252 brw_set_src1(insn
, brw_null_reg());
1253 insn
->header
.execution_size
= 0; /* must */
1254 insn
->header
.predicate_control
= 0;
1255 insn
->header
.compression_control
= 0;
1259 /***********************************************************************
1260 * Helpers for the various SEND message types:
1263 /** Extended math function, float[8].
1265 void brw_math( struct brw_compile
*p
,
1266 struct brw_reg dest
,
1274 struct intel_context
*intel
= &p
->brw
->intel
;
1276 if (intel
->gen
>= 6) {
1277 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1279 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1280 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1282 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1283 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1285 /* Source modifiers are ignored for extended math instructions. */
1286 assert(!src
.negate
);
1289 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1290 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1291 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1294 /* Math is the same ISA format as other opcodes, except that CondModifier
1295 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1297 insn
->header
.destreg__conditionalmod
= function
;
1298 insn
->header
.saturate
= saturate
;
1300 brw_set_dest(p
, insn
, dest
);
1301 brw_set_src0(insn
, src
);
1302 brw_set_src1(insn
, brw_null_reg());
1304 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1305 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1306 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1307 /* Example code doesn't set predicate_control for send
1310 insn
->header
.predicate_control
= 0;
1311 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1313 brw_set_dest(p
, insn
, dest
);
1314 brw_set_src0(insn
, src
);
1315 brw_set_math_message(p
->brw
,
1317 msg_length
, response_length
,
1319 BRW_MATH_INTEGER_UNSIGNED
,
1326 /** Extended math function, float[8].
1328 void brw_math2(struct brw_compile
*p
,
1329 struct brw_reg dest
,
1331 struct brw_reg src0
,
1332 struct brw_reg src1
)
1334 struct intel_context
*intel
= &p
->brw
->intel
;
1335 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1337 assert(intel
->gen
>= 6);
1341 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1342 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1343 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1345 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1346 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1347 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1349 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1350 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1351 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1352 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1355 /* Source modifiers are ignored for extended math instructions. */
1356 assert(!src0
.negate
);
1358 assert(!src1
.negate
);
1361 /* Math is the same ISA format as other opcodes, except that CondModifier
1362 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1364 insn
->header
.destreg__conditionalmod
= function
;
1366 brw_set_dest(p
, insn
, dest
);
1367 brw_set_src0(insn
, src0
);
1368 brw_set_src1(insn
, src1
);
1372 * Extended math function, float[16].
1373 * Use 2 send instructions.
1375 void brw_math_16( struct brw_compile
*p
,
1376 struct brw_reg dest
,
1383 struct intel_context
*intel
= &p
->brw
->intel
;
1384 struct brw_instruction
*insn
;
1385 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1386 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1388 if (intel
->gen
>= 6) {
1389 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1391 /* Math is the same ISA format as other opcodes, except that CondModifier
1392 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1394 insn
->header
.destreg__conditionalmod
= function
;
1395 insn
->header
.saturate
= saturate
;
1397 /* Source modifiers are ignored for extended math instructions. */
1398 assert(!src
.negate
);
1401 brw_set_dest(p
, insn
, dest
);
1402 brw_set_src0(insn
, src
);
1403 brw_set_src1(insn
, brw_null_reg());
1407 /* First instruction:
1409 brw_push_insn_state(p
);
1410 brw_set_predicate_control_flag_value(p
, 0xff);
1411 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1413 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1414 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1416 brw_set_dest(p
, insn
, dest
);
1417 brw_set_src0(insn
, src
);
1418 brw_set_math_message(p
->brw
,
1420 msg_length
, response_length
,
1422 BRW_MATH_INTEGER_UNSIGNED
,
1425 BRW_MATH_DATA_VECTOR
);
1427 /* Second instruction:
1429 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1430 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1431 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1433 brw_set_dest(p
, insn
, offset(dest
,1));
1434 brw_set_src0(insn
, src
);
1435 brw_set_math_message(p
->brw
,
1437 msg_length
, response_length
,
1439 BRW_MATH_INTEGER_UNSIGNED
,
1442 BRW_MATH_DATA_VECTOR
);
1444 brw_pop_insn_state(p
);
1449 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1450 * using a constant offset per channel.
1452 * The offset must be aligned to oword size (16 bytes). Used for
1453 * register spilling.
1455 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1460 struct intel_context
*intel
= &p
->brw
->intel
;
1461 uint32_t msg_control
;
1464 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1466 if (num_regs
== 1) {
1467 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1470 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1474 /* Set up the message header. This is g0, with g0.2 filled with
1475 * the offset. We don't want to leave our offset around in g0 or
1476 * it'll screw up texture samples, so set it up inside the message
1480 brw_push_insn_state(p
);
1481 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1482 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1484 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1486 /* set message header global offset field (reg 0, element 2) */
1488 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1490 2), BRW_REGISTER_TYPE_UD
),
1491 brw_imm_ud(offset
));
1493 brw_pop_insn_state(p
);
1497 struct brw_reg dest
;
1498 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1499 int send_commit_msg
;
1500 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1501 BRW_REGISTER_TYPE_UW
);
1503 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1504 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1505 src_header
= vec16(src_header
);
1507 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1508 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1510 /* Until gen6, writes followed by reads from the same location
1511 * are not guaranteed to be ordered unless write_commit is set.
1512 * If set, then a no-op write is issued to the destination
1513 * register to set a dependency, and a read from the destination
1514 * can be used to ensure the ordering.
1516 * For gen6, only writes between different threads need ordering
1517 * protection. Our use of DP writes is all about register
1518 * spilling within a thread.
1520 if (intel
->gen
>= 6) {
1521 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1522 send_commit_msg
= 0;
1525 send_commit_msg
= 1;
1528 brw_set_dest(p
, insn
, dest
);
1529 brw_set_src0(insn
, brw_null_reg());
1531 brw_set_dp_write_message(p
->brw
,
1533 255, /* binding table index (255=stateless) */
1535 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1537 GL_TRUE
, /* header_present */
1538 0, /* pixel scoreboard */
1539 send_commit_msg
, /* response_length */
1547 * Read a block of owords (half a GRF each) from the scratch buffer
1548 * using a constant index per channel.
1550 * Offset must be aligned to oword size (16 bytes). Used for register
1554 brw_oword_block_read_scratch(struct brw_compile
*p
,
1555 struct brw_reg dest
,
1560 uint32_t msg_control
;
1563 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1564 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1566 if (num_regs
== 1) {
1567 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1570 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1575 brw_push_insn_state(p
);
1576 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1577 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1579 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1581 /* set message header global offset field (reg 0, element 2) */
1583 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1585 2), BRW_REGISTER_TYPE_UD
),
1586 brw_imm_ud(offset
));
1588 brw_pop_insn_state(p
);
1592 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1594 assert(insn
->header
.predicate_control
== 0);
1595 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1596 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1598 brw_set_dest(p
, insn
, dest
); /* UW? */
1599 brw_set_src0(insn
, brw_null_reg());
1601 brw_set_dp_read_message(p
->brw
,
1603 255, /* binding table index (255=stateless) */
1605 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1606 1, /* target cache (render/scratch) */
1613 * Read a float[4] vector from the data port Data Cache (const buffer).
1614 * Location (in buffer) should be a multiple of 16.
1615 * Used for fetching shader constants.
1617 void brw_oword_block_read(struct brw_compile
*p
,
1618 struct brw_reg dest
,
1621 uint32_t bind_table_index
)
1623 struct intel_context
*intel
= &p
->brw
->intel
;
1625 /* On newer hardware, offset is in units of owords. */
1626 if (intel
->gen
>= 6)
1629 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1631 brw_push_insn_state(p
);
1632 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1633 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1634 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1636 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1638 /* set message header global offset field (reg 0, element 2) */
1640 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1642 2), BRW_REGISTER_TYPE_UD
),
1643 brw_imm_ud(offset
));
1645 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1646 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1648 /* cast dest to a uword[8] vector */
1649 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1651 brw_set_dest(p
, insn
, dest
);
1652 if (intel
->gen
>= 6) {
1653 brw_set_src0(insn
, mrf
);
1655 brw_set_src0(insn
, brw_null_reg());
1658 brw_set_dp_read_message(p
->brw
,
1661 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1662 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1663 0, /* source cache = data cache */
1665 1); /* response_length (1 reg, 2 owords!) */
1667 brw_pop_insn_state(p
);
1671 * Read a set of dwords from the data port Data Cache (const buffer).
1673 * Location (in buffer) appears as UD offsets in the register after
1674 * the provided mrf header reg.
1676 void brw_dword_scattered_read(struct brw_compile
*p
,
1677 struct brw_reg dest
,
1679 uint32_t bind_table_index
)
1681 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1683 brw_push_insn_state(p
);
1684 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1685 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1686 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1687 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1688 brw_pop_insn_state(p
);
1690 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1691 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1693 /* cast dest to a uword[8] vector */
1694 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1696 brw_set_dest(p
, insn
, dest
);
1697 brw_set_src0(insn
, brw_null_reg());
1699 brw_set_dp_read_message(p
->brw
,
1702 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1703 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1704 0, /* source cache = data cache */
1706 1); /* response_length */
1712 * Read float[4] constant(s) from VS constant buffer.
1713 * For relative addressing, two float[4] constants will be read into 'dest'.
1714 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1716 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1717 struct brw_reg dest
,
1719 GLuint bind_table_index
)
1721 struct intel_context
*intel
= &p
->brw
->intel
;
1722 struct brw_instruction
*insn
;
1723 GLuint msg_reg_nr
= 1;
1725 if (intel
->gen
>= 6)
1728 /* Setup MRF[1] with location/offset into const buffer */
1729 brw_push_insn_state(p
);
1730 brw_set_access_mode(p
, BRW_ALIGN_1
);
1731 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1732 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1733 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1734 brw_MOV(p
, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
, msg_reg_nr
, 2),
1735 BRW_REGISTER_TYPE_UD
),
1736 brw_imm_ud(location
));
1737 brw_pop_insn_state(p
);
1739 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1741 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1742 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1743 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1744 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1746 brw_set_dest(p
, insn
, dest
);
1747 if (intel
->gen
>= 6) {
1748 brw_set_src0(insn
, brw_message_reg(msg_reg_nr
));
1750 brw_set_src0(insn
, brw_null_reg());
1753 brw_set_dp_read_message(p
->brw
,
1757 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1758 0, /* source cache = data cache */
1760 1); /* response_length (1 Oword) */
1764 * Read a float[4] constant per vertex from VS constant buffer, with
1765 * relative addressing.
1767 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1768 struct brw_reg dest
,
1769 struct brw_reg addr_reg
,
1771 GLuint bind_table_index
)
1773 struct intel_context
*intel
= &p
->brw
->intel
;
1776 /* Setup MRF[1] with offset into const buffer */
1777 brw_push_insn_state(p
);
1778 brw_set_access_mode(p
, BRW_ALIGN_1
);
1779 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1780 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1781 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1783 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1786 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D
),
1787 addr_reg
, brw_imm_d(offset
));
1788 brw_pop_insn_state(p
);
1790 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1792 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1793 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1794 insn
->header
.destreg__conditionalmod
= 0;
1795 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1797 brw_set_dest(p
, insn
, dest
);
1798 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1800 if (intel
->gen
== 6)
1801 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1802 else if (intel
->gen
== 5 || intel
->is_g4x
)
1803 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1805 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1807 brw_set_dp_read_message(p
->brw
,
1810 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1812 0, /* source cache = data cache */
1814 1); /* response_length */
1819 void brw_fb_WRITE(struct brw_compile
*p
,
1821 struct brw_reg dest
,
1823 struct brw_reg src0
,
1824 GLuint binding_table_index
,
1826 GLuint response_length
,
1828 GLboolean header_present
)
1830 struct intel_context
*intel
= &p
->brw
->intel
;
1831 struct brw_instruction
*insn
;
1832 GLuint msg_control
, msg_type
;
1834 if (intel
->gen
>= 6 && binding_table_index
== 0) {
1835 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
1837 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1839 /* The execution mask is ignored for render target writes. */
1840 insn
->header
.predicate_control
= 0;
1841 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1843 if (intel
->gen
>= 6) {
1844 /* headerless version, just submit color payload */
1845 src0
= brw_message_reg(msg_reg_nr
);
1847 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1849 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1851 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1854 if (dispatch_width
== 16)
1855 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1857 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1859 brw_set_dest(p
, insn
, dest
);
1860 brw_set_src0(insn
, src0
);
1861 brw_set_dp_write_message(p
->brw
,
1863 binding_table_index
,
1868 1, /* pixel scoreboard */
1871 0 /* send_commit_msg */);
1876 * Texture sample instruction.
1877 * Note: the msg_type plus msg_length values determine exactly what kind
1878 * of sampling operation is performed. See volume 4, page 161 of docs.
1880 void brw_SAMPLE(struct brw_compile
*p
,
1881 struct brw_reg dest
,
1883 struct brw_reg src0
,
1884 GLuint binding_table_index
,
1888 GLuint response_length
,
1891 GLuint header_present
,
1894 struct intel_context
*intel
= &p
->brw
->intel
;
1895 GLboolean need_stall
= 0;
1897 if (writemask
== 0) {
1898 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1902 /* Hardware doesn't do destination dependency checking on send
1903 * instructions properly. Add a workaround which generates the
1904 * dependency by other means. In practice it seems like this bug
1905 * only crops up for texture samples, and only where registers are
1906 * written by the send and then written again later without being
1907 * read in between. Luckily for us, we already track that
1908 * information and use it to modify the writemask for the
1909 * instruction, so that is a guide for whether a workaround is
1912 if (writemask
!= WRITEMASK_XYZW
) {
1913 GLuint dst_offset
= 0;
1914 GLuint i
, newmask
= 0, len
= 0;
1916 for (i
= 0; i
< 4; i
++) {
1917 if (writemask
& (1<<i
))
1921 for (; i
< 4; i
++) {
1922 if (!(writemask
& (1<<i
)))
1928 if (newmask
!= writemask
) {
1930 /* printf("need stall %x %x\n", newmask , writemask); */
1933 GLboolean dispatch_16
= GL_FALSE
;
1935 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1937 guess_execution_size(p
, p
->current
, dest
);
1938 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1939 dispatch_16
= GL_TRUE
;
1941 newmask
= ~newmask
& WRITEMASK_XYZW
;
1943 brw_push_insn_state(p
);
1945 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1946 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1948 brw_MOV(p
, retype(m1
, BRW_REGISTER_TYPE_UD
),
1949 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD
));
1950 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1952 brw_pop_insn_state(p
);
1954 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1955 dest
= offset(dest
, dst_offset
);
1957 /* For 16-wide dispatch, masked channels are skipped in the
1958 * response. For 8-wide, masked channels still take up slots,
1959 * and are just not written to.
1962 response_length
= len
* 2;
1967 struct brw_instruction
*insn
;
1969 /* Sandybridge doesn't have the implied move for SENDs,
1970 * and the first message register index comes from src0.
1972 if (intel
->gen
>= 6) {
1973 if (src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
1974 src0
.nr
!= BRW_ARF_NULL
) {
1975 brw_push_insn_state(p
);
1976 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1977 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1978 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), src0
.type
), src0
);
1979 brw_pop_insn_state(p
);
1981 src0
= brw_message_reg(msg_reg_nr
);
1984 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1985 insn
->header
.predicate_control
= 0; /* XXX */
1986 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1988 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1990 brw_set_dest(p
, insn
, dest
);
1991 brw_set_src0(insn
, src0
);
1992 brw_set_sampler_message(p
->brw
, insn
,
1993 binding_table_index
,
2004 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
2006 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
2008 brw_push_insn_state(p
);
2009 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
2010 brw_MOV(p
, retype(reg
, BRW_REGISTER_TYPE_UD
),
2011 retype(reg
, BRW_REGISTER_TYPE_UD
));
2012 brw_pop_insn_state(p
);
2017 /* All these variables are pretty confusing - we might be better off
2018 * using bitmasks and macros for this, in the old style. Or perhaps
2019 * just having the caller instantiate the fields in dword3 itself.
2021 void brw_urb_WRITE(struct brw_compile
*p
,
2022 struct brw_reg dest
,
2024 struct brw_reg src0
,
2028 GLuint response_length
,
2030 GLboolean writes_complete
,
2034 struct intel_context
*intel
= &p
->brw
->intel
;
2035 struct brw_instruction
*insn
;
2037 /* Sandybridge doesn't have the implied move for SENDs,
2038 * and the first message register index comes from src0.
2040 if (intel
->gen
>= 6) {
2041 brw_push_insn_state(p
);
2042 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2043 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
2044 retype(src0
, BRW_REGISTER_TYPE_UD
));
2045 brw_pop_insn_state(p
);
2046 src0
= brw_message_reg(msg_reg_nr
);
2049 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2051 assert(msg_length
< BRW_MAX_MRF
);
2053 brw_set_dest(p
, insn
, dest
);
2054 brw_set_src0(insn
, src0
);
2055 brw_set_src1(insn
, brw_imm_d(0));
2058 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2060 brw_set_urb_message(p
->brw
,
2073 brw_find_next_block_end(struct brw_compile
*p
, int start
)
2077 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2078 struct brw_instruction
*insn
= &p
->store
[ip
];
2080 switch (insn
->header
.opcode
) {
2081 case BRW_OPCODE_ENDIF
:
2082 case BRW_OPCODE_ELSE
:
2083 case BRW_OPCODE_WHILE
:
2087 assert(!"not reached");
2091 /* There is no DO instruction on gen6, so to find the end of the loop
2092 * we have to see if the loop is jumping back before our start
2096 brw_find_loop_end(struct brw_compile
*p
, int start
)
2101 for (ip
= start
+ 1; ip
< p
->nr_insn
; ip
++) {
2102 struct brw_instruction
*insn
= &p
->store
[ip
];
2104 if (insn
->header
.opcode
== BRW_OPCODE_WHILE
) {
2105 if (ip
+ insn
->bits1
.branch_gen6
.jump_count
/ br
< start
)
2109 assert(!"not reached");
2113 /* After program generation, go back and update the UIP and JIP of
2114 * BREAK and CONT instructions to their correct locations.
2117 brw_set_uip_jip(struct brw_compile
*p
)
2119 struct intel_context
*intel
= &p
->brw
->intel
;
2126 for (ip
= 0; ip
< p
->nr_insn
; ip
++) {
2127 struct brw_instruction
*insn
= &p
->store
[ip
];
2129 switch (insn
->header
.opcode
) {
2130 case BRW_OPCODE_BREAK
:
2131 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2132 insn
->bits3
.break_cont
.uip
= br
* (brw_find_loop_end(p
, ip
) - ip
+ 1);
2134 case BRW_OPCODE_CONTINUE
:
2135 /* JIP is set at CONTINUE emit time, since that's when we
2136 * know where the start of the loop is.
2138 insn
->bits3
.break_cont
.jip
= br
* (brw_find_next_block_end(p
, ip
) - ip
);
2139 assert(insn
->bits3
.break_cont
.uip
!= 0);
2140 assert(insn
->bits3
.break_cont
.jip
!= 0);
2146 void brw_ff_sync(struct brw_compile
*p
,
2147 struct brw_reg dest
,
2149 struct brw_reg src0
,
2151 GLuint response_length
,
2154 struct intel_context
*intel
= &p
->brw
->intel
;
2155 struct brw_instruction
*insn
;
2157 /* Sandybridge doesn't have the implied move for SENDs,
2158 * and the first message register index comes from src0.
2160 if (intel
->gen
>= 6) {
2161 brw_push_insn_state(p
);
2162 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2163 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
2164 retype(src0
, BRW_REGISTER_TYPE_UD
));
2165 brw_pop_insn_state(p
);
2166 src0
= brw_message_reg(msg_reg_nr
);
2169 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2170 brw_set_dest(p
, insn
, dest
);
2171 brw_set_src0(insn
, src0
);
2172 brw_set_src1(insn
, brw_imm_d(0));
2175 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2177 brw_set_ff_sync_message(p
->brw
,