2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
59 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
60 assert(dest
.nr
< 128);
62 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
63 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
64 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
66 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
67 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
69 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
70 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
71 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
72 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
73 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
76 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
77 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
78 /* even ignored in da16, still need to set as '01' */
79 insn
->bits1
.da16
.dest_horiz_stride
= 1;
83 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
85 /* These are different sizes in align1 vs align16:
87 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
88 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
89 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
90 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
91 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
94 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
95 /* even ignored in da16, still need to set as '01' */
96 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
103 guess_execution_size(insn
, dest
);
106 extern int reg_type_size
[];
109 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
111 int hstride_for_reg
[] = {0, 1, 2, 4};
112 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg
[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
115 int width
, hstride
, vstride
, execsize
;
117 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
122 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
123 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
124 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
130 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
131 reg
.file
== BRW_ARF_NULL
)
134 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
135 hstride
= hstride_for_reg
[reg
.hstride
];
137 if (reg
.vstride
== 0xf) {
140 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
141 vstride
= vstride_for_reg
[reg
.vstride
];
144 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
145 width
= width_for_reg
[reg
.width
];
147 assert(insn
->header
.execution_size
>= 0 &&
148 insn
->header
.execution_size
< Elements(execsize_for_reg
));
149 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
153 assert(execsize
>= width
);
156 if (execsize
== width
&& hstride
!= 0) {
157 assert(vstride
== -1 || vstride
== width
* hstride
);
161 if (execsize
== width
&& hstride
== 0) {
162 /* no restriction on vstride. */
167 assert(hstride
== 0);
171 if (execsize
== 1 && width
== 1) {
172 assert(hstride
== 0);
173 assert(vstride
== 0);
177 if (vstride
== 0 && hstride
== 0) {
181 /* 10. Check destination issues. */
184 static void brw_set_src0( struct brw_instruction
*insn
,
187 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
188 assert(reg
.nr
< 128);
190 validate_reg(insn
, reg
);
192 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
193 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
194 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
195 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
196 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
198 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
199 insn
->bits3
.ud
= reg
.dw1
.ud
;
201 /* Required to set some fields in src1 as well:
203 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
204 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
208 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
209 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
210 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
211 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
214 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
215 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
219 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
221 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
222 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
225 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
229 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
230 if (reg
.width
== BRW_WIDTH_1
&&
231 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
232 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
233 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
234 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
237 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
238 insn
->bits2
.da1
.src0_width
= reg
.width
;
239 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
243 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
244 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
245 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
246 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
251 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
252 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
254 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
260 void brw_set_src1( struct brw_instruction
*insn
,
263 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
265 assert(reg
.nr
< 128);
267 validate_reg(insn
, reg
);
269 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
270 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
271 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
272 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
274 /* Only src1 can be immediate in two-argument instructions.
276 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
278 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
279 insn
->bits3
.ud
= reg
.dw1
.ud
;
282 /* This is a hardware restriction, which may or may not be lifted
285 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
288 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
289 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
290 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
293 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
294 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
297 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
298 if (reg
.width
== BRW_WIDTH_1
&&
299 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
300 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
301 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
302 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
305 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
306 insn
->bits3
.da1
.src1_width
= reg
.width
;
307 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
311 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
312 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
313 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
314 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
319 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
320 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
322 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
329 static void brw_set_math_message( struct brw_context
*brw
,
330 struct brw_instruction
*insn
,
332 GLuint response_length
,
335 GLboolean low_precision
,
339 struct intel_context
*intel
= &brw
->intel
;
340 brw_set_src1(insn
, brw_imm_d(0));
342 if (intel
->gen
== 5) {
343 insn
->bits3
.math_gen5
.function
= function
;
344 insn
->bits3
.math_gen5
.int_type
= integer_type
;
345 insn
->bits3
.math_gen5
.precision
= low_precision
;
346 insn
->bits3
.math_gen5
.saturate
= saturate
;
347 insn
->bits3
.math_gen5
.data_type
= dataType
;
348 insn
->bits3
.math_gen5
.snapshot
= 0;
349 insn
->bits3
.math_gen5
.header_present
= 0;
350 insn
->bits3
.math_gen5
.response_length
= response_length
;
351 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
352 insn
->bits3
.math_gen5
.end_of_thread
= 0;
353 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
354 insn
->bits2
.send_gen5
.end_of_thread
= 0;
356 insn
->bits3
.math
.function
= function
;
357 insn
->bits3
.math
.int_type
= integer_type
;
358 insn
->bits3
.math
.precision
= low_precision
;
359 insn
->bits3
.math
.saturate
= saturate
;
360 insn
->bits3
.math
.data_type
= dataType
;
361 insn
->bits3
.math
.response_length
= response_length
;
362 insn
->bits3
.math
.msg_length
= msg_length
;
363 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
364 insn
->bits3
.math
.end_of_thread
= 0;
369 static void brw_set_ff_sync_message(struct brw_context
*brw
,
370 struct brw_instruction
*insn
,
372 GLuint response_length
,
373 GLboolean end_of_thread
)
375 struct intel_context
*intel
= &brw
->intel
;
376 brw_set_src1(insn
, brw_imm_d(0));
378 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
379 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
380 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
381 insn
->bits3
.urb_gen5
.allocate
= allocate
;
382 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
383 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
384 insn
->bits3
.urb_gen5
.header_present
= 1;
385 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
386 insn
->bits3
.urb_gen5
.msg_length
= 1;
387 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
388 if (intel
->gen
>= 6) {
389 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
391 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
392 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
396 static void brw_set_urb_message( struct brw_context
*brw
,
397 struct brw_instruction
*insn
,
401 GLuint response_length
,
402 GLboolean end_of_thread
,
405 GLuint swizzle_control
)
407 struct intel_context
*intel
= &brw
->intel
;
408 brw_set_src1(insn
, brw_imm_d(0));
410 if (intel
->gen
>= 5) {
411 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
412 insn
->bits3
.urb_gen5
.offset
= offset
;
413 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
414 insn
->bits3
.urb_gen5
.allocate
= allocate
;
415 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
416 insn
->bits3
.urb_gen5
.complete
= complete
;
417 insn
->bits3
.urb_gen5
.header_present
= 1;
418 insn
->bits3
.urb_gen5
.response_length
= response_length
;
419 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
420 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
421 if (intel
->gen
>= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
426 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
428 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
429 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
432 insn
->bits3
.urb
.opcode
= 0; /* ? */
433 insn
->bits3
.urb
.offset
= offset
;
434 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
435 insn
->bits3
.urb
.allocate
= allocate
;
436 insn
->bits3
.urb
.used
= used
; /* ? */
437 insn
->bits3
.urb
.complete
= complete
;
438 insn
->bits3
.urb
.response_length
= response_length
;
439 insn
->bits3
.urb
.msg_length
= msg_length
;
440 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
441 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
445 static void brw_set_dp_write_message( struct brw_context
*brw
,
446 struct brw_instruction
*insn
,
447 GLuint binding_table_index
,
451 GLboolean header_present
,
452 GLuint pixel_scoreboard_clear
,
453 GLuint response_length
,
454 GLuint end_of_thread
,
455 GLuint send_commit_msg
)
457 struct intel_context
*intel
= &brw
->intel
;
458 brw_set_src1(insn
, brw_imm_ud(0));
460 if (intel
->gen
>= 6) {
461 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
462 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
463 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
464 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
465 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
466 insn
->bits3
.dp_render_cache
.header_present
= header_present
;
467 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
468 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
469 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
470 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
471 /* XXX really need below? */
472 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
473 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
474 } else if (intel
->gen
== 5) {
475 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
476 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
477 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
478 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
479 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
480 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
481 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
482 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
483 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
484 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
485 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
487 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
488 insn
->bits3
.dp_write
.msg_control
= msg_control
;
489 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
490 insn
->bits3
.dp_write
.msg_type
= msg_type
;
491 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
492 insn
->bits3
.dp_write
.response_length
= response_length
;
493 insn
->bits3
.dp_write
.msg_length
= msg_length
;
494 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
495 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
500 brw_set_dp_read_message(struct brw_context
*brw
,
501 struct brw_instruction
*insn
,
502 GLuint binding_table_index
,
507 GLuint response_length
)
509 struct intel_context
*intel
= &brw
->intel
;
510 brw_set_src1(insn
, brw_imm_d(0));
512 if (intel
->gen
>= 6) {
513 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
514 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
515 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= 0;
516 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
517 insn
->bits3
.dp_render_cache
.send_commit_msg
= 0;
518 insn
->bits3
.dp_render_cache
.header_present
= 1;
519 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
520 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
521 insn
->bits3
.dp_render_cache
.end_of_thread
= 0;
522 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
523 /* XXX really need below? */
524 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
525 insn
->bits2
.send_gen5
.end_of_thread
= 0;
526 } else if (intel
->gen
== 5) {
527 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
528 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
529 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
530 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
531 insn
->bits3
.dp_read_gen5
.header_present
= 1;
532 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
533 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
534 insn
->bits3
.dp_read_gen5
.pad1
= 0;
535 insn
->bits3
.dp_read_gen5
.end_of_thread
= 0;
536 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
537 insn
->bits2
.send_gen5
.end_of_thread
= 0;
539 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
540 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
541 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
542 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
543 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
544 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
545 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
546 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
547 insn
->bits3
.dp_read
.end_of_thread
= 0; /*31*/
551 static void brw_set_sampler_message(struct brw_context
*brw
,
552 struct brw_instruction
*insn
,
553 GLuint binding_table_index
,
556 GLuint response_length
,
559 GLuint header_present
,
562 struct intel_context
*intel
= &brw
->intel
;
564 brw_set_src1(insn
, brw_imm_d(0));
566 if (intel
->gen
>= 5) {
567 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
568 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
569 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
570 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
571 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
572 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
573 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
574 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
576 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
578 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
579 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
581 } else if (intel
->is_g4x
) {
582 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
583 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
584 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
585 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
586 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
587 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
588 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
590 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
591 insn
->bits3
.sampler
.sampler
= sampler
;
592 insn
->bits3
.sampler
.msg_type
= msg_type
;
593 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
594 insn
->bits3
.sampler
.response_length
= response_length
;
595 insn
->bits3
.sampler
.msg_length
= msg_length
;
596 insn
->bits3
.sampler
.end_of_thread
= eot
;
597 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
603 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
606 struct brw_instruction
*insn
;
608 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
610 insn
= &p
->store
[p
->nr_insn
++];
611 memcpy(insn
, p
->current
, sizeof(*insn
));
613 /* Reset this one-shot flag:
616 if (p
->current
->header
.destreg__conditionalmod
) {
617 p
->current
->header
.destreg__conditionalmod
= 0;
618 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
621 insn
->header
.opcode
= opcode
;
626 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
631 struct brw_instruction
*insn
= next_insn(p
, opcode
);
632 brw_set_dest(insn
, dest
);
633 brw_set_src0(insn
, src
);
637 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
641 struct brw_reg src1
)
643 struct brw_instruction
*insn
= next_insn(p
, opcode
);
644 brw_set_dest(insn
, dest
);
645 brw_set_src0(insn
, src0
);
646 brw_set_src1(insn
, src1
);
651 /***********************************************************************
652 * Convenience routines.
655 struct brw_instruction *brw_##OP(struct brw_compile *p, \
656 struct brw_reg dest, \
657 struct brw_reg src0) \
659 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
663 struct brw_instruction *brw_##OP(struct brw_compile *p, \
664 struct brw_reg dest, \
665 struct brw_reg src0, \
666 struct brw_reg src1) \
668 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
671 /* Rounding operations (other than RNDD) require two instructions - the first
672 * stores a rounded value (possibly the wrong way) in the dest register, but
673 * also sets a per-channel "increment bit" in the flag register. A predicated
674 * add of 1.0 fixes dest to contain the desired result.
677 void brw_##OP(struct brw_compile *p, \
678 struct brw_reg dest, \
679 struct brw_reg src) \
681 struct brw_instruction *rnd, *add; \
682 rnd = next_insn(p, BRW_OPCODE_##OP); \
683 brw_set_dest(rnd, dest); \
684 brw_set_src0(rnd, src); \
685 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
687 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
688 add->header.predicate_control = BRW_PREDICATE_NORMAL; \
720 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
726 if (src0
.type
== BRW_REGISTER_TYPE_F
||
727 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
728 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
729 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
730 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
733 if (src1
.type
== BRW_REGISTER_TYPE_F
||
734 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
735 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
736 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
737 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
740 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
743 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
749 if (src0
.type
== BRW_REGISTER_TYPE_D
||
750 src0
.type
== BRW_REGISTER_TYPE_UD
||
751 src1
.type
== BRW_REGISTER_TYPE_D
||
752 src1
.type
== BRW_REGISTER_TYPE_UD
) {
753 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
756 if (src0
.type
== BRW_REGISTER_TYPE_F
||
757 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
758 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
759 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
760 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
763 if (src1
.type
== BRW_REGISTER_TYPE_F
||
764 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
765 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
766 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
767 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
770 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
771 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
772 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
773 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
775 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
779 void brw_NOP(struct brw_compile
*p
)
781 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
782 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
783 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
784 brw_set_src1(insn
, brw_imm_ud(0x0));
791 /***********************************************************************
792 * Comparisons, if/else/endif
795 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
800 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
802 insn
->header
.execution_size
= 1;
803 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
804 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
806 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
811 /* EU takes the value from the flag register and pushes it onto some
812 * sort of a stack (presumably merging with any flag value already on
813 * the stack). Within an if block, the flags at the top of the stack
814 * control execution on each channel of the unit, eg. on each of the
815 * 16 pixel values in our wm programs.
817 * When the matching 'else' instruction is reached (presumably by
818 * countdown of the instruction count patched in by our ELSE/ENDIF
819 * functions), the relevent flags are inverted.
821 * When the matching 'endif' instruction is reached, the flags are
822 * popped off. If the stack is now empty, normal execution resumes.
824 * No attempt is made to deal with stack overflow (14 elements?).
826 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
828 struct intel_context
*intel
= &p
->brw
->intel
;
829 struct brw_instruction
*insn
;
831 if (p
->single_program_flow
) {
832 assert(execute_size
== BRW_EXECUTE_1
);
834 insn
= next_insn(p
, BRW_OPCODE_ADD
);
835 insn
->header
.predicate_inverse
= 1;
837 insn
= next_insn(p
, BRW_OPCODE_IF
);
840 /* Override the defaults for this instruction:
842 if (intel
->gen
< 6) {
843 brw_set_dest(insn
, brw_ip_reg());
844 brw_set_src0(insn
, brw_ip_reg());
845 brw_set_src1(insn
, brw_imm_d(0x0));
847 brw_set_dest(insn
, brw_imm_w(0));
848 insn
->bits1
.branch_gen6
.jump_count
= 0;
849 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
850 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
853 insn
->header
.execution_size
= execute_size
;
854 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
855 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
856 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
857 if (!p
->single_program_flow
)
858 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
860 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
865 struct brw_instruction
*
866 brw_IF_gen6(struct brw_compile
*p
, uint32_t conditional
,
867 struct brw_reg src0
, struct brw_reg src1
)
869 struct brw_instruction
*insn
;
871 insn
= next_insn(p
, BRW_OPCODE_IF
);
873 brw_set_dest(insn
, brw_imm_w(0));
874 insn
->header
.execution_size
= BRW_EXECUTE_8
;
875 insn
->bits1
.branch_gen6
.jump_count
= 0;
876 brw_set_src0(insn
, src0
);
877 brw_set_src1(insn
, src1
);
879 assert(insn
->header
.compression_control
== BRW_COMPRESSION_NONE
);
880 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
881 insn
->header
.destreg__conditionalmod
= conditional
;
883 if (!p
->single_program_flow
)
884 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
889 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
890 struct brw_instruction
*if_insn
)
892 struct intel_context
*intel
= &p
->brw
->intel
;
893 struct brw_instruction
*insn
;
896 /* jump count is for 64bit data chunk each, so one 128bit
897 instruction requires 2 chunks. */
901 if (p
->single_program_flow
) {
902 insn
= next_insn(p
, BRW_OPCODE_ADD
);
904 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
907 if (intel
->gen
< 6) {
908 brw_set_dest(insn
, brw_ip_reg());
909 brw_set_src0(insn
, brw_ip_reg());
910 brw_set_src1(insn
, brw_imm_d(0x0));
912 brw_set_dest(insn
, brw_imm_w(0));
913 insn
->bits1
.branch_gen6
.jump_count
= 0;
914 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
915 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
918 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
919 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
920 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
921 if (!p
->single_program_flow
)
922 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
924 /* Patch the if instruction to point at this instruction.
926 if (p
->single_program_flow
) {
927 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
929 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
931 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
933 if (intel
->gen
< 6) {
934 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
935 if_insn
->bits3
.if_else
.pop_count
= 0;
936 if_insn
->bits3
.if_else
.pad0
= 0;
938 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
945 void brw_ENDIF(struct brw_compile
*p
,
946 struct brw_instruction
*patch_insn
)
948 struct intel_context
*intel
= &p
->brw
->intel
;
954 if (p
->single_program_flow
) {
955 /* In single program flow mode, there's no need to execute an ENDIF,
956 * since we don't need to do any stack operations, and if we're executing
957 * currently, we want to just continue executing.
959 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
961 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
963 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
965 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
967 if (intel
->gen
< 6) {
968 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
969 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
970 brw_set_src1(insn
, brw_imm_d(0x0));
972 brw_set_dest(insn
, brw_imm_w(0));
973 brw_set_src0(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
974 brw_set_src1(insn
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
977 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
978 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
979 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
980 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
983 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
985 assert(patch_insn
->bits1
.branch_gen6
.jump_count
== 0);
987 /* Patch the if or else instructions to point at this or the next
988 * instruction respectively.
990 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
991 if (intel
->gen
< 6) {
992 /* Turn it into an IFF, which means no mask stack operations for
993 * all-false and jumping past the ENDIF.
995 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
996 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
997 patch_insn
->bits3
.if_else
.pop_count
= 0;
998 patch_insn
->bits3
.if_else
.pad0
= 0;
1000 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1001 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1004 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
1005 if (intel
->gen
< 6) {
1006 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1009 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
1010 patch_insn
->bits3
.if_else
.pop_count
= 1;
1011 patch_insn
->bits3
.if_else
.pad0
= 0;
1013 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1014 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
1018 /* Also pop item off the stack in the endif instruction:
1020 if (intel
->gen
< 6) {
1021 insn
->bits3
.if_else
.jump_count
= 0;
1022 insn
->bits3
.if_else
.pop_count
= 1;
1023 insn
->bits3
.if_else
.pad0
= 0;
1025 insn
->bits1
.branch_gen6
.jump_count
= 2;
1030 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
1032 struct brw_instruction
*insn
;
1033 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
1034 brw_set_dest(insn
, brw_ip_reg());
1035 brw_set_src0(insn
, brw_ip_reg());
1036 brw_set_src1(insn
, brw_imm_d(0x0));
1037 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1038 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1039 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1040 insn
->bits3
.if_else
.pad0
= 0;
1041 insn
->bits3
.if_else
.pop_count
= pop_count
;
1045 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
1047 struct brw_instruction
*insn
;
1048 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
1049 brw_set_dest(insn
, brw_ip_reg());
1050 brw_set_src0(insn
, brw_ip_reg());
1051 brw_set_src1(insn
, brw_imm_d(0x0));
1052 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1053 insn
->header
.execution_size
= BRW_EXECUTE_8
;
1054 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1055 insn
->bits3
.if_else
.pad0
= 0;
1056 insn
->bits3
.if_else
.pop_count
= pop_count
;
1062 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
1064 if (p
->single_program_flow
) {
1065 return &p
->store
[p
->nr_insn
];
1067 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1069 /* Override the defaults for this instruction:
1071 brw_set_dest(insn
, brw_null_reg());
1072 brw_set_src0(insn
, brw_null_reg());
1073 brw_set_src1(insn
, brw_null_reg());
1075 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1076 insn
->header
.execution_size
= execute_size
;
1077 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1078 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1079 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1087 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1088 struct brw_instruction
*do_insn
)
1090 struct intel_context
*intel
= &p
->brw
->intel
;
1091 struct brw_instruction
*insn
;
1094 if (intel
->gen
>= 5)
1097 if (p
->single_program_flow
)
1098 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1100 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1102 brw_set_dest(insn
, brw_ip_reg());
1103 brw_set_src0(insn
, brw_ip_reg());
1104 brw_set_src1(insn
, brw_imm_d(0x0));
1106 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1108 if (p
->single_program_flow
) {
1109 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1111 insn
->bits3
.d
= (do_insn
- insn
) * 16;
1113 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1115 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1116 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1117 insn
->bits3
.if_else
.pop_count
= 0;
1118 insn
->bits3
.if_else
.pad0
= 0;
1121 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1123 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1124 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1131 void brw_land_fwd_jump(struct brw_compile
*p
,
1132 struct brw_instruction
*jmp_insn
)
1134 struct intel_context
*intel
= &p
->brw
->intel
;
1135 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1138 if (intel
->gen
>= 5)
1141 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1142 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1144 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1149 /* To integrate with the above, it makes sense that the comparison
1150 * instruction should populate the flag register. It might be simpler
1151 * just to use the flag reg for most WM tasks?
1153 void brw_CMP(struct brw_compile
*p
,
1154 struct brw_reg dest
,
1156 struct brw_reg src0
,
1157 struct brw_reg src1
)
1159 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1161 insn
->header
.destreg__conditionalmod
= conditional
;
1162 brw_set_dest(insn
, dest
);
1163 brw_set_src0(insn
, src0
);
1164 brw_set_src1(insn
, src1
);
1166 /* guess_execution_size(insn, src0); */
1169 /* Make it so that future instructions will use the computed flag
1170 * value until brw_set_predicate_control_flag_value() is called
1173 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1175 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1176 p
->flag_value
= 0xff;
1180 /* Issue 'wait' instruction for n1, host could program MMIO
1181 to wake up thread. */
1182 void brw_WAIT (struct brw_compile
*p
)
1184 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1185 struct brw_reg src
= brw_notification_1_reg();
1187 brw_set_dest(insn
, src
);
1188 brw_set_src0(insn
, src
);
1189 brw_set_src1(insn
, brw_null_reg());
1190 insn
->header
.execution_size
= 0; /* must */
1191 insn
->header
.predicate_control
= 0;
1192 insn
->header
.compression_control
= 0;
1196 /***********************************************************************
1197 * Helpers for the various SEND message types:
1200 /** Extended math function, float[8].
1202 void brw_math( struct brw_compile
*p
,
1203 struct brw_reg dest
,
1211 struct intel_context
*intel
= &p
->brw
->intel
;
1213 if (intel
->gen
>= 6) {
1214 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1216 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1217 assert(src
.file
== BRW_GENERAL_REGISTER_FILE
);
1219 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1220 assert(src
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1222 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1223 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1224 assert(src
.type
== BRW_REGISTER_TYPE_F
);
1227 /* Math is the same ISA format as other opcodes, except that CondModifier
1228 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1230 insn
->header
.destreg__conditionalmod
= function
;
1232 brw_set_dest(insn
, dest
);
1233 brw_set_src0(insn
, src
);
1234 brw_set_src1(insn
, brw_null_reg());
1236 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1237 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1238 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1239 /* Example code doesn't set predicate_control for send
1242 insn
->header
.predicate_control
= 0;
1243 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1245 brw_set_dest(insn
, dest
);
1246 brw_set_src0(insn
, src
);
1247 brw_set_math_message(p
->brw
,
1249 msg_length
, response_length
,
1251 BRW_MATH_INTEGER_UNSIGNED
,
1258 /** Extended math function, float[8].
1260 void brw_math2(struct brw_compile
*p
,
1261 struct brw_reg dest
,
1263 struct brw_reg src0
,
1264 struct brw_reg src1
)
1266 struct intel_context
*intel
= &p
->brw
->intel
;
1267 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1269 assert(intel
->gen
>= 6);
1273 assert(dest
.file
== BRW_GENERAL_REGISTER_FILE
);
1274 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
1275 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
1277 assert(dest
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1278 assert(src0
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1279 assert(src1
.hstride
== BRW_HORIZONTAL_STRIDE_1
);
1281 if (function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
&&
1282 function
!= BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
) {
1283 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
1284 assert(src1
.type
== BRW_REGISTER_TYPE_F
);
1287 /* Math is the same ISA format as other opcodes, except that CondModifier
1288 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1290 insn
->header
.destreg__conditionalmod
= function
;
1292 brw_set_dest(insn
, dest
);
1293 brw_set_src0(insn
, src0
);
1294 brw_set_src1(insn
, src1
);
1298 * Extended math function, float[16].
1299 * Use 2 send instructions.
1301 void brw_math_16( struct brw_compile
*p
,
1302 struct brw_reg dest
,
1309 struct intel_context
*intel
= &p
->brw
->intel
;
1310 struct brw_instruction
*insn
;
1311 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1312 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1314 if (intel
->gen
>= 6) {
1315 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1317 /* Math is the same ISA format as other opcodes, except that CondModifier
1318 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1320 insn
->header
.destreg__conditionalmod
= function
;
1322 brw_set_dest(insn
, dest
);
1323 brw_set_src0(insn
, src
);
1324 brw_set_src1(insn
, brw_null_reg());
1328 /* First instruction:
1330 brw_push_insn_state(p
);
1331 brw_set_predicate_control_flag_value(p
, 0xff);
1332 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1334 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1335 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1337 brw_set_dest(insn
, dest
);
1338 brw_set_src0(insn
, src
);
1339 brw_set_math_message(p
->brw
,
1341 msg_length
, response_length
,
1343 BRW_MATH_INTEGER_UNSIGNED
,
1346 BRW_MATH_DATA_VECTOR
);
1348 /* Second instruction:
1350 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1351 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1352 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1354 brw_set_dest(insn
, offset(dest
,1));
1355 brw_set_src0(insn
, src
);
1356 brw_set_math_message(p
->brw
,
1358 msg_length
, response_length
,
1360 BRW_MATH_INTEGER_UNSIGNED
,
1363 BRW_MATH_DATA_VECTOR
);
1365 brw_pop_insn_state(p
);
1370 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1371 * using a constant offset per channel.
1373 * The offset must be aligned to oword size (16 bytes). Used for
1374 * register spilling.
1376 void brw_oword_block_write_scratch(struct brw_compile
*p
,
1381 struct intel_context
*intel
= &p
->brw
->intel
;
1382 uint32_t msg_control
;
1385 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1387 if (num_regs
== 1) {
1388 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1391 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1395 /* Set up the message header. This is g0, with g0.2 filled with
1396 * the offset. We don't want to leave our offset around in g0 or
1397 * it'll screw up texture samples, so set it up inside the message
1401 brw_push_insn_state(p
);
1402 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1403 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1405 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1407 /* set message header global offset field (reg 0, element 2) */
1409 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1411 2), BRW_REGISTER_TYPE_UD
),
1412 brw_imm_ud(offset
));
1414 brw_pop_insn_state(p
);
1418 struct brw_reg dest
;
1419 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1420 int send_commit_msg
;
1421 struct brw_reg src_header
= retype(brw_vec8_grf(0, 0),
1422 BRW_REGISTER_TYPE_UW
);
1424 if (insn
->header
.compression_control
!= BRW_COMPRESSION_NONE
) {
1425 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1426 src_header
= vec16(src_header
);
1428 assert(insn
->header
.predicate_control
== BRW_PREDICATE_NONE
);
1429 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1431 /* Until gen6, writes followed by reads from the same location
1432 * are not guaranteed to be ordered unless write_commit is set.
1433 * If set, then a no-op write is issued to the destination
1434 * register to set a dependency, and a read from the destination
1435 * can be used to ensure the ordering.
1437 * For gen6, only writes between different threads need ordering
1438 * protection. Our use of DP writes is all about register
1439 * spilling within a thread.
1441 if (intel
->gen
>= 6) {
1442 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1443 send_commit_msg
= 0;
1446 send_commit_msg
= 1;
1449 brw_set_dest(insn
, dest
);
1450 brw_set_src0(insn
, brw_null_reg());
1452 brw_set_dp_write_message(p
->brw
,
1454 255, /* binding table index (255=stateless) */
1456 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1458 GL_TRUE
, /* header_present */
1459 0, /* pixel scoreboard */
1460 send_commit_msg
, /* response_length */
1468 * Read a block of owords (half a GRF each) from the scratch buffer
1469 * using a constant index per channel.
1471 * Offset must be aligned to oword size (16 bytes). Used for register
1475 brw_oword_block_read_scratch(struct brw_compile
*p
,
1476 struct brw_reg dest
,
1481 uint32_t msg_control
;
1484 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1485 dest
= retype(dest
, BRW_REGISTER_TYPE_UW
);
1487 if (num_regs
== 1) {
1488 msg_control
= BRW_DATAPORT_OWORD_BLOCK_2_OWORDS
;
1491 msg_control
= BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
;
1496 brw_push_insn_state(p
);
1497 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1498 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1500 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1502 /* set message header global offset field (reg 0, element 2) */
1504 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1506 2), BRW_REGISTER_TYPE_UD
),
1507 brw_imm_ud(offset
));
1509 brw_pop_insn_state(p
);
1513 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1515 assert(insn
->header
.predicate_control
== 0);
1516 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1517 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1519 brw_set_dest(insn
, dest
); /* UW? */
1520 brw_set_src0(insn
, brw_null_reg());
1522 brw_set_dp_read_message(p
->brw
,
1524 255, /* binding table index (255=stateless) */
1526 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1527 1, /* target cache (render/scratch) */
1534 * Read a float[4] vector from the data port Data Cache (const buffer).
1535 * Location (in buffer) should be a multiple of 16.
1536 * Used for fetching shader constants.
1538 void brw_oword_block_read(struct brw_compile
*p
,
1539 struct brw_reg dest
,
1542 uint32_t bind_table_index
)
1544 struct intel_context
*intel
= &p
->brw
->intel
;
1546 /* On newer hardware, offset is in units of owords. */
1547 if (intel
->gen
>= 6)
1550 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1552 brw_push_insn_state(p
);
1553 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1554 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1555 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1557 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1559 /* set message header global offset field (reg 0, element 2) */
1561 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE
,
1563 2), BRW_REGISTER_TYPE_UD
),
1564 brw_imm_ud(offset
));
1566 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1567 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1569 /* cast dest to a uword[8] vector */
1570 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1572 brw_set_dest(insn
, dest
);
1573 if (intel
->gen
>= 6) {
1574 brw_set_src0(insn
, mrf
);
1576 brw_set_src0(insn
, brw_null_reg());
1579 brw_set_dp_read_message(p
->brw
,
1582 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
,
1583 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
,
1584 0, /* source cache = data cache */
1586 1); /* response_length (1 reg, 2 owords!) */
1588 brw_pop_insn_state(p
);
1592 * Read a set of dwords from the data port Data Cache (const buffer).
1594 * Location (in buffer) appears as UD offsets in the register after
1595 * the provided mrf header reg.
1597 void brw_dword_scattered_read(struct brw_compile
*p
,
1598 struct brw_reg dest
,
1600 uint32_t bind_table_index
)
1602 mrf
= retype(mrf
, BRW_REGISTER_TYPE_UD
);
1604 brw_push_insn_state(p
);
1605 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1606 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1607 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1608 brw_MOV(p
, mrf
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
1609 brw_pop_insn_state(p
);
1611 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1612 insn
->header
.destreg__conditionalmod
= mrf
.nr
;
1614 /* cast dest to a uword[8] vector */
1615 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1617 brw_set_dest(insn
, dest
);
1618 brw_set_src0(insn
, brw_null_reg());
1620 brw_set_dp_read_message(p
->brw
,
1623 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS
,
1624 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
,
1625 0, /* source cache = data cache */
1627 1); /* response_length */
1633 * Read float[4] constant(s) from VS constant buffer.
1634 * For relative addressing, two float[4] constants will be read into 'dest'.
1635 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1637 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1638 struct brw_reg dest
,
1640 GLuint bind_table_index
)
1642 struct brw_instruction
*insn
;
1643 GLuint msg_reg_nr
= 1;
1647 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1648 location, msg_reg_nr);
1651 /* Setup MRF[1] with location/offset into const buffer */
1652 brw_push_insn_state(p
);
1653 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1654 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1655 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1657 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1658 * when the docs say only dword[2] should be set. Hmmm. But it works.
1660 b
= brw_message_reg(msg_reg_nr
);
1661 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1662 /*b = get_element_ud(b, 2);*/
1663 brw_MOV(p
, b
, brw_imm_ud(location
));
1665 brw_pop_insn_state(p
);
1667 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1669 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1670 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1671 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1672 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1674 brw_set_dest(insn
, dest
);
1675 brw_set_src0(insn
, brw_null_reg());
1677 brw_set_dp_read_message(p
->brw
,
1681 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1682 0, /* source cache = data cache */
1684 1); /* response_length (1 Oword) */
1688 * Read a float[4] constant per vertex from VS constant buffer, with
1689 * relative addressing.
1691 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1692 struct brw_reg dest
,
1693 struct brw_reg addr_reg
,
1695 GLuint bind_table_index
)
1697 struct intel_context
*intel
= &p
->brw
->intel
;
1700 /* Setup MRF[1] with offset into const buffer */
1701 brw_push_insn_state(p
);
1702 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1703 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1704 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1706 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1709 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
),
1710 addr_reg
, brw_imm_d(offset
));
1711 brw_pop_insn_state(p
);
1713 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1715 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1716 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1717 insn
->header
.destreg__conditionalmod
= 0;
1718 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1720 brw_set_dest(insn
, dest
);
1721 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1723 if (intel
->gen
== 6)
1724 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1725 else if (intel
->gen
== 5 || intel
->is_g4x
)
1726 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1728 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1730 brw_set_dp_read_message(p
->brw
,
1733 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1735 0, /* source cache = data cache */
1737 1); /* response_length */
1742 void brw_fb_WRITE(struct brw_compile
*p
,
1744 struct brw_reg dest
,
1746 struct brw_reg src0
,
1747 GLuint binding_table_index
,
1749 GLuint response_length
,
1752 struct intel_context
*intel
= &p
->brw
->intel
;
1753 struct brw_instruction
*insn
;
1754 GLuint msg_control
, msg_type
;
1755 GLboolean header_present
= GL_TRUE
;
1757 if (intel
->gen
>= 6 && binding_table_index
== 0) {
1758 insn
= next_insn(p
, BRW_OPCODE_SENDC
);
1760 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1762 /* The execution mask is ignored for render target writes. */
1763 insn
->header
.predicate_control
= 0;
1764 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1766 if (intel
->gen
>= 6) {
1767 if (msg_length
== 4)
1768 header_present
= GL_FALSE
;
1770 /* headerless version, just submit color payload */
1771 src0
= brw_message_reg(msg_reg_nr
);
1773 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6
;
1775 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1777 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1780 if (dispatch_width
== 16)
1781 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1783 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1785 brw_set_dest(insn
, dest
);
1786 brw_set_src0(insn
, src0
);
1787 brw_set_dp_write_message(p
->brw
,
1789 binding_table_index
,
1794 1, /* pixel scoreboard */
1797 0 /* send_commit_msg */);
1802 * Texture sample instruction.
1803 * Note: the msg_type plus msg_length values determine exactly what kind
1804 * of sampling operation is performed. See volume 4, page 161 of docs.
1806 void brw_SAMPLE(struct brw_compile
*p
,
1807 struct brw_reg dest
,
1809 struct brw_reg src0
,
1810 GLuint binding_table_index
,
1814 GLuint response_length
,
1817 GLuint header_present
,
1820 struct intel_context
*intel
= &p
->brw
->intel
;
1821 GLboolean need_stall
= 0;
1823 if (writemask
== 0) {
1824 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1828 /* Hardware doesn't do destination dependency checking on send
1829 * instructions properly. Add a workaround which generates the
1830 * dependency by other means. In practice it seems like this bug
1831 * only crops up for texture samples, and only where registers are
1832 * written by the send and then written again later without being
1833 * read in between. Luckily for us, we already track that
1834 * information and use it to modify the writemask for the
1835 * instruction, so that is a guide for whether a workaround is
1838 if (writemask
!= WRITEMASK_XYZW
) {
1839 GLuint dst_offset
= 0;
1840 GLuint i
, newmask
= 0, len
= 0;
1842 for (i
= 0; i
< 4; i
++) {
1843 if (writemask
& (1<<i
))
1847 for (; i
< 4; i
++) {
1848 if (!(writemask
& (1<<i
)))
1854 if (newmask
!= writemask
) {
1856 /* printf("need stall %x %x\n", newmask , writemask); */
1859 GLboolean dispatch_16
= GL_FALSE
;
1861 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1863 guess_execution_size(p
->current
, dest
);
1864 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1865 dispatch_16
= GL_TRUE
;
1867 newmask
= ~newmask
& WRITEMASK_XYZW
;
1869 brw_push_insn_state(p
);
1871 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1872 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1874 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1875 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1877 brw_pop_insn_state(p
);
1879 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1880 dest
= offset(dest
, dst_offset
);
1882 /* For 16-wide dispatch, masked channels are skipped in the
1883 * response. For 8-wide, masked channels still take up slots,
1884 * and are just not written to.
1887 response_length
= len
* 2;
1892 struct brw_instruction
*insn
;
1894 /* Sandybridge doesn't have the implied move for SENDs,
1895 * and the first message register index comes from src0.
1897 if (intel
->gen
>= 6) {
1898 brw_push_insn_state(p
);
1899 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1900 /* m1 contains header? */
1901 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1902 brw_pop_insn_state(p
);
1903 src0
= brw_message_reg(msg_reg_nr
);
1906 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1907 insn
->header
.predicate_control
= 0; /* XXX */
1908 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1910 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1912 brw_set_dest(insn
, dest
);
1913 brw_set_src0(insn
, src0
);
1914 brw_set_sampler_message(p
->brw
, insn
,
1915 binding_table_index
,
1926 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1928 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1930 brw_push_insn_state(p
);
1931 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1932 brw_MOV(p
, reg
, reg
);
1933 brw_pop_insn_state(p
);
1938 /* All these variables are pretty confusing - we might be better off
1939 * using bitmasks and macros for this, in the old style. Or perhaps
1940 * just having the caller instantiate the fields in dword3 itself.
1942 void brw_urb_WRITE(struct brw_compile
*p
,
1943 struct brw_reg dest
,
1945 struct brw_reg src0
,
1949 GLuint response_length
,
1951 GLboolean writes_complete
,
1955 struct intel_context
*intel
= &p
->brw
->intel
;
1956 struct brw_instruction
*insn
;
1958 /* Sandybridge doesn't have the implied move for SENDs,
1959 * and the first message register index comes from src0.
1961 if (intel
->gen
>= 6) {
1962 brw_push_insn_state(p
);
1963 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1964 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1965 brw_pop_insn_state(p
);
1966 src0
= brw_message_reg(msg_reg_nr
);
1969 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1971 assert(msg_length
< BRW_MAX_MRF
);
1973 brw_set_dest(insn
, dest
);
1974 brw_set_src0(insn
, src0
);
1975 brw_set_src1(insn
, brw_imm_d(0));
1978 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1980 brw_set_urb_message(p
->brw
,
1992 void brw_ff_sync(struct brw_compile
*p
,
1993 struct brw_reg dest
,
1995 struct brw_reg src0
,
1997 GLuint response_length
,
2000 struct intel_context
*intel
= &p
->brw
->intel
;
2001 struct brw_instruction
*insn
;
2003 /* Sandybridge doesn't have the implied move for SENDs,
2004 * and the first message register index comes from src0.
2006 if (intel
->gen
>= 6) {
2007 brw_push_insn_state(p
);
2008 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
2009 brw_MOV(p
, retype(brw_message_reg(msg_reg_nr
), BRW_REGISTER_TYPE_UD
),
2010 retype(src0
, BRW_REGISTER_TYPE_UD
));
2011 brw_pop_insn_state(p
);
2012 src0
= brw_message_reg(msg_reg_nr
);
2015 insn
= next_insn(p
, BRW_OPCODE_SEND
);
2016 brw_set_dest(insn
, dest
);
2017 brw_set_src0(insn
, src0
);
2018 brw_set_src1(insn
, brw_imm_d(0));
2021 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
2023 brw_set_ff_sync_message(p
->brw
,