2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
59 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
60 assert(dest
.nr
< 128);
62 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
63 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
64 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
66 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
67 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
69 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
70 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
71 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
72 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
73 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
76 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
77 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
78 /* even ignored in da16, still need to set as '01' */
79 insn
->bits1
.da16
.dest_horiz_stride
= 1;
83 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
85 /* These are different sizes in align1 vs align16:
87 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
88 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
89 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
90 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
91 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
94 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
95 /* even ignored in da16, still need to set as '01' */
96 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
103 guess_execution_size(insn
, dest
);
106 extern int reg_type_size
[];
109 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
111 int hstride_for_reg
[] = {0, 1, 2, 4};
112 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg
[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
115 int width
, hstride
, vstride
, execsize
;
117 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
122 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
123 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
124 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
130 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
131 reg
.file
== BRW_ARF_NULL
)
134 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
135 hstride
= hstride_for_reg
[reg
.hstride
];
137 if (reg
.vstride
== 0xf) {
140 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
141 vstride
= vstride_for_reg
[reg
.vstride
];
144 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
145 width
= width_for_reg
[reg
.width
];
147 assert(insn
->header
.execution_size
>= 0 &&
148 insn
->header
.execution_size
< Elements(execsize_for_reg
));
149 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
153 assert(execsize
>= width
);
156 if (execsize
== width
&& hstride
!= 0) {
157 assert(vstride
== -1 || vstride
== width
* hstride
);
161 if (execsize
== width
&& hstride
== 0) {
162 /* no restriction on vstride. */
167 assert(hstride
== 0);
171 if (execsize
== 1 && width
== 1) {
172 assert(hstride
== 0);
173 assert(vstride
== 0);
177 if (vstride
== 0 && hstride
== 0) {
181 /* 10. Check destination issues. */
184 static void brw_set_src0( struct brw_instruction
*insn
,
187 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
188 assert(reg
.nr
< 128);
190 validate_reg(insn
, reg
);
192 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
193 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
194 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
195 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
196 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
198 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
199 insn
->bits3
.ud
= reg
.dw1
.ud
;
201 /* Required to set some fields in src1 as well:
203 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
204 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
208 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
209 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
210 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
211 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
214 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
215 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
219 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
221 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
222 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
225 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
229 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
230 if (reg
.width
== BRW_WIDTH_1
&&
231 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
232 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
233 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
234 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
237 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
238 insn
->bits2
.da1
.src0_width
= reg
.width
;
239 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
243 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
244 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
245 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
246 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
251 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
252 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
254 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
260 void brw_set_src1( struct brw_instruction
*insn
,
263 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
265 assert(reg
.nr
< 128);
267 validate_reg(insn
, reg
);
269 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
270 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
271 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
272 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
274 /* Only src1 can be immediate in two-argument instructions.
276 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
278 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
279 insn
->bits3
.ud
= reg
.dw1
.ud
;
282 /* This is a hardware restriction, which may or may not be lifted
285 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
288 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
289 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
290 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
293 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
294 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
297 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
298 if (reg
.width
== BRW_WIDTH_1
&&
299 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
300 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
301 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
302 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
305 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
306 insn
->bits3
.da1
.src1_width
= reg
.width
;
307 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
311 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
312 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
313 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
314 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
319 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
320 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
322 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
329 static void brw_set_math_message( struct brw_context
*brw
,
330 struct brw_instruction
*insn
,
332 GLuint response_length
,
335 GLboolean low_precision
,
339 struct intel_context
*intel
= &brw
->intel
;
340 brw_set_src1(insn
, brw_imm_d(0));
342 if (intel
->gen
== 5) {
343 insn
->bits3
.math_gen5
.function
= function
;
344 insn
->bits3
.math_gen5
.int_type
= integer_type
;
345 insn
->bits3
.math_gen5
.precision
= low_precision
;
346 insn
->bits3
.math_gen5
.saturate
= saturate
;
347 insn
->bits3
.math_gen5
.data_type
= dataType
;
348 insn
->bits3
.math_gen5
.snapshot
= 0;
349 insn
->bits3
.math_gen5
.header_present
= 0;
350 insn
->bits3
.math_gen5
.response_length
= response_length
;
351 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
352 insn
->bits3
.math_gen5
.end_of_thread
= 0;
353 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
354 insn
->bits2
.send_gen5
.end_of_thread
= 0;
356 insn
->bits3
.math
.function
= function
;
357 insn
->bits3
.math
.int_type
= integer_type
;
358 insn
->bits3
.math
.precision
= low_precision
;
359 insn
->bits3
.math
.saturate
= saturate
;
360 insn
->bits3
.math
.data_type
= dataType
;
361 insn
->bits3
.math
.response_length
= response_length
;
362 insn
->bits3
.math
.msg_length
= msg_length
;
363 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
364 insn
->bits3
.math
.end_of_thread
= 0;
369 static void brw_set_ff_sync_message(struct brw_context
*brw
,
370 struct brw_instruction
*insn
,
372 GLuint response_length
,
373 GLboolean end_of_thread
)
375 struct intel_context
*intel
= &brw
->intel
;
376 brw_set_src1(insn
, brw_imm_d(0));
378 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
379 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
380 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
381 insn
->bits3
.urb_gen5
.allocate
= allocate
;
382 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
383 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
384 insn
->bits3
.urb_gen5
.header_present
= 1;
385 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
386 insn
->bits3
.urb_gen5
.msg_length
= 1;
387 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
388 if (intel
->gen
>= 6) {
389 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
391 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
392 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
396 static void brw_set_urb_message( struct brw_context
*brw
,
397 struct brw_instruction
*insn
,
401 GLuint response_length
,
402 GLboolean end_of_thread
,
405 GLuint swizzle_control
)
407 struct intel_context
*intel
= &brw
->intel
;
408 brw_set_src1(insn
, brw_imm_d(0));
410 if (intel
->gen
>= 5) {
411 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
412 insn
->bits3
.urb_gen5
.offset
= offset
;
413 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
414 insn
->bits3
.urb_gen5
.allocate
= allocate
;
415 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
416 insn
->bits3
.urb_gen5
.complete
= complete
;
417 insn
->bits3
.urb_gen5
.header_present
= 1;
418 insn
->bits3
.urb_gen5
.response_length
= response_length
;
419 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
420 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
421 if (intel
->gen
>= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
426 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
428 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
429 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
432 insn
->bits3
.urb
.opcode
= 0; /* ? */
433 insn
->bits3
.urb
.offset
= offset
;
434 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
435 insn
->bits3
.urb
.allocate
= allocate
;
436 insn
->bits3
.urb
.used
= used
; /* ? */
437 insn
->bits3
.urb
.complete
= complete
;
438 insn
->bits3
.urb
.response_length
= response_length
;
439 insn
->bits3
.urb
.msg_length
= msg_length
;
440 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
441 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
445 static void brw_set_dp_write_message( struct brw_context
*brw
,
446 struct brw_instruction
*insn
,
447 GLuint binding_table_index
,
451 GLuint pixel_scoreboard_clear
,
452 GLuint response_length
,
453 GLuint end_of_thread
,
454 GLuint send_commit_msg
)
456 struct intel_context
*intel
= &brw
->intel
;
457 brw_set_src1(insn
, brw_imm_ud(0));
459 if (intel
->gen
>= 6) {
460 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
461 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
462 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
463 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
464 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
465 insn
->bits3
.dp_render_cache
.header_present
= 0; /* XXX */
466 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
467 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
468 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
469 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
470 /* XXX really need below? */
471 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
472 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
473 } else if (intel
->gen
== 5) {
474 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
475 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
476 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
477 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
478 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
479 insn
->bits3
.dp_write_gen5
.header_present
= 1;
480 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
481 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
482 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
483 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
484 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
486 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
487 insn
->bits3
.dp_write
.msg_control
= msg_control
;
488 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
489 insn
->bits3
.dp_write
.msg_type
= msg_type
;
490 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
491 insn
->bits3
.dp_write
.response_length
= response_length
;
492 insn
->bits3
.dp_write
.msg_length
= msg_length
;
493 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
494 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
498 static void brw_set_dp_read_message( struct brw_context
*brw
,
499 struct brw_instruction
*insn
,
500 GLuint binding_table_index
,
505 GLuint response_length
,
506 GLuint end_of_thread
)
508 struct intel_context
*intel
= &brw
->intel
;
509 brw_set_src1(insn
, brw_imm_d(0));
511 if (intel
->gen
== 5) {
512 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
513 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
514 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
515 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
516 insn
->bits3
.dp_read_gen5
.header_present
= 1;
517 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
518 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
519 insn
->bits3
.dp_read_gen5
.pad1
= 0;
520 insn
->bits3
.dp_read_gen5
.end_of_thread
= end_of_thread
;
521 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
522 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
524 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
525 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
526 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
527 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
528 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
529 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
530 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
531 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
532 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
; /*31*/
536 static void brw_set_sampler_message(struct brw_context
*brw
,
537 struct brw_instruction
*insn
,
538 GLuint binding_table_index
,
541 GLuint response_length
,
544 GLuint header_present
,
547 struct intel_context
*intel
= &brw
->intel
;
549 brw_set_src1(insn
, brw_imm_d(0));
551 if (intel
->gen
== 5) {
552 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
553 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
554 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
555 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
556 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
557 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
558 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
559 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
560 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
561 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
562 } else if (intel
->is_g4x
) {
563 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
564 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
565 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
566 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
567 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
568 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
569 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
571 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
572 insn
->bits3
.sampler
.sampler
= sampler
;
573 insn
->bits3
.sampler
.msg_type
= msg_type
;
574 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
575 insn
->bits3
.sampler
.response_length
= response_length
;
576 insn
->bits3
.sampler
.msg_length
= msg_length
;
577 insn
->bits3
.sampler
.end_of_thread
= eot
;
578 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
584 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
587 struct brw_instruction
*insn
;
589 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
591 insn
= &p
->store
[p
->nr_insn
++];
592 memcpy(insn
, p
->current
, sizeof(*insn
));
594 /* Reset this one-shot flag:
597 if (p
->current
->header
.destreg__conditionalmod
) {
598 p
->current
->header
.destreg__conditionalmod
= 0;
599 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
602 insn
->header
.opcode
= opcode
;
607 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
612 struct brw_instruction
*insn
= next_insn(p
, opcode
);
613 brw_set_dest(insn
, dest
);
614 brw_set_src0(insn
, src
);
618 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
622 struct brw_reg src1
)
624 struct brw_instruction
*insn
= next_insn(p
, opcode
);
625 brw_set_dest(insn
, dest
);
626 brw_set_src0(insn
, src0
);
627 brw_set_src1(insn
, src1
);
632 /***********************************************************************
633 * Convenience routines.
636 struct brw_instruction *brw_##OP(struct brw_compile *p, \
637 struct brw_reg dest, \
638 struct brw_reg src0) \
640 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
644 struct brw_instruction *brw_##OP(struct brw_compile *p, \
645 struct brw_reg dest, \
646 struct brw_reg src0, \
647 struct brw_reg src1) \
649 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
677 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
683 if (src0
.type
== BRW_REGISTER_TYPE_F
||
684 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
685 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
686 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
687 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
690 if (src1
.type
== BRW_REGISTER_TYPE_F
||
691 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
692 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
693 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
694 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
697 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
700 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
706 if (src0
.type
== BRW_REGISTER_TYPE_D
||
707 src0
.type
== BRW_REGISTER_TYPE_UD
||
708 src1
.type
== BRW_REGISTER_TYPE_D
||
709 src1
.type
== BRW_REGISTER_TYPE_UD
) {
710 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
713 if (src0
.type
== BRW_REGISTER_TYPE_F
||
714 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
715 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
716 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
717 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
720 if (src1
.type
== BRW_REGISTER_TYPE_F
||
721 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
722 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
723 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
724 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
727 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
728 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
729 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
730 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
732 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
736 void brw_NOP(struct brw_compile
*p
)
738 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
739 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
740 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
741 brw_set_src1(insn
, brw_imm_ud(0x0));
748 /***********************************************************************
749 * Comparisons, if/else/endif
752 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
757 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
759 insn
->header
.execution_size
= 1;
760 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
761 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
763 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
768 /* EU takes the value from the flag register and pushes it onto some
769 * sort of a stack (presumably merging with any flag value already on
770 * the stack). Within an if block, the flags at the top of the stack
771 * control execution on each channel of the unit, eg. on each of the
772 * 16 pixel values in our wm programs.
774 * When the matching 'else' instruction is reached (presumably by
775 * countdown of the instruction count patched in by our ELSE/ENDIF
776 * functions), the relevent flags are inverted.
778 * When the matching 'endif' instruction is reached, the flags are
779 * popped off. If the stack is now empty, normal execution resumes.
781 * No attempt is made to deal with stack overflow (14 elements?).
783 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
785 struct brw_instruction
*insn
;
787 if (p
->single_program_flow
) {
788 assert(execute_size
== BRW_EXECUTE_1
);
790 insn
= next_insn(p
, BRW_OPCODE_ADD
);
791 insn
->header
.predicate_inverse
= 1;
793 insn
= next_insn(p
, BRW_OPCODE_IF
);
796 /* Override the defaults for this instruction:
798 brw_set_dest(insn
, brw_ip_reg());
799 brw_set_src0(insn
, brw_ip_reg());
800 brw_set_src1(insn
, brw_imm_d(0x0));
802 insn
->header
.execution_size
= execute_size
;
803 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
804 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
805 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
806 if (!p
->single_program_flow
)
807 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
809 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
815 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
816 struct brw_instruction
*if_insn
)
818 struct intel_context
*intel
= &p
->brw
->intel
;
819 struct brw_instruction
*insn
;
825 if (p
->single_program_flow
) {
826 insn
= next_insn(p
, BRW_OPCODE_ADD
);
828 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
831 brw_set_dest(insn
, brw_ip_reg());
832 brw_set_src0(insn
, brw_ip_reg());
833 brw_set_src1(insn
, brw_imm_d(0x0));
835 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
836 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
837 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
838 if (!p
->single_program_flow
)
839 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
841 /* Patch the if instruction to point at this instruction.
843 if (p
->single_program_flow
) {
844 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
846 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
848 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
850 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
851 if_insn
->bits3
.if_else
.pop_count
= 0;
852 if_insn
->bits3
.if_else
.pad0
= 0;
858 void brw_ENDIF(struct brw_compile
*p
,
859 struct brw_instruction
*patch_insn
)
861 struct intel_context
*intel
= &p
->brw
->intel
;
867 if (p
->single_program_flow
) {
868 /* In single program flow mode, there's no need to execute an ENDIF,
869 * since we don't need to do any stack operations, and if we're executing
870 * currently, we want to just continue executing.
872 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
874 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
876 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
878 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
880 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
881 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
882 brw_set_src1(insn
, brw_imm_d(0x0));
884 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
885 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
886 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
887 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
889 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
891 /* Patch the if or else instructions to point at this or the next
892 * instruction respectively.
894 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
895 /* Automagically turn it into an IFF:
897 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
898 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
899 patch_insn
->bits3
.if_else
.pop_count
= 0;
900 patch_insn
->bits3
.if_else
.pad0
= 0;
901 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
902 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
903 patch_insn
->bits3
.if_else
.pop_count
= 1;
904 patch_insn
->bits3
.if_else
.pad0
= 0;
909 /* Also pop item off the stack in the endif instruction:
911 insn
->bits3
.if_else
.jump_count
= 0;
912 insn
->bits3
.if_else
.pop_count
= 1;
913 insn
->bits3
.if_else
.pad0
= 0;
917 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
919 struct brw_instruction
*insn
;
920 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
921 brw_set_dest(insn
, brw_ip_reg());
922 brw_set_src0(insn
, brw_ip_reg());
923 brw_set_src1(insn
, brw_imm_d(0x0));
924 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
925 insn
->header
.execution_size
= BRW_EXECUTE_8
;
926 /* insn->header.mask_control = BRW_MASK_DISABLE; */
927 insn
->bits3
.if_else
.pad0
= 0;
928 insn
->bits3
.if_else
.pop_count
= pop_count
;
932 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
934 struct brw_instruction
*insn
;
935 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
936 brw_set_dest(insn
, brw_ip_reg());
937 brw_set_src0(insn
, brw_ip_reg());
938 brw_set_src1(insn
, brw_imm_d(0x0));
939 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
940 insn
->header
.execution_size
= BRW_EXECUTE_8
;
941 /* insn->header.mask_control = BRW_MASK_DISABLE; */
942 insn
->bits3
.if_else
.pad0
= 0;
943 insn
->bits3
.if_else
.pop_count
= pop_count
;
949 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
951 if (p
->single_program_flow
) {
952 return &p
->store
[p
->nr_insn
];
954 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
956 /* Override the defaults for this instruction:
958 brw_set_dest(insn
, brw_null_reg());
959 brw_set_src0(insn
, brw_null_reg());
960 brw_set_src1(insn
, brw_null_reg());
962 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
963 insn
->header
.execution_size
= execute_size
;
964 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
965 /* insn->header.mask_control = BRW_MASK_ENABLE; */
966 /* insn->header.mask_control = BRW_MASK_DISABLE; */
974 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
975 struct brw_instruction
*do_insn
)
977 struct intel_context
*intel
= &p
->brw
->intel
;
978 struct brw_instruction
*insn
;
984 if (p
->single_program_flow
)
985 insn
= next_insn(p
, BRW_OPCODE_ADD
);
987 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
989 brw_set_dest(insn
, brw_ip_reg());
990 brw_set_src0(insn
, brw_ip_reg());
991 brw_set_src1(insn
, brw_imm_d(0x0));
993 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
995 if (p
->single_program_flow
) {
996 insn
->header
.execution_size
= BRW_EXECUTE_1
;
998 insn
->bits3
.d
= (do_insn
- insn
) * 16;
1000 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1002 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1003 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1004 insn
->bits3
.if_else
.pop_count
= 0;
1005 insn
->bits3
.if_else
.pad0
= 0;
1008 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1010 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1011 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1018 void brw_land_fwd_jump(struct brw_compile
*p
,
1019 struct brw_instruction
*jmp_insn
)
1021 struct intel_context
*intel
= &p
->brw
->intel
;
1022 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1025 if (intel
->gen
== 5)
1028 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1029 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1031 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1036 /* To integrate with the above, it makes sense that the comparison
1037 * instruction should populate the flag register. It might be simpler
1038 * just to use the flag reg for most WM tasks?
1040 void brw_CMP(struct brw_compile
*p
,
1041 struct brw_reg dest
,
1043 struct brw_reg src0
,
1044 struct brw_reg src1
)
1046 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1048 insn
->header
.destreg__conditionalmod
= conditional
;
1049 brw_set_dest(insn
, dest
);
1050 brw_set_src0(insn
, src0
);
1051 brw_set_src1(insn
, src1
);
1053 /* guess_execution_size(insn, src0); */
1056 /* Make it so that future instructions will use the computed flag
1057 * value until brw_set_predicate_control_flag_value() is called
1060 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1062 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1063 p
->flag_value
= 0xff;
1067 /* Issue 'wait' instruction for n1, host could program MMIO
1068 to wake up thread. */
1069 void brw_WAIT (struct brw_compile
*p
)
1071 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1072 struct brw_reg src
= brw_notification_1_reg();
1074 brw_set_dest(insn
, src
);
1075 brw_set_src0(insn
, src
);
1076 brw_set_src1(insn
, brw_null_reg());
1077 insn
->header
.execution_size
= 0; /* must */
1078 insn
->header
.predicate_control
= 0;
1079 insn
->header
.compression_control
= 0;
1083 /***********************************************************************
1084 * Helpers for the various SEND message types:
1087 /** Extended math function, float[8].
1089 void brw_math( struct brw_compile
*p
,
1090 struct brw_reg dest
,
1098 struct intel_context
*intel
= &p
->brw
->intel
;
1100 if (intel
->gen
>= 6) {
1101 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1103 /* Math is the same ISA format as other opcodes, except that CondModifier
1104 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1106 insn
->header
.destreg__conditionalmod
= function
;
1108 brw_set_dest(insn
, dest
);
1109 brw_set_src0(insn
, src
);
1110 brw_set_src1(insn
, brw_null_reg());
1112 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1113 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1114 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1115 /* Example code doesn't set predicate_control for send
1118 insn
->header
.predicate_control
= 0;
1119 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1121 brw_set_dest(insn
, dest
);
1122 brw_set_src0(insn
, src
);
1123 brw_set_math_message(p
->brw
,
1125 msg_length
, response_length
,
1127 BRW_MATH_INTEGER_UNSIGNED
,
1135 * Extended math function, float[16].
1136 * Use 2 send instructions.
1138 void brw_math_16( struct brw_compile
*p
,
1139 struct brw_reg dest
,
1146 struct intel_context
*intel
= &p
->brw
->intel
;
1147 struct brw_instruction
*insn
;
1148 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1149 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1151 if (intel
->gen
>= 6) {
1152 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1154 /* Math is the same ISA format as other opcodes, except that CondModifier
1155 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1157 insn
->header
.destreg__conditionalmod
= function
;
1159 brw_set_dest(insn
, dest
);
1160 brw_set_src0(insn
, src
);
1161 brw_set_src1(insn
, brw_null_reg());
1165 /* First instruction:
1167 brw_push_insn_state(p
);
1168 brw_set_predicate_control_flag_value(p
, 0xff);
1169 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1171 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1172 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1174 brw_set_dest(insn
, dest
);
1175 brw_set_src0(insn
, src
);
1176 brw_set_math_message(p
->brw
,
1178 msg_length
, response_length
,
1180 BRW_MATH_INTEGER_UNSIGNED
,
1183 BRW_MATH_DATA_VECTOR
);
1185 /* Second instruction:
1187 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1188 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1189 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1191 brw_set_dest(insn
, offset(dest
,1));
1192 brw_set_src0(insn
, src
);
1193 brw_set_math_message(p
->brw
,
1195 msg_length
, response_length
,
1197 BRW_MATH_INTEGER_UNSIGNED
,
1200 BRW_MATH_DATA_VECTOR
);
1202 brw_pop_insn_state(p
);
1207 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1208 * Scratch offset should be a multiple of 64.
1209 * Used for register spilling.
1211 void brw_dp_WRITE_16( struct brw_compile
*p
,
1213 GLuint scratch_offset
)
1215 struct intel_context
*intel
= &p
->brw
->intel
;
1216 GLuint msg_reg_nr
= 1;
1218 brw_push_insn_state(p
);
1219 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1220 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1222 /* set message header global offset field (reg 0, element 2) */
1224 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1225 brw_imm_d(scratch_offset
));
1227 brw_pop_insn_state(p
);
1231 GLuint msg_length
= 3;
1232 struct brw_reg dest
;
1233 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1234 int send_commit_msg
;
1236 insn
->header
.predicate_control
= 0; /* XXX */
1237 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1238 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1240 /* Until gen6, writes followed by reads from the same location
1241 * are not guaranteed to be ordered unless write_commit is set.
1242 * If set, then a no-op write is issued to the destination
1243 * register to set a dependency, and a read from the destination
1244 * can be used to ensure the ordering.
1246 * For gen6, only writes between different threads need ordering
1247 * protection. Our use of DP writes is all about register
1248 * spilling within a thread.
1250 if (intel
->gen
>= 6) {
1251 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1252 send_commit_msg
= 0;
1254 dest
= brw_uw16_grf(0, 0);
1255 send_commit_msg
= 1;
1258 brw_set_dest(insn
, dest
);
1259 brw_set_src0(insn
, src
);
1261 brw_set_dp_write_message(p
->brw
,
1263 255, /* binding table index (255=stateless) */
1264 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
1265 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1267 0, /* pixel scoreboard */
1268 send_commit_msg
, /* response_length */
1276 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1277 * Scratch offset should be a multiple of 64.
1278 * Used for register spilling.
1280 void brw_dp_READ_16( struct brw_compile
*p
,
1281 struct brw_reg dest
,
1282 GLuint scratch_offset
)
1284 GLuint msg_reg_nr
= 1;
1286 brw_push_insn_state(p
);
1287 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1288 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1290 /* set message header global offset field (reg 0, element 2) */
1292 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1293 brw_imm_d(scratch_offset
));
1295 brw_pop_insn_state(p
);
1299 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1301 insn
->header
.predicate_control
= 0; /* XXX */
1302 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1303 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1305 brw_set_dest(insn
, dest
); /* UW? */
1306 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
1308 brw_set_dp_read_message(p
->brw
,
1310 255, /* binding table index (255=stateless) */
1311 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
,
1312 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1313 1, /* target cache (render/scratch) */
1315 2, /* response_length */
1322 * Read a float[4] vector from the data port Data Cache (const buffer).
1323 * Location (in buffer) should be a multiple of 16.
1324 * Used for fetching shader constants.
1325 * If relAddr is true, we'll do an indirect fetch using the address register.
1327 void brw_dp_READ_4( struct brw_compile
*p
,
1328 struct brw_reg dest
,
1331 GLuint bind_table_index
)
1333 /* XXX: relAddr not implemented */
1334 GLuint msg_reg_nr
= 1;
1337 brw_push_insn_state(p
);
1338 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1339 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1340 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1342 /* Setup MRF[1] with location/offset into const buffer */
1343 b
= brw_message_reg(msg_reg_nr
);
1344 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1345 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1346 * when the docs say only dword[2] should be set. Hmmm. But it works.
1348 brw_MOV(p
, b
, brw_imm_ud(location
));
1349 brw_pop_insn_state(p
);
1353 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1355 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1356 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1357 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1358 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1360 /* cast dest to a uword[8] vector */
1361 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1363 brw_set_dest(insn
, dest
);
1364 brw_set_src0(insn
, brw_null_reg());
1366 brw_set_dp_read_message(p
->brw
,
1369 0, /* msg_control (0 means 1 Oword) */
1370 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1371 0, /* source cache = data cache */
1373 1, /* response_length (1 Oword) */
1380 * Read float[4] constant(s) from VS constant buffer.
1381 * For relative addressing, two float[4] constants will be read into 'dest'.
1382 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1384 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1385 struct brw_reg dest
,
1387 GLuint bind_table_index
)
1389 struct brw_instruction
*insn
;
1390 GLuint msg_reg_nr
= 1;
1394 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1395 location, msg_reg_nr);
1398 /* Setup MRF[1] with location/offset into const buffer */
1399 brw_push_insn_state(p
);
1400 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1401 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1402 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1404 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1405 * when the docs say only dword[2] should be set. Hmmm. But it works.
1407 b
= brw_message_reg(msg_reg_nr
);
1408 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1409 /*b = get_element_ud(b, 2);*/
1410 brw_MOV(p
, b
, brw_imm_ud(location
));
1412 brw_pop_insn_state(p
);
1414 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1416 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1417 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1418 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1419 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1421 brw_set_dest(insn
, dest
);
1422 brw_set_src0(insn
, brw_null_reg());
1424 brw_set_dp_read_message(p
->brw
,
1428 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1429 0, /* source cache = data cache */
1431 1, /* response_length (1 Oword) */
1436 * Read a float[4] constant per vertex from VS constant buffer, with
1437 * relative addressing.
1439 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1440 struct brw_reg dest
,
1441 struct brw_reg addr_reg
,
1443 GLuint bind_table_index
)
1445 struct intel_context
*intel
= &p
->brw
->intel
;
1448 /* Setup MRF[1] with offset into const buffer */
1449 brw_push_insn_state(p
);
1450 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1451 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1452 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1454 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1457 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
),
1458 addr_reg
, brw_imm_d(offset
));
1459 brw_pop_insn_state(p
);
1461 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1463 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1464 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1465 insn
->header
.destreg__conditionalmod
= 0;
1466 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1468 brw_set_dest(insn
, dest
);
1469 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1471 if (intel
->gen
== 6)
1472 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1473 else if (intel
->gen
== 5 || intel
->is_g4x
)
1474 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1476 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1478 brw_set_dp_read_message(p
->brw
,
1481 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1483 0, /* source cache = data cache */
1485 1, /* response_length */
1491 void brw_fb_WRITE(struct brw_compile
*p
,
1493 struct brw_reg dest
,
1495 struct brw_reg src0
,
1496 GLuint binding_table_index
,
1498 GLuint response_length
,
1501 struct intel_context
*intel
= &p
->brw
->intel
;
1502 struct brw_instruction
*insn
;
1503 GLuint msg_control
, msg_type
;
1505 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1506 insn
->header
.predicate_control
= 0; /* XXX */
1507 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1509 if (intel
->gen
>= 6) {
1510 /* headerless version, just submit color payload */
1511 src0
= brw_message_reg(msg_reg_nr
);
1513 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6
;
1515 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1517 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1520 if (dispatch_width
== 16)
1521 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1523 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1525 brw_set_dest(insn
, dest
);
1526 brw_set_src0(insn
, src0
);
1527 brw_set_dp_write_message(p
->brw
,
1529 binding_table_index
,
1533 1, /* pixel scoreboard */
1536 0 /* send_commit_msg */);
1541 * Texture sample instruction.
1542 * Note: the msg_type plus msg_length values determine exactly what kind
1543 * of sampling operation is performed. See volume 4, page 161 of docs.
1545 void brw_SAMPLE(struct brw_compile
*p
,
1546 struct brw_reg dest
,
1548 struct brw_reg src0
,
1549 GLuint binding_table_index
,
1553 GLuint response_length
,
1556 GLuint header_present
,
1559 GLboolean need_stall
= 0;
1561 if (writemask
== 0) {
1562 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1566 /* Hardware doesn't do destination dependency checking on send
1567 * instructions properly. Add a workaround which generates the
1568 * dependency by other means. In practice it seems like this bug
1569 * only crops up for texture samples, and only where registers are
1570 * written by the send and then written again later without being
1571 * read in between. Luckily for us, we already track that
1572 * information and use it to modify the writemask for the
1573 * instruction, so that is a guide for whether a workaround is
1576 if (writemask
!= WRITEMASK_XYZW
) {
1577 GLuint dst_offset
= 0;
1578 GLuint i
, newmask
= 0, len
= 0;
1580 for (i
= 0; i
< 4; i
++) {
1581 if (writemask
& (1<<i
))
1585 for (; i
< 4; i
++) {
1586 if (!(writemask
& (1<<i
)))
1592 if (newmask
!= writemask
) {
1594 /* printf("need stall %x %x\n", newmask , writemask); */
1597 GLboolean dispatch_16
= GL_FALSE
;
1599 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1601 guess_execution_size(p
->current
, dest
);
1602 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1603 dispatch_16
= GL_TRUE
;
1605 newmask
= ~newmask
& WRITEMASK_XYZW
;
1607 brw_push_insn_state(p
);
1609 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1610 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1612 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1613 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1615 brw_pop_insn_state(p
);
1617 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1618 dest
= offset(dest
, dst_offset
);
1620 /* For 16-wide dispatch, masked channels are skipped in the
1621 * response. For 8-wide, masked channels still take up slots,
1622 * and are just not written to.
1625 response_length
= len
* 2;
1630 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1632 insn
->header
.predicate_control
= 0; /* XXX */
1633 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1634 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1636 brw_set_dest(insn
, dest
);
1637 brw_set_src0(insn
, src0
);
1638 brw_set_sampler_message(p
->brw
, insn
,
1639 binding_table_index
,
1650 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1652 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1654 brw_push_insn_state(p
);
1655 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1656 brw_MOV(p
, reg
, reg
);
1657 brw_pop_insn_state(p
);
1662 /* All these variables are pretty confusing - we might be better off
1663 * using bitmasks and macros for this, in the old style. Or perhaps
1664 * just having the caller instantiate the fields in dword3 itself.
1666 void brw_urb_WRITE(struct brw_compile
*p
,
1667 struct brw_reg dest
,
1669 struct brw_reg src0
,
1673 GLuint response_length
,
1675 GLboolean writes_complete
,
1679 struct intel_context
*intel
= &p
->brw
->intel
;
1680 struct brw_instruction
*insn
;
1682 /* Sandybridge doesn't have the implied move for SENDs,
1683 * and the first message register index comes from src0.
1685 if (intel
->gen
>= 6) {
1686 brw_push_insn_state(p
);
1687 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1688 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1689 brw_pop_insn_state(p
);
1690 src0
= brw_message_reg(msg_reg_nr
);
1693 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1695 assert(msg_length
< BRW_MAX_MRF
);
1697 brw_set_dest(insn
, dest
);
1698 brw_set_src0(insn
, src0
);
1699 brw_set_src1(insn
, brw_imm_d(0));
1702 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1704 brw_set_urb_message(p
->brw
,
1716 void brw_ff_sync(struct brw_compile
*p
,
1717 struct brw_reg dest
,
1719 struct brw_reg src0
,
1721 GLuint response_length
,
1724 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1726 brw_set_dest(insn
, dest
);
1727 brw_set_src0(insn
, src0
);
1728 brw_set_src1(insn
, brw_imm_d(0));
1730 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1732 brw_set_ff_sync_message(p
->brw
,