2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 if (dest
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
&&
59 dest
.file
!= BRW_MESSAGE_REGISTER_FILE
)
60 assert(dest
.nr
< 128);
62 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
63 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
64 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
66 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
67 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
69 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
70 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
71 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
72 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
73 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
76 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
77 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
78 /* even ignored in da16, still need to set as '01' */
79 insn
->bits1
.da16
.dest_horiz_stride
= 1;
83 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
85 /* These are different sizes in align1 vs align16:
87 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
88 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
89 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
90 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
91 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
94 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
95 /* even ignored in da16, still need to set as '01' */
96 insn
->bits1
.ia16
.dest_horiz_stride
= 1;
100 /* NEW: Set the execution size based on dest.width and
101 * insn->compression_control:
103 guess_execution_size(insn
, dest
);
106 extern int reg_type_size
[];
109 validate_reg(struct brw_instruction
*insn
, struct brw_reg reg
)
111 int hstride_for_reg
[] = {0, 1, 2, 4};
112 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113 int width_for_reg
[] = {1, 2, 4, 8, 16};
114 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
115 int width
, hstride
, vstride
, execsize
;
117 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors
119 * mean the destination has to be 128-bit aligned and the
120 * destination horiz stride has to be a word.
122 if (reg
.type
== BRW_REGISTER_TYPE_V
) {
123 assert(hstride_for_reg
[insn
->bits1
.da1
.dest_horiz_stride
] *
124 reg_type_size
[insn
->bits1
.da1
.dest_reg_type
] == 2);
130 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
131 reg
.file
== BRW_ARF_NULL
)
134 assert(reg
.hstride
>= 0 && reg
.hstride
< Elements(hstride_for_reg
));
135 hstride
= hstride_for_reg
[reg
.hstride
];
137 if (reg
.vstride
== 0xf) {
140 assert(reg
.vstride
>= 0 && reg
.vstride
< Elements(vstride_for_reg
));
141 vstride
= vstride_for_reg
[reg
.vstride
];
144 assert(reg
.width
>= 0 && reg
.width
< Elements(width_for_reg
));
145 width
= width_for_reg
[reg
.width
];
147 assert(insn
->header
.execution_size
>= 0 &&
148 insn
->header
.execution_size
< Elements(execsize_for_reg
));
149 execsize
= execsize_for_reg
[insn
->header
.execution_size
];
151 /* Restrictions from 3.3.10: Register Region Restrictions. */
153 assert(execsize
>= width
);
156 if (execsize
== width
&& hstride
!= 0) {
157 assert(vstride
== -1 || vstride
== width
* hstride
);
161 if (execsize
== width
&& hstride
== 0) {
162 /* no restriction on vstride. */
167 assert(hstride
== 0);
171 if (execsize
== 1 && width
== 1) {
172 assert(hstride
== 0);
173 assert(vstride
== 0);
177 if (vstride
== 0 && hstride
== 0) {
181 /* 10. Check destination issues. */
184 static void brw_set_src0( struct brw_instruction
*insn
,
187 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
188 assert(reg
.nr
< 128);
190 validate_reg(insn
, reg
);
192 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
193 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
194 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
195 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
196 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
198 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
199 insn
->bits3
.ud
= reg
.dw1
.ud
;
201 /* Required to set some fields in src1 as well:
203 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
204 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
208 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
209 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
210 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
211 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
214 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
215 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
219 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
221 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
222 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
225 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
229 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
230 if (reg
.width
== BRW_WIDTH_1
&&
231 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
232 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
233 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
234 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
237 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
238 insn
->bits2
.da1
.src0_width
= reg
.width
;
239 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
243 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
244 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
245 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
246 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
248 /* This is an oddity of the fact we're using the same
249 * descriptions for registers in align_16 as align_1:
251 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
252 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
254 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
260 void brw_set_src1( struct brw_instruction
*insn
,
263 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
265 assert(reg
.nr
< 128);
267 validate_reg(insn
, reg
);
269 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
270 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
271 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
272 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
274 /* Only src1 can be immediate in two-argument instructions.
276 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
278 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
279 insn
->bits3
.ud
= reg
.dw1
.ud
;
282 /* This is a hardware restriction, which may or may not be lifted
285 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
288 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
289 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
290 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
293 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
294 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
297 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
298 if (reg
.width
== BRW_WIDTH_1
&&
299 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
300 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
301 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
302 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
305 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
306 insn
->bits3
.da1
.src1_width
= reg
.width
;
307 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
311 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
312 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
313 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
314 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
316 /* This is an oddity of the fact we're using the same
317 * descriptions for registers in align_16 as align_1:
319 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
320 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
322 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
329 static void brw_set_math_message( struct brw_context
*brw
,
330 struct brw_instruction
*insn
,
332 GLuint response_length
,
335 GLboolean low_precision
,
339 struct intel_context
*intel
= &brw
->intel
;
340 brw_set_src1(insn
, brw_imm_d(0));
342 if (intel
->gen
== 5) {
343 insn
->bits3
.math_gen5
.function
= function
;
344 insn
->bits3
.math_gen5
.int_type
= integer_type
;
345 insn
->bits3
.math_gen5
.precision
= low_precision
;
346 insn
->bits3
.math_gen5
.saturate
= saturate
;
347 insn
->bits3
.math_gen5
.data_type
= dataType
;
348 insn
->bits3
.math_gen5
.snapshot
= 0;
349 insn
->bits3
.math_gen5
.header_present
= 0;
350 insn
->bits3
.math_gen5
.response_length
= response_length
;
351 insn
->bits3
.math_gen5
.msg_length
= msg_length
;
352 insn
->bits3
.math_gen5
.end_of_thread
= 0;
353 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_MATH
;
354 insn
->bits2
.send_gen5
.end_of_thread
= 0;
356 insn
->bits3
.math
.function
= function
;
357 insn
->bits3
.math
.int_type
= integer_type
;
358 insn
->bits3
.math
.precision
= low_precision
;
359 insn
->bits3
.math
.saturate
= saturate
;
360 insn
->bits3
.math
.data_type
= dataType
;
361 insn
->bits3
.math
.response_length
= response_length
;
362 insn
->bits3
.math
.msg_length
= msg_length
;
363 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
364 insn
->bits3
.math
.end_of_thread
= 0;
369 static void brw_set_ff_sync_message(struct brw_context
*brw
,
370 struct brw_instruction
*insn
,
372 GLuint response_length
,
373 GLboolean end_of_thread
)
375 struct intel_context
*intel
= &brw
->intel
;
376 brw_set_src1(insn
, brw_imm_d(0));
378 insn
->bits3
.urb_gen5
.opcode
= 1; /* FF_SYNC */
379 insn
->bits3
.urb_gen5
.offset
= 0; /* Not used by FF_SYNC */
380 insn
->bits3
.urb_gen5
.swizzle_control
= 0; /* Not used by FF_SYNC */
381 insn
->bits3
.urb_gen5
.allocate
= allocate
;
382 insn
->bits3
.urb_gen5
.used
= 0; /* Not used by FF_SYNC */
383 insn
->bits3
.urb_gen5
.complete
= 0; /* Not used by FF_SYNC */
384 insn
->bits3
.urb_gen5
.header_present
= 1;
385 insn
->bits3
.urb_gen5
.response_length
= response_length
; /* may be 1 or 0 */
386 insn
->bits3
.urb_gen5
.msg_length
= 1;
387 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
388 if (intel
->gen
>= 6) {
389 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
391 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
392 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
396 static void brw_set_urb_message( struct brw_context
*brw
,
397 struct brw_instruction
*insn
,
401 GLuint response_length
,
402 GLboolean end_of_thread
,
405 GLuint swizzle_control
)
407 struct intel_context
*intel
= &brw
->intel
;
408 brw_set_src1(insn
, brw_imm_d(0));
410 if (intel
->gen
>= 5) {
411 insn
->bits3
.urb_gen5
.opcode
= 0; /* ? */
412 insn
->bits3
.urb_gen5
.offset
= offset
;
413 insn
->bits3
.urb_gen5
.swizzle_control
= swizzle_control
;
414 insn
->bits3
.urb_gen5
.allocate
= allocate
;
415 insn
->bits3
.urb_gen5
.used
= used
; /* ? */
416 insn
->bits3
.urb_gen5
.complete
= complete
;
417 insn
->bits3
.urb_gen5
.header_present
= 1;
418 insn
->bits3
.urb_gen5
.response_length
= response_length
;
419 insn
->bits3
.urb_gen5
.msg_length
= msg_length
;
420 insn
->bits3
.urb_gen5
.end_of_thread
= end_of_thread
;
421 if (intel
->gen
>= 6) {
422 /* For SNB, the SFID bits moved to the condmod bits, and
423 * EOT stayed in bits3 above. Does the EOT bit setting
424 * below on Ironlake even do anything?
426 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_URB
;
428 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_URB
;
429 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
432 insn
->bits3
.urb
.opcode
= 0; /* ? */
433 insn
->bits3
.urb
.offset
= offset
;
434 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
435 insn
->bits3
.urb
.allocate
= allocate
;
436 insn
->bits3
.urb
.used
= used
; /* ? */
437 insn
->bits3
.urb
.complete
= complete
;
438 insn
->bits3
.urb
.response_length
= response_length
;
439 insn
->bits3
.urb
.msg_length
= msg_length
;
440 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
441 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
445 static void brw_set_dp_write_message( struct brw_context
*brw
,
446 struct brw_instruction
*insn
,
447 GLuint binding_table_index
,
451 GLboolean header_present
,
452 GLuint pixel_scoreboard_clear
,
453 GLuint response_length
,
454 GLuint end_of_thread
,
455 GLuint send_commit_msg
)
457 struct intel_context
*intel
= &brw
->intel
;
458 brw_set_src1(insn
, brw_imm_ud(0));
460 if (intel
->gen
>= 6) {
461 insn
->bits3
.dp_render_cache
.binding_table_index
= binding_table_index
;
462 insn
->bits3
.dp_render_cache
.msg_control
= msg_control
;
463 insn
->bits3
.dp_render_cache
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
464 insn
->bits3
.dp_render_cache
.msg_type
= msg_type
;
465 insn
->bits3
.dp_render_cache
.send_commit_msg
= send_commit_msg
;
466 insn
->bits3
.dp_render_cache
.header_present
= header_present
;
467 insn
->bits3
.dp_render_cache
.response_length
= response_length
;
468 insn
->bits3
.dp_render_cache
.msg_length
= msg_length
;
469 insn
->bits3
.dp_render_cache
.end_of_thread
= end_of_thread
;
470 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
471 /* XXX really need below? */
472 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
473 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
474 } else if (intel
->gen
== 5) {
475 insn
->bits3
.dp_write_gen5
.binding_table_index
= binding_table_index
;
476 insn
->bits3
.dp_write_gen5
.msg_control
= msg_control
;
477 insn
->bits3
.dp_write_gen5
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
478 insn
->bits3
.dp_write_gen5
.msg_type
= msg_type
;
479 insn
->bits3
.dp_write_gen5
.send_commit_msg
= send_commit_msg
;
480 insn
->bits3
.dp_write_gen5
.header_present
= header_present
;
481 insn
->bits3
.dp_write_gen5
.response_length
= response_length
;
482 insn
->bits3
.dp_write_gen5
.msg_length
= msg_length
;
483 insn
->bits3
.dp_write_gen5
.end_of_thread
= end_of_thread
;
484 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
485 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
487 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
488 insn
->bits3
.dp_write
.msg_control
= msg_control
;
489 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
490 insn
->bits3
.dp_write
.msg_type
= msg_type
;
491 insn
->bits3
.dp_write
.send_commit_msg
= send_commit_msg
;
492 insn
->bits3
.dp_write
.response_length
= response_length
;
493 insn
->bits3
.dp_write
.msg_length
= msg_length
;
494 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
495 insn
->bits3
.dp_write
.end_of_thread
= end_of_thread
;
499 static void brw_set_dp_read_message( struct brw_context
*brw
,
500 struct brw_instruction
*insn
,
501 GLuint binding_table_index
,
506 GLuint response_length
,
507 GLuint end_of_thread
)
509 struct intel_context
*intel
= &brw
->intel
;
510 brw_set_src1(insn
, brw_imm_d(0));
512 if (intel
->gen
== 5) {
513 insn
->bits3
.dp_read_gen5
.binding_table_index
= binding_table_index
;
514 insn
->bits3
.dp_read_gen5
.msg_control
= msg_control
;
515 insn
->bits3
.dp_read_gen5
.msg_type
= msg_type
;
516 insn
->bits3
.dp_read_gen5
.target_cache
= target_cache
;
517 insn
->bits3
.dp_read_gen5
.header_present
= 1;
518 insn
->bits3
.dp_read_gen5
.response_length
= response_length
;
519 insn
->bits3
.dp_read_gen5
.msg_length
= msg_length
;
520 insn
->bits3
.dp_read_gen5
.pad1
= 0;
521 insn
->bits3
.dp_read_gen5
.end_of_thread
= end_of_thread
;
522 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
523 insn
->bits2
.send_gen5
.end_of_thread
= end_of_thread
;
525 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
526 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
527 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
528 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
529 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
530 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
531 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
532 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
533 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
; /*31*/
537 static void brw_set_sampler_message(struct brw_context
*brw
,
538 struct brw_instruction
*insn
,
539 GLuint binding_table_index
,
542 GLuint response_length
,
545 GLuint header_present
,
548 struct intel_context
*intel
= &brw
->intel
;
550 brw_set_src1(insn
, brw_imm_d(0));
552 if (intel
->gen
>= 5) {
553 insn
->bits3
.sampler_gen5
.binding_table_index
= binding_table_index
;
554 insn
->bits3
.sampler_gen5
.sampler
= sampler
;
555 insn
->bits3
.sampler_gen5
.msg_type
= msg_type
;
556 insn
->bits3
.sampler_gen5
.simd_mode
= simd_mode
;
557 insn
->bits3
.sampler_gen5
.header_present
= header_present
;
558 insn
->bits3
.sampler_gen5
.response_length
= response_length
;
559 insn
->bits3
.sampler_gen5
.msg_length
= msg_length
;
560 insn
->bits3
.sampler_gen5
.end_of_thread
= eot
;
562 insn
->header
.destreg__conditionalmod
= BRW_MESSAGE_TARGET_SAMPLER
;
564 insn
->bits2
.send_gen5
.sfid
= BRW_MESSAGE_TARGET_SAMPLER
;
565 insn
->bits2
.send_gen5
.end_of_thread
= eot
;
567 } else if (intel
->is_g4x
) {
568 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
569 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
570 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
571 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
572 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
573 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
574 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
576 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
577 insn
->bits3
.sampler
.sampler
= sampler
;
578 insn
->bits3
.sampler
.msg_type
= msg_type
;
579 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
580 insn
->bits3
.sampler
.response_length
= response_length
;
581 insn
->bits3
.sampler
.msg_length
= msg_length
;
582 insn
->bits3
.sampler
.end_of_thread
= eot
;
583 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
589 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
592 struct brw_instruction
*insn
;
594 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
596 insn
= &p
->store
[p
->nr_insn
++];
597 memcpy(insn
, p
->current
, sizeof(*insn
));
599 /* Reset this one-shot flag:
602 if (p
->current
->header
.destreg__conditionalmod
) {
603 p
->current
->header
.destreg__conditionalmod
= 0;
604 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
607 insn
->header
.opcode
= opcode
;
612 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
617 struct brw_instruction
*insn
= next_insn(p
, opcode
);
618 brw_set_dest(insn
, dest
);
619 brw_set_src0(insn
, src
);
623 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
627 struct brw_reg src1
)
629 struct brw_instruction
*insn
= next_insn(p
, opcode
);
630 brw_set_dest(insn
, dest
);
631 brw_set_src0(insn
, src0
);
632 brw_set_src1(insn
, src1
);
637 /***********************************************************************
638 * Convenience routines.
641 struct brw_instruction *brw_##OP(struct brw_compile *p, \
642 struct brw_reg dest, \
643 struct brw_reg src0) \
645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
649 struct brw_instruction *brw_##OP(struct brw_compile *p, \
650 struct brw_reg dest, \
651 struct brw_reg src0, \
652 struct brw_reg src1) \
654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
682 struct brw_instruction
*brw_ADD(struct brw_compile
*p
,
688 if (src0
.type
== BRW_REGISTER_TYPE_F
||
689 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
690 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
691 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
692 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
695 if (src1
.type
== BRW_REGISTER_TYPE_F
||
696 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
697 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
698 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
699 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
702 return brw_alu2(p
, BRW_OPCODE_ADD
, dest
, src0
, src1
);
705 struct brw_instruction
*brw_MUL(struct brw_compile
*p
,
711 if (src0
.type
== BRW_REGISTER_TYPE_D
||
712 src0
.type
== BRW_REGISTER_TYPE_UD
||
713 src1
.type
== BRW_REGISTER_TYPE_D
||
714 src1
.type
== BRW_REGISTER_TYPE_UD
) {
715 assert(dest
.type
!= BRW_REGISTER_TYPE_F
);
718 if (src0
.type
== BRW_REGISTER_TYPE_F
||
719 (src0
.file
== BRW_IMMEDIATE_VALUE
&&
720 src0
.type
== BRW_REGISTER_TYPE_VF
)) {
721 assert(src1
.type
!= BRW_REGISTER_TYPE_UD
);
722 assert(src1
.type
!= BRW_REGISTER_TYPE_D
);
725 if (src1
.type
== BRW_REGISTER_TYPE_F
||
726 (src1
.file
== BRW_IMMEDIATE_VALUE
&&
727 src1
.type
== BRW_REGISTER_TYPE_VF
)) {
728 assert(src0
.type
!= BRW_REGISTER_TYPE_UD
);
729 assert(src0
.type
!= BRW_REGISTER_TYPE_D
);
732 assert(src0
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
733 src0
.nr
!= BRW_ARF_ACCUMULATOR
);
734 assert(src1
.file
!= BRW_ARCHITECTURE_REGISTER_FILE
||
735 src1
.nr
!= BRW_ARF_ACCUMULATOR
);
737 return brw_alu2(p
, BRW_OPCODE_MUL
, dest
, src0
, src1
);
741 void brw_NOP(struct brw_compile
*p
)
743 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
744 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
745 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
746 brw_set_src1(insn
, brw_imm_ud(0x0));
753 /***********************************************************************
754 * Comparisons, if/else/endif
757 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
762 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
764 insn
->header
.execution_size
= 1;
765 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
766 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
768 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
773 /* EU takes the value from the flag register and pushes it onto some
774 * sort of a stack (presumably merging with any flag value already on
775 * the stack). Within an if block, the flags at the top of the stack
776 * control execution on each channel of the unit, eg. on each of the
777 * 16 pixel values in our wm programs.
779 * When the matching 'else' instruction is reached (presumably by
780 * countdown of the instruction count patched in by our ELSE/ENDIF
781 * functions), the relevent flags are inverted.
783 * When the matching 'endif' instruction is reached, the flags are
784 * popped off. If the stack is now empty, normal execution resumes.
786 * No attempt is made to deal with stack overflow (14 elements?).
788 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
790 struct intel_context
*intel
= &p
->brw
->intel
;
791 struct brw_instruction
*insn
;
793 if (p
->single_program_flow
) {
794 assert(execute_size
== BRW_EXECUTE_1
);
796 insn
= next_insn(p
, BRW_OPCODE_ADD
);
797 insn
->header
.predicate_inverse
= 1;
799 insn
= next_insn(p
, BRW_OPCODE_IF
);
802 /* Override the defaults for this instruction:
804 if (intel
->gen
< 6) {
805 brw_set_dest(insn
, brw_ip_reg());
806 brw_set_src0(insn
, brw_ip_reg());
807 brw_set_src1(insn
, brw_imm_d(0x0));
809 brw_set_dest(insn
, brw_imm_w(0));
810 brw_set_src0(insn
, brw_null_reg());
811 brw_set_src1(insn
, brw_null_reg());
814 insn
->header
.execution_size
= execute_size
;
815 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
816 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
817 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
818 if (!p
->single_program_flow
)
819 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
821 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
827 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
828 struct brw_instruction
*if_insn
)
830 struct intel_context
*intel
= &p
->brw
->intel
;
831 struct brw_instruction
*insn
;
834 /* jump count is for 64bit data chunk each, so one 128bit
835 instruction requires 2 chunks. */
839 if (p
->single_program_flow
) {
840 insn
= next_insn(p
, BRW_OPCODE_ADD
);
842 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
845 if (intel
->gen
< 6) {
846 brw_set_dest(insn
, brw_ip_reg());
847 brw_set_src0(insn
, brw_ip_reg());
848 brw_set_src1(insn
, brw_imm_d(0x0));
850 brw_set_dest(insn
, brw_imm_w(0));
851 brw_set_src0(insn
, brw_null_reg());
852 brw_set_src1(insn
, brw_null_reg());
855 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
856 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
857 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
858 if (!p
->single_program_flow
)
859 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
861 /* Patch the if instruction to point at this instruction.
863 if (p
->single_program_flow
) {
864 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
866 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
868 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
870 if (intel
->gen
< 6) {
871 if_insn
->bits3
.if_else
.jump_count
= br
* (insn
- if_insn
);
872 if_insn
->bits3
.if_else
.pop_count
= 0;
873 if_insn
->bits3
.if_else
.pad0
= 0;
875 if_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- if_insn
+ 1);
882 void brw_ENDIF(struct brw_compile
*p
,
883 struct brw_instruction
*patch_insn
)
885 struct intel_context
*intel
= &p
->brw
->intel
;
891 if (p
->single_program_flow
) {
892 /* In single program flow mode, there's no need to execute an ENDIF,
893 * since we don't need to do any stack operations, and if we're executing
894 * currently, we want to just continue executing.
896 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
898 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
900 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
902 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
904 if (intel
->gen
< 6) {
905 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
906 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
907 brw_set_src1(insn
, brw_imm_d(0x0));
909 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W
));
910 brw_set_src0(insn
, brw_null_reg());
911 brw_set_src1(insn
, brw_null_reg());
914 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
915 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
916 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
917 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
919 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
921 /* Patch the if or else instructions to point at this or the next
922 * instruction respectively.
924 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
925 if (intel
->gen
< 6) {
926 /* Turn it into an IFF, which means no mask stack operations for
927 * all-false and jumping past the ENDIF.
929 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
930 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
931 patch_insn
->bits3
.if_else
.pop_count
= 0;
932 patch_insn
->bits3
.if_else
.pad0
= 0;
934 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
935 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
938 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
);
939 if (intel
->gen
< 6) {
940 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
943 patch_insn
->bits3
.if_else
.jump_count
= br
* (insn
- patch_insn
+ 1);
944 patch_insn
->bits3
.if_else
.pop_count
= 1;
945 patch_insn
->bits3
.if_else
.pad0
= 0;
947 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
948 patch_insn
->bits1
.branch_gen6
.jump_count
= br
* (insn
- patch_insn
);
952 /* Also pop item off the stack in the endif instruction:
954 if (intel
->gen
< 6) {
955 insn
->bits3
.if_else
.jump_count
= 0;
956 insn
->bits3
.if_else
.pop_count
= 1;
957 insn
->bits3
.if_else
.pad0
= 0;
959 insn
->bits1
.branch_gen6
.jump_count
= 2;
964 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
, int pop_count
)
966 struct brw_instruction
*insn
;
967 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
968 brw_set_dest(insn
, brw_ip_reg());
969 brw_set_src0(insn
, brw_ip_reg());
970 brw_set_src1(insn
, brw_imm_d(0x0));
971 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
972 insn
->header
.execution_size
= BRW_EXECUTE_8
;
973 /* insn->header.mask_control = BRW_MASK_DISABLE; */
974 insn
->bits3
.if_else
.pad0
= 0;
975 insn
->bits3
.if_else
.pop_count
= pop_count
;
979 struct brw_instruction
*brw_CONT(struct brw_compile
*p
, int pop_count
)
981 struct brw_instruction
*insn
;
982 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
983 brw_set_dest(insn
, brw_ip_reg());
984 brw_set_src0(insn
, brw_ip_reg());
985 brw_set_src1(insn
, brw_imm_d(0x0));
986 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
987 insn
->header
.execution_size
= BRW_EXECUTE_8
;
988 /* insn->header.mask_control = BRW_MASK_DISABLE; */
989 insn
->bits3
.if_else
.pad0
= 0;
990 insn
->bits3
.if_else
.pop_count
= pop_count
;
996 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
998 if (p
->single_program_flow
) {
999 return &p
->store
[p
->nr_insn
];
1001 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
1003 /* Override the defaults for this instruction:
1005 brw_set_dest(insn
, brw_null_reg());
1006 brw_set_src0(insn
, brw_null_reg());
1007 brw_set_src1(insn
, brw_null_reg());
1009 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1010 insn
->header
.execution_size
= execute_size
;
1011 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1012 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1013 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1021 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
1022 struct brw_instruction
*do_insn
)
1024 struct intel_context
*intel
= &p
->brw
->intel
;
1025 struct brw_instruction
*insn
;
1028 if (intel
->gen
>= 5)
1031 if (p
->single_program_flow
)
1032 insn
= next_insn(p
, BRW_OPCODE_ADD
);
1034 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
1036 brw_set_dest(insn
, brw_ip_reg());
1037 brw_set_src0(insn
, brw_ip_reg());
1038 brw_set_src1(insn
, brw_imm_d(0x0));
1040 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1042 if (p
->single_program_flow
) {
1043 insn
->header
.execution_size
= BRW_EXECUTE_1
;
1045 insn
->bits3
.d
= (do_insn
- insn
) * 16;
1047 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
1049 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
1050 insn
->bits3
.if_else
.jump_count
= br
* (do_insn
- insn
+ 1);
1051 insn
->bits3
.if_else
.pop_count
= 0;
1052 insn
->bits3
.if_else
.pad0
= 0;
1055 /* insn->header.mask_control = BRW_MASK_ENABLE; */
1057 /* insn->header.mask_control = BRW_MASK_DISABLE; */
1058 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1065 void brw_land_fwd_jump(struct brw_compile
*p
,
1066 struct brw_instruction
*jmp_insn
)
1068 struct intel_context
*intel
= &p
->brw
->intel
;
1069 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
1072 if (intel
->gen
>= 5)
1075 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
1076 assert(jmp_insn
->bits1
.da1
.src1_reg_file
== BRW_IMMEDIATE_VALUE
);
1078 jmp_insn
->bits3
.ud
= jmpi
* ((landing
- jmp_insn
) - 1);
1083 /* To integrate with the above, it makes sense that the comparison
1084 * instruction should populate the flag register. It might be simpler
1085 * just to use the flag reg for most WM tasks?
1087 void brw_CMP(struct brw_compile
*p
,
1088 struct brw_reg dest
,
1090 struct brw_reg src0
,
1091 struct brw_reg src1
)
1093 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
1095 insn
->header
.destreg__conditionalmod
= conditional
;
1096 brw_set_dest(insn
, dest
);
1097 brw_set_src0(insn
, src0
);
1098 brw_set_src1(insn
, src1
);
1100 /* guess_execution_size(insn, src0); */
1103 /* Make it so that future instructions will use the computed flag
1104 * value until brw_set_predicate_control_flag_value() is called
1107 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
1109 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
1110 p
->flag_value
= 0xff;
1114 /* Issue 'wait' instruction for n1, host could program MMIO
1115 to wake up thread. */
1116 void brw_WAIT (struct brw_compile
*p
)
1118 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_WAIT
);
1119 struct brw_reg src
= brw_notification_1_reg();
1121 brw_set_dest(insn
, src
);
1122 brw_set_src0(insn
, src
);
1123 brw_set_src1(insn
, brw_null_reg());
1124 insn
->header
.execution_size
= 0; /* must */
1125 insn
->header
.predicate_control
= 0;
1126 insn
->header
.compression_control
= 0;
1130 /***********************************************************************
1131 * Helpers for the various SEND message types:
1134 /** Extended math function, float[8].
1136 void brw_math( struct brw_compile
*p
,
1137 struct brw_reg dest
,
1145 struct intel_context
*intel
= &p
->brw
->intel
;
1147 if (intel
->gen
>= 6) {
1148 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1150 /* Math is the same ISA format as other opcodes, except that CondModifier
1151 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1153 insn
->header
.destreg__conditionalmod
= function
;
1155 brw_set_dest(insn
, dest
);
1156 brw_set_src0(insn
, src
);
1157 brw_set_src1(insn
, brw_null_reg());
1159 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1160 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1161 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1162 /* Example code doesn't set predicate_control for send
1165 insn
->header
.predicate_control
= 0;
1166 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1168 brw_set_dest(insn
, dest
);
1169 brw_set_src0(insn
, src
);
1170 brw_set_math_message(p
->brw
,
1172 msg_length
, response_length
,
1174 BRW_MATH_INTEGER_UNSIGNED
,
1181 /** Extended math function, float[8].
1183 void brw_math2(struct brw_compile
*p
,
1184 struct brw_reg dest
,
1186 struct brw_reg src0
,
1187 struct brw_reg src1
)
1189 struct intel_context
*intel
= &p
->brw
->intel
;
1190 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_MATH
);
1192 assert(intel
->gen
>= 6);
1195 /* Math is the same ISA format as other opcodes, except that CondModifier
1196 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1198 insn
->header
.destreg__conditionalmod
= function
;
1200 brw_set_dest(insn
, dest
);
1201 brw_set_src0(insn
, src0
);
1202 brw_set_src1(insn
, src1
);
1206 * Extended math function, float[16].
1207 * Use 2 send instructions.
1209 void brw_math_16( struct brw_compile
*p
,
1210 struct brw_reg dest
,
1217 struct intel_context
*intel
= &p
->brw
->intel
;
1218 struct brw_instruction
*insn
;
1219 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
1220 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
1222 if (intel
->gen
>= 6) {
1223 insn
= next_insn(p
, BRW_OPCODE_MATH
);
1225 /* Math is the same ISA format as other opcodes, except that CondModifier
1226 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1228 insn
->header
.destreg__conditionalmod
= function
;
1230 brw_set_dest(insn
, dest
);
1231 brw_set_src0(insn
, src
);
1232 brw_set_src1(insn
, brw_null_reg());
1236 /* First instruction:
1238 brw_push_insn_state(p
);
1239 brw_set_predicate_control_flag_value(p
, 0xff);
1240 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1242 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1243 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1245 brw_set_dest(insn
, dest
);
1246 brw_set_src0(insn
, src
);
1247 brw_set_math_message(p
->brw
,
1249 msg_length
, response_length
,
1251 BRW_MATH_INTEGER_UNSIGNED
,
1254 BRW_MATH_DATA_VECTOR
);
1256 /* Second instruction:
1258 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1259 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
1260 insn
->header
.destreg__conditionalmod
= msg_reg_nr
+1;
1262 brw_set_dest(insn
, offset(dest
,1));
1263 brw_set_src0(insn
, src
);
1264 brw_set_math_message(p
->brw
,
1266 msg_length
, response_length
,
1268 BRW_MATH_INTEGER_UNSIGNED
,
1271 BRW_MATH_DATA_VECTOR
);
1273 brw_pop_insn_state(p
);
1278 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1279 * Scratch offset should be a multiple of 64.
1280 * Used for register spilling.
1282 void brw_dp_WRITE_16( struct brw_compile
*p
,
1284 GLuint scratch_offset
)
1286 struct intel_context
*intel
= &p
->brw
->intel
;
1287 GLuint msg_reg_nr
= 1;
1289 brw_push_insn_state(p
);
1290 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1291 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1293 /* set message header global offset field (reg 0, element 2) */
1295 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1296 brw_imm_d(scratch_offset
));
1298 brw_pop_insn_state(p
);
1302 GLuint msg_length
= 3;
1303 struct brw_reg dest
;
1304 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1305 int send_commit_msg
;
1307 insn
->header
.predicate_control
= 0; /* XXX */
1308 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1309 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1311 /* Until gen6, writes followed by reads from the same location
1312 * are not guaranteed to be ordered unless write_commit is set.
1313 * If set, then a no-op write is issued to the destination
1314 * register to set a dependency, and a read from the destination
1315 * can be used to ensure the ordering.
1317 * For gen6, only writes between different threads need ordering
1318 * protection. Our use of DP writes is all about register
1319 * spilling within a thread.
1321 if (intel
->gen
>= 6) {
1322 dest
= retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW
);
1323 send_commit_msg
= 0;
1325 dest
= brw_uw16_grf(0, 0);
1326 send_commit_msg
= 1;
1329 brw_set_dest(insn
, dest
);
1330 brw_set_src0(insn
, src
);
1332 brw_set_dp_write_message(p
->brw
,
1334 255, /* binding table index (255=stateless) */
1335 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
1336 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
1338 GL_TRUE
, /* header_present */
1339 0, /* pixel scoreboard */
1340 send_commit_msg
, /* response_length */
1348 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1349 * Scratch offset should be a multiple of 64.
1350 * Used for register spilling.
1352 void brw_dp_READ_16( struct brw_compile
*p
,
1353 struct brw_reg dest
,
1354 GLuint scratch_offset
)
1356 GLuint msg_reg_nr
= 1;
1358 brw_push_insn_state(p
);
1359 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1360 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1362 /* set message header global offset field (reg 0, element 2) */
1364 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
1365 brw_imm_d(scratch_offset
));
1367 brw_pop_insn_state(p
);
1371 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1373 insn
->header
.predicate_control
= 0; /* XXX */
1374 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1375 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1377 brw_set_dest(insn
, dest
); /* UW? */
1378 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
1380 brw_set_dp_read_message(p
->brw
,
1382 255, /* binding table index (255=stateless) */
1383 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
,
1384 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1385 1, /* target cache (render/scratch) */
1387 2, /* response_length */
1394 * Read a float[4] vector from the data port Data Cache (const buffer).
1395 * Location (in buffer) should be a multiple of 16.
1396 * Used for fetching shader constants.
1397 * If relAddr is true, we'll do an indirect fetch using the address register.
1399 void brw_dp_READ_4( struct brw_compile
*p
,
1400 struct brw_reg dest
,
1403 GLuint bind_table_index
)
1405 /* XXX: relAddr not implemented */
1406 GLuint msg_reg_nr
= 1;
1409 brw_push_insn_state(p
);
1410 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1411 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1412 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1414 /* Setup MRF[1] with location/offset into const buffer */
1415 b
= brw_message_reg(msg_reg_nr
);
1416 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1417 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1418 * when the docs say only dword[2] should be set. Hmmm. But it works.
1420 brw_MOV(p
, b
, brw_imm_ud(location
));
1421 brw_pop_insn_state(p
);
1425 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1427 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1428 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1429 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1430 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1432 /* cast dest to a uword[8] vector */
1433 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
1435 brw_set_dest(insn
, dest
);
1436 brw_set_src0(insn
, brw_null_reg());
1438 brw_set_dp_read_message(p
->brw
,
1441 0, /* msg_control (0 means 1 Oword) */
1442 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1443 0, /* source cache = data cache */
1445 1, /* response_length (1 Oword) */
1452 * Read float[4] constant(s) from VS constant buffer.
1453 * For relative addressing, two float[4] constants will be read into 'dest'.
1454 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1456 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1457 struct brw_reg dest
,
1459 GLuint bind_table_index
)
1461 struct brw_instruction
*insn
;
1462 GLuint msg_reg_nr
= 1;
1466 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1467 location, msg_reg_nr);
1470 /* Setup MRF[1] with location/offset into const buffer */
1471 brw_push_insn_state(p
);
1472 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1473 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1474 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1476 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1477 * when the docs say only dword[2] should be set. Hmmm. But it works.
1479 b
= brw_message_reg(msg_reg_nr
);
1480 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1481 /*b = get_element_ud(b, 2);*/
1482 brw_MOV(p
, b
, brw_imm_ud(location
));
1484 brw_pop_insn_state(p
);
1486 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1488 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1489 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1490 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1491 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1493 brw_set_dest(insn
, dest
);
1494 brw_set_src0(insn
, brw_null_reg());
1496 brw_set_dp_read_message(p
->brw
,
1500 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1501 0, /* source cache = data cache */
1503 1, /* response_length (1 Oword) */
1508 * Read a float[4] constant per vertex from VS constant buffer, with
1509 * relative addressing.
1511 void brw_dp_READ_4_vs_relative(struct brw_compile
*p
,
1512 struct brw_reg dest
,
1513 struct brw_reg addr_reg
,
1515 GLuint bind_table_index
)
1517 struct intel_context
*intel
= &p
->brw
->intel
;
1520 /* Setup MRF[1] with offset into const buffer */
1521 brw_push_insn_state(p
);
1522 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1523 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1524 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1526 /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1529 brw_ADD(p
, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD
),
1530 addr_reg
, brw_imm_d(offset
));
1531 brw_pop_insn_state(p
);
1533 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1535 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1536 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1537 insn
->header
.destreg__conditionalmod
= 0;
1538 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1540 brw_set_dest(insn
, dest
);
1541 brw_set_src0(insn
, brw_vec8_grf(0, 0));
1543 if (intel
->gen
== 6)
1544 msg_type
= GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1545 else if (intel
->gen
== 5 || intel
->is_g4x
)
1546 msg_type
= G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1548 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ
;
1550 brw_set_dp_read_message(p
->brw
,
1553 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD
,
1555 0, /* source cache = data cache */
1557 1, /* response_length */
1563 void brw_fb_WRITE(struct brw_compile
*p
,
1565 struct brw_reg dest
,
1567 struct brw_reg src0
,
1568 GLuint binding_table_index
,
1570 GLuint response_length
,
1573 struct intel_context
*intel
= &p
->brw
->intel
;
1574 struct brw_instruction
*insn
;
1575 GLuint msg_control
, msg_type
;
1576 GLboolean header_present
= GL_TRUE
;
1578 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1579 insn
->header
.predicate_control
= 0; /* XXX */
1580 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1582 if (intel
->gen
>= 6) {
1583 if (msg_length
== 4)
1584 header_present
= GL_FALSE
;
1586 /* headerless version, just submit color payload */
1587 src0
= brw_message_reg(msg_reg_nr
);
1589 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6
;
1591 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1593 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
;
1596 if (dispatch_width
== 16)
1597 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
;
1599 msg_control
= BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1601 brw_set_dest(insn
, dest
);
1602 brw_set_src0(insn
, src0
);
1603 brw_set_dp_write_message(p
->brw
,
1605 binding_table_index
,
1610 1, /* pixel scoreboard */
1613 0 /* send_commit_msg */);
1618 * Texture sample instruction.
1619 * Note: the msg_type plus msg_length values determine exactly what kind
1620 * of sampling operation is performed. See volume 4, page 161 of docs.
1622 void brw_SAMPLE(struct brw_compile
*p
,
1623 struct brw_reg dest
,
1625 struct brw_reg src0
,
1626 GLuint binding_table_index
,
1630 GLuint response_length
,
1633 GLuint header_present
,
1636 struct intel_context
*intel
= &p
->brw
->intel
;
1637 GLboolean need_stall
= 0;
1639 if (writemask
== 0) {
1640 /*printf("%s: zero writemask??\n", __FUNCTION__); */
1644 /* Hardware doesn't do destination dependency checking on send
1645 * instructions properly. Add a workaround which generates the
1646 * dependency by other means. In practice it seems like this bug
1647 * only crops up for texture samples, and only where registers are
1648 * written by the send and then written again later without being
1649 * read in between. Luckily for us, we already track that
1650 * information and use it to modify the writemask for the
1651 * instruction, so that is a guide for whether a workaround is
1654 if (writemask
!= WRITEMASK_XYZW
) {
1655 GLuint dst_offset
= 0;
1656 GLuint i
, newmask
= 0, len
= 0;
1658 for (i
= 0; i
< 4; i
++) {
1659 if (writemask
& (1<<i
))
1663 for (; i
< 4; i
++) {
1664 if (!(writemask
& (1<<i
)))
1670 if (newmask
!= writemask
) {
1672 /* printf("need stall %x %x\n", newmask , writemask); */
1675 GLboolean dispatch_16
= GL_FALSE
;
1677 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1679 guess_execution_size(p
->current
, dest
);
1680 if (p
->current
->header
.execution_size
== BRW_EXECUTE_16
)
1681 dispatch_16
= GL_TRUE
;
1683 newmask
= ~newmask
& WRITEMASK_XYZW
;
1685 brw_push_insn_state(p
);
1687 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1688 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1690 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1691 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1693 brw_pop_insn_state(p
);
1695 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1696 dest
= offset(dest
, dst_offset
);
1698 /* For 16-wide dispatch, masked channels are skipped in the
1699 * response. For 8-wide, masked channels still take up slots,
1700 * and are just not written to.
1703 response_length
= len
* 2;
1708 struct brw_instruction
*insn
;
1710 /* Sandybridge doesn't have the implied move for SENDs,
1711 * and the first message register index comes from src0.
1713 if (intel
->gen
>= 6) {
1714 brw_push_insn_state(p
);
1715 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1716 /* m1 contains header? */
1717 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1718 brw_pop_insn_state(p
);
1719 src0
= brw_message_reg(msg_reg_nr
);
1722 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1723 insn
->header
.predicate_control
= 0; /* XXX */
1724 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1726 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1728 brw_set_dest(insn
, dest
);
1729 brw_set_src0(insn
, src0
);
1730 brw_set_sampler_message(p
->brw
, insn
,
1731 binding_table_index
,
1742 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1744 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1746 brw_push_insn_state(p
);
1747 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1748 brw_MOV(p
, reg
, reg
);
1749 brw_pop_insn_state(p
);
1754 /* All these variables are pretty confusing - we might be better off
1755 * using bitmasks and macros for this, in the old style. Or perhaps
1756 * just having the caller instantiate the fields in dword3 itself.
1758 void brw_urb_WRITE(struct brw_compile
*p
,
1759 struct brw_reg dest
,
1761 struct brw_reg src0
,
1765 GLuint response_length
,
1767 GLboolean writes_complete
,
1771 struct intel_context
*intel
= &p
->brw
->intel
;
1772 struct brw_instruction
*insn
;
1774 /* Sandybridge doesn't have the implied move for SENDs,
1775 * and the first message register index comes from src0.
1777 if (intel
->gen
>= 6) {
1778 brw_push_insn_state(p
);
1779 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1780 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1781 brw_pop_insn_state(p
);
1782 src0
= brw_message_reg(msg_reg_nr
);
1785 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1787 assert(msg_length
< BRW_MAX_MRF
);
1789 brw_set_dest(insn
, dest
);
1790 brw_set_src0(insn
, src0
);
1791 brw_set_src1(insn
, brw_imm_d(0));
1794 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1796 brw_set_urb_message(p
->brw
,
1808 void brw_ff_sync(struct brw_compile
*p
,
1809 struct brw_reg dest
,
1811 struct brw_reg src0
,
1813 GLuint response_length
,
1816 struct intel_context
*intel
= &p
->brw
->intel
;
1817 struct brw_instruction
*insn
;
1819 /* Sandybridge doesn't have the implied move for SENDs,
1820 * and the first message register index comes from src0.
1822 if (intel
->gen
>= 6) {
1823 brw_push_insn_state(p
);
1824 brw_set_mask_control( p
, BRW_MASK_DISABLE
);
1825 brw_MOV(p
, brw_message_reg(msg_reg_nr
), src0
);
1826 brw_pop_insn_state(p
);
1827 src0
= brw_message_reg(msg_reg_nr
);
1830 insn
= next_insn(p
, BRW_OPCODE_SEND
);
1831 brw_set_dest(insn
, dest
);
1832 brw_set_src0(insn
, src0
);
1833 brw_set_src1(insn
, brw_imm_d(0));
1836 insn
->header
.destreg__conditionalmod
= msg_reg_nr
;
1838 brw_set_ff_sync_message(p
->brw
,