2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
59 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
60 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
62 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
63 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
65 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
66 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
67 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
68 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
69 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
72 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
73 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
77 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
79 /* These are different sizes in align1 vs align16:
81 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
82 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
83 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
84 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
85 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
88 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
92 /* NEW: Set the execution size based on dest.width and
93 * insn->compression_control:
95 guess_execution_size(insn
, dest
);
98 static void brw_set_src0( struct brw_instruction
*insn
,
101 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
103 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
104 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
105 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
106 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
107 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
109 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
110 insn
->bits3
.ud
= reg
.dw1
.ud
;
112 /* Required to set some fields in src1 as well:
114 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
115 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
119 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
122 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
125 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
126 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
130 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
132 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
133 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
136 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
140 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
141 if (reg
.width
== BRW_WIDTH_1
&&
142 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
143 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
144 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
145 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
148 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
149 insn
->bits2
.da1
.src0_width
= reg
.width
;
150 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
154 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
155 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
156 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
157 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
159 /* This is an oddity of the fact we're using the same
160 * descriptions for registers in align_16 as align_1:
162 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
163 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
165 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
171 void brw_set_src1( struct brw_instruction
*insn
,
174 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
176 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
177 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
178 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
179 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
181 /* Only src1 can be immediate in two-argument instructions.
183 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
185 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
186 insn
->bits3
.ud
= reg
.dw1
.ud
;
189 /* This is a hardware restriction, which may or may not be lifted
192 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
193 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
195 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
196 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
197 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
200 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
201 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
204 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
205 if (reg
.width
== BRW_WIDTH_1
&&
206 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
207 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
208 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
209 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
212 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
213 insn
->bits3
.da1
.src1_width
= reg
.width
;
214 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
218 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
219 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
220 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
221 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
223 /* This is an oddity of the fact we're using the same
224 * descriptions for registers in align_16 as align_1:
226 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
227 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
229 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
236 static void brw_set_math_message( struct brw_instruction
*insn
,
238 GLuint response_length
,
241 GLboolean low_precision
,
245 brw_set_src1(insn
, brw_imm_d(0));
247 insn
->bits3
.math
.function
= function
;
248 insn
->bits3
.math
.int_type
= integer_type
;
249 insn
->bits3
.math
.precision
= low_precision
;
250 insn
->bits3
.math
.saturate
= saturate
;
251 insn
->bits3
.math
.data_type
= dataType
;
252 insn
->bits3
.math
.response_length
= response_length
;
253 insn
->bits3
.math
.msg_length
= msg_length
;
254 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
255 insn
->bits3
.math
.end_of_thread
= 0;
258 static void brw_set_urb_message( struct brw_instruction
*insn
,
262 GLuint response_length
,
263 GLboolean end_of_thread
,
266 GLuint swizzle_control
)
268 brw_set_src1(insn
, brw_imm_d(0));
270 insn
->bits3
.urb
.opcode
= 0; /* ? */
271 insn
->bits3
.urb
.offset
= offset
;
272 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
273 insn
->bits3
.urb
.allocate
= allocate
;
274 insn
->bits3
.urb
.used
= used
; /* ? */
275 insn
->bits3
.urb
.complete
= complete
;
276 insn
->bits3
.urb
.response_length
= response_length
;
277 insn
->bits3
.urb
.msg_length
= msg_length
;
278 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
279 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
282 static void brw_set_dp_write_message( struct brw_instruction
*insn
,
283 GLuint binding_table_index
,
287 GLuint pixel_scoreboard_clear
,
288 GLuint response_length
,
289 GLuint end_of_thread
)
291 brw_set_src1(insn
, brw_imm_d(0));
293 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
294 insn
->bits3
.dp_write
.msg_control
= msg_control
;
295 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
296 insn
->bits3
.dp_write
.msg_type
= msg_type
;
297 insn
->bits3
.dp_write
.send_commit_msg
= 0;
298 insn
->bits3
.dp_write
.response_length
= response_length
;
299 insn
->bits3
.dp_write
.msg_length
= msg_length
;
300 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
301 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
304 static void brw_set_dp_read_message( struct brw_instruction
*insn
,
305 GLuint binding_table_index
,
310 GLuint response_length
,
311 GLuint end_of_thread
)
313 brw_set_src1(insn
, brw_imm_d(0));
315 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
;
316 insn
->bits3
.dp_read
.msg_control
= msg_control
;
317 insn
->bits3
.dp_read
.msg_type
= msg_type
;
318 insn
->bits3
.dp_read
.target_cache
= target_cache
;
319 insn
->bits3
.dp_read
.response_length
= response_length
;
320 insn
->bits3
.dp_read
.msg_length
= msg_length
;
321 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
322 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
;
325 static void brw_set_sampler_message(struct brw_context
*brw
,
326 struct brw_instruction
*insn
,
327 GLuint binding_table_index
,
330 GLuint response_length
,
334 brw_set_src1(insn
, brw_imm_d(0));
336 if (BRW_IS_G4X(brw
)) {
337 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
338 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
339 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
340 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
341 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
342 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
343 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
345 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
346 insn
->bits3
.sampler
.sampler
= sampler
;
347 insn
->bits3
.sampler
.msg_type
= msg_type
;
348 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
349 insn
->bits3
.sampler
.response_length
= response_length
;
350 insn
->bits3
.sampler
.msg_length
= msg_length
;
351 insn
->bits3
.sampler
.end_of_thread
= eot
;
352 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
358 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
361 struct brw_instruction
*insn
;
363 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
365 insn
= &p
->store
[p
->nr_insn
++];
366 memcpy(insn
, p
->current
, sizeof(*insn
));
368 /* Reset this one-shot flag:
371 if (p
->current
->header
.destreg__conditonalmod
) {
372 p
->current
->header
.destreg__conditonalmod
= 0;
373 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
376 insn
->header
.opcode
= opcode
;
381 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
386 struct brw_instruction
*insn
= next_insn(p
, opcode
);
387 brw_set_dest(insn
, dest
);
388 brw_set_src0(insn
, src
);
392 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
396 struct brw_reg src1
)
398 struct brw_instruction
*insn
= next_insn(p
, opcode
);
399 brw_set_dest(insn
, dest
);
400 brw_set_src0(insn
, src0
);
401 brw_set_src1(insn
, src1
);
406 /***********************************************************************
407 * Convenience routines.
410 struct brw_instruction *brw_##OP(struct brw_compile *p, \
411 struct brw_reg dest, \
412 struct brw_reg src0) \
414 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
418 struct brw_instruction *brw_##OP(struct brw_compile *p, \
419 struct brw_reg dest, \
420 struct brw_reg src0, \
421 struct brw_reg src1) \
423 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
454 void brw_NOP(struct brw_compile
*p
)
456 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
457 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
458 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
459 brw_set_src1(insn
, brw_imm_ud(0x0));
466 /***********************************************************************
467 * Comparisons, if/else/endif
470 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
475 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
477 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
482 /* EU takes the value from the flag register and pushes it onto some
483 * sort of a stack (presumably merging with any flag value already on
484 * the stack). Within an if block, the flags at the top of the stack
485 * control execution on each channel of the unit, eg. on each of the
486 * 16 pixel values in our wm programs.
488 * When the matching 'else' instruction is reached (presumably by
489 * countdown of the instruction count patched in by our ELSE/ENDIF
490 * functions), the relevent flags are inverted.
492 * When the matching 'endif' instruction is reached, the flags are
493 * popped off. If the stack is now empty, normal execution resumes.
495 * No attempt is made to deal with stack overflow (14 elements?).
497 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
499 struct brw_instruction
*insn
;
501 if (p
->single_program_flow
) {
502 assert(execute_size
== BRW_EXECUTE_1
);
504 insn
= next_insn(p
, BRW_OPCODE_ADD
);
505 insn
->header
.predicate_inverse
= 1;
507 insn
= next_insn(p
, BRW_OPCODE_IF
);
510 /* Override the defaults for this instruction:
512 brw_set_dest(insn
, brw_ip_reg());
513 brw_set_src0(insn
, brw_ip_reg());
514 brw_set_src1(insn
, brw_imm_d(0x0));
516 insn
->header
.execution_size
= execute_size
;
517 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
518 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
519 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
520 if (!p
->single_program_flow
)
521 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
523 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
529 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
530 struct brw_instruction
*if_insn
)
532 struct brw_instruction
*insn
;
534 if (p
->single_program_flow
) {
535 insn
= next_insn(p
, BRW_OPCODE_ADD
);
537 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
540 brw_set_dest(insn
, brw_ip_reg());
541 brw_set_src0(insn
, brw_ip_reg());
542 brw_set_src1(insn
, brw_imm_d(0x0));
544 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
545 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
546 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
547 if (!p
->single_program_flow
)
548 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
550 /* Patch the if instruction to point at this instruction.
552 if (p
->single_program_flow
) {
553 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
555 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
557 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
559 if_insn
->bits3
.if_else
.jump_count
= insn
- if_insn
;
560 if_insn
->bits3
.if_else
.pop_count
= 1;
561 if_insn
->bits3
.if_else
.pad0
= 0;
567 void brw_ENDIF(struct brw_compile
*p
,
568 struct brw_instruction
*patch_insn
)
570 if (p
->single_program_flow
) {
571 /* In single program flow mode, there's no need to execute an ENDIF,
572 * since we don't need to do any stack operations, and if we're executing
573 * currently, we want to just continue executing.
575 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
577 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
579 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
581 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
583 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
584 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
585 brw_set_src1(insn
, brw_imm_d(0x0));
587 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
588 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
589 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
590 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
592 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
594 /* Patch the if or else instructions to point at this or the next
595 * instruction respectively.
597 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
598 /* Automagically turn it into an IFF:
600 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
601 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
602 patch_insn
->bits3
.if_else
.pop_count
= 0;
603 patch_insn
->bits3
.if_else
.pad0
= 0;
604 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
605 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
606 patch_insn
->bits3
.if_else
.pop_count
= 1;
607 patch_insn
->bits3
.if_else
.pad0
= 0;
612 /* Also pop item off the stack in the endif instruction:
614 insn
->bits3
.if_else
.jump_count
= 0;
615 insn
->bits3
.if_else
.pop_count
= 1;
616 insn
->bits3
.if_else
.pad0
= 0;
620 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
622 struct brw_instruction
*insn
;
623 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
624 brw_set_dest(insn
, brw_ip_reg());
625 brw_set_src0(insn
, brw_ip_reg());
626 brw_set_src1(insn
, brw_imm_d(0x0));
627 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
628 insn
->header
.execution_size
= BRW_EXECUTE_8
;
629 /* insn->header.mask_control = BRW_MASK_DISABLE; */
630 insn
->bits3
.if_else
.pad0
= 0;
634 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
636 struct brw_instruction
*insn
;
637 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
638 brw_set_dest(insn
, brw_ip_reg());
639 brw_set_src0(insn
, brw_ip_reg());
640 brw_set_src1(insn
, brw_imm_d(0x0));
641 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
642 insn
->header
.execution_size
= BRW_EXECUTE_8
;
643 /* insn->header.mask_control = BRW_MASK_DISABLE; */
644 insn
->bits3
.if_else
.pad0
= 0;
650 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
652 if (p
->single_program_flow
) {
653 return &p
->store
[p
->nr_insn
];
655 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
657 /* Override the defaults for this instruction:
659 brw_set_dest(insn
, brw_null_reg());
660 brw_set_src0(insn
, brw_null_reg());
661 brw_set_src1(insn
, brw_null_reg());
663 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
664 insn
->header
.execution_size
= execute_size
;
665 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
666 /* insn->header.mask_control = BRW_MASK_ENABLE; */
667 /* insn->header.mask_control = BRW_MASK_DISABLE; */
675 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
676 struct brw_instruction
*do_insn
)
678 struct brw_instruction
*insn
;
680 if (p
->single_program_flow
)
681 insn
= next_insn(p
, BRW_OPCODE_ADD
);
683 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
685 brw_set_dest(insn
, brw_ip_reg());
686 brw_set_src0(insn
, brw_ip_reg());
687 brw_set_src1(insn
, brw_imm_d(0x0));
689 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
691 if (p
->single_program_flow
) {
692 insn
->header
.execution_size
= BRW_EXECUTE_1
;
694 insn
->bits3
.d
= (do_insn
- insn
) * 16;
696 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
698 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
699 insn
->bits3
.if_else
.jump_count
= do_insn
- insn
+ 1;
700 insn
->bits3
.if_else
.pop_count
= 0;
701 insn
->bits3
.if_else
.pad0
= 0;
704 /* insn->header.mask_control = BRW_MASK_ENABLE; */
706 /* insn->header.mask_control = BRW_MASK_DISABLE; */
707 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
714 void brw_land_fwd_jump(struct brw_compile
*p
,
715 struct brw_instruction
*jmp_insn
)
717 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
719 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
720 assert(jmp_insn
->bits1
.da1
.src1_reg_file
= BRW_IMMEDIATE_VALUE
);
722 jmp_insn
->bits3
.ud
= (landing
- jmp_insn
) - 1;
727 /* To integrate with the above, it makes sense that the comparison
728 * instruction should populate the flag register. It might be simpler
729 * just to use the flag reg for most WM tasks?
731 void brw_CMP(struct brw_compile
*p
,
737 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
739 insn
->header
.destreg__conditonalmod
= conditional
;
740 brw_set_dest(insn
, dest
);
741 brw_set_src0(insn
, src0
);
742 brw_set_src1(insn
, src1
);
744 /* guess_execution_size(insn, src0); */
747 /* Make it so that future instructions will use the computed flag
748 * value until brw_set_predicate_control_flag_value() is called
751 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
753 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
754 p
->flag_value
= 0xff;
760 /***********************************************************************
761 * Helpers for the various SEND message types:
766 void brw_math( struct brw_compile
*p
,
775 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
776 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
777 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
779 /* Example code doesn't set predicate_control for send
782 insn
->header
.predicate_control
= 0;
783 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
785 brw_set_dest(insn
, dest
);
786 brw_set_src0(insn
, src
);
787 brw_set_math_message(insn
,
788 msg_length
, response_length
,
790 BRW_MATH_INTEGER_UNSIGNED
,
796 /* Use 2 send instructions to invert 16 elements
798 void brw_math_16( struct brw_compile
*p
,
806 struct brw_instruction
*insn
;
807 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
808 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
810 /* First instruction:
812 brw_push_insn_state(p
);
813 brw_set_predicate_control_flag_value(p
, 0xff);
814 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
816 insn
= next_insn(p
, BRW_OPCODE_SEND
);
817 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
819 brw_set_dest(insn
, dest
);
820 brw_set_src0(insn
, src
);
821 brw_set_math_message(insn
,
822 msg_length
, response_length
,
824 BRW_MATH_INTEGER_UNSIGNED
,
827 BRW_MATH_DATA_VECTOR
);
829 /* Second instruction:
831 insn
= next_insn(p
, BRW_OPCODE_SEND
);
832 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
833 insn
->header
.destreg__conditonalmod
= msg_reg_nr
+1;
835 brw_set_dest(insn
, offset(dest
,1));
836 brw_set_src0(insn
, src
);
837 brw_set_math_message(insn
,
838 msg_length
, response_length
,
840 BRW_MATH_INTEGER_UNSIGNED
,
843 BRW_MATH_DATA_VECTOR
);
845 brw_pop_insn_state(p
);
851 void brw_dp_WRITE_16( struct brw_compile
*p
,
854 GLuint scratch_offset
)
857 brw_push_insn_state(p
);
858 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
859 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
862 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
863 brw_imm_d(scratch_offset
));
865 brw_pop_insn_state(p
);
869 GLuint msg_length
= 3;
870 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
871 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
873 insn
->header
.predicate_control
= 0; /* XXX */
874 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
875 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
877 brw_set_dest(insn
, dest
);
878 brw_set_src0(insn
, src
);
880 brw_set_dp_write_message(insn
,
882 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
883 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
885 0, /* pixel scoreboard */
886 0, /* response_length */
893 void brw_dp_READ_16( struct brw_compile
*p
,
896 GLuint scratch_offset
)
899 brw_push_insn_state(p
);
900 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
901 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
904 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
905 brw_imm_d(scratch_offset
));
907 brw_pop_insn_state(p
);
911 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
913 insn
->header
.predicate_control
= 0; /* XXX */
914 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
915 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
917 brw_set_dest(insn
, dest
); /* UW? */
918 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
920 brw_set_dp_read_message(insn
,
923 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
924 1, /* target cache */
926 2, /* response_length */
932 void brw_fb_WRITE(struct brw_compile
*p
,
936 GLuint binding_table_index
,
938 GLuint response_length
,
941 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
943 insn
->header
.predicate_control
= 0; /* XXX */
944 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
945 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
947 brw_set_dest(insn
, dest
);
948 brw_set_src0(insn
, src0
);
949 brw_set_dp_write_message(insn
,
951 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
952 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
954 1, /* pixel scoreboard */
961 void brw_SAMPLE(struct brw_compile
*p
,
965 GLuint binding_table_index
,
969 GLuint response_length
,
973 GLboolean need_stall
= 0;
976 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
980 /* Hardware doesn't do destination dependency checking on send
981 * instructions properly. Add a workaround which generates the
982 * dependency by other means. In practice it seems like this bug
983 * only crops up for texture samples, and only where registers are
984 * written by the send and then written again later without being
985 * read in between. Luckily for us, we already track that
986 * information and use it to modify the writemask for the
987 * instruction, so that is a guide for whether a workaround is
990 if (writemask
!= WRITEMASK_XYZW
) {
991 GLuint dst_offset
= 0;
992 GLuint i
, newmask
= 0, len
= 0;
994 for (i
= 0; i
< 4; i
++) {
995 if (writemask
& (1<<i
))
1000 if (!(writemask
& (1<<i
)))
1006 if (newmask
!= writemask
) {
1008 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1011 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1013 newmask
= ~newmask
& WRITEMASK_XYZW
;
1015 brw_push_insn_state(p
);
1017 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1018 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1020 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1021 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1023 brw_pop_insn_state(p
);
1025 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1026 dest
= offset(dest
, dst_offset
);
1027 response_length
= len
* 2;
1032 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1034 insn
->header
.predicate_control
= 0; /* XXX */
1035 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1036 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1038 brw_set_dest(insn
, dest
);
1039 brw_set_src0(insn
, src0
);
1040 brw_set_sampler_message(p
->brw
, insn
,
1041 binding_table_index
,
1051 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1053 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1055 brw_push_insn_state(p
);
1056 brw_set_compression_control(p
, GL_FALSE
);
1057 brw_MOV(p
, reg
, reg
);
1058 brw_pop_insn_state(p
);
1063 /* All these variables are pretty confusing - we might be better off
1064 * using bitmasks and macros for this, in the old style. Or perhaps
1065 * just having the caller instantiate the fields in dword3 itself.
1067 void brw_urb_WRITE(struct brw_compile
*p
,
1068 struct brw_reg dest
,
1070 struct brw_reg src0
,
1074 GLuint response_length
,
1076 GLboolean writes_complete
,
1080 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1082 assert(msg_length
< 16);
1084 brw_set_dest(insn
, dest
);
1085 brw_set_src0(insn
, src0
);
1086 brw_set_src1(insn
, brw_imm_d(0));
1088 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1090 brw_set_urb_message(insn
,