2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 if (dest
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
59 assert(dest
.nr
< 128);
61 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
62 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
63 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
65 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
66 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
68 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
69 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
70 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
71 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
72 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
75 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
76 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
80 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
82 /* These are different sizes in align1 vs align16:
84 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
85 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
86 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
87 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
88 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
91 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
95 /* NEW: Set the execution size based on dest.width and
96 * insn->compression_control:
98 guess_execution_size(insn
, dest
);
101 static void brw_set_src0( struct brw_instruction
*insn
,
104 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
106 if (reg
.type
!= BRW_ARCHITECTURE_REGISTER_FILE
)
107 assert(reg
.nr
< 128);
109 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
110 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
111 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
112 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
113 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
115 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
116 insn
->bits3
.ud
= reg
.dw1
.ud
;
118 /* Required to set some fields in src1 as well:
120 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
121 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
125 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
126 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
127 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
128 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
131 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
132 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
136 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
138 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
139 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
142 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
146 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
147 if (reg
.width
== BRW_WIDTH_1
&&
148 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
149 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
150 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
151 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
154 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
155 insn
->bits2
.da1
.src0_width
= reg
.width
;
156 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
160 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
161 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
162 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
163 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
165 /* This is an oddity of the fact we're using the same
166 * descriptions for registers in align_16 as align_1:
168 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
169 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
171 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
177 void brw_set_src1( struct brw_instruction
*insn
,
180 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
182 assert(reg
.nr
< 128);
184 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
185 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
186 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
187 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
189 /* Only src1 can be immediate in two-argument instructions.
191 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
193 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
194 insn
->bits3
.ud
= reg
.dw1
.ud
;
197 /* This is a hardware restriction, which may or may not be lifted
200 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
201 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
203 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
204 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
205 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
208 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
209 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
212 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
213 if (reg
.width
== BRW_WIDTH_1
&&
214 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
215 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
216 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
217 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
220 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
221 insn
->bits3
.da1
.src1_width
= reg
.width
;
222 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
226 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
227 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
228 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
229 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
231 /* This is an oddity of the fact we're using the same
232 * descriptions for registers in align_16 as align_1:
234 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
235 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
237 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
244 static void brw_set_math_message( struct brw_instruction
*insn
,
246 GLuint response_length
,
249 GLboolean low_precision
,
253 brw_set_src1(insn
, brw_imm_d(0));
255 insn
->bits3
.math
.function
= function
;
256 insn
->bits3
.math
.int_type
= integer_type
;
257 insn
->bits3
.math
.precision
= low_precision
;
258 insn
->bits3
.math
.saturate
= saturate
;
259 insn
->bits3
.math
.data_type
= dataType
;
260 insn
->bits3
.math
.response_length
= response_length
;
261 insn
->bits3
.math
.msg_length
= msg_length
;
262 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
263 insn
->bits3
.math
.end_of_thread
= 0;
266 static void brw_set_urb_message( struct brw_instruction
*insn
,
270 GLuint response_length
,
271 GLboolean end_of_thread
,
274 GLuint swizzle_control
)
276 brw_set_src1(insn
, brw_imm_d(0));
278 insn
->bits3
.urb
.opcode
= 0; /* ? */
279 insn
->bits3
.urb
.offset
= offset
;
280 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
281 insn
->bits3
.urb
.allocate
= allocate
;
282 insn
->bits3
.urb
.used
= used
; /* ? */
283 insn
->bits3
.urb
.complete
= complete
;
284 insn
->bits3
.urb
.response_length
= response_length
;
285 insn
->bits3
.urb
.msg_length
= msg_length
;
286 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
287 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
290 static void brw_set_dp_write_message( struct brw_instruction
*insn
,
291 GLuint binding_table_index
,
295 GLuint pixel_scoreboard_clear
,
296 GLuint response_length
,
297 GLuint end_of_thread
)
299 brw_set_src1(insn
, brw_imm_d(0));
301 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
302 insn
->bits3
.dp_write
.msg_control
= msg_control
;
303 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
304 insn
->bits3
.dp_write
.msg_type
= msg_type
;
305 insn
->bits3
.dp_write
.send_commit_msg
= 0;
306 insn
->bits3
.dp_write
.response_length
= response_length
;
307 insn
->bits3
.dp_write
.msg_length
= msg_length
;
308 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
309 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
312 static void brw_set_dp_read_message( struct brw_instruction
*insn
,
313 GLuint binding_table_index
,
318 GLuint response_length
,
319 GLuint end_of_thread
)
321 brw_set_src1(insn
, brw_imm_d(0));
323 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
; /*0:7*/
324 insn
->bits3
.dp_read
.msg_control
= msg_control
; /*8:11*/
325 insn
->bits3
.dp_read
.msg_type
= msg_type
; /*12:13*/
326 insn
->bits3
.dp_read
.target_cache
= target_cache
; /*14:15*/
327 insn
->bits3
.dp_read
.response_length
= response_length
; /*16:19*/
328 insn
->bits3
.dp_read
.msg_length
= msg_length
; /*20:23*/
329 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
; /*24:27*/
330 insn
->bits3
.dp_read
.pad1
= 0; /*28:30*/
331 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
; /*31*/
334 static void brw_set_sampler_message(struct brw_context
*brw
,
335 struct brw_instruction
*insn
,
336 GLuint binding_table_index
,
339 GLuint response_length
,
343 brw_set_src1(insn
, brw_imm_d(0));
345 if (BRW_IS_G4X(brw
)) {
346 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
347 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
348 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
349 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
350 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
351 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
352 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
354 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
355 insn
->bits3
.sampler
.sampler
= sampler
;
356 insn
->bits3
.sampler
.msg_type
= msg_type
;
357 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
358 insn
->bits3
.sampler
.response_length
= response_length
;
359 insn
->bits3
.sampler
.msg_length
= msg_length
;
360 insn
->bits3
.sampler
.end_of_thread
= eot
;
361 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
367 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
370 struct brw_instruction
*insn
;
372 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
374 insn
= &p
->store
[p
->nr_insn
++];
375 memcpy(insn
, p
->current
, sizeof(*insn
));
377 /* Reset this one-shot flag:
380 if (p
->current
->header
.destreg__conditonalmod
) {
381 p
->current
->header
.destreg__conditonalmod
= 0;
382 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
385 insn
->header
.opcode
= opcode
;
390 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
395 struct brw_instruction
*insn
= next_insn(p
, opcode
);
396 brw_set_dest(insn
, dest
);
397 brw_set_src0(insn
, src
);
401 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
405 struct brw_reg src1
)
407 struct brw_instruction
*insn
= next_insn(p
, opcode
);
408 brw_set_dest(insn
, dest
);
409 brw_set_src0(insn
, src0
);
410 brw_set_src1(insn
, src1
);
415 /***********************************************************************
416 * Convenience routines.
419 struct brw_instruction *brw_##OP(struct brw_compile *p, \
420 struct brw_reg dest, \
421 struct brw_reg src0) \
423 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
427 struct brw_instruction *brw_##OP(struct brw_compile *p, \
428 struct brw_reg dest, \
429 struct brw_reg src0, \
430 struct brw_reg src1) \
432 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
464 void brw_NOP(struct brw_compile
*p
)
466 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
467 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
468 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
469 brw_set_src1(insn
, brw_imm_ud(0x0));
476 /***********************************************************************
477 * Comparisons, if/else/endif
480 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
485 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
487 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
492 /* EU takes the value from the flag register and pushes it onto some
493 * sort of a stack (presumably merging with any flag value already on
494 * the stack). Within an if block, the flags at the top of the stack
495 * control execution on each channel of the unit, eg. on each of the
496 * 16 pixel values in our wm programs.
498 * When the matching 'else' instruction is reached (presumably by
499 * countdown of the instruction count patched in by our ELSE/ENDIF
500 * functions), the relevent flags are inverted.
502 * When the matching 'endif' instruction is reached, the flags are
503 * popped off. If the stack is now empty, normal execution resumes.
505 * No attempt is made to deal with stack overflow (14 elements?).
507 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
509 struct brw_instruction
*insn
;
511 if (p
->single_program_flow
) {
512 assert(execute_size
== BRW_EXECUTE_1
);
514 insn
= next_insn(p
, BRW_OPCODE_ADD
);
515 insn
->header
.predicate_inverse
= 1;
517 insn
= next_insn(p
, BRW_OPCODE_IF
);
520 /* Override the defaults for this instruction:
522 brw_set_dest(insn
, brw_ip_reg());
523 brw_set_src0(insn
, brw_ip_reg());
524 brw_set_src1(insn
, brw_imm_d(0x0));
526 insn
->header
.execution_size
= execute_size
;
527 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
528 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
529 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
530 if (!p
->single_program_flow
)
531 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
533 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
539 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
540 struct brw_instruction
*if_insn
)
542 struct brw_instruction
*insn
;
544 if (p
->single_program_flow
) {
545 insn
= next_insn(p
, BRW_OPCODE_ADD
);
547 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
550 brw_set_dest(insn
, brw_ip_reg());
551 brw_set_src0(insn
, brw_ip_reg());
552 brw_set_src1(insn
, brw_imm_d(0x0));
554 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
555 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
556 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
557 if (!p
->single_program_flow
)
558 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
560 /* Patch the if instruction to point at this instruction.
562 if (p
->single_program_flow
) {
563 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
565 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
567 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
569 if_insn
->bits3
.if_else
.jump_count
= insn
- if_insn
;
570 if_insn
->bits3
.if_else
.pop_count
= 1;
571 if_insn
->bits3
.if_else
.pad0
= 0;
577 void brw_ENDIF(struct brw_compile
*p
,
578 struct brw_instruction
*patch_insn
)
580 if (p
->single_program_flow
) {
581 /* In single program flow mode, there's no need to execute an ENDIF,
582 * since we don't need to do any stack operations, and if we're executing
583 * currently, we want to just continue executing.
585 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
587 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
589 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
591 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
593 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
594 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
595 brw_set_src1(insn
, brw_imm_d(0x0));
597 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
598 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
599 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
600 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
602 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
604 /* Patch the if or else instructions to point at this or the next
605 * instruction respectively.
607 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
608 /* Automagically turn it into an IFF:
610 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
611 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
612 patch_insn
->bits3
.if_else
.pop_count
= 0;
613 patch_insn
->bits3
.if_else
.pad0
= 0;
614 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
615 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
616 patch_insn
->bits3
.if_else
.pop_count
= 1;
617 patch_insn
->bits3
.if_else
.pad0
= 0;
622 /* Also pop item off the stack in the endif instruction:
624 insn
->bits3
.if_else
.jump_count
= 0;
625 insn
->bits3
.if_else
.pop_count
= 1;
626 insn
->bits3
.if_else
.pad0
= 0;
630 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
632 struct brw_instruction
*insn
;
633 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
634 brw_set_dest(insn
, brw_ip_reg());
635 brw_set_src0(insn
, brw_ip_reg());
636 brw_set_src1(insn
, brw_imm_d(0x0));
637 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
638 insn
->header
.execution_size
= BRW_EXECUTE_8
;
639 /* insn->header.mask_control = BRW_MASK_DISABLE; */
640 insn
->bits3
.if_else
.pad0
= 0;
644 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
646 struct brw_instruction
*insn
;
647 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
648 brw_set_dest(insn
, brw_ip_reg());
649 brw_set_src0(insn
, brw_ip_reg());
650 brw_set_src1(insn
, brw_imm_d(0x0));
651 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
652 insn
->header
.execution_size
= BRW_EXECUTE_8
;
653 /* insn->header.mask_control = BRW_MASK_DISABLE; */
654 insn
->bits3
.if_else
.pad0
= 0;
660 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
662 if (p
->single_program_flow
) {
663 return &p
->store
[p
->nr_insn
];
665 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
667 /* Override the defaults for this instruction:
669 brw_set_dest(insn
, brw_null_reg());
670 brw_set_src0(insn
, brw_null_reg());
671 brw_set_src1(insn
, brw_null_reg());
673 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
674 insn
->header
.execution_size
= execute_size
;
675 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
676 /* insn->header.mask_control = BRW_MASK_ENABLE; */
677 /* insn->header.mask_control = BRW_MASK_DISABLE; */
685 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
686 struct brw_instruction
*do_insn
)
688 struct brw_instruction
*insn
;
690 if (p
->single_program_flow
)
691 insn
= next_insn(p
, BRW_OPCODE_ADD
);
693 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
695 brw_set_dest(insn
, brw_ip_reg());
696 brw_set_src0(insn
, brw_ip_reg());
697 brw_set_src1(insn
, brw_imm_d(0x0));
699 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
701 if (p
->single_program_flow
) {
702 insn
->header
.execution_size
= BRW_EXECUTE_1
;
704 insn
->bits3
.d
= (do_insn
- insn
) * 16;
706 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
708 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
709 insn
->bits3
.if_else
.jump_count
= do_insn
- insn
+ 1;
710 insn
->bits3
.if_else
.pop_count
= 0;
711 insn
->bits3
.if_else
.pad0
= 0;
714 /* insn->header.mask_control = BRW_MASK_ENABLE; */
716 /* insn->header.mask_control = BRW_MASK_DISABLE; */
717 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
724 void brw_land_fwd_jump(struct brw_compile
*p
,
725 struct brw_instruction
*jmp_insn
)
727 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
729 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
730 assert(jmp_insn
->bits1
.da1
.src1_reg_file
= BRW_IMMEDIATE_VALUE
);
732 jmp_insn
->bits3
.ud
= (landing
- jmp_insn
) - 1;
737 /* To integrate with the above, it makes sense that the comparison
738 * instruction should populate the flag register. It might be simpler
739 * just to use the flag reg for most WM tasks?
741 void brw_CMP(struct brw_compile
*p
,
747 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
749 insn
->header
.destreg__conditonalmod
= conditional
;
750 brw_set_dest(insn
, dest
);
751 brw_set_src0(insn
, src0
);
752 brw_set_src1(insn
, src1
);
754 /* guess_execution_size(insn, src0); */
757 /* Make it so that future instructions will use the computed flag
758 * value until brw_set_predicate_control_flag_value() is called
761 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
763 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
764 p
->flag_value
= 0xff;
770 /***********************************************************************
771 * Helpers for the various SEND message types:
774 /** Extended math function, float[8].
776 void brw_math( struct brw_compile
*p
,
785 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
786 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
787 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
789 /* Example code doesn't set predicate_control for send
792 insn
->header
.predicate_control
= 0;
793 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
795 brw_set_dest(insn
, dest
);
796 brw_set_src0(insn
, src
);
797 brw_set_math_message(insn
,
798 msg_length
, response_length
,
800 BRW_MATH_INTEGER_UNSIGNED
,
807 * Extended math function, float[16].
808 * Use 2 send instructions.
810 void brw_math_16( struct brw_compile
*p
,
818 struct brw_instruction
*insn
;
819 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
820 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
822 /* First instruction:
824 brw_push_insn_state(p
);
825 brw_set_predicate_control_flag_value(p
, 0xff);
826 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
828 insn
= next_insn(p
, BRW_OPCODE_SEND
);
829 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
831 brw_set_dest(insn
, dest
);
832 brw_set_src0(insn
, src
);
833 brw_set_math_message(insn
,
834 msg_length
, response_length
,
836 BRW_MATH_INTEGER_UNSIGNED
,
839 BRW_MATH_DATA_VECTOR
);
841 /* Second instruction:
843 insn
= next_insn(p
, BRW_OPCODE_SEND
);
844 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
845 insn
->header
.destreg__conditonalmod
= msg_reg_nr
+1;
847 brw_set_dest(insn
, offset(dest
,1));
848 brw_set_src0(insn
, src
);
849 brw_set_math_message(insn
,
850 msg_length
, response_length
,
852 BRW_MATH_INTEGER_UNSIGNED
,
855 BRW_MATH_DATA_VECTOR
);
857 brw_pop_insn_state(p
);
862 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
863 * Scratch offset should be a multiple of 64.
864 * Used for register spilling.
866 void brw_dp_WRITE_16( struct brw_compile
*p
,
869 GLuint scratch_offset
)
872 brw_push_insn_state(p
);
873 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
874 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
876 /* set message header global offset field (reg 0, element 2) */
878 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
879 brw_imm_d(scratch_offset
));
881 brw_pop_insn_state(p
);
885 GLuint msg_length
= 3;
886 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
887 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
889 insn
->header
.predicate_control
= 0; /* XXX */
890 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
891 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
893 brw_set_dest(insn
, dest
);
894 brw_set_src0(insn
, src
);
896 brw_set_dp_write_message(insn
,
897 255, /* binding table index (255=stateless) */
898 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
899 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
901 0, /* pixel scoreboard */
902 0, /* response_length */
909 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
910 * Scratch offset should be a multiple of 64.
911 * Used for register spilling.
913 void brw_dp_READ_16( struct brw_compile
*p
,
916 GLuint scratch_offset
)
919 brw_push_insn_state(p
);
920 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
921 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
923 /* set message header global offset field (reg 0, element 2) */
925 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
926 brw_imm_d(scratch_offset
));
928 brw_pop_insn_state(p
);
932 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
934 insn
->header
.predicate_control
= 0; /* XXX */
935 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
936 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
938 brw_set_dest(insn
, dest
); /* UW? */
939 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
941 brw_set_dp_read_message(insn
,
942 255, /* binding table index (255=stateless) */
943 3, /* msg_control (3 means 4 Owords) */
944 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
945 1, /* target cache (render/scratch) */
947 2, /* response_length */
954 * Read a float[4] vector from the data port Data Cache (const buffer).
955 * Location (in buffer) should be a multiple of 16.
956 * Used for fetching shader constants.
957 * If relAddr is true, we'll do an indirect fetch using the address register.
959 void brw_dp_READ_4( struct brw_compile
*p
,
964 GLuint bind_table_index
)
967 brw_push_insn_state(p
);
968 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
969 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
971 /* set message header global offset field (reg 0, element 2) */
972 /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */
974 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD
),
975 brw_imm_d(location
));
976 brw_pop_insn_state(p
);
980 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
982 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
983 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
984 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
985 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
987 /* cast dest to a uword[8] vector */
988 dest
= retype(vec8(dest
), BRW_REGISTER_TYPE_UW
);
990 brw_set_dest(insn
, dest
);
991 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
993 brw_set_dp_read_message(insn
,
995 0, /* msg_control (0 means 1 Oword) */
996 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
997 0, /* source cache = data cache */
999 1, /* response_length (1 Oword) */
1006 * Read float[4] constant(s) from VS constant buffer.
1007 * For relative addressing, two float[4] constants will be read into 'dest'.
1008 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1010 void brw_dp_READ_4_vs(struct brw_compile
*p
,
1011 struct brw_reg dest
,
1014 struct brw_reg addrReg
,
1016 GLuint bind_table_index
)
1018 GLuint msg_reg_nr
= 1;
1022 printf("vs const read msg, location %u, msg_reg_nr %d\n",
1023 location, msg_reg_nr);
1026 /* Setup MRF[1] with location/offset into const buffer */
1030 brw_push_insn_state(p
);
1031 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1032 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1033 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1034 /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1036 /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1037 * when the docs say only dword[2] should be set. Hmmm. But it works.
1039 b
= brw_message_reg(msg_reg_nr
);
1040 b
= retype(b
, BRW_REGISTER_TYPE_UD
);
1041 /*b = get_element_ud(b, 2);*/
1043 brw_ADD(p
, b
, addrReg
, brw_imm_ud(location
));
1046 brw_MOV(p
, b
, brw_imm_ud(location
));
1049 brw_pop_insn_state(p
);
1053 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1055 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
1056 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1057 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1058 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
1059 /*insn->header.access_mode = BRW_ALIGN_16;*/
1061 brw_set_dest(insn
, dest
);
1062 brw_set_src0(insn
, brw_null_reg());
1064 brw_set_dp_read_message(insn
,
1066 oword
, /* 0 = lower Oword, 1 = upper Oword */
1067 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
1068 0, /* source cache = data cache */
1070 1, /* response_length (1 Oword) */
1077 void brw_fb_WRITE(struct brw_compile
*p
,
1078 struct brw_reg dest
,
1080 struct brw_reg src0
,
1081 GLuint binding_table_index
,
1083 GLuint response_length
,
1086 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1088 insn
->header
.predicate_control
= 0; /* XXX */
1089 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1090 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1092 brw_set_dest(insn
, dest
);
1093 brw_set_src0(insn
, src0
);
1094 brw_set_dp_write_message(insn
,
1095 binding_table_index
,
1096 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
1097 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
1099 1, /* pixel scoreboard */
1106 * Texture sample instruction.
1107 * Note: the msg_type plus msg_length values determine exactly what kind
1108 * of sampling operation is performed. See volume 4, page 161 of docs.
1110 void brw_SAMPLE(struct brw_compile
*p
,
1111 struct brw_reg dest
,
1113 struct brw_reg src0
,
1114 GLuint binding_table_index
,
1118 GLuint response_length
,
1122 GLboolean need_stall
= 0;
1124 if (writemask
== 0) {
1125 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1129 /* Hardware doesn't do destination dependency checking on send
1130 * instructions properly. Add a workaround which generates the
1131 * dependency by other means. In practice it seems like this bug
1132 * only crops up for texture samples, and only where registers are
1133 * written by the send and then written again later without being
1134 * read in between. Luckily for us, we already track that
1135 * information and use it to modify the writemask for the
1136 * instruction, so that is a guide for whether a workaround is
1139 if (writemask
!= WRITEMASK_XYZW
) {
1140 GLuint dst_offset
= 0;
1141 GLuint i
, newmask
= 0, len
= 0;
1143 for (i
= 0; i
< 4; i
++) {
1144 if (writemask
& (1<<i
))
1148 for (; i
< 4; i
++) {
1149 if (!(writemask
& (1<<i
)))
1155 if (newmask
!= writemask
) {
1157 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1160 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1162 newmask
= ~newmask
& WRITEMASK_XYZW
;
1164 brw_push_insn_state(p
);
1166 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1167 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1169 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1170 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1172 brw_pop_insn_state(p
);
1174 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1175 dest
= offset(dest
, dst_offset
);
1176 response_length
= len
* 2;
1181 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1183 insn
->header
.predicate_control
= 0; /* XXX */
1184 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1185 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1187 brw_set_dest(insn
, dest
);
1188 brw_set_src0(insn
, src0
);
1189 brw_set_sampler_message(p
->brw
, insn
,
1190 binding_table_index
,
1199 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1201 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1203 brw_push_insn_state(p
);
1204 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1205 brw_MOV(p
, reg
, reg
);
1206 brw_pop_insn_state(p
);
1211 /* All these variables are pretty confusing - we might be better off
1212 * using bitmasks and macros for this, in the old style. Or perhaps
1213 * just having the caller instantiate the fields in dword3 itself.
1215 void brw_urb_WRITE(struct brw_compile
*p
,
1216 struct brw_reg dest
,
1218 struct brw_reg src0
,
1222 GLuint response_length
,
1224 GLboolean writes_complete
,
1228 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1230 assert(msg_length
< 16);
1232 brw_set_dest(insn
, dest
);
1233 brw_set_src0(insn
, src0
);
1234 brw_set_src1(insn
, brw_imm_d(0));
1236 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1238 brw_set_urb_message(insn
,