2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
59 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
60 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
62 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
63 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
65 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
66 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
67 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
68 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
69 insn
->bits1
.da1
.dest_horiz_stride
= dest
.hstride
;
72 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
73 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
77 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
79 /* These are different sizes in align1 vs align16:
81 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
82 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
83 if (dest
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
84 dest
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
85 insn
->bits1
.ia1
.dest_horiz_stride
= dest
.hstride
;
88 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
92 /* NEW: Set the execution size based on dest.width and
93 * insn->compression_control:
95 guess_execution_size(insn
, dest
);
98 static void brw_set_src0( struct brw_instruction
*insn
,
101 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
103 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
104 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
105 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
106 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
107 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
109 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
110 insn
->bits3
.ud
= reg
.dw1
.ud
;
112 /* Required to set some fields in src1 as well:
114 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
115 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
119 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
120 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
121 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
122 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
125 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
126 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
130 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
132 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
133 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
136 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
140 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
141 if (reg
.width
== BRW_WIDTH_1
&&
142 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
143 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
144 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
145 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
148 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
149 insn
->bits2
.da1
.src0_width
= reg
.width
;
150 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
154 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
155 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
156 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
157 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
159 /* This is an oddity of the fact we're using the same
160 * descriptions for registers in align_16 as align_1:
162 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
163 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
165 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
171 void brw_set_src1( struct brw_instruction
*insn
,
174 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
176 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
177 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
178 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
179 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
181 /* Only src1 can be immediate in two-argument instructions.
183 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
185 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
186 insn
->bits3
.ud
= reg
.dw1
.ud
;
189 /* This is a hardware restriction, which may or may not be lifted
192 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
193 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
195 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
196 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
197 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
200 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
201 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
204 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
205 if (reg
.width
== BRW_WIDTH_1
&&
206 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
207 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
208 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
209 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
212 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
213 insn
->bits3
.da1
.src1_width
= reg
.width
;
214 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
218 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
219 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
220 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
221 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
223 /* This is an oddity of the fact we're using the same
224 * descriptions for registers in align_16 as align_1:
226 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
227 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
229 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
236 static void brw_set_math_message( struct brw_instruction
*insn
,
238 GLuint response_length
,
241 GLboolean low_precision
,
245 brw_set_src1(insn
, brw_imm_d(0));
247 insn
->bits3
.math
.function
= function
;
248 insn
->bits3
.math
.int_type
= integer_type
;
249 insn
->bits3
.math
.precision
= low_precision
;
250 insn
->bits3
.math
.saturate
= saturate
;
251 insn
->bits3
.math
.data_type
= dataType
;
252 insn
->bits3
.math
.response_length
= response_length
;
253 insn
->bits3
.math
.msg_length
= msg_length
;
254 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
255 insn
->bits3
.math
.end_of_thread
= 0;
258 static void brw_set_urb_message( struct brw_instruction
*insn
,
262 GLuint response_length
,
263 GLboolean end_of_thread
,
266 GLuint swizzle_control
)
268 brw_set_src1(insn
, brw_imm_d(0));
270 insn
->bits3
.urb
.opcode
= 0; /* ? */
271 insn
->bits3
.urb
.offset
= offset
;
272 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
273 insn
->bits3
.urb
.allocate
= allocate
;
274 insn
->bits3
.urb
.used
= used
; /* ? */
275 insn
->bits3
.urb
.complete
= complete
;
276 insn
->bits3
.urb
.response_length
= response_length
;
277 insn
->bits3
.urb
.msg_length
= msg_length
;
278 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
279 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
282 static void brw_set_dp_write_message( struct brw_instruction
*insn
,
283 GLuint binding_table_index
,
287 GLuint pixel_scoreboard_clear
,
288 GLuint response_length
,
289 GLuint end_of_thread
)
291 brw_set_src1(insn
, brw_imm_d(0));
293 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
294 insn
->bits3
.dp_write
.msg_control
= msg_control
;
295 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
296 insn
->bits3
.dp_write
.msg_type
= msg_type
;
297 insn
->bits3
.dp_write
.send_commit_msg
= 0;
298 insn
->bits3
.dp_write
.response_length
= response_length
;
299 insn
->bits3
.dp_write
.msg_length
= msg_length
;
300 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
301 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
304 static void brw_set_dp_read_message( struct brw_instruction
*insn
,
305 GLuint binding_table_index
,
310 GLuint response_length
,
311 GLuint end_of_thread
)
313 brw_set_src1(insn
, brw_imm_d(0));
315 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
;
316 insn
->bits3
.dp_read
.msg_control
= msg_control
;
317 insn
->bits3
.dp_read
.msg_type
= msg_type
;
318 insn
->bits3
.dp_read
.target_cache
= target_cache
;
319 insn
->bits3
.dp_read
.response_length
= response_length
;
320 insn
->bits3
.dp_read
.msg_length
= msg_length
;
321 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
322 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
;
325 static void brw_set_sampler_message(struct brw_context
*brw
,
326 struct brw_instruction
*insn
,
327 GLuint binding_table_index
,
330 GLuint response_length
,
334 brw_set_src1(insn
, brw_imm_d(0));
336 if (BRW_IS_G4X(brw
)) {
337 insn
->bits3
.sampler_g4x
.binding_table_index
= binding_table_index
;
338 insn
->bits3
.sampler_g4x
.sampler
= sampler
;
339 insn
->bits3
.sampler_g4x
.msg_type
= msg_type
;
340 insn
->bits3
.sampler_g4x
.response_length
= response_length
;
341 insn
->bits3
.sampler_g4x
.msg_length
= msg_length
;
342 insn
->bits3
.sampler_g4x
.end_of_thread
= eot
;
343 insn
->bits3
.sampler_g4x
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
345 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
346 insn
->bits3
.sampler
.sampler
= sampler
;
347 insn
->bits3
.sampler
.msg_type
= msg_type
;
348 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
349 insn
->bits3
.sampler
.response_length
= response_length
;
350 insn
->bits3
.sampler
.msg_length
= msg_length
;
351 insn
->bits3
.sampler
.end_of_thread
= eot
;
352 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
358 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
361 struct brw_instruction
*insn
;
363 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
365 insn
= &p
->store
[p
->nr_insn
++];
366 memcpy(insn
, p
->current
, sizeof(*insn
));
368 /* Reset this one-shot flag:
371 if (p
->current
->header
.destreg__conditonalmod
) {
372 p
->current
->header
.destreg__conditonalmod
= 0;
373 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
376 insn
->header
.opcode
= opcode
;
381 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
386 struct brw_instruction
*insn
= next_insn(p
, opcode
);
387 brw_set_dest(insn
, dest
);
388 brw_set_src0(insn
, src
);
392 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
396 struct brw_reg src1
)
398 struct brw_instruction
*insn
= next_insn(p
, opcode
);
399 brw_set_dest(insn
, dest
);
400 brw_set_src0(insn
, src0
);
401 brw_set_src1(insn
, src1
);
406 /***********************************************************************
407 * Convenience routines.
410 struct brw_instruction *brw_##OP(struct brw_compile *p, \
411 struct brw_reg dest, \
412 struct brw_reg src0) \
414 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
418 struct brw_instruction *brw_##OP(struct brw_compile *p, \
419 struct brw_reg dest, \
420 struct brw_reg src0, \
421 struct brw_reg src1) \
423 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
455 void brw_NOP(struct brw_compile
*p
)
457 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
458 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
459 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
460 brw_set_src1(insn
, brw_imm_ud(0x0));
467 /***********************************************************************
468 * Comparisons, if/else/endif
471 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
476 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
478 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
483 /* EU takes the value from the flag register and pushes it onto some
484 * sort of a stack (presumably merging with any flag value already on
485 * the stack). Within an if block, the flags at the top of the stack
486 * control execution on each channel of the unit, eg. on each of the
487 * 16 pixel values in our wm programs.
489 * When the matching 'else' instruction is reached (presumably by
490 * countdown of the instruction count patched in by our ELSE/ENDIF
491 * functions), the relevent flags are inverted.
493 * When the matching 'endif' instruction is reached, the flags are
494 * popped off. If the stack is now empty, normal execution resumes.
496 * No attempt is made to deal with stack overflow (14 elements?).
498 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
500 struct brw_instruction
*insn
;
502 if (p
->single_program_flow
) {
503 assert(execute_size
== BRW_EXECUTE_1
);
505 insn
= next_insn(p
, BRW_OPCODE_ADD
);
506 insn
->header
.predicate_inverse
= 1;
508 insn
= next_insn(p
, BRW_OPCODE_IF
);
511 /* Override the defaults for this instruction:
513 brw_set_dest(insn
, brw_ip_reg());
514 brw_set_src0(insn
, brw_ip_reg());
515 brw_set_src1(insn
, brw_imm_d(0x0));
517 insn
->header
.execution_size
= execute_size
;
518 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
519 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
520 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
521 if (!p
->single_program_flow
)
522 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
524 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
530 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
531 struct brw_instruction
*if_insn
)
533 struct brw_instruction
*insn
;
535 if (p
->single_program_flow
) {
536 insn
= next_insn(p
, BRW_OPCODE_ADD
);
538 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
541 brw_set_dest(insn
, brw_ip_reg());
542 brw_set_src0(insn
, brw_ip_reg());
543 brw_set_src1(insn
, brw_imm_d(0x0));
545 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
546 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
547 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
548 if (!p
->single_program_flow
)
549 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
551 /* Patch the if instruction to point at this instruction.
553 if (p
->single_program_flow
) {
554 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
556 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
558 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
560 if_insn
->bits3
.if_else
.jump_count
= insn
- if_insn
;
561 if_insn
->bits3
.if_else
.pop_count
= 1;
562 if_insn
->bits3
.if_else
.pad0
= 0;
568 void brw_ENDIF(struct brw_compile
*p
,
569 struct brw_instruction
*patch_insn
)
571 if (p
->single_program_flow
) {
572 /* In single program flow mode, there's no need to execute an ENDIF,
573 * since we don't need to do any stack operations, and if we're executing
574 * currently, we want to just continue executing.
576 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
578 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
580 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
582 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
584 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
585 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
586 brw_set_src1(insn
, brw_imm_d(0x0));
588 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
589 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
590 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
591 insn
->header
.thread_control
= BRW_THREAD_SWITCH
;
593 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
595 /* Patch the if or else instructions to point at this or the next
596 * instruction respectively.
598 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
599 /* Automagically turn it into an IFF:
601 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
602 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
603 patch_insn
->bits3
.if_else
.pop_count
= 0;
604 patch_insn
->bits3
.if_else
.pad0
= 0;
605 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
606 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
607 patch_insn
->bits3
.if_else
.pop_count
= 1;
608 patch_insn
->bits3
.if_else
.pad0
= 0;
613 /* Also pop item off the stack in the endif instruction:
615 insn
->bits3
.if_else
.jump_count
= 0;
616 insn
->bits3
.if_else
.pop_count
= 1;
617 insn
->bits3
.if_else
.pad0
= 0;
621 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
623 struct brw_instruction
*insn
;
624 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
625 brw_set_dest(insn
, brw_ip_reg());
626 brw_set_src0(insn
, brw_ip_reg());
627 brw_set_src1(insn
, brw_imm_d(0x0));
628 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
629 insn
->header
.execution_size
= BRW_EXECUTE_8
;
630 /* insn->header.mask_control = BRW_MASK_DISABLE; */
631 insn
->bits3
.if_else
.pad0
= 0;
635 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
637 struct brw_instruction
*insn
;
638 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
639 brw_set_dest(insn
, brw_ip_reg());
640 brw_set_src0(insn
, brw_ip_reg());
641 brw_set_src1(insn
, brw_imm_d(0x0));
642 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
643 insn
->header
.execution_size
= BRW_EXECUTE_8
;
644 /* insn->header.mask_control = BRW_MASK_DISABLE; */
645 insn
->bits3
.if_else
.pad0
= 0;
651 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
653 if (p
->single_program_flow
) {
654 return &p
->store
[p
->nr_insn
];
656 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
658 /* Override the defaults for this instruction:
660 brw_set_dest(insn
, brw_null_reg());
661 brw_set_src0(insn
, brw_null_reg());
662 brw_set_src1(insn
, brw_null_reg());
664 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
665 insn
->header
.execution_size
= execute_size
;
666 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
667 /* insn->header.mask_control = BRW_MASK_ENABLE; */
668 /* insn->header.mask_control = BRW_MASK_DISABLE; */
676 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
677 struct brw_instruction
*do_insn
)
679 struct brw_instruction
*insn
;
681 if (p
->single_program_flow
)
682 insn
= next_insn(p
, BRW_OPCODE_ADD
);
684 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
686 brw_set_dest(insn
, brw_ip_reg());
687 brw_set_src0(insn
, brw_ip_reg());
688 brw_set_src1(insn
, brw_imm_d(0x0));
690 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
692 if (p
->single_program_flow
) {
693 insn
->header
.execution_size
= BRW_EXECUTE_1
;
695 insn
->bits3
.d
= (do_insn
- insn
) * 16;
697 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
699 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
700 insn
->bits3
.if_else
.jump_count
= do_insn
- insn
+ 1;
701 insn
->bits3
.if_else
.pop_count
= 0;
702 insn
->bits3
.if_else
.pad0
= 0;
705 /* insn->header.mask_control = BRW_MASK_ENABLE; */
707 /* insn->header.mask_control = BRW_MASK_DISABLE; */
708 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
715 void brw_land_fwd_jump(struct brw_compile
*p
,
716 struct brw_instruction
*jmp_insn
)
718 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
720 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
721 assert(jmp_insn
->bits1
.da1
.src1_reg_file
= BRW_IMMEDIATE_VALUE
);
723 jmp_insn
->bits3
.ud
= (landing
- jmp_insn
) - 1;
728 /* To integrate with the above, it makes sense that the comparison
729 * instruction should populate the flag register. It might be simpler
730 * just to use the flag reg for most WM tasks?
732 void brw_CMP(struct brw_compile
*p
,
738 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
740 insn
->header
.destreg__conditonalmod
= conditional
;
741 brw_set_dest(insn
, dest
);
742 brw_set_src0(insn
, src0
);
743 brw_set_src1(insn
, src1
);
745 /* guess_execution_size(insn, src0); */
748 /* Make it so that future instructions will use the computed flag
749 * value until brw_set_predicate_control_flag_value() is called
752 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
754 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
755 p
->flag_value
= 0xff;
761 /***********************************************************************
762 * Helpers for the various SEND message types:
767 void brw_math( struct brw_compile
*p
,
776 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
777 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
778 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
780 /* Example code doesn't set predicate_control for send
783 insn
->header
.predicate_control
= 0;
784 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
786 brw_set_dest(insn
, dest
);
787 brw_set_src0(insn
, src
);
788 brw_set_math_message(insn
,
789 msg_length
, response_length
,
791 BRW_MATH_INTEGER_UNSIGNED
,
797 /* Use 2 send instructions to invert 16 elements
799 void brw_math_16( struct brw_compile
*p
,
807 struct brw_instruction
*insn
;
808 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
809 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
811 /* First instruction:
813 brw_push_insn_state(p
);
814 brw_set_predicate_control_flag_value(p
, 0xff);
815 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
817 insn
= next_insn(p
, BRW_OPCODE_SEND
);
818 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
820 brw_set_dest(insn
, dest
);
821 brw_set_src0(insn
, src
);
822 brw_set_math_message(insn
,
823 msg_length
, response_length
,
825 BRW_MATH_INTEGER_UNSIGNED
,
828 BRW_MATH_DATA_VECTOR
);
830 /* Second instruction:
832 insn
= next_insn(p
, BRW_OPCODE_SEND
);
833 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
834 insn
->header
.destreg__conditonalmod
= msg_reg_nr
+1;
836 brw_set_dest(insn
, offset(dest
,1));
837 brw_set_src0(insn
, src
);
838 brw_set_math_message(insn
,
839 msg_length
, response_length
,
841 BRW_MATH_INTEGER_UNSIGNED
,
844 BRW_MATH_DATA_VECTOR
);
846 brw_pop_insn_state(p
);
852 void brw_dp_WRITE_16( struct brw_compile
*p
,
855 GLuint scratch_offset
)
858 brw_push_insn_state(p
);
859 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
860 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
863 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
864 brw_imm_d(scratch_offset
));
866 brw_pop_insn_state(p
);
870 GLuint msg_length
= 3;
871 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
872 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
874 insn
->header
.predicate_control
= 0; /* XXX */
875 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
876 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
878 brw_set_dest(insn
, dest
);
879 brw_set_src0(insn
, src
);
881 brw_set_dp_write_message(insn
,
883 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
884 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
886 0, /* pixel scoreboard */
887 0, /* response_length */
894 void brw_dp_READ_16( struct brw_compile
*p
,
897 GLuint scratch_offset
)
900 brw_push_insn_state(p
);
901 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
902 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
905 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
906 brw_imm_d(scratch_offset
));
908 brw_pop_insn_state(p
);
912 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
914 insn
->header
.predicate_control
= 0; /* XXX */
915 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
916 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
918 brw_set_dest(insn
, dest
); /* UW? */
919 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
921 brw_set_dp_read_message(insn
,
924 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
925 1, /* target cache */
927 2, /* response_length */
933 void brw_fb_WRITE(struct brw_compile
*p
,
937 GLuint binding_table_index
,
939 GLuint response_length
,
942 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
944 insn
->header
.predicate_control
= 0; /* XXX */
945 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
946 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
948 brw_set_dest(insn
, dest
);
949 brw_set_src0(insn
, src0
);
950 brw_set_dp_write_message(insn
,
952 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
953 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
955 1, /* pixel scoreboard */
962 void brw_SAMPLE(struct brw_compile
*p
,
966 GLuint binding_table_index
,
970 GLuint response_length
,
974 GLboolean need_stall
= 0;
976 if (writemask
== 0) {
977 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
981 /* Hardware doesn't do destination dependency checking on send
982 * instructions properly. Add a workaround which generates the
983 * dependency by other means. In practice it seems like this bug
984 * only crops up for texture samples, and only where registers are
985 * written by the send and then written again later without being
986 * read in between. Luckily for us, we already track that
987 * information and use it to modify the writemask for the
988 * instruction, so that is a guide for whether a workaround is
991 if (writemask
!= WRITEMASK_XYZW
) {
992 GLuint dst_offset
= 0;
993 GLuint i
, newmask
= 0, len
= 0;
995 for (i
= 0; i
< 4; i
++) {
996 if (writemask
& (1<<i
))
1000 for (; i
< 4; i
++) {
1001 if (!(writemask
& (1<<i
)))
1007 if (newmask
!= writemask
) {
1009 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1012 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1014 newmask
= ~newmask
& WRITEMASK_XYZW
;
1016 brw_push_insn_state(p
);
1018 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1019 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1021 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1022 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1024 brw_pop_insn_state(p
);
1026 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1027 dest
= offset(dest
, dst_offset
);
1028 response_length
= len
* 2;
1033 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1035 insn
->header
.predicate_control
= 0; /* XXX */
1036 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1037 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1039 brw_set_dest(insn
, dest
);
1040 brw_set_src0(insn
, src0
);
1041 brw_set_sampler_message(p
->brw
, insn
,
1042 binding_table_index
,
1051 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1053 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1055 brw_push_insn_state(p
);
1056 brw_set_compression_control(p
, GL_FALSE
);
1057 brw_MOV(p
, reg
, reg
);
1058 brw_pop_insn_state(p
);
1063 /* All these variables are pretty confusing - we might be better off
1064 * using bitmasks and macros for this, in the old style. Or perhaps
1065 * just having the caller instantiate the fields in dword3 itself.
1067 void brw_urb_WRITE(struct brw_compile
*p
,
1068 struct brw_reg dest
,
1070 struct brw_reg src0
,
1074 GLuint response_length
,
1076 GLboolean writes_complete
,
1080 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1082 assert(msg_length
< 16);
1084 brw_set_dest(insn
, dest
);
1085 brw_set_src0(insn
, src0
);
1086 brw_set_src1(insn
, brw_imm_d(0));
1088 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1090 brw_set_urb_message(insn
,