2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
59 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
60 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
62 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
63 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
65 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
66 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
67 insn
->bits1
.da1
.dest_horiz_stride
= BRW_HORIZONTAL_STRIDE_1
;
70 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
71 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
75 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
77 /* These are different sizes in align1 vs align16:
79 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
80 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
81 insn
->bits1
.ia1
.dest_horiz_stride
= BRW_HORIZONTAL_STRIDE_1
;
84 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
91 guess_execution_size(insn
, dest
);
94 static void brw_set_src0( struct brw_instruction
*insn
,
97 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
99 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
100 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
101 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
102 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
103 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
105 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
106 insn
->bits3
.ud
= reg
.dw1
.ud
;
108 /* Required to set some fields in src1 as well:
110 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
111 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
115 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
116 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
117 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
118 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
121 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
122 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
126 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
128 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
129 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
132 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
136 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
137 if (reg
.width
== BRW_WIDTH_1
&&
138 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
139 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
140 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
141 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
144 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
145 insn
->bits2
.da1
.src0_width
= reg
.width
;
146 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
150 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
151 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
152 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
153 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
155 /* This is an oddity of the fact we're using the same
156 * descriptions for registers in align_16 as align_1:
158 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
159 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
161 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
167 void brw_set_src1( struct brw_instruction
*insn
,
170 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
172 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
173 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
174 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
175 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
177 /* Only src1 can be immediate in two-argument instructions.
179 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
181 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
182 insn
->bits3
.ud
= reg
.dw1
.ud
;
185 /* This is a hardware restriction, which may or may not be lifted
188 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
189 //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
191 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
192 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
193 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
196 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
197 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
200 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
201 if (reg
.width
== BRW_WIDTH_1
&&
202 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
203 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
204 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
205 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
208 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
209 insn
->bits3
.da1
.src1_width
= reg
.width
;
210 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
214 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
215 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
216 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
217 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
219 /* This is an oddity of the fact we're using the same
220 * descriptions for registers in align_16 as align_1:
222 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
223 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
225 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
232 static void brw_set_math_message( struct brw_instruction
*insn
,
234 GLuint response_length
,
237 GLboolean low_precision
,
241 brw_set_src1(insn
, brw_imm_d(0));
243 insn
->bits3
.math
.function
= function
;
244 insn
->bits3
.math
.int_type
= integer_type
;
245 insn
->bits3
.math
.precision
= low_precision
;
246 insn
->bits3
.math
.saturate
= saturate
;
247 insn
->bits3
.math
.data_type
= dataType
;
248 insn
->bits3
.math
.response_length
= response_length
;
249 insn
->bits3
.math
.msg_length
= msg_length
;
250 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
251 insn
->bits3
.math
.end_of_thread
= 0;
254 static void brw_set_urb_message( struct brw_instruction
*insn
,
258 GLuint response_length
,
259 GLboolean end_of_thread
,
262 GLuint swizzle_control
)
264 brw_set_src1(insn
, brw_imm_d(0));
266 insn
->bits3
.urb
.opcode
= 0; /* ? */
267 insn
->bits3
.urb
.offset
= offset
;
268 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
269 insn
->bits3
.urb
.allocate
= allocate
;
270 insn
->bits3
.urb
.used
= used
; /* ? */
271 insn
->bits3
.urb
.complete
= complete
;
272 insn
->bits3
.urb
.response_length
= response_length
;
273 insn
->bits3
.urb
.msg_length
= msg_length
;
274 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
275 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
278 static void brw_set_dp_write_message( struct brw_instruction
*insn
,
279 GLuint binding_table_index
,
283 GLuint pixel_scoreboard_clear
,
284 GLuint response_length
,
285 GLuint end_of_thread
)
287 brw_set_src1(insn
, brw_imm_d(0));
289 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
290 insn
->bits3
.dp_write
.msg_control
= msg_control
;
291 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
292 insn
->bits3
.dp_write
.msg_type
= msg_type
;
293 insn
->bits3
.dp_write
.send_commit_msg
= 0;
294 insn
->bits3
.dp_write
.response_length
= response_length
;
295 insn
->bits3
.dp_write
.msg_length
= msg_length
;
296 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
297 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
300 static void brw_set_dp_read_message( struct brw_instruction
*insn
,
301 GLuint binding_table_index
,
306 GLuint response_length
,
307 GLuint end_of_thread
)
309 brw_set_src1(insn
, brw_imm_d(0));
311 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
;
312 insn
->bits3
.dp_read
.msg_control
= msg_control
;
313 insn
->bits3
.dp_read
.msg_type
= msg_type
;
314 insn
->bits3
.dp_read
.target_cache
= target_cache
;
315 insn
->bits3
.dp_read
.response_length
= response_length
;
316 insn
->bits3
.dp_read
.msg_length
= msg_length
;
317 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
318 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
;
321 static void brw_set_sampler_message(struct brw_context
*brw
,
322 struct brw_instruction
*insn
,
323 GLuint binding_table_index
,
326 GLuint response_length
,
330 brw_set_src1(insn
, brw_imm_d(0));
332 if (BRW_IS_IGD(brw
)) {
333 insn
->bits3
.sampler_igd
.binding_table_index
= binding_table_index
;
334 insn
->bits3
.sampler_igd
.sampler
= sampler
;
335 insn
->bits3
.sampler_igd
.msg_type
= msg_type
;
336 insn
->bits3
.sampler_igd
.response_length
= response_length
;
337 insn
->bits3
.sampler_igd
.msg_length
= msg_length
;
338 insn
->bits3
.sampler_igd
.end_of_thread
= eot
;
339 insn
->bits3
.sampler_igd
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
341 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
342 insn
->bits3
.sampler
.sampler
= sampler
;
343 insn
->bits3
.sampler
.msg_type
= msg_type
;
344 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
345 insn
->bits3
.sampler
.response_length
= response_length
;
346 insn
->bits3
.sampler
.msg_length
= msg_length
;
347 insn
->bits3
.sampler
.end_of_thread
= eot
;
348 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
354 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
357 struct brw_instruction
*insn
;
359 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
361 insn
= &p
->store
[p
->nr_insn
++];
362 memcpy(insn
, p
->current
, sizeof(*insn
));
364 /* Reset this one-shot flag:
367 if (p
->current
->header
.destreg__conditonalmod
) {
368 p
->current
->header
.destreg__conditonalmod
= 0;
369 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
372 insn
->header
.opcode
= opcode
;
377 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
382 struct brw_instruction
*insn
= next_insn(p
, opcode
);
383 brw_set_dest(insn
, dest
);
384 brw_set_src0(insn
, src
);
388 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
392 struct brw_reg src1
)
394 struct brw_instruction
*insn
= next_insn(p
, opcode
);
395 brw_set_dest(insn
, dest
);
396 brw_set_src0(insn
, src0
);
397 brw_set_src1(insn
, src1
);
402 /***********************************************************************
403 * Convenience routines.
406 struct brw_instruction *brw_##OP(struct brw_compile *p, \
407 struct brw_reg dest, \
408 struct brw_reg src0) \
410 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
414 struct brw_instruction *brw_##OP(struct brw_compile *p, \
415 struct brw_reg dest, \
416 struct brw_reg src0, \
417 struct brw_reg src1) \
419 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
450 void brw_NOP(struct brw_compile
*p
)
452 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
453 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
454 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
455 brw_set_src1(insn
, brw_imm_ud(0x0));
462 /***********************************************************************
463 * Comparisons, if/else/endif
466 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
471 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
473 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
478 /* EU takes the value from the flag register and pushes it onto some
479 * sort of a stack (presumably merging with any flag value already on
480 * the stack). Within an if block, the flags at the top of the stack
481 * control execution on each channel of the unit, eg. on each of the
482 * 16 pixel values in our wm programs.
484 * When the matching 'else' instruction is reached (presumably by
485 * countdown of the instruction count patched in by our ELSE/ENDIF
486 * functions), the relevent flags are inverted.
488 * When the matching 'endif' instruction is reached, the flags are
489 * popped off. If the stack is now empty, normal execution resumes.
491 * No attempt is made to deal with stack overflow (14 elements?).
493 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
495 struct brw_instruction
*insn
;
497 if (p
->single_program_flow
) {
498 assert(execute_size
== BRW_EXECUTE_1
);
500 insn
= next_insn(p
, BRW_OPCODE_ADD
);
501 insn
->header
.predicate_inverse
= 1;
503 insn
= next_insn(p
, BRW_OPCODE_IF
);
506 /* Override the defaults for this instruction:
508 brw_set_dest(insn
, brw_ip_reg());
509 brw_set_src0(insn
, brw_ip_reg());
510 brw_set_src1(insn
, brw_imm_d(0x0));
512 insn
->header
.execution_size
= execute_size
;
513 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
514 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
515 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
517 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
523 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
524 struct brw_instruction
*if_insn
)
526 struct brw_instruction
*insn
;
528 if (p
->single_program_flow
) {
529 insn
= next_insn(p
, BRW_OPCODE_ADD
);
531 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
534 brw_set_dest(insn
, brw_ip_reg());
535 brw_set_src0(insn
, brw_ip_reg());
536 brw_set_src1(insn
, brw_imm_d(0x0));
538 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
539 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
540 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
542 /* Patch the if instruction to point at this instruction.
544 if (p
->single_program_flow
) {
545 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
547 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
549 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
551 if_insn
->bits3
.if_else
.jump_count
= insn
- if_insn
;
552 if_insn
->bits3
.if_else
.pop_count
= 1;
553 if_insn
->bits3
.if_else
.pad0
= 0;
559 void brw_ENDIF(struct brw_compile
*p
,
560 struct brw_instruction
*patch_insn
)
562 if (p
->single_program_flow
) {
563 /* In single program flow mode, there's no need to execute an ENDIF,
564 * since we don't need to do any stack operations, and if we're executing
565 * currently, we want to just continue executing.
567 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
569 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
571 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
573 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
575 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
576 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
577 brw_set_src1(insn
, brw_imm_d(0x0));
579 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
580 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
581 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
583 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
585 /* Patch the if or else instructions to point at this or the next
586 * instruction respectively.
588 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
589 /* Automagically turn it into an IFF:
591 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
592 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
593 patch_insn
->bits3
.if_else
.pop_count
= 0;
594 patch_insn
->bits3
.if_else
.pad0
= 0;
595 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
596 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
597 patch_insn
->bits3
.if_else
.pop_count
= 1;
598 patch_insn
->bits3
.if_else
.pad0
= 0;
603 /* Also pop item off the stack in the endif instruction:
605 insn
->bits3
.if_else
.jump_count
= 0;
606 insn
->bits3
.if_else
.pop_count
= 1;
607 insn
->bits3
.if_else
.pad0
= 0;
611 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
)
613 struct brw_instruction
*insn
;
614 insn
= next_insn(p
, BRW_OPCODE_BREAK
);
615 brw_set_dest(insn
, brw_ip_reg());
616 brw_set_src0(insn
, brw_ip_reg());
617 brw_set_src1(insn
, brw_imm_d(0x0));
618 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
619 insn
->header
.execution_size
= BRW_EXECUTE_8
;
620 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
621 insn
->bits3
.if_else
.pad0
= 0;
625 struct brw_instruction
*brw_CONT(struct brw_compile
*p
)
627 struct brw_instruction
*insn
;
628 insn
= next_insn(p
, BRW_OPCODE_CONTINUE
);
629 brw_set_dest(insn
, brw_ip_reg());
630 brw_set_src0(insn
, brw_ip_reg());
631 brw_set_src1(insn
, brw_imm_d(0x0));
632 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
633 insn
->header
.execution_size
= BRW_EXECUTE_8
;
634 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
635 insn
->bits3
.if_else
.pad0
= 0;
641 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
643 if (p
->single_program_flow
) {
644 return &p
->store
[p
->nr_insn
];
646 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
648 /* Override the defaults for this instruction:
650 brw_set_dest(insn
, brw_null_reg());
651 brw_set_src0(insn
, brw_null_reg());
652 brw_set_src1(insn
, brw_null_reg());
654 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
655 insn
->header
.execution_size
= execute_size
;
656 insn
->header
.predicate_control
= BRW_PREDICATE_NONE
;
657 /* insn->header.mask_control = BRW_MASK_ENABLE; */
658 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
666 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
667 struct brw_instruction
*do_insn
)
669 struct brw_instruction
*insn
;
671 if (p
->single_program_flow
)
672 insn
= next_insn(p
, BRW_OPCODE_ADD
);
674 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
676 brw_set_dest(insn
, brw_ip_reg());
677 brw_set_src0(insn
, brw_ip_reg());
678 brw_set_src1(insn
, brw_imm_d(0x0));
680 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
682 if (p
->single_program_flow
) {
683 insn
->header
.execution_size
= BRW_EXECUTE_1
;
685 insn
->bits3
.d
= (do_insn
- insn
) * 16;
687 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
689 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
690 insn
->bits3
.if_else
.jump_count
= do_insn
- insn
+ 1;
691 insn
->bits3
.if_else
.pop_count
= 0;
692 insn
->bits3
.if_else
.pad0
= 0;
695 /* insn->header.mask_control = BRW_MASK_ENABLE; */
697 insn
->header
.mask_control
= BRW_MASK_DISABLE
;
698 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
705 void brw_land_fwd_jump(struct brw_compile
*p
,
706 struct brw_instruction
*jmp_insn
)
708 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
710 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
711 assert(jmp_insn
->bits1
.da1
.src1_reg_file
= BRW_IMMEDIATE_VALUE
);
713 jmp_insn
->bits3
.ud
= (landing
- jmp_insn
) - 1;
718 /* To integrate with the above, it makes sense that the comparison
719 * instruction should populate the flag register. It might be simpler
720 * just to use the flag reg for most WM tasks?
722 void brw_CMP(struct brw_compile
*p
,
728 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
730 insn
->header
.destreg__conditonalmod
= conditional
;
731 brw_set_dest(insn
, dest
);
732 brw_set_src0(insn
, src0
);
733 brw_set_src1(insn
, src1
);
735 /* guess_execution_size(insn, src0); */
738 /* Make it so that future instructions will use the computed flag
739 * value until brw_set_predicate_control_flag_value() is called
742 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
744 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
745 p
->flag_value
= 0xff;
751 /***********************************************************************
752 * Helpers for the various SEND message types:
757 void brw_math( struct brw_compile
*p
,
766 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
767 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
768 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
770 /* Example code doesn't set predicate_control for send
773 insn
->header
.predicate_control
= 0;
774 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
776 brw_set_dest(insn
, dest
);
777 brw_set_src0(insn
, src
);
778 brw_set_math_message(insn
,
779 msg_length
, response_length
,
781 BRW_MATH_INTEGER_UNSIGNED
,
787 /* Use 2 send instructions to invert 16 elements
789 void brw_math_16( struct brw_compile
*p
,
797 struct brw_instruction
*insn
;
798 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
799 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
801 /* First instruction:
803 brw_push_insn_state(p
);
804 brw_set_predicate_control_flag_value(p
, 0xff);
805 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
807 insn
= next_insn(p
, BRW_OPCODE_SEND
);
808 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
810 brw_set_dest(insn
, dest
);
811 brw_set_src0(insn
, src
);
812 brw_set_math_message(insn
,
813 msg_length
, response_length
,
815 BRW_MATH_INTEGER_UNSIGNED
,
818 BRW_MATH_DATA_VECTOR
);
820 /* Second instruction:
822 insn
= next_insn(p
, BRW_OPCODE_SEND
);
823 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
824 insn
->header
.destreg__conditonalmod
= msg_reg_nr
+1;
826 brw_set_dest(insn
, offset(dest
,1));
827 brw_set_src0(insn
, src
);
828 brw_set_math_message(insn
,
829 msg_length
, response_length
,
831 BRW_MATH_INTEGER_UNSIGNED
,
834 BRW_MATH_DATA_VECTOR
);
836 brw_pop_insn_state(p
);
842 void brw_dp_WRITE_16( struct brw_compile
*p
,
845 GLuint scratch_offset
)
848 brw_push_insn_state(p
);
849 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
850 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
853 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
854 brw_imm_d(scratch_offset
));
856 brw_pop_insn_state(p
);
860 GLuint msg_length
= 3;
861 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
862 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
864 insn
->header
.predicate_control
= 0; /* XXX */
865 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
866 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
868 brw_set_dest(insn
, dest
);
869 brw_set_src0(insn
, src
);
871 brw_set_dp_write_message(insn
,
873 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
874 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
876 0, /* pixel scoreboard */
877 0, /* response_length */
884 void brw_dp_READ_16( struct brw_compile
*p
,
887 GLuint scratch_offset
)
890 brw_push_insn_state(p
);
891 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
892 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
895 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
896 brw_imm_d(scratch_offset
));
898 brw_pop_insn_state(p
);
902 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
904 insn
->header
.predicate_control
= 0; /* XXX */
905 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
906 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
908 brw_set_dest(insn
, dest
); /* UW? */
909 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
911 brw_set_dp_read_message(insn
,
914 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
915 1, /* target cache */
917 2, /* response_length */
923 void brw_fb_WRITE(struct brw_compile
*p
,
927 GLuint binding_table_index
,
929 GLuint response_length
,
932 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
934 insn
->header
.predicate_control
= 0; /* XXX */
935 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
936 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
938 brw_set_dest(insn
, dest
);
939 brw_set_src0(insn
, src0
);
940 brw_set_dp_write_message(insn
,
942 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
943 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
945 1, /* pixel scoreboard */
952 void brw_SAMPLE(struct brw_compile
*p
,
956 GLuint binding_table_index
,
960 GLuint response_length
,
964 GLboolean need_stall
= 0;
967 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
971 /* Hardware doesn't do destination dependency checking on send
972 * instructions properly. Add a workaround which generates the
973 * dependency by other means. In practice it seems like this bug
974 * only crops up for texture samples, and only where registers are
975 * written by the send and then written again later without being
976 * read in between. Luckily for us, we already track that
977 * information and use it to modify the writemask for the
978 * instruction, so that is a guide for whether a workaround is
981 if (writemask
!= WRITEMASK_XYZW
) {
982 GLuint dst_offset
= 0;
983 GLuint i
, newmask
= 0, len
= 0;
985 for (i
= 0; i
< 4; i
++) {
986 if (writemask
& (1<<i
))
991 if (!(writemask
& (1<<i
)))
997 if (newmask
!= writemask
) {
999 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1002 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
1004 newmask
= ~newmask
& WRITEMASK_XYZW
;
1006 brw_push_insn_state(p
);
1008 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1009 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1011 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
1012 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
1014 brw_pop_insn_state(p
);
1016 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
1017 dest
= offset(dest
, dst_offset
);
1018 response_length
= len
* 2;
1023 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1025 insn
->header
.predicate_control
= 0; /* XXX */
1026 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
1027 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1029 brw_set_dest(insn
, dest
);
1030 brw_set_src0(insn
, src0
);
1031 brw_set_sampler_message(p
->brw
, insn
,
1032 binding_table_index
,
1042 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1044 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1046 brw_push_insn_state(p
);
1047 brw_set_compression_control(p
, GL_FALSE
);
1048 brw_MOV(p
, reg
, reg
);
1049 brw_pop_insn_state(p
);
1054 /* All these variables are pretty confusing - we might be better off
1055 * using bitmasks and macros for this, in the old style. Or perhaps
1056 * just having the caller instantiate the fields in dword3 itself.
1058 void brw_urb_WRITE(struct brw_compile
*p
,
1059 struct brw_reg dest
,
1061 struct brw_reg src0
,
1065 GLuint response_length
,
1067 GLboolean writes_complete
,
1071 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1073 assert(msg_length
< 16);
1075 brw_set_dest(insn
, dest
);
1076 brw_set_src0(insn
, src0
);
1077 brw_set_src1(insn
, brw_imm_d(0));
1079 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1081 brw_set_urb_message(insn
,