2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "brw_context.h"
34 #include "brw_defines.h"
40 /***********************************************************************
41 * Internal helper for constructing instructions
44 static void guess_execution_size( struct brw_instruction
*insn
,
47 if (reg
.width
== BRW_WIDTH_8
&&
48 insn
->header
.compression_control
== BRW_COMPRESSION_COMPRESSED
)
49 insn
->header
.execution_size
= BRW_EXECUTE_16
;
51 insn
->header
.execution_size
= reg
.width
; /* note - definitions are compatible */
55 static void brw_set_dest( struct brw_instruction
*insn
,
58 insn
->bits1
.da1
.dest_reg_file
= dest
.file
;
59 insn
->bits1
.da1
.dest_reg_type
= dest
.type
;
60 insn
->bits1
.da1
.dest_address_mode
= dest
.address_mode
;
62 if (dest
.address_mode
== BRW_ADDRESS_DIRECT
) {
63 insn
->bits1
.da1
.dest_reg_nr
= dest
.nr
;
65 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
66 insn
->bits1
.da1
.dest_subreg_nr
= dest
.subnr
;
67 insn
->bits1
.da1
.dest_horiz_stride
= BRW_HORIZONTAL_STRIDE_1
;
70 insn
->bits1
.da16
.dest_subreg_nr
= dest
.subnr
/ 16;
71 insn
->bits1
.da16
.dest_writemask
= dest
.dw1
.bits
.writemask
;
75 insn
->bits1
.ia1
.dest_subreg_nr
= dest
.subnr
;
77 /* These are different sizes in align1 vs align16:
79 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
80 insn
->bits1
.ia1
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
81 insn
->bits1
.ia1
.dest_horiz_stride
= BRW_HORIZONTAL_STRIDE_1
;
84 insn
->bits1
.ia16
.dest_indirect_offset
= dest
.dw1
.bits
.indirect_offset
;
88 /* NEW: Set the execution size based on dest.width and
89 * insn->compression_control:
91 guess_execution_size(insn
, dest
);
94 static void brw_set_src0( struct brw_instruction
*insn
,
97 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
99 insn
->bits1
.da1
.src0_reg_file
= reg
.file
;
100 insn
->bits1
.da1
.src0_reg_type
= reg
.type
;
101 insn
->bits2
.da1
.src0_abs
= reg
.abs
;
102 insn
->bits2
.da1
.src0_negate
= reg
.negate
;
103 insn
->bits2
.da1
.src0_address_mode
= reg
.address_mode
;
105 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
106 insn
->bits3
.ud
= reg
.dw1
.ud
;
108 /* Required to set some fields in src1 as well:
110 insn
->bits1
.da1
.src1_reg_file
= 0; /* arf */
111 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
115 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
116 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
117 insn
->bits2
.da1
.src0_subreg_nr
= reg
.subnr
;
118 insn
->bits2
.da1
.src0_reg_nr
= reg
.nr
;
121 insn
->bits2
.da16
.src0_subreg_nr
= reg
.subnr
/ 16;
122 insn
->bits2
.da16
.src0_reg_nr
= reg
.nr
;
126 insn
->bits2
.ia1
.src0_subreg_nr
= reg
.subnr
;
128 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
129 insn
->bits2
.ia1
.src0_indirect_offset
= reg
.dw1
.bits
.indirect_offset
;
132 insn
->bits2
.ia16
.src0_subreg_nr
= reg
.dw1
.bits
.indirect_offset
;
136 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
137 if (reg
.width
== BRW_WIDTH_1
&&
138 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
139 insn
->bits2
.da1
.src0_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
140 insn
->bits2
.da1
.src0_width
= BRW_WIDTH_1
;
141 insn
->bits2
.da1
.src0_vert_stride
= BRW_VERTICAL_STRIDE_0
;
144 insn
->bits2
.da1
.src0_horiz_stride
= reg
.hstride
;
145 insn
->bits2
.da1
.src0_width
= reg
.width
;
146 insn
->bits2
.da1
.src0_vert_stride
= reg
.vstride
;
150 insn
->bits2
.da16
.src0_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
151 insn
->bits2
.da16
.src0_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
152 insn
->bits2
.da16
.src0_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
153 insn
->bits2
.da16
.src0_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
155 /* This is an oddity of the fact we're using the same
156 * descriptions for registers in align_16 as align_1:
158 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
159 insn
->bits2
.da16
.src0_vert_stride
= BRW_VERTICAL_STRIDE_4
;
161 insn
->bits2
.da16
.src0_vert_stride
= reg
.vstride
;
167 static void brw_set_src1( struct brw_instruction
*insn
,
170 assert(reg
.file
!= BRW_MESSAGE_REGISTER_FILE
);
172 insn
->bits1
.da1
.src1_reg_file
= reg
.file
;
173 insn
->bits1
.da1
.src1_reg_type
= reg
.type
;
174 insn
->bits3
.da1
.src1_abs
= reg
.abs
;
175 insn
->bits3
.da1
.src1_negate
= reg
.negate
;
177 /* Only src1 can be immediate in two-argument instructions.
179 assert(insn
->bits1
.da1
.src0_reg_file
!= BRW_IMMEDIATE_VALUE
);
181 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
182 insn
->bits3
.ud
= reg
.dw1
.ud
;
185 /* This is a hardware restriction, which may or may not be lifted
188 assert (reg
.address_mode
== BRW_ADDRESS_DIRECT
);
189 assert (reg
.file
== BRW_GENERAL_REGISTER_FILE
);
191 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
192 insn
->bits3
.da1
.src1_subreg_nr
= reg
.subnr
;
193 insn
->bits3
.da1
.src1_reg_nr
= reg
.nr
;
196 insn
->bits3
.da16
.src1_subreg_nr
= reg
.subnr
/ 16;
197 insn
->bits3
.da16
.src1_reg_nr
= reg
.nr
;
200 if (insn
->header
.access_mode
== BRW_ALIGN_1
) {
201 if (reg
.width
== BRW_WIDTH_1
&&
202 insn
->header
.execution_size
== BRW_EXECUTE_1
) {
203 insn
->bits3
.da1
.src1_horiz_stride
= BRW_HORIZONTAL_STRIDE_0
;
204 insn
->bits3
.da1
.src1_width
= BRW_WIDTH_1
;
205 insn
->bits3
.da1
.src1_vert_stride
= BRW_VERTICAL_STRIDE_0
;
208 insn
->bits3
.da1
.src1_horiz_stride
= reg
.hstride
;
209 insn
->bits3
.da1
.src1_width
= reg
.width
;
210 insn
->bits3
.da1
.src1_vert_stride
= reg
.vstride
;
214 insn
->bits3
.da16
.src1_swz_x
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_X
);
215 insn
->bits3
.da16
.src1_swz_y
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Y
);
216 insn
->bits3
.da16
.src1_swz_z
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_Z
);
217 insn
->bits3
.da16
.src1_swz_w
= BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, BRW_CHANNEL_W
);
219 /* This is an oddity of the fact we're using the same
220 * descriptions for registers in align_16 as align_1:
222 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
223 insn
->bits3
.da16
.src1_vert_stride
= BRW_VERTICAL_STRIDE_4
;
225 insn
->bits3
.da16
.src1_vert_stride
= reg
.vstride
;
232 static void brw_set_math_message( struct brw_instruction
*insn
,
234 GLuint response_length
,
237 GLboolean low_precision
,
241 brw_set_src1(insn
, brw_imm_d(0));
243 insn
->bits3
.math
.function
= function
;
244 insn
->bits3
.math
.int_type
= integer_type
;
245 insn
->bits3
.math
.precision
= low_precision
;
246 insn
->bits3
.math
.saturate
= saturate
;
247 insn
->bits3
.math
.data_type
= dataType
;
248 insn
->bits3
.math
.response_length
= response_length
;
249 insn
->bits3
.math
.msg_length
= msg_length
;
250 insn
->bits3
.math
.msg_target
= BRW_MESSAGE_TARGET_MATH
;
251 insn
->bits3
.math
.end_of_thread
= 0;
254 static void brw_set_urb_message( struct brw_instruction
*insn
,
258 GLuint response_length
,
259 GLboolean end_of_thread
,
262 GLuint swizzle_control
)
264 brw_set_src1(insn
, brw_imm_d(0));
266 insn
->bits3
.urb
.opcode
= 0; /* ? */
267 insn
->bits3
.urb
.offset
= offset
;
268 insn
->bits3
.urb
.swizzle_control
= swizzle_control
;
269 insn
->bits3
.urb
.allocate
= allocate
;
270 insn
->bits3
.urb
.used
= used
; /* ? */
271 insn
->bits3
.urb
.complete
= complete
;
272 insn
->bits3
.urb
.response_length
= response_length
;
273 insn
->bits3
.urb
.msg_length
= msg_length
;
274 insn
->bits3
.urb
.msg_target
= BRW_MESSAGE_TARGET_URB
;
275 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
278 static void brw_set_dp_write_message( struct brw_instruction
*insn
,
279 GLuint binding_table_index
,
283 GLuint pixel_scoreboard_clear
,
284 GLuint response_length
,
285 GLuint end_of_thread
)
287 brw_set_src1(insn
, brw_imm_d(0));
289 insn
->bits3
.dp_write
.binding_table_index
= binding_table_index
;
290 insn
->bits3
.dp_write
.msg_control
= msg_control
;
291 insn
->bits3
.dp_write
.pixel_scoreboard_clear
= pixel_scoreboard_clear
;
292 insn
->bits3
.dp_write
.msg_type
= msg_type
;
293 insn
->bits3
.dp_write
.send_commit_msg
= 0;
294 insn
->bits3
.dp_write
.response_length
= response_length
;
295 insn
->bits3
.dp_write
.msg_length
= msg_length
;
296 insn
->bits3
.dp_write
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_WRITE
;
297 insn
->bits3
.urb
.end_of_thread
= end_of_thread
;
300 static void brw_set_dp_read_message( struct brw_instruction
*insn
,
301 GLuint binding_table_index
,
306 GLuint response_length
,
307 GLuint end_of_thread
)
309 brw_set_src1(insn
, brw_imm_d(0));
311 insn
->bits3
.dp_read
.binding_table_index
= binding_table_index
;
312 insn
->bits3
.dp_read
.msg_control
= msg_control
;
313 insn
->bits3
.dp_read
.msg_type
= msg_type
;
314 insn
->bits3
.dp_read
.target_cache
= target_cache
;
315 insn
->bits3
.dp_read
.response_length
= response_length
;
316 insn
->bits3
.dp_read
.msg_length
= msg_length
;
317 insn
->bits3
.dp_read
.msg_target
= BRW_MESSAGE_TARGET_DATAPORT_READ
;
318 insn
->bits3
.dp_read
.end_of_thread
= end_of_thread
;
321 static void brw_set_sampler_message( struct brw_instruction
*insn
,
322 GLuint binding_table_index
,
325 GLuint response_length
,
329 brw_set_src1(insn
, brw_imm_d(0));
331 insn
->bits3
.sampler
.binding_table_index
= binding_table_index
;
332 insn
->bits3
.sampler
.sampler
= sampler
;
333 insn
->bits3
.sampler
.msg_type
= msg_type
;
334 insn
->bits3
.sampler
.return_format
= BRW_SAMPLER_RETURN_FORMAT_FLOAT32
;
335 insn
->bits3
.sampler
.response_length
= response_length
;
336 insn
->bits3
.sampler
.msg_length
= msg_length
;
337 insn
->bits3
.sampler
.end_of_thread
= eot
;
338 insn
->bits3
.sampler
.msg_target
= BRW_MESSAGE_TARGET_SAMPLER
;
343 static struct brw_instruction
*next_insn( struct brw_compile
*p
,
346 struct brw_instruction
*insn
;
348 assert(p
->nr_insn
+ 1 < BRW_EU_MAX_INSN
);
350 insn
= &p
->store
[p
->nr_insn
++];
351 memcpy(insn
, p
->current
, sizeof(*insn
));
353 /* Reset this one-shot flag:
356 if (p
->current
->header
.destreg__conditonalmod
) {
357 p
->current
->header
.destreg__conditonalmod
= 0;
358 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
361 insn
->header
.opcode
= opcode
;
366 static struct brw_instruction
*brw_alu1( struct brw_compile
*p
,
371 struct brw_instruction
*insn
= next_insn(p
, opcode
);
372 brw_set_dest(insn
, dest
);
373 brw_set_src0(insn
, src
);
377 static struct brw_instruction
*brw_alu2(struct brw_compile
*p
,
381 struct brw_reg src1
)
383 struct brw_instruction
*insn
= next_insn(p
, opcode
);
384 brw_set_dest(insn
, dest
);
385 brw_set_src0(insn
, src0
);
386 brw_set_src1(insn
, src1
);
391 /***********************************************************************
392 * Convenience routines.
395 struct brw_instruction *brw_##OP(struct brw_compile *p, \
396 struct brw_reg dest, \
397 struct brw_reg src0) \
399 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
403 struct brw_instruction *brw_##OP(struct brw_compile *p, \
404 struct brw_reg dest, \
405 struct brw_reg src0, \
406 struct brw_reg src1) \
408 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
439 void brw_NOP(struct brw_compile
*p
)
441 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_NOP
);
442 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
443 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
444 brw_set_src1(insn
, brw_imm_ud(0x0));
451 /***********************************************************************
452 * Comparisons, if/else/endif
455 struct brw_instruction
*brw_JMPI(struct brw_compile
*p
,
460 struct brw_instruction
*insn
= brw_alu2(p
, BRW_OPCODE_JMPI
, dest
, src0
, src1
);
462 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
467 /* EU takes the value from the flag register and pushes it onto some
468 * sort of a stack (presumably merging with any flag value already on
469 * the stack). Within an if block, the flags at the top of the stack
470 * control execution on each channel of the unit, eg. on each of the
471 * 16 pixel values in our wm programs.
473 * When the matching 'else' instruction is reached (presumably by
474 * countdown of the instruction count patched in by our ELSE/ENDIF
475 * functions), the relevent flags are inverted.
477 * When the matching 'endif' instruction is reached, the flags are
478 * popped off. If the stack is now empty, normal execution resumes.
480 * No attempt is made to deal with stack overflow (14 elements?).
482 struct brw_instruction
*brw_IF(struct brw_compile
*p
, GLuint execute_size
)
484 struct brw_instruction
*insn
;
486 if (p
->single_program_flow
) {
487 assert(execute_size
== BRW_EXECUTE_1
);
489 insn
= next_insn(p
, BRW_OPCODE_ADD
);
490 insn
->header
.predicate_inverse
= 1;
492 insn
= next_insn(p
, BRW_OPCODE_IF
);
495 /* Override the defaults for this instruction:
497 brw_set_dest(insn
, brw_ip_reg());
498 brw_set_src0(insn
, brw_ip_reg());
499 brw_set_src1(insn
, brw_imm_d(0x0));
501 insn
->header
.execution_size
= execute_size
;
502 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
503 insn
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
504 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
506 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
512 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
513 struct brw_instruction
*if_insn
)
515 struct brw_instruction
*insn
;
517 if (p
->single_program_flow
) {
518 insn
= next_insn(p
, BRW_OPCODE_ADD
);
520 insn
= next_insn(p
, BRW_OPCODE_ELSE
);
523 brw_set_dest(insn
, brw_ip_reg());
524 brw_set_src0(insn
, brw_ip_reg());
525 brw_set_src1(insn
, brw_imm_d(0x0));
527 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
528 insn
->header
.execution_size
= if_insn
->header
.execution_size
;
529 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
531 /* Patch the if instruction to point at this instruction.
533 if (p
->single_program_flow
) {
534 assert(if_insn
->header
.opcode
== BRW_OPCODE_ADD
);
536 if_insn
->bits3
.ud
= (insn
- if_insn
+ 1) * 16;
538 assert(if_insn
->header
.opcode
== BRW_OPCODE_IF
);
540 if_insn
->bits3
.if_else
.jump_count
= insn
- if_insn
;
541 if_insn
->bits3
.if_else
.pop_count
= 1;
542 if_insn
->bits3
.if_else
.pad0
= 0;
548 void brw_ENDIF(struct brw_compile
*p
,
549 struct brw_instruction
*patch_insn
)
551 if (p
->single_program_flow
) {
552 /* In single program flow mode, there's no need to execute an ENDIF,
553 * since we don't need to do any stack operations, and if we're executing
554 * currently, we want to just continue executing.
556 struct brw_instruction
*next
= &p
->store
[p
->nr_insn
];
558 assert(patch_insn
->header
.opcode
== BRW_OPCODE_ADD
);
560 patch_insn
->bits3
.ud
= (next
- patch_insn
) * 16;
562 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_ENDIF
);
564 brw_set_dest(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
565 brw_set_src0(insn
, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD
));
566 brw_set_src1(insn
, brw_imm_d(0x0));
568 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
569 insn
->header
.execution_size
= patch_insn
->header
.execution_size
;
570 insn
->header
.mask_control
= BRW_MASK_ENABLE
;
572 assert(patch_insn
->bits3
.if_else
.jump_count
== 0);
574 /* Patch the if or else instructions to point at this or the next
575 * instruction respectively.
577 if (patch_insn
->header
.opcode
== BRW_OPCODE_IF
) {
578 /* Automagically turn it into an IFF:
580 patch_insn
->header
.opcode
= BRW_OPCODE_IFF
;
581 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
582 patch_insn
->bits3
.if_else
.pop_count
= 0;
583 patch_insn
->bits3
.if_else
.pad0
= 0;
584 } else if (patch_insn
->header
.opcode
== BRW_OPCODE_ELSE
) {
585 patch_insn
->bits3
.if_else
.jump_count
= insn
- patch_insn
+ 1;
586 patch_insn
->bits3
.if_else
.pop_count
= 1;
587 patch_insn
->bits3
.if_else
.pad0
= 0;
592 /* Also pop item off the stack in the endif instruction:
594 insn
->bits3
.if_else
.jump_count
= 0;
595 insn
->bits3
.if_else
.pop_count
= 1;
596 insn
->bits3
.if_else
.pad0
= 0;
602 struct brw_instruction
*brw_DO(struct brw_compile
*p
, GLuint execute_size
)
604 if (p
->single_program_flow
) {
605 return &p
->store
[p
->nr_insn
];
607 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_DO
);
609 /* Override the defaults for this instruction:
611 brw_set_dest(insn
, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD
));
612 brw_set_src0(insn
, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD
));
613 brw_set_src1(insn
, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD
));
615 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
616 insn
->header
.execution_size
= execute_size
;
617 /* insn->header.mask_control = BRW_MASK_ENABLE; */
625 void brw_WHILE(struct brw_compile
*p
,
626 struct brw_instruction
*do_insn
)
628 struct brw_instruction
*insn
;
630 if (p
->single_program_flow
)
631 insn
= next_insn(p
, BRW_OPCODE_ADD
);
633 insn
= next_insn(p
, BRW_OPCODE_WHILE
);
635 brw_set_dest(insn
, brw_ip_reg());
636 brw_set_src0(insn
, brw_ip_reg());
637 brw_set_src1(insn
, brw_imm_d(0x0));
639 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
641 if (p
->single_program_flow
) {
642 insn
->header
.execution_size
= BRW_EXECUTE_1
;
644 insn
->bits3
.d
= (do_insn
- insn
) * 16;
646 insn
->header
.execution_size
= do_insn
->header
.execution_size
;
648 assert(do_insn
->header
.opcode
== BRW_OPCODE_DO
);
649 insn
->bits3
.if_else
.jump_count
= do_insn
- insn
;
650 insn
->bits3
.if_else
.pop_count
= 0;
651 insn
->bits3
.if_else
.pad0
= 0;
654 /* insn->header.mask_control = BRW_MASK_ENABLE; */
656 p
->current
->header
.predicate_control
= BRW_PREDICATE_NONE
;
662 void brw_land_fwd_jump(struct brw_compile
*p
,
663 struct brw_instruction
*jmp_insn
)
665 struct brw_instruction
*landing
= &p
->store
[p
->nr_insn
];
667 assert(jmp_insn
->header
.opcode
== BRW_OPCODE_JMPI
);
668 assert(jmp_insn
->bits1
.da1
.src1_reg_file
= BRW_IMMEDIATE_VALUE
);
670 jmp_insn
->bits3
.ud
= (landing
- jmp_insn
) - 1;
675 /* To integrate with the above, it makes sense that the comparison
676 * instruction should populate the flag register. It might be simpler
677 * just to use the flag reg for most WM tasks?
679 void brw_CMP(struct brw_compile
*p
,
685 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_CMP
);
687 insn
->header
.destreg__conditonalmod
= conditional
;
688 brw_set_dest(insn
, dest
);
689 brw_set_src0(insn
, src0
);
690 brw_set_src1(insn
, src1
);
692 /* guess_execution_size(insn, src0); */
695 /* Make it so that future instructions will use the computed flag
696 * value until brw_set_predicate_control_flag_value() is called
699 if (dest
.file
== BRW_ARCHITECTURE_REGISTER_FILE
&&
701 p
->current
->header
.predicate_control
= BRW_PREDICATE_NORMAL
;
702 p
->flag_value
= 0xff;
708 /***********************************************************************
709 * Helpers for the various SEND message types:
714 void brw_math( struct brw_compile
*p
,
723 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
724 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
725 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
727 /* Example code doesn't set predicate_control for send
730 insn
->header
.predicate_control
= 0;
731 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
733 brw_set_dest(insn
, dest
);
734 brw_set_src0(insn
, src
);
735 brw_set_math_message(insn
,
736 msg_length
, response_length
,
738 BRW_MATH_INTEGER_UNSIGNED
,
744 /* Use 2 send instructions to invert 16 elements
746 void brw_math_16( struct brw_compile
*p
,
754 struct brw_instruction
*insn
;
755 GLuint msg_length
= (function
== BRW_MATH_FUNCTION_POW
) ? 2 : 1;
756 GLuint response_length
= (function
== BRW_MATH_FUNCTION_SINCOS
) ? 2 : 1;
758 /* First instruction:
760 brw_push_insn_state(p
);
761 brw_set_predicate_control_flag_value(p
, 0xff);
762 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
764 insn
= next_insn(p
, BRW_OPCODE_SEND
);
765 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
767 brw_set_dest(insn
, dest
);
768 brw_set_src0(insn
, src
);
769 brw_set_math_message(insn
,
770 msg_length
, response_length
,
772 BRW_MATH_INTEGER_UNSIGNED
,
775 BRW_MATH_DATA_VECTOR
);
777 /* Second instruction:
779 insn
= next_insn(p
, BRW_OPCODE_SEND
);
780 insn
->header
.compression_control
= BRW_COMPRESSION_2NDHALF
;
781 insn
->header
.destreg__conditonalmod
= msg_reg_nr
+1;
783 brw_set_dest(insn
, offset(dest
,1));
784 brw_set_src0(insn
, src
);
785 brw_set_math_message(insn
,
786 msg_length
, response_length
,
788 BRW_MATH_INTEGER_UNSIGNED
,
791 BRW_MATH_DATA_VECTOR
);
793 brw_pop_insn_state(p
);
799 void brw_dp_WRITE_16( struct brw_compile
*p
,
802 GLuint scratch_offset
)
805 brw_push_insn_state(p
);
806 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
807 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
810 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
811 brw_imm_d(scratch_offset
));
813 brw_pop_insn_state(p
);
817 GLuint msg_length
= 3;
818 struct brw_reg dest
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
819 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
821 insn
->header
.predicate_control
= 0; /* XXX */
822 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
823 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
825 brw_set_dest(insn
, dest
);
826 brw_set_src0(insn
, src
);
828 brw_set_dp_write_message(insn
,
830 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS
, /* msg_control */
831 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE
, /* msg_type */
833 0, /* pixel scoreboard */
834 0, /* response_length */
841 void brw_dp_READ_16( struct brw_compile
*p
,
844 GLuint scratch_offset
)
847 brw_push_insn_state(p
);
848 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
849 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
852 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D
),
853 brw_imm_d(scratch_offset
));
855 brw_pop_insn_state(p
);
859 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
861 insn
->header
.predicate_control
= 0; /* XXX */
862 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
863 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
865 brw_set_dest(insn
, dest
); /* UW? */
866 brw_set_src0(insn
, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
));
868 brw_set_dp_read_message(insn
,
871 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
, /* msg_type */
872 1, /* target cache */
874 2, /* response_length */
880 void brw_fb_WRITE(struct brw_compile
*p
,
884 GLuint binding_table_index
,
886 GLuint response_length
,
889 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
891 insn
->header
.predicate_control
= 0; /* XXX */
892 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
893 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
895 brw_set_dest(insn
, dest
);
896 brw_set_src0(insn
, src0
);
897 brw_set_dp_write_message(insn
,
899 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
, /* msg_control */
900 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
, /* msg_type */
902 1, /* pixel scoreboard */
909 void brw_SAMPLE(struct brw_compile
*p
,
913 GLuint binding_table_index
,
917 GLuint response_length
,
921 GLboolean need_stall
= 0;
924 /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
928 /* Hardware doesn't do destination dependency checking on send
929 * instructions properly. Add a workaround which generates the
930 * dependency by other means. In practice it seems like this bug
931 * only crops up for texture samples, and only where registers are
932 * written by the send and then written again later without being
933 * read in between. Luckily for us, we already track that
934 * information and use it to modify the writemask for the
935 * instruction, so that is a guide for whether a workaround is
938 if (writemask
!= WRITEMASK_XYZW
) {
939 GLuint dst_offset
= 0;
940 GLuint i
, newmask
= 0, len
= 0;
942 for (i
= 0; i
< 4; i
++) {
943 if (writemask
& (1<<i
))
948 if (!(writemask
& (1<<i
)))
954 if (newmask
!= writemask
) {
956 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
959 struct brw_reg m1
= brw_message_reg(msg_reg_nr
);
961 newmask
= ~newmask
& WRITEMASK_XYZW
;
963 brw_push_insn_state(p
);
965 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
966 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
968 brw_MOV(p
, m1
, brw_vec8_grf(0,0));
969 brw_MOV(p
, get_element_ud(m1
, 2), brw_imm_ud(newmask
<< 12));
971 brw_pop_insn_state(p
);
973 src0
= retype(brw_null_reg(), BRW_REGISTER_TYPE_UW
);
974 dest
= offset(dest
, dst_offset
);
975 response_length
= len
* 2;
980 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
982 insn
->header
.predicate_control
= 0; /* XXX */
983 insn
->header
.compression_control
= BRW_COMPRESSION_NONE
;
984 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
986 brw_set_dest(insn
, dest
);
987 brw_set_src0(insn
, src0
);
988 brw_set_sampler_message(insn
,
999 struct brw_reg reg
= vec8(offset(dest
, response_length
-1));
1001 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
1003 brw_push_insn_state(p
);
1004 brw_set_compression_control(p
, GL_FALSE
);
1005 brw_MOV(p
, reg
, reg
);
1006 brw_pop_insn_state(p
);
1011 /* All these variables are pretty confusing - we might be better off
1012 * using bitmasks and macros for this, in the old style. Or perhaps
1013 * just having the caller instantiate the fields in dword3 itself.
1015 void brw_urb_WRITE(struct brw_compile
*p
,
1016 struct brw_reg dest
,
1018 struct brw_reg src0
,
1022 GLuint response_length
,
1024 GLboolean writes_complete
,
1028 struct brw_instruction
*insn
= next_insn(p
, BRW_OPCODE_SEND
);
1030 assert(msg_length
< 16);
1032 brw_set_dest(insn
, dest
);
1033 brw_set_src0(insn
, src0
);
1034 brw_set_src1(insn
, brw_imm_d(0));
1036 insn
->header
.destreg__conditonalmod
= msg_reg_nr
;
1038 brw_set_urb_message(insn
,