2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
39 #include "brw_eu_defines.h"
41 #include "brw_disasm_info.h"
47 #define BRW_EU_MAX_INSN_STACK 5
49 struct brw_insn_state
{
50 /* One of BRW_EXECUTE_* */
53 /* Group in units of channels */
56 /* Compression control on gen4-5 */
59 /* One of BRW_MASK_* */
60 unsigned mask_control
:1;
62 /* Scheduling info for Gen12+ */
67 /* One of BRW_ALIGN_* */
68 unsigned access_mode
:1;
70 /* One of BRW_PREDICATE_* */
71 enum brw_predicate predicate
:4;
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg
:2;
78 bool acc_wr_control
:1;
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
86 #define brw_last_inst (&p->store[p->nr_insn - 1])
92 unsigned int next_insn_offset
;
96 /* Allow clients to push/pop instruction state:
98 struct brw_insn_state stack
[BRW_EU_MAX_INSN_STACK
];
99 struct brw_insn_state
*current
;
101 /** Whether or not the user wants automatic exec sizes
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by brw_set_default_exec_size verbatim.
107 * This is set to true by default in brw_init_codegen.
109 bool automatic_exec_sizes
;
111 bool single_program_flow
;
112 const struct gen_device_info
*devinfo
;
114 /* Control flow stacks:
115 * - if_stack contains IF and ELSE instructions which must be patched
116 * (and popped) once the matching ENDIF instruction is encountered.
118 * Just store the instruction pointer(an index).
122 int if_stack_array_size
;
125 * loop_stack contains the instruction pointers of the starts of loops which
126 * must be patched (and popped) once the matching WHILE instruction is
131 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
132 * blocks they were popping out of, to fix up the mask stack. This tracks
133 * the IF/ENDIF nesting in each current nested loop level.
135 int *if_depth_in_loop
;
136 int loop_stack_depth
;
137 int loop_stack_array_size
;
143 struct brw_label
*next
;
146 void brw_pop_insn_state( struct brw_codegen
*p
);
147 void brw_push_insn_state( struct brw_codegen
*p
);
148 unsigned brw_get_default_exec_size(struct brw_codegen
*p
);
149 unsigned brw_get_default_group(struct brw_codegen
*p
);
150 unsigned brw_get_default_access_mode(struct brw_codegen
*p
);
151 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen
*p
);
152 void brw_set_default_exec_size(struct brw_codegen
*p
, unsigned value
);
153 void brw_set_default_mask_control( struct brw_codegen
*p
, unsigned value
);
154 void brw_set_default_saturate( struct brw_codegen
*p
, bool enable
);
155 void brw_set_default_access_mode( struct brw_codegen
*p
, unsigned access_mode
);
156 void brw_inst_set_compression(const struct gen_device_info
*devinfo
,
157 brw_inst
*inst
, bool on
);
158 void brw_set_default_compression(struct brw_codegen
*p
, bool on
);
159 void brw_inst_set_group(const struct gen_device_info
*devinfo
,
160 brw_inst
*inst
, unsigned group
);
161 void brw_set_default_group(struct brw_codegen
*p
, unsigned group
);
162 void brw_set_default_compression_control(struct brw_codegen
*p
, enum brw_compression c
);
163 void brw_set_default_predicate_control(struct brw_codegen
*p
, enum brw_predicate pc
);
164 void brw_set_default_predicate_inverse(struct brw_codegen
*p
, bool predicate_inverse
);
165 void brw_set_default_flag_reg(struct brw_codegen
*p
, int reg
, int subreg
);
166 void brw_set_default_acc_write_control(struct brw_codegen
*p
, unsigned value
);
167 void brw_set_default_swsb(struct brw_codegen
*p
, struct tgl_swsb value
);
169 void brw_init_codegen(const struct gen_device_info
*, struct brw_codegen
*p
,
171 bool brw_has_jip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
172 bool brw_has_uip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
173 const struct brw_label
*brw_find_label(const struct brw_label
*root
, int offset
);
174 void brw_create_label(struct brw_label
**labels
, int offset
, void *mem_ctx
);
175 int brw_disassemble_inst(FILE *file
, const struct gen_device_info
*devinfo
,
176 const struct brw_inst
*inst
, bool is_compacted
);
177 void brw_disassemble(const struct gen_device_info
*devinfo
,
178 const void *assembly
, int start
, int end
, FILE *out
);
179 const unsigned *brw_get_program( struct brw_codegen
*p
, unsigned *sz
);
181 bool brw_try_override_assembly(struct brw_codegen
*p
, int start_offset
,
182 const char *identifier
);
184 brw_inst
*brw_next_insn(struct brw_codegen
*p
, unsigned opcode
);
185 void brw_set_dest(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg dest
);
186 void brw_set_src0(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
188 void gen6_resolve_implied_move(struct brw_codegen
*p
,
190 unsigned msg_reg_nr
);
192 /* Helpers for regular instructions:
195 brw_inst *brw_##OP(struct brw_codegen *p, \
196 struct brw_reg dest, \
197 struct brw_reg src0);
200 brw_inst *brw_##OP(struct brw_codegen *p, \
201 struct brw_reg dest, \
202 struct brw_reg src0, \
203 struct brw_reg src1);
206 brw_inst *brw_##OP(struct brw_codegen *p, \
207 struct brw_reg dest, \
208 struct brw_reg src0, \
209 struct brw_reg src1, \
210 struct brw_reg src2);
262 /* Helpers for SEND instruction:
266 * Construct a message descriptor immediate with the specified common
267 * descriptor controls.
269 static inline uint32_t
270 brw_message_desc(const struct gen_device_info
*devinfo
,
272 unsigned response_length
,
275 if (devinfo
->gen
>= 5) {
276 return (SET_BITS(msg_length
, 28, 25) |
277 SET_BITS(response_length
, 24, 20) |
278 SET_BITS(header_present
, 19, 19));
280 return (SET_BITS(msg_length
, 23, 20) |
281 SET_BITS(response_length
, 19, 16));
285 static inline unsigned
286 brw_message_desc_mlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
288 if (devinfo
->gen
>= 5)
289 return GET_BITS(desc
, 28, 25);
291 return GET_BITS(desc
, 23, 20);
294 static inline unsigned
295 brw_message_desc_rlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
297 if (devinfo
->gen
>= 5)
298 return GET_BITS(desc
, 24, 20);
300 return GET_BITS(desc
, 19, 16);
304 brw_message_desc_header_present(ASSERTED
const struct gen_device_info
*devinfo
,
307 assert(devinfo
->gen
>= 5);
308 return GET_BITS(desc
, 19, 19);
311 static inline unsigned
312 brw_message_ex_desc(UNUSED
const struct gen_device_info
*devinfo
,
313 unsigned ex_msg_length
)
315 return SET_BITS(ex_msg_length
, 9, 6);
318 static inline unsigned
319 brw_message_ex_desc_ex_mlen(UNUSED
const struct gen_device_info
*devinfo
,
322 return GET_BITS(ex_desc
, 9, 6);
325 static inline uint32_t
326 brw_urb_desc(const struct gen_device_info
*devinfo
,
328 bool per_slot_offset_present
,
329 bool channel_mask_present
,
330 unsigned global_offset
)
332 if (devinfo
->gen
>= 8) {
333 return (SET_BITS(per_slot_offset_present
, 17, 17) |
334 SET_BITS(channel_mask_present
, 15, 15) |
335 SET_BITS(global_offset
, 14, 4) |
336 SET_BITS(msg_type
, 3, 0));
337 } else if (devinfo
->gen
>= 7) {
338 assert(!channel_mask_present
);
339 return (SET_BITS(per_slot_offset_present
, 16, 16) |
340 SET_BITS(global_offset
, 13, 3) |
341 SET_BITS(msg_type
, 3, 0));
343 unreachable("unhandled URB write generation");
347 static inline uint32_t
348 brw_urb_desc_msg_type(ASSERTED
const struct gen_device_info
*devinfo
,
351 assert(devinfo
->gen
>= 7);
352 return GET_BITS(desc
, 3, 0);
356 * Construct a message descriptor immediate with the specified sampler
359 static inline uint32_t
360 brw_sampler_desc(const struct gen_device_info
*devinfo
,
361 unsigned binding_table_index
,
365 unsigned return_format
)
367 const unsigned desc
= (SET_BITS(binding_table_index
, 7, 0) |
368 SET_BITS(sampler
, 11, 8));
369 if (devinfo
->gen
>= 7)
370 return (desc
| SET_BITS(msg_type
, 16, 12) |
371 SET_BITS(simd_mode
, 18, 17));
372 else if (devinfo
->gen
>= 5)
373 return (desc
| SET_BITS(msg_type
, 15, 12) |
374 SET_BITS(simd_mode
, 17, 16));
375 else if (devinfo
->is_g4x
)
376 return desc
| SET_BITS(msg_type
, 15, 12);
378 return (desc
| SET_BITS(return_format
, 13, 12) |
379 SET_BITS(msg_type
, 15, 14));
382 static inline unsigned
383 brw_sampler_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
386 return GET_BITS(desc
, 7, 0);
389 static inline unsigned
390 brw_sampler_desc_sampler(UNUSED
const struct gen_device_info
*devinfo
, uint32_t desc
)
392 return GET_BITS(desc
, 11, 8);
395 static inline unsigned
396 brw_sampler_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
398 if (devinfo
->gen
>= 7)
399 return GET_BITS(desc
, 16, 12);
400 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
401 return GET_BITS(desc
, 15, 12);
403 return GET_BITS(desc
, 15, 14);
406 static inline unsigned
407 brw_sampler_desc_simd_mode(const struct gen_device_info
*devinfo
, uint32_t desc
)
409 assert(devinfo
->gen
>= 5);
410 if (devinfo
->gen
>= 7)
411 return GET_BITS(desc
, 18, 17);
413 return GET_BITS(desc
, 17, 16);
416 static inline unsigned
417 brw_sampler_desc_return_format(ASSERTED
const struct gen_device_info
*devinfo
,
420 assert(devinfo
->gen
== 4 && !devinfo
->is_g4x
);
421 return GET_BITS(desc
, 13, 12);
425 * Construct a message descriptor for the dataport
427 static inline uint32_t
428 brw_dp_desc(const struct gen_device_info
*devinfo
,
429 unsigned binding_table_index
,
431 unsigned msg_control
)
433 /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
436 assert(devinfo
->gen
>= 6);
437 const unsigned desc
= SET_BITS(binding_table_index
, 7, 0);
438 if (devinfo
->gen
>= 8) {
439 return (desc
| SET_BITS(msg_control
, 13, 8) |
440 SET_BITS(msg_type
, 18, 14));
441 } else if (devinfo
->gen
>= 7) {
442 return (desc
| SET_BITS(msg_control
, 13, 8) |
443 SET_BITS(msg_type
, 17, 14));
445 return (desc
| SET_BITS(msg_control
, 12, 8) |
446 SET_BITS(msg_type
, 16, 13));
450 static inline unsigned
451 brw_dp_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
454 return GET_BITS(desc
, 7, 0);
457 static inline unsigned
458 brw_dp_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
460 assert(devinfo
->gen
>= 6);
461 if (devinfo
->gen
>= 8)
462 return GET_BITS(desc
, 18, 14);
463 else if (devinfo
->gen
>= 7)
464 return GET_BITS(desc
, 17, 14);
466 return GET_BITS(desc
, 16, 13);
469 static inline unsigned
470 brw_dp_desc_msg_control(const struct gen_device_info
*devinfo
, uint32_t desc
)
472 assert(devinfo
->gen
>= 6);
473 if (devinfo
->gen
>= 7)
474 return GET_BITS(desc
, 13, 8);
476 return GET_BITS(desc
, 12, 8);
480 * Construct a message descriptor immediate with the specified dataport read
483 static inline uint32_t
484 brw_dp_read_desc(const struct gen_device_info
*devinfo
,
485 unsigned binding_table_index
,
486 unsigned msg_control
,
488 unsigned target_cache
)
490 if (devinfo
->gen
>= 6)
491 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
);
492 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
493 return (SET_BITS(binding_table_index
, 7, 0) |
494 SET_BITS(msg_control
, 10, 8) |
495 SET_BITS(msg_type
, 13, 11) |
496 SET_BITS(target_cache
, 15, 14));
498 return (SET_BITS(binding_table_index
, 7, 0) |
499 SET_BITS(msg_control
, 11, 8) |
500 SET_BITS(msg_type
, 13, 12) |
501 SET_BITS(target_cache
, 15, 14));
504 static inline unsigned
505 brw_dp_read_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
507 if (devinfo
->gen
>= 6)
508 return brw_dp_desc_msg_type(devinfo
, desc
);
509 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
510 return GET_BITS(desc
, 13, 11);
512 return GET_BITS(desc
, 13, 12);
515 static inline unsigned
516 brw_dp_read_desc_msg_control(const struct gen_device_info
*devinfo
,
519 if (devinfo
->gen
>= 6)
520 return brw_dp_desc_msg_control(devinfo
, desc
);
521 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
522 return GET_BITS(desc
, 10, 8);
524 return GET_BITS(desc
, 11, 8);
528 * Construct a message descriptor immediate with the specified dataport write
531 static inline uint32_t
532 brw_dp_write_desc(const struct gen_device_info
*devinfo
,
533 unsigned binding_table_index
,
534 unsigned msg_control
,
536 unsigned last_render_target
,
537 unsigned send_commit_msg
)
539 assert(devinfo
->gen
<= 6 || !send_commit_msg
);
540 if (devinfo
->gen
>= 6)
541 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
) |
542 SET_BITS(last_render_target
, 12, 12) |
543 SET_BITS(send_commit_msg
, 17, 17);
545 return (SET_BITS(binding_table_index
, 7, 0) |
546 SET_BITS(msg_control
, 11, 8) |
547 SET_BITS(last_render_target
, 11, 11) |
548 SET_BITS(msg_type
, 14, 12) |
549 SET_BITS(send_commit_msg
, 15, 15));
552 static inline unsigned
553 brw_dp_write_desc_msg_type(const struct gen_device_info
*devinfo
,
556 if (devinfo
->gen
>= 6)
557 return brw_dp_desc_msg_type(devinfo
, desc
);
559 return GET_BITS(desc
, 14, 12);
562 static inline unsigned
563 brw_dp_write_desc_msg_control(const struct gen_device_info
*devinfo
,
566 if (devinfo
->gen
>= 6)
567 return brw_dp_desc_msg_control(devinfo
, desc
);
569 return GET_BITS(desc
, 11, 8);
573 brw_dp_write_desc_last_render_target(const struct gen_device_info
*devinfo
,
576 if (devinfo
->gen
>= 6)
577 return GET_BITS(desc
, 12, 12);
579 return GET_BITS(desc
, 11, 11);
583 brw_dp_write_desc_write_commit(const struct gen_device_info
*devinfo
,
586 assert(devinfo
->gen
<= 6);
587 if (devinfo
->gen
>= 6)
588 return GET_BITS(desc
, 17, 17);
590 return GET_BITS(desc
, 15, 15);
594 * Construct a message descriptor immediate with the specified dataport
595 * surface function controls.
597 static inline uint32_t
598 brw_dp_surface_desc(const struct gen_device_info
*devinfo
,
600 unsigned msg_control
)
602 assert(devinfo
->gen
>= 7);
603 /* We'll OR in the binding table index later */
604 return brw_dp_desc(devinfo
, 0, msg_type
, msg_control
);
607 static inline uint32_t
608 brw_dp_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
609 unsigned exec_size
, /**< 0 for SIMD4x2 */
611 bool response_expected
)
613 assert(exec_size
<= 8 || exec_size
== 16);
616 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
618 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
620 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
623 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
626 const unsigned msg_control
=
627 SET_BITS(atomic_op
, 3, 0) |
628 SET_BITS(0 < exec_size
&& exec_size
<= 8, 4, 4) |
629 SET_BITS(response_expected
, 5, 5);
631 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
634 static inline uint32_t
635 brw_dp_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
638 bool response_expected
)
640 assert(exec_size
<= 8 || exec_size
== 16);
641 assert(devinfo
->gen
>= 9);
643 assert(exec_size
> 0);
644 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP
;
646 const unsigned msg_control
=
647 SET_BITS(atomic_op
, 1, 0) |
648 SET_BITS(exec_size
<= 8, 4, 4) |
649 SET_BITS(response_expected
, 5, 5);
651 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
654 static inline unsigned
655 brw_mdc_cmask(unsigned num_channels
)
657 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
658 return 0xf & (0xf << num_channels
);
661 static inline uint32_t
662 brw_dp_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
663 unsigned exec_size
, /**< 0 for SIMD4x2 */
664 unsigned num_channels
,
667 assert(exec_size
<= 8 || exec_size
== 16);
671 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
672 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
;
674 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
;
678 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
679 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
681 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
685 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
686 if (write
&& devinfo
->gen
== 7 && !devinfo
->is_haswell
&& exec_size
== 0)
689 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
690 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
691 exec_size
<= 8 ? 2 : 1;
693 const unsigned msg_control
=
694 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
695 SET_BITS(simd_mode
, 5, 4);
697 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
700 static inline unsigned
701 brw_mdc_ds(unsigned bit_size
)
705 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
707 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
709 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
711 unreachable("Unsupported bit_size for byte scattered messages");
715 static inline uint32_t
716 brw_dp_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
721 assert(exec_size
<= 8 || exec_size
== 16);
723 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
724 const unsigned msg_type
=
725 write
? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
:
726 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
;
728 assert(exec_size
> 0);
729 const unsigned msg_control
=
730 SET_BITS(exec_size
== 16, 0, 0) |
731 SET_BITS(brw_mdc_ds(bit_size
), 3, 2);
733 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
736 static inline uint32_t
737 brw_dp_dword_scattered_rw_desc(const struct gen_device_info
*devinfo
,
741 assert(exec_size
== 8 || exec_size
== 16);
745 if (devinfo
->gen
>= 6) {
746 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
748 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
751 if (devinfo
->gen
>= 7) {
752 msg_type
= GEN7_DATAPORT_DC_DWORD_SCATTERED_READ
;
753 } else if (devinfo
->gen
> 4 || devinfo
->is_g4x
) {
754 msg_type
= G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
756 msg_type
= BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
760 const unsigned msg_control
=
761 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
762 SET_BITS(exec_size
== 16, 0, 0);
764 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
767 static inline uint32_t
768 brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
769 unsigned exec_size
, /**< 0 for SIMD4x2 */
770 unsigned num_channels
,
773 assert(exec_size
<= 8 || exec_size
== 16);
774 assert(devinfo
->gen
>= 8);
777 write
? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE
:
778 GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ
;
780 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
781 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
782 exec_size
<= 8 ? 2 : 1;
784 const unsigned msg_control
=
785 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
786 SET_BITS(simd_mode
, 5, 4);
788 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
789 msg_type
, msg_control
);
793 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
796 static inline uint32_t
797 brw_mdc_a64_ds(unsigned elems
)
805 unreachable("Unsupported elmeent count for A64 scattered message");
809 static inline uint32_t
810 brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
811 unsigned exec_size
, /**< 0 for SIMD4x2 */
815 assert(exec_size
<= 8 || exec_size
== 16);
816 assert(devinfo
->gen
>= 8);
819 write
? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE
:
820 GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ
;
822 const unsigned msg_control
=
823 SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE
, 1, 0) |
824 SET_BITS(brw_mdc_a64_ds(bit_size
/ 8), 3, 2) |
825 SET_BITS(exec_size
== 16, 4, 4);
827 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
828 msg_type
, msg_control
);
831 static inline uint32_t
832 brw_dp_a64_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
833 ASSERTED
unsigned exec_size
, /**< 0 for SIMD4x2 */
836 bool response_expected
)
838 assert(exec_size
== 8);
839 assert(devinfo
->gen
>= 8);
840 assert(bit_size
== 32 || bit_size
== 64);
842 const unsigned msg_type
= GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP
;
844 const unsigned msg_control
=
845 SET_BITS(atomic_op
, 3, 0) |
846 SET_BITS(bit_size
== 64, 4, 4) |
847 SET_BITS(response_expected
, 5, 5);
849 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
850 msg_type
, msg_control
);
853 static inline uint32_t
854 brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
855 ASSERTED
unsigned exec_size
,
857 bool response_expected
)
859 assert(exec_size
== 8);
860 assert(devinfo
->gen
>= 9);
862 assert(exec_size
> 0);
863 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP
;
865 const unsigned msg_control
=
866 SET_BITS(atomic_op
, 1, 0) |
867 SET_BITS(response_expected
, 5, 5);
869 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
870 msg_type
, msg_control
);
873 static inline uint32_t
874 brw_dp_typed_atomic_desc(const struct gen_device_info
*devinfo
,
878 bool response_expected
)
880 assert(exec_size
> 0 || exec_group
== 0);
881 assert(exec_group
% 8 == 0);
884 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
885 if (exec_size
== 0) {
886 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
;
888 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
;
891 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
892 assert(exec_size
> 0);
893 msg_type
= GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
;
896 const bool high_sample_mask
= (exec_group
/ 8) % 2 == 1;
898 const unsigned msg_control
=
899 SET_BITS(atomic_op
, 3, 0) |
900 SET_BITS(high_sample_mask
, 4, 4) |
901 SET_BITS(response_expected
, 5, 5);
903 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
906 static inline uint32_t
907 brw_dp_typed_surface_rw_desc(const struct gen_device_info
*devinfo
,
910 unsigned num_channels
,
913 assert(exec_size
> 0 || exec_group
== 0);
914 assert(exec_group
% 8 == 0);
916 /* Typed surface reads and writes don't support SIMD16 */
917 assert(exec_size
<= 8);
921 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
922 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
;
924 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
;
927 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
928 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
;
930 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_READ
;
934 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
935 unsigned msg_control
;
936 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
937 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
938 const unsigned slot_group
= exec_size
== 0 ? 0 : /* SIMD4x2 */
939 1 + ((exec_group
/ 8) % 2);
942 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
943 SET_BITS(slot_group
, 5, 4);
945 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
946 assert(exec_size
> 0);
947 const unsigned slot_group
= ((exec_group
/ 8) % 2);
950 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
951 SET_BITS(slot_group
, 5, 5);
954 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
958 * Construct a message descriptor immediate with the specified pixel
959 * interpolator function controls.
961 static inline uint32_t
962 brw_pixel_interp_desc(UNUSED
const struct gen_device_info
*devinfo
,
968 return (SET_BITS(slot_group
, 11, 11) |
969 SET_BITS(msg_type
, 13, 12) |
970 SET_BITS(!!noperspective
, 14, 14) |
971 SET_BITS(simd_mode
, 16, 16));
974 void brw_urb_WRITE(struct brw_codegen
*p
,
978 enum brw_urb_write_flags flags
,
980 unsigned response_length
,
985 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
986 * desc. If \p desc is not an immediate it will be transparently loaded to an
987 * address register using an OR instruction.
990 brw_send_indirect_message(struct brw_codegen
*p
,
993 struct brw_reg payload
,
999 brw_send_indirect_split_message(struct brw_codegen
*p
,
1002 struct brw_reg payload0
,
1003 struct brw_reg payload1
,
1004 struct brw_reg desc
,
1006 struct brw_reg ex_desc
,
1007 unsigned ex_desc_imm
,
1010 void brw_ff_sync(struct brw_codegen
*p
,
1011 struct brw_reg dest
,
1012 unsigned msg_reg_nr
,
1013 struct brw_reg src0
,
1015 unsigned response_length
,
1018 void brw_svb_write(struct brw_codegen
*p
,
1019 struct brw_reg dest
,
1020 unsigned msg_reg_nr
,
1021 struct brw_reg src0
,
1022 unsigned binding_table_index
,
1023 bool send_commit_msg
);
1025 brw_inst
*brw_fb_WRITE(struct brw_codegen
*p
,
1026 struct brw_reg payload
,
1027 struct brw_reg implied_header
,
1028 unsigned msg_control
,
1029 unsigned binding_table_index
,
1030 unsigned msg_length
,
1031 unsigned response_length
,
1033 bool last_render_target
,
1034 bool header_present
);
1036 brw_inst
*gen9_fb_READ(struct brw_codegen
*p
,
1038 struct brw_reg payload
,
1039 unsigned binding_table_index
,
1040 unsigned msg_length
,
1041 unsigned response_length
,
1044 void brw_SAMPLE(struct brw_codegen
*p
,
1045 struct brw_reg dest
,
1046 unsigned msg_reg_nr
,
1047 struct brw_reg src0
,
1048 unsigned binding_table_index
,
1051 unsigned response_length
,
1052 unsigned msg_length
,
1053 unsigned header_present
,
1055 unsigned return_format
);
1057 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
1058 struct brw_reg header
,
1059 struct brw_reg sampler_index
);
1061 void gen4_math(struct brw_codegen
*p
,
1062 struct brw_reg dest
,
1064 unsigned msg_reg_nr
,
1066 unsigned precision
);
1068 void gen6_math(struct brw_codegen
*p
,
1069 struct brw_reg dest
,
1071 struct brw_reg src0
,
1072 struct brw_reg src1
);
1074 void brw_oword_block_read(struct brw_codegen
*p
,
1075 struct brw_reg dest
,
1078 uint32_t bind_table_index
);
1080 unsigned brw_scratch_surface_idx(const struct brw_codegen
*p
);
1082 void brw_oword_block_read_scratch(struct brw_codegen
*p
,
1083 struct brw_reg dest
,
1088 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1093 void gen7_block_read_scratch(struct brw_codegen
*p
,
1094 struct brw_reg dest
,
1098 void brw_shader_time_add(struct brw_codegen
*p
,
1099 struct brw_reg payload
,
1100 uint32_t surf_index
);
1103 * Return the generation-specific jump distance scaling factor.
1105 * Given the number of instructions to jump, we need to scale by
1106 * some number to obtain the actual jump distance to program in an
1109 static inline unsigned
1110 brw_jump_scale(const struct gen_device_info
*devinfo
)
1112 /* Broadwell measures jump targets in bytes. */
1113 if (devinfo
->gen
>= 8)
1116 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1117 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1119 if (devinfo
->gen
>= 5)
1122 /* Gen4 simply uses the number of 128-bit instructions. */
1126 void brw_barrier(struct brw_codegen
*p
, struct brw_reg src
);
1128 /* If/else/endif. Works by manipulating the execution flags on each
1131 brw_inst
*brw_IF(struct brw_codegen
*p
, unsigned execute_size
);
1132 brw_inst
*gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1133 struct brw_reg src0
, struct brw_reg src1
);
1135 void brw_ELSE(struct brw_codegen
*p
);
1136 void brw_ENDIF(struct brw_codegen
*p
);
1140 brw_inst
*brw_DO(struct brw_codegen
*p
, unsigned execute_size
);
1142 brw_inst
*brw_WHILE(struct brw_codegen
*p
);
1144 brw_inst
*brw_BREAK(struct brw_codegen
*p
);
1145 brw_inst
*brw_CONT(struct brw_codegen
*p
);
1146 brw_inst
*brw_HALT(struct brw_codegen
*p
);
1150 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
);
1152 brw_inst
*brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1153 unsigned predicate_control
);
1155 void brw_NOP(struct brw_codegen
*p
);
1157 void brw_WAIT(struct brw_codegen
*p
);
1159 void brw_SYNC(struct brw_codegen
*p
, enum tgl_sync_function func
);
1161 /* Special case: there is never a destination, execution size will be
1164 void brw_CMP(struct brw_codegen
*p
,
1165 struct brw_reg dest
,
1166 unsigned conditional
,
1167 struct brw_reg src0
,
1168 struct brw_reg src1
);
1171 brw_untyped_atomic(struct brw_codegen
*p
,
1173 struct brw_reg payload
,
1174 struct brw_reg surface
,
1176 unsigned msg_length
,
1177 bool response_expected
,
1178 bool header_present
);
1181 brw_untyped_surface_read(struct brw_codegen
*p
,
1183 struct brw_reg payload
,
1184 struct brw_reg surface
,
1185 unsigned msg_length
,
1186 unsigned num_channels
);
1189 brw_untyped_surface_write(struct brw_codegen
*p
,
1190 struct brw_reg payload
,
1191 struct brw_reg surface
,
1192 unsigned msg_length
,
1193 unsigned num_channels
,
1194 bool header_present
);
1197 brw_memory_fence(struct brw_codegen
*p
,
1200 enum opcode send_op
,
1201 enum brw_message_target sfid
,
1206 brw_pixel_interpolator_query(struct brw_codegen
*p
,
1207 struct brw_reg dest
,
1211 struct brw_reg data
,
1212 unsigned msg_length
,
1213 unsigned response_length
);
1216 brw_find_live_channel(struct brw_codegen
*p
,
1218 struct brw_reg mask
);
1221 brw_broadcast(struct brw_codegen
*p
,
1224 struct brw_reg idx
);
1227 brw_float_controls_mode(struct brw_codegen
*p
,
1228 unsigned mode
, unsigned mask
);
1230 /***********************************************************************
1234 void brw_copy_indirect_to_indirect(struct brw_codegen
*p
,
1235 struct brw_indirect dst_ptr
,
1236 struct brw_indirect src_ptr
,
1239 void brw_copy_from_indirect(struct brw_codegen
*p
,
1241 struct brw_indirect ptr
,
1244 void brw_copy4(struct brw_codegen
*p
,
1249 void brw_copy8(struct brw_codegen
*p
,
1254 void brw_math_invert( struct brw_codegen
*p
,
1256 struct brw_reg src
);
1258 void brw_set_src1(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
1260 void brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*insn
,
1261 unsigned desc
, unsigned ex_desc
);
1264 brw_set_desc(struct brw_codegen
*p
, brw_inst
*insn
, unsigned desc
)
1266 brw_set_desc_ex(p
, insn
, desc
, 0);
1269 void brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
);
1271 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod
);
1272 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod
);
1274 /* brw_eu_compact.c */
1275 void brw_init_compaction_tables(const struct gen_device_info
*devinfo
);
1276 void brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1277 struct disasm_info
*disasm
);
1278 void brw_uncompact_instruction(const struct gen_device_info
*devinfo
,
1279 brw_inst
*dst
, brw_compact_inst
*src
);
1280 bool brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1281 brw_compact_inst
*dst
, const brw_inst
*src
);
1283 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1284 brw_inst
*orig
, brw_inst
*uncompacted
);
1286 /* brw_eu_validate.c */
1287 bool brw_validate_instruction(const struct gen_device_info
*devinfo
,
1288 const brw_inst
*inst
, int offset
,
1289 struct disasm_info
*disasm
);
1290 bool brw_validate_instructions(const struct gen_device_info
*devinfo
,
1291 const void *assembly
, int start_offset
, int end_offset
,
1292 struct disasm_info
*disasm
);
1295 next_offset(const struct gen_device_info
*devinfo
, void *store
, int offset
)
1297 brw_inst
*insn
= (brw_inst
*)((char *)store
+ offset
);
1299 if (brw_inst_cmpt_control(devinfo
, insn
))
1305 struct opcode_desc
{
1314 const struct opcode_desc
*
1315 brw_opcode_desc(const struct gen_device_info
*devinfo
, enum opcode opcode
);
1317 const struct opcode_desc
*
1318 brw_opcode_desc_from_hw(const struct gen_device_info
*devinfo
, unsigned hw
);
1320 static inline unsigned
1321 brw_opcode_encode(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1323 return brw_opcode_desc(devinfo
, opcode
)->hw
;
1326 static inline enum opcode
1327 brw_opcode_decode(const struct gen_device_info
*devinfo
, unsigned hw
)
1329 const struct opcode_desc
*desc
= brw_opcode_desc_from_hw(devinfo
, hw
);
1330 return desc
? (enum opcode
)desc
->ir
: BRW_OPCODE_ILLEGAL
;
1334 brw_inst_set_opcode(const struct gen_device_info
*devinfo
,
1335 brw_inst
*inst
, enum opcode opcode
)
1337 brw_inst_set_hw_opcode(devinfo
, inst
, brw_opcode_encode(devinfo
, opcode
));
1340 static inline enum opcode
1341 brw_inst_opcode(const struct gen_device_info
*devinfo
, const brw_inst
*inst
)
1343 return brw_opcode_decode(devinfo
, brw_inst_hw_opcode(devinfo
, inst
));
1347 is_3src(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1349 const struct opcode_desc
*desc
= brw_opcode_desc(devinfo
, opcode
);
1350 return desc
&& desc
->nsrc
== 3;
1353 /** Maximum SEND message length */
1354 #define BRW_MAX_MSG_LENGTH 15
1356 /** First MRF register used by pull loads */
1357 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1359 /** First MRF register used by spills */
1360 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)