2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
39 #include "brw_eu_defines.h"
41 #include "brw_disasm_info.h"
47 #define BRW_EU_MAX_INSN_STACK 5
49 struct brw_insn_state
{
50 /* One of BRW_EXECUTE_* */
53 /* Group in units of channels */
56 /* Compression control on gen4-5 */
59 /* One of BRW_MASK_* */
60 unsigned mask_control
:1;
62 /* Scheduling info for Gen12+ */
67 /* One of BRW_ALIGN_* */
68 unsigned access_mode
:1;
70 /* One of BRW_PREDICATE_* */
71 enum brw_predicate predicate
:4;
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg
:2;
78 bool acc_wr_control
:1;
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
86 #define brw_last_inst (&p->store[p->nr_insn - 1])
92 unsigned int next_insn_offset
;
96 /* Allow clients to push/pop instruction state:
98 struct brw_insn_state stack
[BRW_EU_MAX_INSN_STACK
];
99 struct brw_insn_state
*current
;
101 /** Whether or not the user wants automatic exec sizes
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by brw_set_default_exec_size verbatim.
107 * This is set to true by default in brw_init_codegen.
109 bool automatic_exec_sizes
;
111 bool single_program_flow
;
112 const struct gen_device_info
*devinfo
;
114 /* Control flow stacks:
115 * - if_stack contains IF and ELSE instructions which must be patched
116 * (and popped) once the matching ENDIF instruction is encountered.
118 * Just store the instruction pointer(an index).
122 int if_stack_array_size
;
125 * loop_stack contains the instruction pointers of the starts of loops which
126 * must be patched (and popped) once the matching WHILE instruction is
131 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
132 * blocks they were popping out of, to fix up the mask stack. This tracks
133 * the IF/ENDIF nesting in each current nested loop level.
135 int *if_depth_in_loop
;
136 int loop_stack_depth
;
137 int loop_stack_array_size
;
140 void brw_pop_insn_state( struct brw_codegen
*p
);
141 void brw_push_insn_state( struct brw_codegen
*p
);
142 unsigned brw_get_default_exec_size(struct brw_codegen
*p
);
143 unsigned brw_get_default_group(struct brw_codegen
*p
);
144 unsigned brw_get_default_access_mode(struct brw_codegen
*p
);
145 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen
*p
);
146 void brw_set_default_exec_size(struct brw_codegen
*p
, unsigned value
);
147 void brw_set_default_mask_control( struct brw_codegen
*p
, unsigned value
);
148 void brw_set_default_saturate( struct brw_codegen
*p
, bool enable
);
149 void brw_set_default_access_mode( struct brw_codegen
*p
, unsigned access_mode
);
150 void brw_inst_set_compression(const struct gen_device_info
*devinfo
,
151 brw_inst
*inst
, bool on
);
152 void brw_set_default_compression(struct brw_codegen
*p
, bool on
);
153 void brw_inst_set_group(const struct gen_device_info
*devinfo
,
154 brw_inst
*inst
, unsigned group
);
155 void brw_set_default_group(struct brw_codegen
*p
, unsigned group
);
156 void brw_set_default_compression_control(struct brw_codegen
*p
, enum brw_compression c
);
157 void brw_set_default_predicate_control(struct brw_codegen
*p
, enum brw_predicate pc
);
158 void brw_set_default_predicate_inverse(struct brw_codegen
*p
, bool predicate_inverse
);
159 void brw_set_default_flag_reg(struct brw_codegen
*p
, int reg
, int subreg
);
160 void brw_set_default_acc_write_control(struct brw_codegen
*p
, unsigned value
);
161 void brw_set_default_swsb(struct brw_codegen
*p
, struct tgl_swsb value
);
163 void brw_init_codegen(const struct gen_device_info
*, struct brw_codegen
*p
,
165 bool brw_has_jip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
166 bool brw_has_uip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
167 int brw_disassemble_inst(FILE *file
, const struct gen_device_info
*devinfo
,
168 const struct brw_inst
*inst
, bool is_compacted
);
169 void brw_disassemble(const struct gen_device_info
*devinfo
,
170 const void *assembly
, int start
, int end
, FILE *out
);
171 const unsigned *brw_get_program( struct brw_codegen
*p
, unsigned *sz
);
173 bool brw_try_override_assembly(struct brw_codegen
*p
, int start_offset
,
174 const char *identifier
);
176 brw_inst
*brw_next_insn(struct brw_codegen
*p
, unsigned opcode
);
177 void brw_set_dest(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg dest
);
178 void brw_set_src0(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
180 void gen6_resolve_implied_move(struct brw_codegen
*p
,
182 unsigned msg_reg_nr
);
184 /* Helpers for regular instructions:
187 brw_inst *brw_##OP(struct brw_codegen *p, \
188 struct brw_reg dest, \
189 struct brw_reg src0);
192 brw_inst *brw_##OP(struct brw_codegen *p, \
193 struct brw_reg dest, \
194 struct brw_reg src0, \
195 struct brw_reg src1);
198 brw_inst *brw_##OP(struct brw_codegen *p, \
199 struct brw_reg dest, \
200 struct brw_reg src0, \
201 struct brw_reg src1, \
202 struct brw_reg src2);
254 /* Helpers for SEND instruction:
258 * Construct a message descriptor immediate with the specified common
259 * descriptor controls.
261 static inline uint32_t
262 brw_message_desc(const struct gen_device_info
*devinfo
,
264 unsigned response_length
,
267 if (devinfo
->gen
>= 5) {
268 return (SET_BITS(msg_length
, 28, 25) |
269 SET_BITS(response_length
, 24, 20) |
270 SET_BITS(header_present
, 19, 19));
272 return (SET_BITS(msg_length
, 23, 20) |
273 SET_BITS(response_length
, 19, 16));
277 static inline unsigned
278 brw_message_desc_mlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
280 if (devinfo
->gen
>= 5)
281 return GET_BITS(desc
, 28, 25);
283 return GET_BITS(desc
, 23, 20);
286 static inline unsigned
287 brw_message_desc_rlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
289 if (devinfo
->gen
>= 5)
290 return GET_BITS(desc
, 24, 20);
292 return GET_BITS(desc
, 19, 16);
296 brw_message_desc_header_present(ASSERTED
const struct gen_device_info
*devinfo
,
299 assert(devinfo
->gen
>= 5);
300 return GET_BITS(desc
, 19, 19);
303 static inline unsigned
304 brw_message_ex_desc(UNUSED
const struct gen_device_info
*devinfo
,
305 unsigned ex_msg_length
)
307 return SET_BITS(ex_msg_length
, 9, 6);
310 static inline unsigned
311 brw_message_ex_desc_ex_mlen(UNUSED
const struct gen_device_info
*devinfo
,
314 return GET_BITS(ex_desc
, 9, 6);
317 static inline uint32_t
318 brw_urb_desc(const struct gen_device_info
*devinfo
,
320 bool per_slot_offset_present
,
321 bool channel_mask_present
,
322 unsigned global_offset
)
324 if (devinfo
->gen
>= 8) {
325 return (SET_BITS(per_slot_offset_present
, 17, 17) |
326 SET_BITS(channel_mask_present
, 15, 15) |
327 SET_BITS(global_offset
, 14, 4) |
328 SET_BITS(msg_type
, 3, 0));
329 } else if (devinfo
->gen
>= 7) {
330 assert(!channel_mask_present
);
331 return (SET_BITS(per_slot_offset_present
, 16, 16) |
332 SET_BITS(global_offset
, 13, 3) |
333 SET_BITS(msg_type
, 3, 0));
335 unreachable("unhandled URB write generation");
339 static inline uint32_t
340 brw_urb_desc_msg_type(ASSERTED
const struct gen_device_info
*devinfo
,
343 assert(devinfo
->gen
>= 7);
344 return GET_BITS(desc
, 3, 0);
348 * Construct a message descriptor immediate with the specified sampler
351 static inline uint32_t
352 brw_sampler_desc(const struct gen_device_info
*devinfo
,
353 unsigned binding_table_index
,
357 unsigned return_format
)
359 const unsigned desc
= (SET_BITS(binding_table_index
, 7, 0) |
360 SET_BITS(sampler
, 11, 8));
361 if (devinfo
->gen
>= 7)
362 return (desc
| SET_BITS(msg_type
, 16, 12) |
363 SET_BITS(simd_mode
, 18, 17));
364 else if (devinfo
->gen
>= 5)
365 return (desc
| SET_BITS(msg_type
, 15, 12) |
366 SET_BITS(simd_mode
, 17, 16));
367 else if (devinfo
->is_g4x
)
368 return desc
| SET_BITS(msg_type
, 15, 12);
370 return (desc
| SET_BITS(return_format
, 13, 12) |
371 SET_BITS(msg_type
, 15, 14));
374 static inline unsigned
375 brw_sampler_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
378 return GET_BITS(desc
, 7, 0);
381 static inline unsigned
382 brw_sampler_desc_sampler(UNUSED
const struct gen_device_info
*devinfo
, uint32_t desc
)
384 return GET_BITS(desc
, 11, 8);
387 static inline unsigned
388 brw_sampler_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
390 if (devinfo
->gen
>= 7)
391 return GET_BITS(desc
, 16, 12);
392 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
393 return GET_BITS(desc
, 15, 12);
395 return GET_BITS(desc
, 15, 14);
398 static inline unsigned
399 brw_sampler_desc_simd_mode(const struct gen_device_info
*devinfo
, uint32_t desc
)
401 assert(devinfo
->gen
>= 5);
402 if (devinfo
->gen
>= 7)
403 return GET_BITS(desc
, 18, 17);
405 return GET_BITS(desc
, 17, 16);
408 static inline unsigned
409 brw_sampler_desc_return_format(ASSERTED
const struct gen_device_info
*devinfo
,
412 assert(devinfo
->gen
== 4 && !devinfo
->is_g4x
);
413 return GET_BITS(desc
, 13, 12);
417 * Construct a message descriptor for the dataport
419 static inline uint32_t
420 brw_dp_desc(const struct gen_device_info
*devinfo
,
421 unsigned binding_table_index
,
423 unsigned msg_control
)
425 /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
428 assert(devinfo
->gen
>= 6);
429 const unsigned desc
= SET_BITS(binding_table_index
, 7, 0);
430 if (devinfo
->gen
>= 8) {
431 return (desc
| SET_BITS(msg_control
, 13, 8) |
432 SET_BITS(msg_type
, 18, 14));
433 } else if (devinfo
->gen
>= 7) {
434 return (desc
| SET_BITS(msg_control
, 13, 8) |
435 SET_BITS(msg_type
, 17, 14));
437 return (desc
| SET_BITS(msg_control
, 12, 8) |
438 SET_BITS(msg_type
, 16, 13));
442 static inline unsigned
443 brw_dp_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
446 return GET_BITS(desc
, 7, 0);
449 static inline unsigned
450 brw_dp_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
452 assert(devinfo
->gen
>= 6);
453 if (devinfo
->gen
>= 8)
454 return GET_BITS(desc
, 18, 14);
455 else if (devinfo
->gen
>= 7)
456 return GET_BITS(desc
, 17, 14);
458 return GET_BITS(desc
, 16, 13);
461 static inline unsigned
462 brw_dp_desc_msg_control(const struct gen_device_info
*devinfo
, uint32_t desc
)
464 assert(devinfo
->gen
>= 6);
465 if (devinfo
->gen
>= 7)
466 return GET_BITS(desc
, 13, 8);
468 return GET_BITS(desc
, 12, 8);
472 * Construct a message descriptor immediate with the specified dataport read
475 static inline uint32_t
476 brw_dp_read_desc(const struct gen_device_info
*devinfo
,
477 unsigned binding_table_index
,
478 unsigned msg_control
,
480 unsigned target_cache
)
482 if (devinfo
->gen
>= 6)
483 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
);
484 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
485 return (SET_BITS(binding_table_index
, 7, 0) |
486 SET_BITS(msg_control
, 10, 8) |
487 SET_BITS(msg_type
, 13, 11) |
488 SET_BITS(target_cache
, 15, 14));
490 return (SET_BITS(binding_table_index
, 7, 0) |
491 SET_BITS(msg_control
, 11, 8) |
492 SET_BITS(msg_type
, 13, 12) |
493 SET_BITS(target_cache
, 15, 14));
496 static inline unsigned
497 brw_dp_read_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
499 if (devinfo
->gen
>= 6)
500 return brw_dp_desc_msg_type(devinfo
, desc
);
501 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
502 return GET_BITS(desc
, 13, 11);
504 return GET_BITS(desc
, 13, 12);
507 static inline unsigned
508 brw_dp_read_desc_msg_control(const struct gen_device_info
*devinfo
,
511 if (devinfo
->gen
>= 6)
512 return brw_dp_desc_msg_control(devinfo
, desc
);
513 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
514 return GET_BITS(desc
, 10, 8);
516 return GET_BITS(desc
, 11, 8);
520 * Construct a message descriptor immediate with the specified dataport write
523 static inline uint32_t
524 brw_dp_write_desc(const struct gen_device_info
*devinfo
,
525 unsigned binding_table_index
,
526 unsigned msg_control
,
528 unsigned last_render_target
,
529 unsigned send_commit_msg
)
531 assert(devinfo
->gen
<= 6 || !send_commit_msg
);
532 if (devinfo
->gen
>= 6)
533 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
) |
534 SET_BITS(last_render_target
, 12, 12) |
535 SET_BITS(send_commit_msg
, 17, 17);
537 return (SET_BITS(binding_table_index
, 7, 0) |
538 SET_BITS(msg_control
, 11, 8) |
539 SET_BITS(last_render_target
, 11, 11) |
540 SET_BITS(msg_type
, 14, 12) |
541 SET_BITS(send_commit_msg
, 15, 15));
544 static inline unsigned
545 brw_dp_write_desc_msg_type(const struct gen_device_info
*devinfo
,
548 if (devinfo
->gen
>= 6)
549 return brw_dp_desc_msg_type(devinfo
, desc
);
551 return GET_BITS(desc
, 14, 12);
554 static inline unsigned
555 brw_dp_write_desc_msg_control(const struct gen_device_info
*devinfo
,
558 if (devinfo
->gen
>= 6)
559 return brw_dp_desc_msg_control(devinfo
, desc
);
561 return GET_BITS(desc
, 11, 8);
565 brw_dp_write_desc_last_render_target(const struct gen_device_info
*devinfo
,
568 if (devinfo
->gen
>= 6)
569 return GET_BITS(desc
, 12, 12);
571 return GET_BITS(desc
, 11, 11);
575 brw_dp_write_desc_write_commit(const struct gen_device_info
*devinfo
,
578 assert(devinfo
->gen
<= 6);
579 if (devinfo
->gen
>= 6)
580 return GET_BITS(desc
, 17, 17);
582 return GET_BITS(desc
, 15, 15);
586 * Construct a message descriptor immediate with the specified dataport
587 * surface function controls.
589 static inline uint32_t
590 brw_dp_surface_desc(const struct gen_device_info
*devinfo
,
592 unsigned msg_control
)
594 assert(devinfo
->gen
>= 7);
595 /* We'll OR in the binding table index later */
596 return brw_dp_desc(devinfo
, 0, msg_type
, msg_control
);
599 static inline uint32_t
600 brw_dp_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
601 unsigned exec_size
, /**< 0 for SIMD4x2 */
603 bool response_expected
)
605 assert(exec_size
<= 8 || exec_size
== 16);
608 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
610 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
612 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
615 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
618 const unsigned msg_control
=
619 SET_BITS(atomic_op
, 3, 0) |
620 SET_BITS(0 < exec_size
&& exec_size
<= 8, 4, 4) |
621 SET_BITS(response_expected
, 5, 5);
623 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
626 static inline uint32_t
627 brw_dp_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
630 bool response_expected
)
632 assert(exec_size
<= 8 || exec_size
== 16);
633 assert(devinfo
->gen
>= 9);
635 assert(exec_size
> 0);
636 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP
;
638 const unsigned msg_control
=
639 SET_BITS(atomic_op
, 1, 0) |
640 SET_BITS(exec_size
<= 8, 4, 4) |
641 SET_BITS(response_expected
, 5, 5);
643 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
646 static inline unsigned
647 brw_mdc_cmask(unsigned num_channels
)
649 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
650 return 0xf & (0xf << num_channels
);
653 static inline uint32_t
654 brw_dp_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
655 unsigned exec_size
, /**< 0 for SIMD4x2 */
656 unsigned num_channels
,
659 assert(exec_size
<= 8 || exec_size
== 16);
663 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
664 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
;
666 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
;
670 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
671 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
673 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
677 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
678 if (write
&& devinfo
->gen
== 7 && !devinfo
->is_haswell
&& exec_size
== 0)
681 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
682 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
683 exec_size
<= 8 ? 2 : 1;
685 const unsigned msg_control
=
686 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
687 SET_BITS(simd_mode
, 5, 4);
689 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
692 static inline unsigned
693 brw_mdc_ds(unsigned bit_size
)
697 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
699 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
701 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
703 unreachable("Unsupported bit_size for byte scattered messages");
707 static inline uint32_t
708 brw_dp_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
713 assert(exec_size
<= 8 || exec_size
== 16);
715 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
716 const unsigned msg_type
=
717 write
? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
:
718 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
;
720 assert(exec_size
> 0);
721 const unsigned msg_control
=
722 SET_BITS(exec_size
== 16, 0, 0) |
723 SET_BITS(brw_mdc_ds(bit_size
), 3, 2);
725 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
728 static inline uint32_t
729 brw_dp_dword_scattered_rw_desc(const struct gen_device_info
*devinfo
,
733 assert(exec_size
== 8 || exec_size
== 16);
737 if (devinfo
->gen
>= 6) {
738 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
740 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
743 if (devinfo
->gen
>= 7) {
744 msg_type
= GEN7_DATAPORT_DC_DWORD_SCATTERED_READ
;
745 } else if (devinfo
->gen
> 4 || devinfo
->is_g4x
) {
746 msg_type
= G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
748 msg_type
= BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
752 const unsigned msg_control
=
753 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
754 SET_BITS(exec_size
== 16, 0, 0);
756 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
759 static inline uint32_t
760 brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
761 unsigned exec_size
, /**< 0 for SIMD4x2 */
762 unsigned num_channels
,
765 assert(exec_size
<= 8 || exec_size
== 16);
766 assert(devinfo
->gen
>= 8);
769 write
? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE
:
770 GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ
;
772 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
773 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
774 exec_size
<= 8 ? 2 : 1;
776 const unsigned msg_control
=
777 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
778 SET_BITS(simd_mode
, 5, 4);
780 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
781 msg_type
, msg_control
);
785 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
788 static inline uint32_t
789 brw_mdc_a64_ds(unsigned elems
)
797 unreachable("Unsupported elmeent count for A64 scattered message");
801 static inline uint32_t
802 brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
803 unsigned exec_size
, /**< 0 for SIMD4x2 */
807 assert(exec_size
<= 8 || exec_size
== 16);
808 assert(devinfo
->gen
>= 8);
811 write
? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE
:
812 GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ
;
814 const unsigned msg_control
=
815 SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE
, 1, 0) |
816 SET_BITS(brw_mdc_a64_ds(bit_size
/ 8), 3, 2) |
817 SET_BITS(exec_size
== 16, 4, 4);
819 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
820 msg_type
, msg_control
);
823 static inline uint32_t
824 brw_dp_a64_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
825 ASSERTED
unsigned exec_size
, /**< 0 for SIMD4x2 */
828 bool response_expected
)
830 assert(exec_size
== 8);
831 assert(devinfo
->gen
>= 8);
832 assert(bit_size
== 32 || bit_size
== 64);
834 const unsigned msg_type
= GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP
;
836 const unsigned msg_control
=
837 SET_BITS(atomic_op
, 3, 0) |
838 SET_BITS(bit_size
== 64, 4, 4) |
839 SET_BITS(response_expected
, 5, 5);
841 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
842 msg_type
, msg_control
);
845 static inline uint32_t
846 brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
847 ASSERTED
unsigned exec_size
,
849 bool response_expected
)
851 assert(exec_size
== 8);
852 assert(devinfo
->gen
>= 9);
854 assert(exec_size
> 0);
855 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP
;
857 const unsigned msg_control
=
858 SET_BITS(atomic_op
, 1, 0) |
859 SET_BITS(response_expected
, 5, 5);
861 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
862 msg_type
, msg_control
);
865 static inline uint32_t
866 brw_dp_typed_atomic_desc(const struct gen_device_info
*devinfo
,
870 bool response_expected
)
872 assert(exec_size
> 0 || exec_group
== 0);
873 assert(exec_group
% 8 == 0);
876 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
877 if (exec_size
== 0) {
878 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
;
880 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
;
883 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
884 assert(exec_size
> 0);
885 msg_type
= GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
;
888 const bool high_sample_mask
= (exec_group
/ 8) % 2 == 1;
890 const unsigned msg_control
=
891 SET_BITS(atomic_op
, 3, 0) |
892 SET_BITS(high_sample_mask
, 4, 4) |
893 SET_BITS(response_expected
, 5, 5);
895 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
898 static inline uint32_t
899 brw_dp_typed_surface_rw_desc(const struct gen_device_info
*devinfo
,
902 unsigned num_channels
,
905 assert(exec_size
> 0 || exec_group
== 0);
906 assert(exec_group
% 8 == 0);
908 /* Typed surface reads and writes don't support SIMD16 */
909 assert(exec_size
<= 8);
913 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
914 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
;
916 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
;
919 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
920 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
;
922 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_READ
;
926 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
927 unsigned msg_control
;
928 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
929 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
930 const unsigned slot_group
= exec_size
== 0 ? 0 : /* SIMD4x2 */
931 1 + ((exec_group
/ 8) % 2);
934 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
935 SET_BITS(slot_group
, 5, 4);
937 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
938 assert(exec_size
> 0);
939 const unsigned slot_group
= ((exec_group
/ 8) % 2);
942 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
943 SET_BITS(slot_group
, 5, 5);
946 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
950 * Construct a message descriptor immediate with the specified pixel
951 * interpolator function controls.
953 static inline uint32_t
954 brw_pixel_interp_desc(UNUSED
const struct gen_device_info
*devinfo
,
960 return (SET_BITS(slot_group
, 11, 11) |
961 SET_BITS(msg_type
, 13, 12) |
962 SET_BITS(!!noperspective
, 14, 14) |
963 SET_BITS(simd_mode
, 16, 16));
966 void brw_urb_WRITE(struct brw_codegen
*p
,
970 enum brw_urb_write_flags flags
,
972 unsigned response_length
,
977 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
978 * desc. If \p desc is not an immediate it will be transparently loaded to an
979 * address register using an OR instruction.
982 brw_send_indirect_message(struct brw_codegen
*p
,
985 struct brw_reg payload
,
991 brw_send_indirect_split_message(struct brw_codegen
*p
,
994 struct brw_reg payload0
,
995 struct brw_reg payload1
,
998 struct brw_reg ex_desc
,
999 unsigned ex_desc_imm
,
1002 void brw_ff_sync(struct brw_codegen
*p
,
1003 struct brw_reg dest
,
1004 unsigned msg_reg_nr
,
1005 struct brw_reg src0
,
1007 unsigned response_length
,
1010 void brw_svb_write(struct brw_codegen
*p
,
1011 struct brw_reg dest
,
1012 unsigned msg_reg_nr
,
1013 struct brw_reg src0
,
1014 unsigned binding_table_index
,
1015 bool send_commit_msg
);
1017 brw_inst
*brw_fb_WRITE(struct brw_codegen
*p
,
1018 struct brw_reg payload
,
1019 struct brw_reg implied_header
,
1020 unsigned msg_control
,
1021 unsigned binding_table_index
,
1022 unsigned msg_length
,
1023 unsigned response_length
,
1025 bool last_render_target
,
1026 bool header_present
);
1028 brw_inst
*gen9_fb_READ(struct brw_codegen
*p
,
1030 struct brw_reg payload
,
1031 unsigned binding_table_index
,
1032 unsigned msg_length
,
1033 unsigned response_length
,
1036 void brw_SAMPLE(struct brw_codegen
*p
,
1037 struct brw_reg dest
,
1038 unsigned msg_reg_nr
,
1039 struct brw_reg src0
,
1040 unsigned binding_table_index
,
1043 unsigned response_length
,
1044 unsigned msg_length
,
1045 unsigned header_present
,
1047 unsigned return_format
);
1049 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
1050 struct brw_reg header
,
1051 struct brw_reg sampler_index
);
1053 void gen4_math(struct brw_codegen
*p
,
1054 struct brw_reg dest
,
1056 unsigned msg_reg_nr
,
1058 unsigned precision
);
1060 void gen6_math(struct brw_codegen
*p
,
1061 struct brw_reg dest
,
1063 struct brw_reg src0
,
1064 struct brw_reg src1
);
1066 void brw_oword_block_read(struct brw_codegen
*p
,
1067 struct brw_reg dest
,
1070 uint32_t bind_table_index
);
1072 unsigned brw_scratch_surface_idx(const struct brw_codegen
*p
);
1074 void brw_oword_block_read_scratch(struct brw_codegen
*p
,
1075 struct brw_reg dest
,
1080 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1085 void gen7_block_read_scratch(struct brw_codegen
*p
,
1086 struct brw_reg dest
,
1090 void brw_shader_time_add(struct brw_codegen
*p
,
1091 struct brw_reg payload
,
1092 uint32_t surf_index
);
1095 * Return the generation-specific jump distance scaling factor.
1097 * Given the number of instructions to jump, we need to scale by
1098 * some number to obtain the actual jump distance to program in an
1101 static inline unsigned
1102 brw_jump_scale(const struct gen_device_info
*devinfo
)
1104 /* Broadwell measures jump targets in bytes. */
1105 if (devinfo
->gen
>= 8)
1108 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1109 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1111 if (devinfo
->gen
>= 5)
1114 /* Gen4 simply uses the number of 128-bit instructions. */
1118 void brw_barrier(struct brw_codegen
*p
, struct brw_reg src
);
1120 /* If/else/endif. Works by manipulating the execution flags on each
1123 brw_inst
*brw_IF(struct brw_codegen
*p
, unsigned execute_size
);
1124 brw_inst
*gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1125 struct brw_reg src0
, struct brw_reg src1
);
1127 void brw_ELSE(struct brw_codegen
*p
);
1128 void brw_ENDIF(struct brw_codegen
*p
);
1132 brw_inst
*brw_DO(struct brw_codegen
*p
, unsigned execute_size
);
1134 brw_inst
*brw_WHILE(struct brw_codegen
*p
);
1136 brw_inst
*brw_BREAK(struct brw_codegen
*p
);
1137 brw_inst
*brw_CONT(struct brw_codegen
*p
);
1138 brw_inst
*brw_HALT(struct brw_codegen
*p
);
1142 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
);
1144 brw_inst
*brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1145 unsigned predicate_control
);
1147 void brw_NOP(struct brw_codegen
*p
);
1149 void brw_WAIT(struct brw_codegen
*p
);
1151 void brw_SYNC(struct brw_codegen
*p
, enum tgl_sync_function func
);
1153 /* Special case: there is never a destination, execution size will be
1156 void brw_CMP(struct brw_codegen
*p
,
1157 struct brw_reg dest
,
1158 unsigned conditional
,
1159 struct brw_reg src0
,
1160 struct brw_reg src1
);
1163 brw_untyped_atomic(struct brw_codegen
*p
,
1165 struct brw_reg payload
,
1166 struct brw_reg surface
,
1168 unsigned msg_length
,
1169 bool response_expected
,
1170 bool header_present
);
1173 brw_untyped_surface_read(struct brw_codegen
*p
,
1175 struct brw_reg payload
,
1176 struct brw_reg surface
,
1177 unsigned msg_length
,
1178 unsigned num_channels
);
1181 brw_untyped_surface_write(struct brw_codegen
*p
,
1182 struct brw_reg payload
,
1183 struct brw_reg surface
,
1184 unsigned msg_length
,
1185 unsigned num_channels
,
1186 bool header_present
);
1189 brw_memory_fence(struct brw_codegen
*p
,
1192 enum opcode send_op
,
1193 enum brw_message_target sfid
,
1198 brw_pixel_interpolator_query(struct brw_codegen
*p
,
1199 struct brw_reg dest
,
1203 struct brw_reg data
,
1204 unsigned msg_length
,
1205 unsigned response_length
);
1208 brw_find_live_channel(struct brw_codegen
*p
,
1210 struct brw_reg mask
);
1213 brw_broadcast(struct brw_codegen
*p
,
1216 struct brw_reg idx
);
1219 brw_float_controls_mode(struct brw_codegen
*p
,
1220 unsigned mode
, unsigned mask
);
1222 /***********************************************************************
1226 void brw_copy_indirect_to_indirect(struct brw_codegen
*p
,
1227 struct brw_indirect dst_ptr
,
1228 struct brw_indirect src_ptr
,
1231 void brw_copy_from_indirect(struct brw_codegen
*p
,
1233 struct brw_indirect ptr
,
1236 void brw_copy4(struct brw_codegen
*p
,
1241 void brw_copy8(struct brw_codegen
*p
,
1246 void brw_math_invert( struct brw_codegen
*p
,
1248 struct brw_reg src
);
1250 void brw_set_src1(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
1252 void brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*insn
,
1253 unsigned desc
, unsigned ex_desc
);
1256 brw_set_desc(struct brw_codegen
*p
, brw_inst
*insn
, unsigned desc
)
1258 brw_set_desc_ex(p
, insn
, desc
, 0);
1261 void brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
);
1263 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod
);
1264 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod
);
1266 /* brw_eu_compact.c */
1267 void brw_init_compaction_tables(const struct gen_device_info
*devinfo
);
1268 void brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1269 struct disasm_info
*disasm
);
1270 void brw_uncompact_instruction(const struct gen_device_info
*devinfo
,
1271 brw_inst
*dst
, brw_compact_inst
*src
);
1272 bool brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1273 brw_compact_inst
*dst
, const brw_inst
*src
);
1275 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1276 brw_inst
*orig
, brw_inst
*uncompacted
);
1278 /* brw_eu_validate.c */
1279 bool brw_validate_instruction(const struct gen_device_info
*devinfo
,
1280 const brw_inst
*inst
, int offset
,
1281 struct disasm_info
*disasm
);
1282 bool brw_validate_instructions(const struct gen_device_info
*devinfo
,
1283 const void *assembly
, int start_offset
, int end_offset
,
1284 struct disasm_info
*disasm
);
1287 next_offset(const struct gen_device_info
*devinfo
, void *store
, int offset
)
1289 brw_inst
*insn
= (brw_inst
*)((char *)store
+ offset
);
1291 if (brw_inst_cmpt_control(devinfo
, insn
))
1297 struct opcode_desc
{
1306 const struct opcode_desc
*
1307 brw_opcode_desc(const struct gen_device_info
*devinfo
, enum opcode opcode
);
1309 const struct opcode_desc
*
1310 brw_opcode_desc_from_hw(const struct gen_device_info
*devinfo
, unsigned hw
);
1312 static inline unsigned
1313 brw_opcode_encode(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1315 return brw_opcode_desc(devinfo
, opcode
)->hw
;
1318 static inline enum opcode
1319 brw_opcode_decode(const struct gen_device_info
*devinfo
, unsigned hw
)
1321 const struct opcode_desc
*desc
= brw_opcode_desc_from_hw(devinfo
, hw
);
1322 return desc
? (enum opcode
)desc
->ir
: BRW_OPCODE_ILLEGAL
;
1326 brw_inst_set_opcode(const struct gen_device_info
*devinfo
,
1327 brw_inst
*inst
, enum opcode opcode
)
1329 brw_inst_set_hw_opcode(devinfo
, inst
, brw_opcode_encode(devinfo
, opcode
));
1332 static inline enum opcode
1333 brw_inst_opcode(const struct gen_device_info
*devinfo
, const brw_inst
*inst
)
1335 return brw_opcode_decode(devinfo
, brw_inst_hw_opcode(devinfo
, inst
));
1339 is_3src(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1341 const struct opcode_desc
*desc
= brw_opcode_desc(devinfo
, opcode
);
1342 return desc
&& desc
->nsrc
== 3;
1345 /** Maximum SEND message length */
1346 #define BRW_MAX_MSG_LENGTH 15
1348 /** First MRF register used by pull loads */
1349 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1351 /** First MRF register used by spills */
1352 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)