2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
39 #include "brw_eu_defines.h"
41 #include "brw_disasm_info.h"
47 #define BRW_EU_MAX_INSN_STACK 5
49 struct brw_insn_state
{
50 /* One of BRW_EXECUTE_* */
53 /* Group in units of channels */
56 /* Compression control on gen4-5 */
59 /* One of BRW_MASK_* */
60 unsigned mask_control
:1;
62 /* Scheduling info for Gen12+ */
67 /* One of BRW_ALIGN_* */
68 unsigned access_mode
:1;
70 /* One of BRW_PREDICATE_* */
71 enum brw_predicate predicate
:4;
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg
:2;
78 bool acc_wr_control
:1;
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
86 #define brw_last_inst (&p->store[p->nr_insn - 1])
92 unsigned int next_insn_offset
;
96 /* Allow clients to push/pop instruction state:
98 struct brw_insn_state stack
[BRW_EU_MAX_INSN_STACK
];
99 struct brw_insn_state
*current
;
101 /** Whether or not the user wants automatic exec sizes
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by brw_set_default_exec_size verbatim.
107 * This is set to true by default in brw_init_codegen.
109 bool automatic_exec_sizes
;
111 bool single_program_flow
;
112 const struct gen_device_info
*devinfo
;
114 /* Control flow stacks:
115 * - if_stack contains IF and ELSE instructions which must be patched
116 * (and popped) once the matching ENDIF instruction is encountered.
118 * Just store the instruction pointer(an index).
122 int if_stack_array_size
;
125 * loop_stack contains the instruction pointers of the starts of loops which
126 * must be patched (and popped) once the matching WHILE instruction is
131 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
132 * blocks they were popping out of, to fix up the mask stack. This tracks
133 * the IF/ENDIF nesting in each current nested loop level.
135 int *if_depth_in_loop
;
136 int loop_stack_depth
;
137 int loop_stack_array_size
;
140 void brw_pop_insn_state( struct brw_codegen
*p
);
141 void brw_push_insn_state( struct brw_codegen
*p
);
142 unsigned brw_get_default_exec_size(struct brw_codegen
*p
);
143 unsigned brw_get_default_group(struct brw_codegen
*p
);
144 unsigned brw_get_default_access_mode(struct brw_codegen
*p
);
145 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen
*p
);
146 void brw_set_default_exec_size(struct brw_codegen
*p
, unsigned value
);
147 void brw_set_default_mask_control( struct brw_codegen
*p
, unsigned value
);
148 void brw_set_default_saturate( struct brw_codegen
*p
, bool enable
);
149 void brw_set_default_access_mode( struct brw_codegen
*p
, unsigned access_mode
);
150 void brw_inst_set_compression(const struct gen_device_info
*devinfo
,
151 brw_inst
*inst
, bool on
);
152 void brw_set_default_compression(struct brw_codegen
*p
, bool on
);
153 void brw_inst_set_group(const struct gen_device_info
*devinfo
,
154 brw_inst
*inst
, unsigned group
);
155 void brw_set_default_group(struct brw_codegen
*p
, unsigned group
);
156 void brw_set_default_compression_control(struct brw_codegen
*p
, enum brw_compression c
);
157 void brw_set_default_predicate_control(struct brw_codegen
*p
, enum brw_predicate pc
);
158 void brw_set_default_predicate_inverse(struct brw_codegen
*p
, bool predicate_inverse
);
159 void brw_set_default_flag_reg(struct brw_codegen
*p
, int reg
, int subreg
);
160 void brw_set_default_acc_write_control(struct brw_codegen
*p
, unsigned value
);
161 void brw_set_default_swsb(struct brw_codegen
*p
, struct tgl_swsb value
);
163 void brw_init_codegen(const struct gen_device_info
*, struct brw_codegen
*p
,
165 int brw_disassemble_inst(FILE *file
, const struct gen_device_info
*devinfo
,
166 const struct brw_inst
*inst
, bool is_compacted
);
167 void brw_disassemble(const struct gen_device_info
*devinfo
,
168 const void *assembly
, int start
, int end
, FILE *out
);
169 const unsigned *brw_get_program( struct brw_codegen
*p
, unsigned *sz
);
171 bool brw_try_override_assembly(struct brw_codegen
*p
, int start_offset
,
172 const char *identifier
);
174 brw_inst
*brw_next_insn(struct brw_codegen
*p
, unsigned opcode
);
175 void brw_set_dest(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg dest
);
176 void brw_set_src0(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
178 void gen6_resolve_implied_move(struct brw_codegen
*p
,
180 unsigned msg_reg_nr
);
182 /* Helpers for regular instructions:
185 brw_inst *brw_##OP(struct brw_codegen *p, \
186 struct brw_reg dest, \
187 struct brw_reg src0);
190 brw_inst *brw_##OP(struct brw_codegen *p, \
191 struct brw_reg dest, \
192 struct brw_reg src0, \
193 struct brw_reg src1);
196 brw_inst *brw_##OP(struct brw_codegen *p, \
197 struct brw_reg dest, \
198 struct brw_reg src0, \
199 struct brw_reg src1, \
200 struct brw_reg src2);
252 /* Helpers for SEND instruction:
256 * Construct a message descriptor immediate with the specified common
257 * descriptor controls.
259 static inline uint32_t
260 brw_message_desc(const struct gen_device_info
*devinfo
,
262 unsigned response_length
,
265 if (devinfo
->gen
>= 5) {
266 return (SET_BITS(msg_length
, 28, 25) |
267 SET_BITS(response_length
, 24, 20) |
268 SET_BITS(header_present
, 19, 19));
270 return (SET_BITS(msg_length
, 23, 20) |
271 SET_BITS(response_length
, 19, 16));
275 static inline unsigned
276 brw_message_desc_mlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
278 if (devinfo
->gen
>= 5)
279 return GET_BITS(desc
, 28, 25);
281 return GET_BITS(desc
, 23, 20);
284 static inline unsigned
285 brw_message_desc_rlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
287 if (devinfo
->gen
>= 5)
288 return GET_BITS(desc
, 24, 20);
290 return GET_BITS(desc
, 19, 16);
294 brw_message_desc_header_present(ASSERTED
const struct gen_device_info
*devinfo
,
297 assert(devinfo
->gen
>= 5);
298 return GET_BITS(desc
, 19, 19);
301 static inline unsigned
302 brw_message_ex_desc(UNUSED
const struct gen_device_info
*devinfo
,
303 unsigned ex_msg_length
)
305 return SET_BITS(ex_msg_length
, 9, 6);
308 static inline unsigned
309 brw_message_ex_desc_ex_mlen(UNUSED
const struct gen_device_info
*devinfo
,
312 return GET_BITS(ex_desc
, 9, 6);
315 static inline uint32_t
316 brw_urb_desc(const struct gen_device_info
*devinfo
,
318 bool per_slot_offset_present
,
319 bool channel_mask_present
,
320 unsigned global_offset
)
322 if (devinfo
->gen
>= 8) {
323 return (SET_BITS(per_slot_offset_present
, 17, 17) |
324 SET_BITS(channel_mask_present
, 15, 15) |
325 SET_BITS(global_offset
, 14, 4) |
326 SET_BITS(msg_type
, 3, 0));
327 } else if (devinfo
->gen
>= 7) {
328 assert(!channel_mask_present
);
329 return (SET_BITS(per_slot_offset_present
, 16, 16) |
330 SET_BITS(global_offset
, 13, 3) |
331 SET_BITS(msg_type
, 3, 0));
333 unreachable("unhandled URB write generation");
337 static inline uint32_t
338 brw_urb_desc_msg_type(ASSERTED
const struct gen_device_info
*devinfo
,
341 assert(devinfo
->gen
>= 7);
342 return GET_BITS(desc
, 3, 0);
346 * Construct a message descriptor immediate with the specified sampler
349 static inline uint32_t
350 brw_sampler_desc(const struct gen_device_info
*devinfo
,
351 unsigned binding_table_index
,
355 unsigned return_format
)
357 const unsigned desc
= (SET_BITS(binding_table_index
, 7, 0) |
358 SET_BITS(sampler
, 11, 8));
359 if (devinfo
->gen
>= 7)
360 return (desc
| SET_BITS(msg_type
, 16, 12) |
361 SET_BITS(simd_mode
, 18, 17));
362 else if (devinfo
->gen
>= 5)
363 return (desc
| SET_BITS(msg_type
, 15, 12) |
364 SET_BITS(simd_mode
, 17, 16));
365 else if (devinfo
->is_g4x
)
366 return desc
| SET_BITS(msg_type
, 15, 12);
368 return (desc
| SET_BITS(return_format
, 13, 12) |
369 SET_BITS(msg_type
, 15, 14));
372 static inline unsigned
373 brw_sampler_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
376 return GET_BITS(desc
, 7, 0);
379 static inline unsigned
380 brw_sampler_desc_sampler(UNUSED
const struct gen_device_info
*devinfo
, uint32_t desc
)
382 return GET_BITS(desc
, 11, 8);
385 static inline unsigned
386 brw_sampler_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
388 if (devinfo
->gen
>= 7)
389 return GET_BITS(desc
, 16, 12);
390 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
391 return GET_BITS(desc
, 15, 12);
393 return GET_BITS(desc
, 15, 14);
396 static inline unsigned
397 brw_sampler_desc_simd_mode(const struct gen_device_info
*devinfo
, uint32_t desc
)
399 assert(devinfo
->gen
>= 5);
400 if (devinfo
->gen
>= 7)
401 return GET_BITS(desc
, 18, 17);
403 return GET_BITS(desc
, 17, 16);
406 static inline unsigned
407 brw_sampler_desc_return_format(ASSERTED
const struct gen_device_info
*devinfo
,
410 assert(devinfo
->gen
== 4 && !devinfo
->is_g4x
);
411 return GET_BITS(desc
, 13, 12);
415 * Construct a message descriptor for the dataport
417 static inline uint32_t
418 brw_dp_desc(const struct gen_device_info
*devinfo
,
419 unsigned binding_table_index
,
421 unsigned msg_control
)
423 /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
426 assert(devinfo
->gen
>= 6);
427 const unsigned desc
= SET_BITS(binding_table_index
, 7, 0);
428 if (devinfo
->gen
>= 8) {
429 return (desc
| SET_BITS(msg_control
, 13, 8) |
430 SET_BITS(msg_type
, 18, 14));
431 } else if (devinfo
->gen
>= 7) {
432 return (desc
| SET_BITS(msg_control
, 13, 8) |
433 SET_BITS(msg_type
, 17, 14));
435 return (desc
| SET_BITS(msg_control
, 12, 8) |
436 SET_BITS(msg_type
, 16, 13));
440 static inline unsigned
441 brw_dp_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
444 return GET_BITS(desc
, 7, 0);
447 static inline unsigned
448 brw_dp_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
450 assert(devinfo
->gen
>= 6);
451 if (devinfo
->gen
>= 8)
452 return GET_BITS(desc
, 18, 14);
453 else if (devinfo
->gen
>= 7)
454 return GET_BITS(desc
, 17, 14);
456 return GET_BITS(desc
, 16, 13);
459 static inline unsigned
460 brw_dp_desc_msg_control(const struct gen_device_info
*devinfo
, uint32_t desc
)
462 assert(devinfo
->gen
>= 6);
463 if (devinfo
->gen
>= 7)
464 return GET_BITS(desc
, 13, 8);
466 return GET_BITS(desc
, 12, 8);
470 * Construct a message descriptor immediate with the specified dataport read
473 static inline uint32_t
474 brw_dp_read_desc(const struct gen_device_info
*devinfo
,
475 unsigned binding_table_index
,
476 unsigned msg_control
,
478 unsigned target_cache
)
480 if (devinfo
->gen
>= 6)
481 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
);
482 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
483 return (SET_BITS(binding_table_index
, 7, 0) |
484 SET_BITS(msg_control
, 10, 8) |
485 SET_BITS(msg_type
, 13, 11) |
486 SET_BITS(target_cache
, 15, 14));
488 return (SET_BITS(binding_table_index
, 7, 0) |
489 SET_BITS(msg_control
, 11, 8) |
490 SET_BITS(msg_type
, 13, 12) |
491 SET_BITS(target_cache
, 15, 14));
494 static inline unsigned
495 brw_dp_read_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
497 if (devinfo
->gen
>= 6)
498 return brw_dp_desc_msg_type(devinfo
, desc
);
499 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
500 return GET_BITS(desc
, 13, 11);
502 return GET_BITS(desc
, 13, 12);
505 static inline unsigned
506 brw_dp_read_desc_msg_control(const struct gen_device_info
*devinfo
,
509 if (devinfo
->gen
>= 6)
510 return brw_dp_desc_msg_control(devinfo
, desc
);
511 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
512 return GET_BITS(desc
, 10, 8);
514 return GET_BITS(desc
, 11, 8);
518 * Construct a message descriptor immediate with the specified dataport write
521 static inline uint32_t
522 brw_dp_write_desc(const struct gen_device_info
*devinfo
,
523 unsigned binding_table_index
,
524 unsigned msg_control
,
526 unsigned last_render_target
,
527 unsigned send_commit_msg
)
529 assert(devinfo
->gen
<= 6 || !send_commit_msg
);
530 if (devinfo
->gen
>= 6)
531 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
) |
532 SET_BITS(last_render_target
, 12, 12) |
533 SET_BITS(send_commit_msg
, 17, 17);
535 return (SET_BITS(binding_table_index
, 7, 0) |
536 SET_BITS(msg_control
, 11, 8) |
537 SET_BITS(last_render_target
, 11, 11) |
538 SET_BITS(msg_type
, 14, 12) |
539 SET_BITS(send_commit_msg
, 15, 15));
542 static inline unsigned
543 brw_dp_write_desc_msg_type(const struct gen_device_info
*devinfo
,
546 if (devinfo
->gen
>= 6)
547 return brw_dp_desc_msg_type(devinfo
, desc
);
549 return GET_BITS(desc
, 14, 12);
552 static inline unsigned
553 brw_dp_write_desc_msg_control(const struct gen_device_info
*devinfo
,
556 if (devinfo
->gen
>= 6)
557 return brw_dp_desc_msg_control(devinfo
, desc
);
559 return GET_BITS(desc
, 11, 8);
563 brw_dp_write_desc_last_render_target(const struct gen_device_info
*devinfo
,
566 if (devinfo
->gen
>= 6)
567 return GET_BITS(desc
, 12, 12);
569 return GET_BITS(desc
, 11, 11);
573 brw_dp_write_desc_write_commit(const struct gen_device_info
*devinfo
,
576 assert(devinfo
->gen
<= 6);
577 if (devinfo
->gen
>= 6)
578 return GET_BITS(desc
, 17, 17);
580 return GET_BITS(desc
, 15, 15);
584 * Construct a message descriptor immediate with the specified dataport
585 * surface function controls.
587 static inline uint32_t
588 brw_dp_surface_desc(const struct gen_device_info
*devinfo
,
590 unsigned msg_control
)
592 assert(devinfo
->gen
>= 7);
593 /* We'll OR in the binding table index later */
594 return brw_dp_desc(devinfo
, 0, msg_type
, msg_control
);
597 static inline uint32_t
598 brw_dp_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
599 unsigned exec_size
, /**< 0 for SIMD4x2 */
601 bool response_expected
)
603 assert(exec_size
<= 8 || exec_size
== 16);
606 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
608 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
610 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
613 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
616 const unsigned msg_control
=
617 SET_BITS(atomic_op
, 3, 0) |
618 SET_BITS(0 < exec_size
&& exec_size
<= 8, 4, 4) |
619 SET_BITS(response_expected
, 5, 5);
621 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
624 static inline uint32_t
625 brw_dp_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
628 bool response_expected
)
630 assert(exec_size
<= 8 || exec_size
== 16);
631 assert(devinfo
->gen
>= 9);
633 assert(exec_size
> 0);
634 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP
;
636 const unsigned msg_control
=
637 SET_BITS(atomic_op
, 1, 0) |
638 SET_BITS(exec_size
<= 8, 4, 4) |
639 SET_BITS(response_expected
, 5, 5);
641 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
644 static inline unsigned
645 brw_mdc_cmask(unsigned num_channels
)
647 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
648 return 0xf & (0xf << num_channels
);
651 static inline uint32_t
652 brw_dp_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
653 unsigned exec_size
, /**< 0 for SIMD4x2 */
654 unsigned num_channels
,
657 assert(exec_size
<= 8 || exec_size
== 16);
661 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
662 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
;
664 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
;
668 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
669 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
671 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
675 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
676 if (write
&& devinfo
->gen
== 7 && !devinfo
->is_haswell
&& exec_size
== 0)
679 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
680 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
681 exec_size
<= 8 ? 2 : 1;
683 const unsigned msg_control
=
684 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
685 SET_BITS(simd_mode
, 5, 4);
687 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
690 static inline unsigned
691 brw_mdc_ds(unsigned bit_size
)
695 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
697 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
699 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
701 unreachable("Unsupported bit_size for byte scattered messages");
705 static inline uint32_t
706 brw_dp_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
711 assert(exec_size
<= 8 || exec_size
== 16);
713 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
714 const unsigned msg_type
=
715 write
? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
:
716 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
;
718 assert(exec_size
> 0);
719 const unsigned msg_control
=
720 SET_BITS(exec_size
== 16, 0, 0) |
721 SET_BITS(brw_mdc_ds(bit_size
), 3, 2);
723 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
726 static inline uint32_t
727 brw_dp_dword_scattered_rw_desc(const struct gen_device_info
*devinfo
,
731 assert(exec_size
== 8 || exec_size
== 16);
735 if (devinfo
->gen
>= 6) {
736 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
738 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
741 if (devinfo
->gen
>= 7) {
742 msg_type
= GEN7_DATAPORT_DC_DWORD_SCATTERED_READ
;
743 } else if (devinfo
->gen
> 4 || devinfo
->is_g4x
) {
744 msg_type
= G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
746 msg_type
= BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
750 const unsigned msg_control
=
751 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
752 SET_BITS(exec_size
== 16, 0, 0);
754 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
757 static inline uint32_t
758 brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
759 unsigned exec_size
, /**< 0 for SIMD4x2 */
760 unsigned num_channels
,
763 assert(exec_size
<= 8 || exec_size
== 16);
764 assert(devinfo
->gen
>= 8);
767 write
? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE
:
768 GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ
;
770 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
771 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
772 exec_size
<= 8 ? 2 : 1;
774 const unsigned msg_control
=
775 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
776 SET_BITS(simd_mode
, 5, 4);
778 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
779 msg_type
, msg_control
);
783 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
786 static inline uint32_t
787 brw_mdc_a64_ds(unsigned elems
)
795 unreachable("Unsupported elmeent count for A64 scattered message");
799 static inline uint32_t
800 brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
801 unsigned exec_size
, /**< 0 for SIMD4x2 */
805 assert(exec_size
<= 8 || exec_size
== 16);
806 assert(devinfo
->gen
>= 8);
809 write
? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE
:
810 GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ
;
812 const unsigned msg_control
=
813 SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE
, 1, 0) |
814 SET_BITS(brw_mdc_a64_ds(bit_size
/ 8), 3, 2) |
815 SET_BITS(exec_size
== 16, 4, 4);
817 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
818 msg_type
, msg_control
);
821 static inline uint32_t
822 brw_dp_a64_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
823 ASSERTED
unsigned exec_size
, /**< 0 for SIMD4x2 */
826 bool response_expected
)
828 assert(exec_size
== 8);
829 assert(devinfo
->gen
>= 8);
830 assert(bit_size
== 32 || bit_size
== 64);
832 const unsigned msg_type
= GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP
;
834 const unsigned msg_control
=
835 SET_BITS(atomic_op
, 3, 0) |
836 SET_BITS(bit_size
== 64, 4, 4) |
837 SET_BITS(response_expected
, 5, 5);
839 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
840 msg_type
, msg_control
);
843 static inline uint32_t
844 brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
845 ASSERTED
unsigned exec_size
,
847 bool response_expected
)
849 assert(exec_size
== 8);
850 assert(devinfo
->gen
>= 9);
852 assert(exec_size
> 0);
853 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP
;
855 const unsigned msg_control
=
856 SET_BITS(atomic_op
, 1, 0) |
857 SET_BITS(response_expected
, 5, 5);
859 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
860 msg_type
, msg_control
);
863 static inline uint32_t
864 brw_dp_typed_atomic_desc(const struct gen_device_info
*devinfo
,
868 bool response_expected
)
870 assert(exec_size
> 0 || exec_group
== 0);
871 assert(exec_group
% 8 == 0);
874 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
875 if (exec_size
== 0) {
876 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
;
878 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
;
881 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
882 assert(exec_size
> 0);
883 msg_type
= GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
;
886 const bool high_sample_mask
= (exec_group
/ 8) % 2 == 1;
888 const unsigned msg_control
=
889 SET_BITS(atomic_op
, 3, 0) |
890 SET_BITS(high_sample_mask
, 4, 4) |
891 SET_BITS(response_expected
, 5, 5);
893 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
896 static inline uint32_t
897 brw_dp_typed_surface_rw_desc(const struct gen_device_info
*devinfo
,
900 unsigned num_channels
,
903 assert(exec_size
> 0 || exec_group
== 0);
904 assert(exec_group
% 8 == 0);
906 /* Typed surface reads and writes don't support SIMD16 */
907 assert(exec_size
<= 8);
911 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
912 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
;
914 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
;
917 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
918 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
;
920 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_READ
;
924 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
925 unsigned msg_control
;
926 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
927 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
928 const unsigned slot_group
= exec_size
== 0 ? 0 : /* SIMD4x2 */
929 1 + ((exec_group
/ 8) % 2);
932 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
933 SET_BITS(slot_group
, 5, 4);
935 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
936 assert(exec_size
> 0);
937 const unsigned slot_group
= ((exec_group
/ 8) % 2);
940 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
941 SET_BITS(slot_group
, 5, 5);
944 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
948 * Construct a message descriptor immediate with the specified pixel
949 * interpolator function controls.
951 static inline uint32_t
952 brw_pixel_interp_desc(UNUSED
const struct gen_device_info
*devinfo
,
958 return (SET_BITS(slot_group
, 11, 11) |
959 SET_BITS(msg_type
, 13, 12) |
960 SET_BITS(!!noperspective
, 14, 14) |
961 SET_BITS(simd_mode
, 16, 16));
964 void brw_urb_WRITE(struct brw_codegen
*p
,
968 enum brw_urb_write_flags flags
,
970 unsigned response_length
,
975 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
976 * desc. If \p desc is not an immediate it will be transparently loaded to an
977 * address register using an OR instruction.
980 brw_send_indirect_message(struct brw_codegen
*p
,
983 struct brw_reg payload
,
989 brw_send_indirect_split_message(struct brw_codegen
*p
,
992 struct brw_reg payload0
,
993 struct brw_reg payload1
,
996 struct brw_reg ex_desc
,
997 unsigned ex_desc_imm
,
1000 void brw_ff_sync(struct brw_codegen
*p
,
1001 struct brw_reg dest
,
1002 unsigned msg_reg_nr
,
1003 struct brw_reg src0
,
1005 unsigned response_length
,
1008 void brw_svb_write(struct brw_codegen
*p
,
1009 struct brw_reg dest
,
1010 unsigned msg_reg_nr
,
1011 struct brw_reg src0
,
1012 unsigned binding_table_index
,
1013 bool send_commit_msg
);
1015 brw_inst
*brw_fb_WRITE(struct brw_codegen
*p
,
1016 struct brw_reg payload
,
1017 struct brw_reg implied_header
,
1018 unsigned msg_control
,
1019 unsigned binding_table_index
,
1020 unsigned msg_length
,
1021 unsigned response_length
,
1023 bool last_render_target
,
1024 bool header_present
);
1026 brw_inst
*gen9_fb_READ(struct brw_codegen
*p
,
1028 struct brw_reg payload
,
1029 unsigned binding_table_index
,
1030 unsigned msg_length
,
1031 unsigned response_length
,
1034 void brw_SAMPLE(struct brw_codegen
*p
,
1035 struct brw_reg dest
,
1036 unsigned msg_reg_nr
,
1037 struct brw_reg src0
,
1038 unsigned binding_table_index
,
1041 unsigned response_length
,
1042 unsigned msg_length
,
1043 unsigned header_present
,
1045 unsigned return_format
);
1047 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
1048 struct brw_reg header
,
1049 struct brw_reg sampler_index
);
1051 void gen4_math(struct brw_codegen
*p
,
1052 struct brw_reg dest
,
1054 unsigned msg_reg_nr
,
1056 unsigned precision
);
1058 void gen6_math(struct brw_codegen
*p
,
1059 struct brw_reg dest
,
1061 struct brw_reg src0
,
1062 struct brw_reg src1
);
1064 void brw_oword_block_read(struct brw_codegen
*p
,
1065 struct brw_reg dest
,
1068 uint32_t bind_table_index
);
1070 unsigned brw_scratch_surface_idx(const struct brw_codegen
*p
);
1072 void brw_oword_block_read_scratch(struct brw_codegen
*p
,
1073 struct brw_reg dest
,
1078 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1083 void gen7_block_read_scratch(struct brw_codegen
*p
,
1084 struct brw_reg dest
,
1088 void brw_shader_time_add(struct brw_codegen
*p
,
1089 struct brw_reg payload
,
1090 uint32_t surf_index
);
1093 * Return the generation-specific jump distance scaling factor.
1095 * Given the number of instructions to jump, we need to scale by
1096 * some number to obtain the actual jump distance to program in an
1099 static inline unsigned
1100 brw_jump_scale(const struct gen_device_info
*devinfo
)
1102 /* Broadwell measures jump targets in bytes. */
1103 if (devinfo
->gen
>= 8)
1106 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1107 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1109 if (devinfo
->gen
>= 5)
1112 /* Gen4 simply uses the number of 128-bit instructions. */
1116 void brw_barrier(struct brw_codegen
*p
, struct brw_reg src
);
1118 /* If/else/endif. Works by manipulating the execution flags on each
1121 brw_inst
*brw_IF(struct brw_codegen
*p
, unsigned execute_size
);
1122 brw_inst
*gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1123 struct brw_reg src0
, struct brw_reg src1
);
1125 void brw_ELSE(struct brw_codegen
*p
);
1126 void brw_ENDIF(struct brw_codegen
*p
);
1130 brw_inst
*brw_DO(struct brw_codegen
*p
, unsigned execute_size
);
1132 brw_inst
*brw_WHILE(struct brw_codegen
*p
);
1134 brw_inst
*brw_BREAK(struct brw_codegen
*p
);
1135 brw_inst
*brw_CONT(struct brw_codegen
*p
);
1136 brw_inst
*brw_HALT(struct brw_codegen
*p
);
1140 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
);
1142 brw_inst
*brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1143 unsigned predicate_control
);
1145 void brw_NOP(struct brw_codegen
*p
);
1147 void brw_WAIT(struct brw_codegen
*p
);
1149 void brw_SYNC(struct brw_codegen
*p
, enum tgl_sync_function func
);
1151 /* Special case: there is never a destination, execution size will be
1154 void brw_CMP(struct brw_codegen
*p
,
1155 struct brw_reg dest
,
1156 unsigned conditional
,
1157 struct brw_reg src0
,
1158 struct brw_reg src1
);
1161 brw_untyped_atomic(struct brw_codegen
*p
,
1163 struct brw_reg payload
,
1164 struct brw_reg surface
,
1166 unsigned msg_length
,
1167 bool response_expected
,
1168 bool header_present
);
1171 brw_untyped_surface_read(struct brw_codegen
*p
,
1173 struct brw_reg payload
,
1174 struct brw_reg surface
,
1175 unsigned msg_length
,
1176 unsigned num_channels
);
1179 brw_untyped_surface_write(struct brw_codegen
*p
,
1180 struct brw_reg payload
,
1181 struct brw_reg surface
,
1182 unsigned msg_length
,
1183 unsigned num_channels
,
1184 bool header_present
);
1187 brw_memory_fence(struct brw_codegen
*p
,
1190 enum opcode send_op
,
1191 enum brw_message_target sfid
,
1196 brw_pixel_interpolator_query(struct brw_codegen
*p
,
1197 struct brw_reg dest
,
1201 struct brw_reg data
,
1202 unsigned msg_length
,
1203 unsigned response_length
);
1206 brw_find_live_channel(struct brw_codegen
*p
,
1208 struct brw_reg mask
);
1211 brw_broadcast(struct brw_codegen
*p
,
1214 struct brw_reg idx
);
1217 brw_float_controls_mode(struct brw_codegen
*p
,
1218 unsigned mode
, unsigned mask
);
1220 /***********************************************************************
1224 void brw_copy_indirect_to_indirect(struct brw_codegen
*p
,
1225 struct brw_indirect dst_ptr
,
1226 struct brw_indirect src_ptr
,
1229 void brw_copy_from_indirect(struct brw_codegen
*p
,
1231 struct brw_indirect ptr
,
1234 void brw_copy4(struct brw_codegen
*p
,
1239 void brw_copy8(struct brw_codegen
*p
,
1244 void brw_math_invert( struct brw_codegen
*p
,
1246 struct brw_reg src
);
1248 void brw_set_src1(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
1250 void brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*insn
,
1251 unsigned desc
, unsigned ex_desc
);
1254 brw_set_desc(struct brw_codegen
*p
, brw_inst
*insn
, unsigned desc
)
1256 brw_set_desc_ex(p
, insn
, desc
, 0);
1259 void brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
);
1261 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod
);
1262 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod
);
1264 /* brw_eu_compact.c */
1265 void brw_init_compaction_tables(const struct gen_device_info
*devinfo
);
1266 void brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1267 struct disasm_info
*disasm
);
1268 void brw_uncompact_instruction(const struct gen_device_info
*devinfo
,
1269 brw_inst
*dst
, brw_compact_inst
*src
);
1270 bool brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1271 brw_compact_inst
*dst
, const brw_inst
*src
);
1273 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1274 brw_inst
*orig
, brw_inst
*uncompacted
);
1276 /* brw_eu_validate.c */
1277 bool brw_validate_instruction(const struct gen_device_info
*devinfo
,
1278 const brw_inst
*inst
, int offset
,
1279 struct disasm_info
*disasm
);
1280 bool brw_validate_instructions(const struct gen_device_info
*devinfo
,
1281 const void *assembly
, int start_offset
, int end_offset
,
1282 struct disasm_info
*disasm
);
1285 next_offset(const struct gen_device_info
*devinfo
, void *store
, int offset
)
1287 brw_inst
*insn
= (brw_inst
*)((char *)store
+ offset
);
1289 if (brw_inst_cmpt_control(devinfo
, insn
))
1295 struct opcode_desc
{
1304 const struct opcode_desc
*
1305 brw_opcode_desc(const struct gen_device_info
*devinfo
, enum opcode opcode
);
1307 const struct opcode_desc
*
1308 brw_opcode_desc_from_hw(const struct gen_device_info
*devinfo
, unsigned hw
);
1310 static inline unsigned
1311 brw_opcode_encode(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1313 return brw_opcode_desc(devinfo
, opcode
)->hw
;
1316 static inline enum opcode
1317 brw_opcode_decode(const struct gen_device_info
*devinfo
, unsigned hw
)
1319 const struct opcode_desc
*desc
= brw_opcode_desc_from_hw(devinfo
, hw
);
1320 return desc
? (enum opcode
)desc
->ir
: BRW_OPCODE_ILLEGAL
;
1324 brw_inst_set_opcode(const struct gen_device_info
*devinfo
,
1325 brw_inst
*inst
, enum opcode opcode
)
1327 brw_inst_set_hw_opcode(devinfo
, inst
, brw_opcode_encode(devinfo
, opcode
));
1330 static inline enum opcode
1331 brw_inst_opcode(const struct gen_device_info
*devinfo
, const brw_inst
*inst
)
1333 return brw_opcode_decode(devinfo
, brw_inst_hw_opcode(devinfo
, inst
));
1337 is_3src(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1339 const struct opcode_desc
*desc
= brw_opcode_desc(devinfo
, opcode
);
1340 return desc
&& desc
->nsrc
== 3;
1343 /** Maximum SEND message length */
1344 #define BRW_MAX_MSG_LENGTH 15
1346 /** First MRF register used by pull loads */
1347 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1349 /** First MRF register used by spills */
1350 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)