2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
39 #include "brw_eu_defines.h"
41 #include "brw_disasm_info.h"
47 #define BRW_EU_MAX_INSN_STACK 5
49 struct brw_insn_state
{
50 /* One of BRW_EXECUTE_* */
53 /* Group in units of channels */
56 /* Compression control on gen4-5 */
59 /* One of BRW_MASK_* */
60 unsigned mask_control
:1;
62 /* Scheduling info for Gen12+ */
67 /* One of BRW_ALIGN_* */
68 unsigned access_mode
:1;
70 /* One of BRW_PREDICATE_* */
71 enum brw_predicate predicate
:4;
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg
:2;
78 bool acc_wr_control
:1;
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
86 #define brw_last_inst (&p->store[p->nr_insn - 1])
92 unsigned int next_insn_offset
;
96 /* Allow clients to push/pop instruction state:
98 struct brw_insn_state stack
[BRW_EU_MAX_INSN_STACK
];
99 struct brw_insn_state
*current
;
101 /** Whether or not the user wants automatic exec sizes
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by brw_set_default_exec_size verbatim.
107 * This is set to true by default in brw_init_codegen.
109 bool automatic_exec_sizes
;
111 bool single_program_flow
;
112 const struct gen_device_info
*devinfo
;
114 /* Control flow stacks:
115 * - if_stack contains IF and ELSE instructions which must be patched
116 * (and popped) once the matching ENDIF instruction is encountered.
118 * Just store the instruction pointer(an index).
122 int if_stack_array_size
;
125 * loop_stack contains the instruction pointers of the starts of loops which
126 * must be patched (and popped) once the matching WHILE instruction is
131 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
132 * blocks they were popping out of, to fix up the mask stack. This tracks
133 * the IF/ENDIF nesting in each current nested loop level.
135 int *if_depth_in_loop
;
136 int loop_stack_depth
;
137 int loop_stack_array_size
;
140 void brw_pop_insn_state( struct brw_codegen
*p
);
141 void brw_push_insn_state( struct brw_codegen
*p
);
142 unsigned brw_get_default_exec_size(struct brw_codegen
*p
);
143 unsigned brw_get_default_group(struct brw_codegen
*p
);
144 unsigned brw_get_default_access_mode(struct brw_codegen
*p
);
145 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen
*p
);
146 void brw_set_default_exec_size(struct brw_codegen
*p
, unsigned value
);
147 void brw_set_default_mask_control( struct brw_codegen
*p
, unsigned value
);
148 void brw_set_default_saturate( struct brw_codegen
*p
, bool enable
);
149 void brw_set_default_access_mode( struct brw_codegen
*p
, unsigned access_mode
);
150 void brw_inst_set_compression(const struct gen_device_info
*devinfo
,
151 brw_inst
*inst
, bool on
);
152 void brw_set_default_compression(struct brw_codegen
*p
, bool on
);
153 void brw_inst_set_group(const struct gen_device_info
*devinfo
,
154 brw_inst
*inst
, unsigned group
);
155 void brw_set_default_group(struct brw_codegen
*p
, unsigned group
);
156 void brw_set_default_compression_control(struct brw_codegen
*p
, enum brw_compression c
);
157 void brw_set_default_predicate_control(struct brw_codegen
*p
, enum brw_predicate pc
);
158 void brw_set_default_predicate_inverse(struct brw_codegen
*p
, bool predicate_inverse
);
159 void brw_set_default_flag_reg(struct brw_codegen
*p
, int reg
, int subreg
);
160 void brw_set_default_acc_write_control(struct brw_codegen
*p
, unsigned value
);
161 void brw_set_default_swsb(struct brw_codegen
*p
, struct tgl_swsb value
);
163 void brw_init_codegen(const struct gen_device_info
*, struct brw_codegen
*p
,
165 int brw_disassemble_inst(FILE *file
, const struct gen_device_info
*devinfo
,
166 const struct brw_inst
*inst
, bool is_compacted
);
167 void brw_disassemble(const struct gen_device_info
*devinfo
,
168 const void *assembly
, int start
, int end
, FILE *out
);
169 const unsigned *brw_get_program( struct brw_codegen
*p
, unsigned *sz
);
171 bool brw_try_override_assembly(struct brw_codegen
*p
, int start_offset
,
172 const char *identifier
);
174 brw_inst
*brw_next_insn(struct brw_codegen
*p
, unsigned opcode
);
175 void brw_set_dest(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg dest
);
176 void brw_set_src0(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
178 void gen6_resolve_implied_move(struct brw_codegen
*p
,
180 unsigned msg_reg_nr
);
182 /* Helpers for regular instructions:
185 brw_inst *brw_##OP(struct brw_codegen *p, \
186 struct brw_reg dest, \
187 struct brw_reg src0);
190 brw_inst *brw_##OP(struct brw_codegen *p, \
191 struct brw_reg dest, \
192 struct brw_reg src0, \
193 struct brw_reg src1);
196 brw_inst *brw_##OP(struct brw_codegen *p, \
197 struct brw_reg dest, \
198 struct brw_reg src0, \
199 struct brw_reg src1, \
200 struct brw_reg src2);
251 /* Helpers for SEND instruction:
255 * Construct a message descriptor immediate with the specified common
256 * descriptor controls.
258 static inline uint32_t
259 brw_message_desc(const struct gen_device_info
*devinfo
,
261 unsigned response_length
,
264 if (devinfo
->gen
>= 5) {
265 return (SET_BITS(msg_length
, 28, 25) |
266 SET_BITS(response_length
, 24, 20) |
267 SET_BITS(header_present
, 19, 19));
269 return (SET_BITS(msg_length
, 23, 20) |
270 SET_BITS(response_length
, 19, 16));
274 static inline unsigned
275 brw_message_desc_mlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
277 if (devinfo
->gen
>= 5)
278 return GET_BITS(desc
, 28, 25);
280 return GET_BITS(desc
, 23, 20);
283 static inline unsigned
284 brw_message_desc_rlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
286 if (devinfo
->gen
>= 5)
287 return GET_BITS(desc
, 24, 20);
289 return GET_BITS(desc
, 19, 16);
293 brw_message_desc_header_present(ASSERTED
const struct gen_device_info
*devinfo
,
296 assert(devinfo
->gen
>= 5);
297 return GET_BITS(desc
, 19, 19);
300 static inline unsigned
301 brw_message_ex_desc(UNUSED
const struct gen_device_info
*devinfo
,
302 unsigned ex_msg_length
)
304 return SET_BITS(ex_msg_length
, 9, 6);
307 static inline unsigned
308 brw_message_ex_desc_ex_mlen(UNUSED
const struct gen_device_info
*devinfo
,
311 return GET_BITS(ex_desc
, 9, 6);
314 static inline uint32_t
315 brw_urb_desc(const struct gen_device_info
*devinfo
,
317 bool per_slot_offset_present
,
318 bool channel_mask_present
,
319 unsigned global_offset
)
321 if (devinfo
->gen
>= 8) {
322 return (SET_BITS(per_slot_offset_present
, 17, 17) |
323 SET_BITS(channel_mask_present
, 15, 15) |
324 SET_BITS(global_offset
, 14, 4) |
325 SET_BITS(msg_type
, 3, 0));
326 } else if (devinfo
->gen
>= 7) {
327 assert(!channel_mask_present
);
328 return (SET_BITS(per_slot_offset_present
, 16, 16) |
329 SET_BITS(global_offset
, 13, 3) |
330 SET_BITS(msg_type
, 3, 0));
332 unreachable("unhandled URB write generation");
337 * Construct a message descriptor immediate with the specified sampler
340 static inline uint32_t
341 brw_sampler_desc(const struct gen_device_info
*devinfo
,
342 unsigned binding_table_index
,
346 unsigned return_format
)
348 const unsigned desc
= (SET_BITS(binding_table_index
, 7, 0) |
349 SET_BITS(sampler
, 11, 8));
350 if (devinfo
->gen
>= 7)
351 return (desc
| SET_BITS(msg_type
, 16, 12) |
352 SET_BITS(simd_mode
, 18, 17));
353 else if (devinfo
->gen
>= 5)
354 return (desc
| SET_BITS(msg_type
, 15, 12) |
355 SET_BITS(simd_mode
, 17, 16));
356 else if (devinfo
->is_g4x
)
357 return desc
| SET_BITS(msg_type
, 15, 12);
359 return (desc
| SET_BITS(return_format
, 13, 12) |
360 SET_BITS(msg_type
, 15, 14));
363 static inline unsigned
364 brw_sampler_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
367 return GET_BITS(desc
, 7, 0);
370 static inline unsigned
371 brw_sampler_desc_sampler(UNUSED
const struct gen_device_info
*devinfo
, uint32_t desc
)
373 return GET_BITS(desc
, 11, 8);
376 static inline unsigned
377 brw_sampler_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
379 if (devinfo
->gen
>= 7)
380 return GET_BITS(desc
, 16, 12);
381 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
382 return GET_BITS(desc
, 15, 12);
384 return GET_BITS(desc
, 15, 14);
387 static inline unsigned
388 brw_sampler_desc_simd_mode(const struct gen_device_info
*devinfo
, uint32_t desc
)
390 assert(devinfo
->gen
>= 5);
391 if (devinfo
->gen
>= 7)
392 return GET_BITS(desc
, 18, 17);
394 return GET_BITS(desc
, 17, 16);
397 static inline unsigned
398 brw_sampler_desc_return_format(ASSERTED
const struct gen_device_info
*devinfo
,
401 assert(devinfo
->gen
== 4 && !devinfo
->is_g4x
);
402 return GET_BITS(desc
, 13, 12);
406 * Construct a message descriptor for the dataport
408 static inline uint32_t
409 brw_dp_desc(const struct gen_device_info
*devinfo
,
410 unsigned binding_table_index
,
412 unsigned msg_control
)
414 /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
417 assert(devinfo
->gen
>= 6);
418 const unsigned desc
= SET_BITS(binding_table_index
, 7, 0);
419 if (devinfo
->gen
>= 8) {
420 return (desc
| SET_BITS(msg_control
, 13, 8) |
421 SET_BITS(msg_type
, 18, 14));
422 } else if (devinfo
->gen
>= 7) {
423 return (desc
| SET_BITS(msg_control
, 13, 8) |
424 SET_BITS(msg_type
, 17, 14));
426 return (desc
| SET_BITS(msg_control
, 12, 8) |
427 SET_BITS(msg_type
, 16, 13));
431 static inline unsigned
432 brw_dp_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
435 return GET_BITS(desc
, 7, 0);
438 static inline unsigned
439 brw_dp_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
441 assert(devinfo
->gen
>= 6);
442 if (devinfo
->gen
>= 8)
443 return GET_BITS(desc
, 18, 14);
444 else if (devinfo
->gen
>= 7)
445 return GET_BITS(desc
, 17, 14);
447 return GET_BITS(desc
, 16, 13);
450 static inline unsigned
451 brw_dp_desc_msg_control(const struct gen_device_info
*devinfo
, uint32_t desc
)
453 assert(devinfo
->gen
>= 6);
454 if (devinfo
->gen
>= 7)
455 return GET_BITS(desc
, 13, 8);
457 return GET_BITS(desc
, 12, 8);
461 * Construct a message descriptor immediate with the specified dataport read
464 static inline uint32_t
465 brw_dp_read_desc(const struct gen_device_info
*devinfo
,
466 unsigned binding_table_index
,
467 unsigned msg_control
,
469 unsigned target_cache
)
471 if (devinfo
->gen
>= 6)
472 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
);
473 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
474 return (SET_BITS(binding_table_index
, 7, 0) |
475 SET_BITS(msg_control
, 10, 8) |
476 SET_BITS(msg_type
, 13, 11) |
477 SET_BITS(target_cache
, 15, 14));
479 return (SET_BITS(binding_table_index
, 7, 0) |
480 SET_BITS(msg_control
, 11, 8) |
481 SET_BITS(msg_type
, 13, 12) |
482 SET_BITS(target_cache
, 15, 14));
485 static inline unsigned
486 brw_dp_read_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
488 if (devinfo
->gen
>= 6)
489 return brw_dp_desc_msg_type(devinfo
, desc
);
490 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
491 return GET_BITS(desc
, 13, 11);
493 return GET_BITS(desc
, 13, 12);
496 static inline unsigned
497 brw_dp_read_desc_msg_control(const struct gen_device_info
*devinfo
,
500 if (devinfo
->gen
>= 6)
501 return brw_dp_desc_msg_control(devinfo
, desc
);
502 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
503 return GET_BITS(desc
, 10, 8);
505 return GET_BITS(desc
, 11, 8);
509 * Construct a message descriptor immediate with the specified dataport write
512 static inline uint32_t
513 brw_dp_write_desc(const struct gen_device_info
*devinfo
,
514 unsigned binding_table_index
,
515 unsigned msg_control
,
517 unsigned last_render_target
,
518 unsigned send_commit_msg
)
520 assert(devinfo
->gen
<= 6 || !send_commit_msg
);
521 if (devinfo
->gen
>= 6)
522 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
) |
523 SET_BITS(last_render_target
, 12, 12) |
524 SET_BITS(send_commit_msg
, 17, 17);
526 return (SET_BITS(binding_table_index
, 7, 0) |
527 SET_BITS(msg_control
, 11, 8) |
528 SET_BITS(last_render_target
, 11, 11) |
529 SET_BITS(msg_type
, 14, 12) |
530 SET_BITS(send_commit_msg
, 15, 15));
533 static inline unsigned
534 brw_dp_write_desc_msg_type(const struct gen_device_info
*devinfo
,
537 if (devinfo
->gen
>= 6)
538 return brw_dp_desc_msg_type(devinfo
, desc
);
540 return GET_BITS(desc
, 14, 12);
543 static inline unsigned
544 brw_dp_write_desc_msg_control(const struct gen_device_info
*devinfo
,
547 if (devinfo
->gen
>= 6)
548 return brw_dp_desc_msg_control(devinfo
, desc
);
550 return GET_BITS(desc
, 11, 8);
554 brw_dp_write_desc_last_render_target(const struct gen_device_info
*devinfo
,
557 if (devinfo
->gen
>= 6)
558 return GET_BITS(desc
, 12, 12);
560 return GET_BITS(desc
, 11, 11);
564 brw_dp_write_desc_write_commit(const struct gen_device_info
*devinfo
,
567 assert(devinfo
->gen
<= 6);
568 if (devinfo
->gen
>= 6)
569 return GET_BITS(desc
, 17, 17);
571 return GET_BITS(desc
, 15, 15);
575 * Construct a message descriptor immediate with the specified dataport
576 * surface function controls.
578 static inline uint32_t
579 brw_dp_surface_desc(const struct gen_device_info
*devinfo
,
581 unsigned msg_control
)
583 assert(devinfo
->gen
>= 7);
584 /* We'll OR in the binding table index later */
585 return brw_dp_desc(devinfo
, 0, msg_type
, msg_control
);
588 static inline uint32_t
589 brw_dp_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
590 unsigned exec_size
, /**< 0 for SIMD4x2 */
592 bool response_expected
)
594 assert(exec_size
<= 8 || exec_size
== 16);
597 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
599 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
601 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
604 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
607 const unsigned msg_control
=
608 SET_BITS(atomic_op
, 3, 0) |
609 SET_BITS(0 < exec_size
&& exec_size
<= 8, 4, 4) |
610 SET_BITS(response_expected
, 5, 5);
612 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
615 static inline uint32_t
616 brw_dp_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
619 bool response_expected
)
621 assert(exec_size
<= 8 || exec_size
== 16);
622 assert(devinfo
->gen
>= 9);
624 assert(exec_size
> 0);
625 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP
;
627 const unsigned msg_control
=
628 SET_BITS(atomic_op
, 1, 0) |
629 SET_BITS(exec_size
<= 8, 4, 4) |
630 SET_BITS(response_expected
, 5, 5);
632 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
635 static inline unsigned
636 brw_mdc_cmask(unsigned num_channels
)
638 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
639 return 0xf & (0xf << num_channels
);
642 static inline uint32_t
643 brw_dp_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
644 unsigned exec_size
, /**< 0 for SIMD4x2 */
645 unsigned num_channels
,
648 assert(exec_size
<= 8 || exec_size
== 16);
652 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
653 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
;
655 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
;
659 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
660 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
662 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
666 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
667 if (write
&& devinfo
->gen
== 7 && !devinfo
->is_haswell
&& exec_size
== 0)
670 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
671 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
672 exec_size
<= 8 ? 2 : 1;
674 const unsigned msg_control
=
675 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
676 SET_BITS(simd_mode
, 5, 4);
678 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
681 static inline unsigned
682 brw_mdc_ds(unsigned bit_size
)
686 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
688 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
690 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
692 unreachable("Unsupported bit_size for byte scattered messages");
696 static inline uint32_t
697 brw_dp_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
702 assert(exec_size
<= 8 || exec_size
== 16);
704 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
705 const unsigned msg_type
=
706 write
? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
:
707 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
;
709 assert(exec_size
> 0);
710 const unsigned msg_control
=
711 SET_BITS(exec_size
== 16, 0, 0) |
712 SET_BITS(brw_mdc_ds(bit_size
), 3, 2);
714 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
717 static inline uint32_t
718 brw_dp_dword_scattered_rw_desc(const struct gen_device_info
*devinfo
,
722 assert(exec_size
== 8 || exec_size
== 16);
726 if (devinfo
->gen
>= 6) {
727 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
729 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
732 if (devinfo
->gen
>= 7) {
733 msg_type
= GEN7_DATAPORT_DC_DWORD_SCATTERED_READ
;
734 } else if (devinfo
->gen
> 4 || devinfo
->is_g4x
) {
735 msg_type
= G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
737 msg_type
= BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
741 const unsigned msg_control
=
742 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
743 SET_BITS(exec_size
== 16, 0, 0);
745 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
748 static inline uint32_t
749 brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
750 unsigned exec_size
, /**< 0 for SIMD4x2 */
751 unsigned num_channels
,
754 assert(exec_size
<= 8 || exec_size
== 16);
755 assert(devinfo
->gen
>= 8);
758 write
? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE
:
759 GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ
;
761 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
762 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
763 exec_size
<= 8 ? 2 : 1;
765 const unsigned msg_control
=
766 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
767 SET_BITS(simd_mode
, 5, 4);
769 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
770 msg_type
, msg_control
);
774 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
777 static inline uint32_t
778 brw_mdc_a64_ds(unsigned elems
)
786 unreachable("Unsupported elmeent count for A64 scattered message");
790 static inline uint32_t
791 brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
792 unsigned exec_size
, /**< 0 for SIMD4x2 */
796 assert(exec_size
<= 8 || exec_size
== 16);
797 assert(devinfo
->gen
>= 8);
800 write
? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE
:
801 GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ
;
803 const unsigned msg_control
=
804 SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE
, 1, 0) |
805 SET_BITS(brw_mdc_a64_ds(bit_size
/ 8), 3, 2) |
806 SET_BITS(exec_size
== 16, 4, 4);
808 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
809 msg_type
, msg_control
);
812 static inline uint32_t
813 brw_dp_a64_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
814 ASSERTED
unsigned exec_size
, /**< 0 for SIMD4x2 */
817 bool response_expected
)
819 assert(exec_size
== 8);
820 assert(devinfo
->gen
>= 8);
821 assert(bit_size
== 32 || bit_size
== 64);
823 const unsigned msg_type
= GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP
;
825 const unsigned msg_control
=
826 SET_BITS(atomic_op
, 3, 0) |
827 SET_BITS(bit_size
== 64, 4, 4) |
828 SET_BITS(response_expected
, 5, 5);
830 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
831 msg_type
, msg_control
);
834 static inline uint32_t
835 brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
836 ASSERTED
unsigned exec_size
,
838 bool response_expected
)
840 assert(exec_size
== 8);
841 assert(devinfo
->gen
>= 9);
843 assert(exec_size
> 0);
844 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP
;
846 const unsigned msg_control
=
847 SET_BITS(atomic_op
, 1, 0) |
848 SET_BITS(response_expected
, 5, 5);
850 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
851 msg_type
, msg_control
);
854 static inline uint32_t
855 brw_dp_typed_atomic_desc(const struct gen_device_info
*devinfo
,
859 bool response_expected
)
861 assert(exec_size
> 0 || exec_group
== 0);
862 assert(exec_group
% 8 == 0);
865 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
866 if (exec_size
== 0) {
867 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
;
869 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
;
872 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
873 assert(exec_size
> 0);
874 msg_type
= GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
;
877 const bool high_sample_mask
= (exec_group
/ 8) % 2 == 1;
879 const unsigned msg_control
=
880 SET_BITS(atomic_op
, 3, 0) |
881 SET_BITS(high_sample_mask
, 4, 4) |
882 SET_BITS(response_expected
, 5, 5);
884 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
887 static inline uint32_t
888 brw_dp_typed_surface_rw_desc(const struct gen_device_info
*devinfo
,
891 unsigned num_channels
,
894 assert(exec_size
> 0 || exec_group
== 0);
895 assert(exec_group
% 8 == 0);
897 /* Typed surface reads and writes don't support SIMD16 */
898 assert(exec_size
<= 8);
902 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
903 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
;
905 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
;
908 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
909 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
;
911 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_READ
;
915 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
916 unsigned msg_control
;
917 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
918 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
919 const unsigned slot_group
= exec_size
== 0 ? 0 : /* SIMD4x2 */
920 1 + ((exec_group
/ 8) % 2);
923 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
924 SET_BITS(slot_group
, 5, 4);
926 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
927 assert(exec_size
> 0);
928 const unsigned slot_group
= ((exec_group
/ 8) % 2);
931 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
932 SET_BITS(slot_group
, 5, 5);
935 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
939 * Construct a message descriptor immediate with the specified pixel
940 * interpolator function controls.
942 static inline uint32_t
943 brw_pixel_interp_desc(UNUSED
const struct gen_device_info
*devinfo
,
949 return (SET_BITS(slot_group
, 11, 11) |
950 SET_BITS(msg_type
, 13, 12) |
951 SET_BITS(!!noperspective
, 14, 14) |
952 SET_BITS(simd_mode
, 16, 16));
955 void brw_urb_WRITE(struct brw_codegen
*p
,
959 enum brw_urb_write_flags flags
,
961 unsigned response_length
,
966 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
967 * desc. If \p desc is not an immediate it will be transparently loaded to an
968 * address register using an OR instruction.
971 brw_send_indirect_message(struct brw_codegen
*p
,
974 struct brw_reg payload
,
980 brw_send_indirect_split_message(struct brw_codegen
*p
,
983 struct brw_reg payload0
,
984 struct brw_reg payload1
,
987 struct brw_reg ex_desc
,
988 unsigned ex_desc_imm
,
991 void brw_ff_sync(struct brw_codegen
*p
,
996 unsigned response_length
,
999 void brw_svb_write(struct brw_codegen
*p
,
1000 struct brw_reg dest
,
1001 unsigned msg_reg_nr
,
1002 struct brw_reg src0
,
1003 unsigned binding_table_index
,
1004 bool send_commit_msg
);
1006 brw_inst
*brw_fb_WRITE(struct brw_codegen
*p
,
1007 struct brw_reg payload
,
1008 struct brw_reg implied_header
,
1009 unsigned msg_control
,
1010 unsigned binding_table_index
,
1011 unsigned msg_length
,
1012 unsigned response_length
,
1014 bool last_render_target
,
1015 bool header_present
);
1017 brw_inst
*gen9_fb_READ(struct brw_codegen
*p
,
1019 struct brw_reg payload
,
1020 unsigned binding_table_index
,
1021 unsigned msg_length
,
1022 unsigned response_length
,
1025 void brw_SAMPLE(struct brw_codegen
*p
,
1026 struct brw_reg dest
,
1027 unsigned msg_reg_nr
,
1028 struct brw_reg src0
,
1029 unsigned binding_table_index
,
1032 unsigned response_length
,
1033 unsigned msg_length
,
1034 unsigned header_present
,
1036 unsigned return_format
);
1038 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
1039 struct brw_reg header
,
1040 struct brw_reg sampler_index
);
1042 void gen4_math(struct brw_codegen
*p
,
1043 struct brw_reg dest
,
1045 unsigned msg_reg_nr
,
1047 unsigned precision
);
1049 void gen6_math(struct brw_codegen
*p
,
1050 struct brw_reg dest
,
1052 struct brw_reg src0
,
1053 struct brw_reg src1
);
1055 void brw_oword_block_read(struct brw_codegen
*p
,
1056 struct brw_reg dest
,
1059 uint32_t bind_table_index
);
1061 unsigned brw_scratch_surface_idx(const struct brw_codegen
*p
);
1063 void brw_oword_block_read_scratch(struct brw_codegen
*p
,
1064 struct brw_reg dest
,
1069 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1074 void gen7_block_read_scratch(struct brw_codegen
*p
,
1075 struct brw_reg dest
,
1079 void brw_shader_time_add(struct brw_codegen
*p
,
1080 struct brw_reg payload
,
1081 uint32_t surf_index
);
1084 * Return the generation-specific jump distance scaling factor.
1086 * Given the number of instructions to jump, we need to scale by
1087 * some number to obtain the actual jump distance to program in an
1090 static inline unsigned
1091 brw_jump_scale(const struct gen_device_info
*devinfo
)
1093 /* Broadwell measures jump targets in bytes. */
1094 if (devinfo
->gen
>= 8)
1097 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1098 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1100 if (devinfo
->gen
>= 5)
1103 /* Gen4 simply uses the number of 128-bit instructions. */
1107 void brw_barrier(struct brw_codegen
*p
, struct brw_reg src
);
1109 /* If/else/endif. Works by manipulating the execution flags on each
1112 brw_inst
*brw_IF(struct brw_codegen
*p
, unsigned execute_size
);
1113 brw_inst
*gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1114 struct brw_reg src0
, struct brw_reg src1
);
1116 void brw_ELSE(struct brw_codegen
*p
);
1117 void brw_ENDIF(struct brw_codegen
*p
);
1121 brw_inst
*brw_DO(struct brw_codegen
*p
, unsigned execute_size
);
1123 brw_inst
*brw_WHILE(struct brw_codegen
*p
);
1125 brw_inst
*brw_BREAK(struct brw_codegen
*p
);
1126 brw_inst
*brw_CONT(struct brw_codegen
*p
);
1127 brw_inst
*brw_HALT(struct brw_codegen
*p
);
1131 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
);
1133 brw_inst
*brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1134 unsigned predicate_control
);
1136 void brw_NOP(struct brw_codegen
*p
);
1138 void brw_WAIT(struct brw_codegen
*p
);
1140 void brw_SYNC(struct brw_codegen
*p
, enum tgl_sync_function func
);
1142 /* Special case: there is never a destination, execution size will be
1145 void brw_CMP(struct brw_codegen
*p
,
1146 struct brw_reg dest
,
1147 unsigned conditional
,
1148 struct brw_reg src0
,
1149 struct brw_reg src1
);
1152 brw_untyped_atomic(struct brw_codegen
*p
,
1154 struct brw_reg payload
,
1155 struct brw_reg surface
,
1157 unsigned msg_length
,
1158 bool response_expected
,
1159 bool header_present
);
1162 brw_untyped_surface_read(struct brw_codegen
*p
,
1164 struct brw_reg payload
,
1165 struct brw_reg surface
,
1166 unsigned msg_length
,
1167 unsigned num_channels
);
1170 brw_untyped_surface_write(struct brw_codegen
*p
,
1171 struct brw_reg payload
,
1172 struct brw_reg surface
,
1173 unsigned msg_length
,
1174 unsigned num_channels
,
1175 bool header_present
);
1178 brw_memory_fence(struct brw_codegen
*p
,
1181 enum opcode send_op
,
1182 enum brw_message_target sfid
,
1187 brw_pixel_interpolator_query(struct brw_codegen
*p
,
1188 struct brw_reg dest
,
1192 struct brw_reg data
,
1193 unsigned msg_length
,
1194 unsigned response_length
);
1197 brw_find_live_channel(struct brw_codegen
*p
,
1199 struct brw_reg mask
);
1202 brw_broadcast(struct brw_codegen
*p
,
1205 struct brw_reg idx
);
1208 brw_float_controls_mode(struct brw_codegen
*p
,
1209 unsigned mode
, unsigned mask
);
1211 /***********************************************************************
1215 void brw_copy_indirect_to_indirect(struct brw_codegen
*p
,
1216 struct brw_indirect dst_ptr
,
1217 struct brw_indirect src_ptr
,
1220 void brw_copy_from_indirect(struct brw_codegen
*p
,
1222 struct brw_indirect ptr
,
1225 void brw_copy4(struct brw_codegen
*p
,
1230 void brw_copy8(struct brw_codegen
*p
,
1235 void brw_math_invert( struct brw_codegen
*p
,
1237 struct brw_reg src
);
1239 void brw_set_src1(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
1241 void brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*insn
,
1242 unsigned desc
, unsigned ex_desc
);
1245 brw_set_desc(struct brw_codegen
*p
, brw_inst
*insn
, unsigned desc
)
1247 brw_set_desc_ex(p
, insn
, desc
, 0);
1250 void brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
);
1252 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod
);
1253 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod
);
1255 /* brw_eu_compact.c */
1256 void brw_init_compaction_tables(const struct gen_device_info
*devinfo
);
1257 void brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1258 struct disasm_info
*disasm
);
1259 void brw_uncompact_instruction(const struct gen_device_info
*devinfo
,
1260 brw_inst
*dst
, brw_compact_inst
*src
);
1261 bool brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1262 brw_compact_inst
*dst
, const brw_inst
*src
);
1264 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1265 brw_inst
*orig
, brw_inst
*uncompacted
);
1267 /* brw_eu_validate.c */
1268 bool brw_validate_instruction(const struct gen_device_info
*devinfo
,
1269 const brw_inst
*inst
, int offset
,
1270 struct disasm_info
*disasm
);
1271 bool brw_validate_instructions(const struct gen_device_info
*devinfo
,
1272 const void *assembly
, int start_offset
, int end_offset
,
1273 struct disasm_info
*disasm
);
1276 next_offset(const struct gen_device_info
*devinfo
, void *store
, int offset
)
1278 brw_inst
*insn
= (brw_inst
*)((char *)store
+ offset
);
1280 if (brw_inst_cmpt_control(devinfo
, insn
))
1286 struct opcode_desc
{
1295 const struct opcode_desc
*
1296 brw_opcode_desc(const struct gen_device_info
*devinfo
, enum opcode opcode
);
1298 const struct opcode_desc
*
1299 brw_opcode_desc_from_hw(const struct gen_device_info
*devinfo
, unsigned hw
);
1301 static inline unsigned
1302 brw_opcode_encode(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1304 return brw_opcode_desc(devinfo
, opcode
)->hw
;
1307 static inline enum opcode
1308 brw_opcode_decode(const struct gen_device_info
*devinfo
, unsigned hw
)
1310 const struct opcode_desc
*desc
= brw_opcode_desc_from_hw(devinfo
, hw
);
1311 return desc
? (enum opcode
)desc
->ir
: BRW_OPCODE_ILLEGAL
;
1315 brw_inst_set_opcode(const struct gen_device_info
*devinfo
,
1316 brw_inst
*inst
, enum opcode opcode
)
1318 brw_inst_set_hw_opcode(devinfo
, inst
, brw_opcode_encode(devinfo
, opcode
));
1321 static inline enum opcode
1322 brw_inst_opcode(const struct gen_device_info
*devinfo
, const brw_inst
*inst
)
1324 return brw_opcode_decode(devinfo
, brw_inst_hw_opcode(devinfo
, inst
));
1328 is_3src(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1330 const struct opcode_desc
*desc
= brw_opcode_desc(devinfo
, opcode
);
1331 return desc
&& desc
->nsrc
== 3;
1334 /** Maximum SEND message length */
1335 #define BRW_MAX_MSG_LENGTH 15
1337 /** First MRF register used by pull loads */
1338 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1340 /** First MRF register used by spills */
1341 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)