2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keithw@vmware.com>
39 #include "brw_eu_defines.h"
41 #include "brw_disasm_info.h"
47 #define BRW_EU_MAX_INSN_STACK 5
49 struct brw_insn_state
{
50 /* One of BRW_EXECUTE_* */
53 /* Group in units of channels */
56 /* Compression control on gen4-5 */
59 /* One of BRW_MASK_* */
60 unsigned mask_control
:1;
62 /* Scheduling info for Gen12+ */
67 /* One of BRW_ALIGN_* */
68 unsigned access_mode
:1;
70 /* One of BRW_PREDICATE_* */
71 enum brw_predicate predicate
:4;
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg
:2;
78 bool acc_wr_control
:1;
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
86 #define brw_last_inst (&p->store[p->nr_insn - 1])
92 unsigned int next_insn_offset
;
96 /* Allow clients to push/pop instruction state:
98 struct brw_insn_state stack
[BRW_EU_MAX_INSN_STACK
];
99 struct brw_insn_state
*current
;
101 /** Whether or not the user wants automatic exec sizes
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by brw_set_default_exec_size verbatim.
107 * This is set to true by default in brw_init_codegen.
109 bool automatic_exec_sizes
;
111 bool single_program_flow
;
112 const struct gen_device_info
*devinfo
;
114 /* Control flow stacks:
115 * - if_stack contains IF and ELSE instructions which must be patched
116 * (and popped) once the matching ENDIF instruction is encountered.
118 * Just store the instruction pointer(an index).
122 int if_stack_array_size
;
125 * loop_stack contains the instruction pointers of the starts of loops which
126 * must be patched (and popped) once the matching WHILE instruction is
131 * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
132 * blocks they were popping out of, to fix up the mask stack. This tracks
133 * the IF/ENDIF nesting in each current nested loop level.
135 int *if_depth_in_loop
;
136 int loop_stack_depth
;
137 int loop_stack_array_size
;
143 struct brw_label
*next
;
146 void brw_pop_insn_state( struct brw_codegen
*p
);
147 void brw_push_insn_state( struct brw_codegen
*p
);
148 unsigned brw_get_default_exec_size(struct brw_codegen
*p
);
149 unsigned brw_get_default_group(struct brw_codegen
*p
);
150 unsigned brw_get_default_access_mode(struct brw_codegen
*p
);
151 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen
*p
);
152 void brw_set_default_exec_size(struct brw_codegen
*p
, unsigned value
);
153 void brw_set_default_mask_control( struct brw_codegen
*p
, unsigned value
);
154 void brw_set_default_saturate( struct brw_codegen
*p
, bool enable
);
155 void brw_set_default_access_mode( struct brw_codegen
*p
, unsigned access_mode
);
156 void brw_inst_set_compression(const struct gen_device_info
*devinfo
,
157 brw_inst
*inst
, bool on
);
158 void brw_set_default_compression(struct brw_codegen
*p
, bool on
);
159 void brw_inst_set_group(const struct gen_device_info
*devinfo
,
160 brw_inst
*inst
, unsigned group
);
161 void brw_set_default_group(struct brw_codegen
*p
, unsigned group
);
162 void brw_set_default_compression_control(struct brw_codegen
*p
, enum brw_compression c
);
163 void brw_set_default_predicate_control(struct brw_codegen
*p
, enum brw_predicate pc
);
164 void brw_set_default_predicate_inverse(struct brw_codegen
*p
, bool predicate_inverse
);
165 void brw_set_default_flag_reg(struct brw_codegen
*p
, int reg
, int subreg
);
166 void brw_set_default_acc_write_control(struct brw_codegen
*p
, unsigned value
);
167 void brw_set_default_swsb(struct brw_codegen
*p
, struct tgl_swsb value
);
169 void brw_init_codegen(const struct gen_device_info
*, struct brw_codegen
*p
,
171 bool brw_has_jip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
172 bool brw_has_uip(const struct gen_device_info
*devinfo
, enum opcode opcode
);
173 const struct brw_label
*brw_find_label(const struct brw_label
*root
, int offset
);
174 void brw_create_label(struct brw_label
**labels
, int offset
, void *mem_ctx
);
175 int brw_disassemble_inst(FILE *file
, const struct gen_device_info
*devinfo
,
176 const struct brw_inst
*inst
, bool is_compacted
,
177 int offset
, const struct brw_label
*root_label
);
178 const struct brw_label
*brw_label_assembly(const struct gen_device_info
*devinfo
,
179 const void *assembly
, int start
, int end
,
181 void brw_disassemble_with_labels(const struct gen_device_info
*devinfo
,
182 const void *assembly
, int start
, int end
, FILE *out
);
183 void brw_disassemble(const struct gen_device_info
*devinfo
,
184 const void *assembly
, int start
, int end
,
185 const struct brw_label
*root_label
, FILE *out
);
186 const unsigned *brw_get_program( struct brw_codegen
*p
, unsigned *sz
);
188 bool brw_try_override_assembly(struct brw_codegen
*p
, int start_offset
,
189 const char *identifier
);
191 brw_inst
*brw_next_insn(struct brw_codegen
*p
, unsigned opcode
);
192 void brw_set_dest(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg dest
);
193 void brw_set_src0(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
195 void gen6_resolve_implied_move(struct brw_codegen
*p
,
197 unsigned msg_reg_nr
);
199 /* Helpers for regular instructions:
202 brw_inst *brw_##OP(struct brw_codegen *p, \
203 struct brw_reg dest, \
204 struct brw_reg src0);
207 brw_inst *brw_##OP(struct brw_codegen *p, \
208 struct brw_reg dest, \
209 struct brw_reg src0, \
210 struct brw_reg src1);
213 brw_inst *brw_##OP(struct brw_codegen *p, \
214 struct brw_reg dest, \
215 struct brw_reg src0, \
216 struct brw_reg src1, \
217 struct brw_reg src2);
269 /* Helpers for SEND instruction:
273 * Construct a message descriptor immediate with the specified common
274 * descriptor controls.
276 static inline uint32_t
277 brw_message_desc(const struct gen_device_info
*devinfo
,
279 unsigned response_length
,
282 if (devinfo
->gen
>= 5) {
283 return (SET_BITS(msg_length
, 28, 25) |
284 SET_BITS(response_length
, 24, 20) |
285 SET_BITS(header_present
, 19, 19));
287 return (SET_BITS(msg_length
, 23, 20) |
288 SET_BITS(response_length
, 19, 16));
292 static inline unsigned
293 brw_message_desc_mlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
295 if (devinfo
->gen
>= 5)
296 return GET_BITS(desc
, 28, 25);
298 return GET_BITS(desc
, 23, 20);
301 static inline unsigned
302 brw_message_desc_rlen(const struct gen_device_info
*devinfo
, uint32_t desc
)
304 if (devinfo
->gen
>= 5)
305 return GET_BITS(desc
, 24, 20);
307 return GET_BITS(desc
, 19, 16);
311 brw_message_desc_header_present(ASSERTED
const struct gen_device_info
*devinfo
,
314 assert(devinfo
->gen
>= 5);
315 return GET_BITS(desc
, 19, 19);
318 static inline unsigned
319 brw_message_ex_desc(UNUSED
const struct gen_device_info
*devinfo
,
320 unsigned ex_msg_length
)
322 return SET_BITS(ex_msg_length
, 9, 6);
325 static inline unsigned
326 brw_message_ex_desc_ex_mlen(UNUSED
const struct gen_device_info
*devinfo
,
329 return GET_BITS(ex_desc
, 9, 6);
332 static inline uint32_t
333 brw_urb_desc(const struct gen_device_info
*devinfo
,
335 bool per_slot_offset_present
,
336 bool channel_mask_present
,
337 unsigned global_offset
)
339 if (devinfo
->gen
>= 8) {
340 return (SET_BITS(per_slot_offset_present
, 17, 17) |
341 SET_BITS(channel_mask_present
, 15, 15) |
342 SET_BITS(global_offset
, 14, 4) |
343 SET_BITS(msg_type
, 3, 0));
344 } else if (devinfo
->gen
>= 7) {
345 assert(!channel_mask_present
);
346 return (SET_BITS(per_slot_offset_present
, 16, 16) |
347 SET_BITS(global_offset
, 13, 3) |
348 SET_BITS(msg_type
, 3, 0));
350 unreachable("unhandled URB write generation");
354 static inline uint32_t
355 brw_urb_desc_msg_type(ASSERTED
const struct gen_device_info
*devinfo
,
358 assert(devinfo
->gen
>= 7);
359 return GET_BITS(desc
, 3, 0);
363 * Construct a message descriptor immediate with the specified sampler
366 static inline uint32_t
367 brw_sampler_desc(const struct gen_device_info
*devinfo
,
368 unsigned binding_table_index
,
372 unsigned return_format
)
374 const unsigned desc
= (SET_BITS(binding_table_index
, 7, 0) |
375 SET_BITS(sampler
, 11, 8));
376 if (devinfo
->gen
>= 7)
377 return (desc
| SET_BITS(msg_type
, 16, 12) |
378 SET_BITS(simd_mode
, 18, 17));
379 else if (devinfo
->gen
>= 5)
380 return (desc
| SET_BITS(msg_type
, 15, 12) |
381 SET_BITS(simd_mode
, 17, 16));
382 else if (devinfo
->is_g4x
)
383 return desc
| SET_BITS(msg_type
, 15, 12);
385 return (desc
| SET_BITS(return_format
, 13, 12) |
386 SET_BITS(msg_type
, 15, 14));
389 static inline unsigned
390 brw_sampler_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
393 return GET_BITS(desc
, 7, 0);
396 static inline unsigned
397 brw_sampler_desc_sampler(UNUSED
const struct gen_device_info
*devinfo
, uint32_t desc
)
399 return GET_BITS(desc
, 11, 8);
402 static inline unsigned
403 brw_sampler_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
405 if (devinfo
->gen
>= 7)
406 return GET_BITS(desc
, 16, 12);
407 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
408 return GET_BITS(desc
, 15, 12);
410 return GET_BITS(desc
, 15, 14);
413 static inline unsigned
414 brw_sampler_desc_simd_mode(const struct gen_device_info
*devinfo
, uint32_t desc
)
416 assert(devinfo
->gen
>= 5);
417 if (devinfo
->gen
>= 7)
418 return GET_BITS(desc
, 18, 17);
420 return GET_BITS(desc
, 17, 16);
423 static inline unsigned
424 brw_sampler_desc_return_format(ASSERTED
const struct gen_device_info
*devinfo
,
427 assert(devinfo
->gen
== 4 && !devinfo
->is_g4x
);
428 return GET_BITS(desc
, 13, 12);
432 * Construct a message descriptor for the dataport
434 static inline uint32_t
435 brw_dp_desc(const struct gen_device_info
*devinfo
,
436 unsigned binding_table_index
,
438 unsigned msg_control
)
440 /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
443 assert(devinfo
->gen
>= 6);
444 const unsigned desc
= SET_BITS(binding_table_index
, 7, 0);
445 if (devinfo
->gen
>= 8) {
446 return (desc
| SET_BITS(msg_control
, 13, 8) |
447 SET_BITS(msg_type
, 18, 14));
448 } else if (devinfo
->gen
>= 7) {
449 return (desc
| SET_BITS(msg_control
, 13, 8) |
450 SET_BITS(msg_type
, 17, 14));
452 return (desc
| SET_BITS(msg_control
, 12, 8) |
453 SET_BITS(msg_type
, 16, 13));
457 static inline unsigned
458 brw_dp_desc_binding_table_index(UNUSED
const struct gen_device_info
*devinfo
,
461 return GET_BITS(desc
, 7, 0);
464 static inline unsigned
465 brw_dp_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
467 assert(devinfo
->gen
>= 6);
468 if (devinfo
->gen
>= 8)
469 return GET_BITS(desc
, 18, 14);
470 else if (devinfo
->gen
>= 7)
471 return GET_BITS(desc
, 17, 14);
473 return GET_BITS(desc
, 16, 13);
476 static inline unsigned
477 brw_dp_desc_msg_control(const struct gen_device_info
*devinfo
, uint32_t desc
)
479 assert(devinfo
->gen
>= 6);
480 if (devinfo
->gen
>= 7)
481 return GET_BITS(desc
, 13, 8);
483 return GET_BITS(desc
, 12, 8);
487 * Construct a message descriptor immediate with the specified dataport read
490 static inline uint32_t
491 brw_dp_read_desc(const struct gen_device_info
*devinfo
,
492 unsigned binding_table_index
,
493 unsigned msg_control
,
495 unsigned target_cache
)
497 if (devinfo
->gen
>= 6)
498 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
);
499 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
500 return (SET_BITS(binding_table_index
, 7, 0) |
501 SET_BITS(msg_control
, 10, 8) |
502 SET_BITS(msg_type
, 13, 11) |
503 SET_BITS(target_cache
, 15, 14));
505 return (SET_BITS(binding_table_index
, 7, 0) |
506 SET_BITS(msg_control
, 11, 8) |
507 SET_BITS(msg_type
, 13, 12) |
508 SET_BITS(target_cache
, 15, 14));
511 static inline unsigned
512 brw_dp_read_desc_msg_type(const struct gen_device_info
*devinfo
, uint32_t desc
)
514 if (devinfo
->gen
>= 6)
515 return brw_dp_desc_msg_type(devinfo
, desc
);
516 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
517 return GET_BITS(desc
, 13, 11);
519 return GET_BITS(desc
, 13, 12);
522 static inline unsigned
523 brw_dp_read_desc_msg_control(const struct gen_device_info
*devinfo
,
526 if (devinfo
->gen
>= 6)
527 return brw_dp_desc_msg_control(devinfo
, desc
);
528 else if (devinfo
->gen
>= 5 || devinfo
->is_g4x
)
529 return GET_BITS(desc
, 10, 8);
531 return GET_BITS(desc
, 11, 8);
535 * Construct a message descriptor immediate with the specified dataport write
538 static inline uint32_t
539 brw_dp_write_desc(const struct gen_device_info
*devinfo
,
540 unsigned binding_table_index
,
541 unsigned msg_control
,
543 unsigned last_render_target
,
544 unsigned send_commit_msg
)
546 assert(devinfo
->gen
<= 6 || !send_commit_msg
);
547 if (devinfo
->gen
>= 6)
548 return brw_dp_desc(devinfo
, binding_table_index
, msg_type
, msg_control
) |
549 SET_BITS(last_render_target
, 12, 12) |
550 SET_BITS(send_commit_msg
, 17, 17);
552 return (SET_BITS(binding_table_index
, 7, 0) |
553 SET_BITS(msg_control
, 11, 8) |
554 SET_BITS(last_render_target
, 11, 11) |
555 SET_BITS(msg_type
, 14, 12) |
556 SET_BITS(send_commit_msg
, 15, 15));
559 static inline unsigned
560 brw_dp_write_desc_msg_type(const struct gen_device_info
*devinfo
,
563 if (devinfo
->gen
>= 6)
564 return brw_dp_desc_msg_type(devinfo
, desc
);
566 return GET_BITS(desc
, 14, 12);
569 static inline unsigned
570 brw_dp_write_desc_msg_control(const struct gen_device_info
*devinfo
,
573 if (devinfo
->gen
>= 6)
574 return brw_dp_desc_msg_control(devinfo
, desc
);
576 return GET_BITS(desc
, 11, 8);
580 brw_dp_write_desc_last_render_target(const struct gen_device_info
*devinfo
,
583 if (devinfo
->gen
>= 6)
584 return GET_BITS(desc
, 12, 12);
586 return GET_BITS(desc
, 11, 11);
590 brw_dp_write_desc_write_commit(const struct gen_device_info
*devinfo
,
593 assert(devinfo
->gen
<= 6);
594 if (devinfo
->gen
>= 6)
595 return GET_BITS(desc
, 17, 17);
597 return GET_BITS(desc
, 15, 15);
601 * Construct a message descriptor immediate with the specified dataport
602 * surface function controls.
604 static inline uint32_t
605 brw_dp_surface_desc(const struct gen_device_info
*devinfo
,
607 unsigned msg_control
)
609 assert(devinfo
->gen
>= 7);
610 /* We'll OR in the binding table index later */
611 return brw_dp_desc(devinfo
, 0, msg_type
, msg_control
);
614 static inline uint32_t
615 brw_dp_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
616 unsigned exec_size
, /**< 0 for SIMD4x2 */
618 bool response_expected
)
620 assert(exec_size
<= 8 || exec_size
== 16);
623 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
625 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP
;
627 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2
;
630 msg_type
= GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP
;
633 const unsigned msg_control
=
634 SET_BITS(atomic_op
, 3, 0) |
635 SET_BITS(0 < exec_size
&& exec_size
<= 8, 4, 4) |
636 SET_BITS(response_expected
, 5, 5);
638 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
641 static inline uint32_t
642 brw_dp_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
645 bool response_expected
)
647 assert(exec_size
<= 8 || exec_size
== 16);
648 assert(devinfo
->gen
>= 9);
650 assert(exec_size
> 0);
651 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP
;
653 const unsigned msg_control
=
654 SET_BITS(atomic_op
, 1, 0) |
655 SET_BITS(exec_size
<= 8, 4, 4) |
656 SET_BITS(response_expected
, 5, 5);
658 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
661 static inline unsigned
662 brw_mdc_cmask(unsigned num_channels
)
664 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
665 return 0xf & (0xf << num_channels
);
668 static inline uint32_t
669 brw_dp_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
670 unsigned exec_size
, /**< 0 for SIMD4x2 */
671 unsigned num_channels
,
674 assert(exec_size
<= 8 || exec_size
== 16);
678 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
679 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE
;
681 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE
;
685 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
686 msg_type
= HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ
;
688 msg_type
= GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ
;
692 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
693 if (write
&& devinfo
->gen
== 7 && !devinfo
->is_haswell
&& exec_size
== 0)
696 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
697 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
698 exec_size
<= 8 ? 2 : 1;
700 const unsigned msg_control
=
701 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
702 SET_BITS(simd_mode
, 5, 4);
704 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
707 static inline unsigned
708 brw_mdc_ds(unsigned bit_size
)
712 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE
;
714 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD
;
716 return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD
;
718 unreachable("Unsupported bit_size for byte scattered messages");
722 static inline uint32_t
723 brw_dp_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
728 assert(exec_size
<= 8 || exec_size
== 16);
730 assert(devinfo
->gen
> 7 || devinfo
->is_haswell
);
731 const unsigned msg_type
=
732 write
? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE
:
733 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ
;
735 assert(exec_size
> 0);
736 const unsigned msg_control
=
737 SET_BITS(exec_size
== 16, 0, 0) |
738 SET_BITS(brw_mdc_ds(bit_size
), 3, 2);
740 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
743 static inline uint32_t
744 brw_dp_dword_scattered_rw_desc(const struct gen_device_info
*devinfo
,
748 assert(exec_size
== 8 || exec_size
== 16);
752 if (devinfo
->gen
>= 6) {
753 msg_type
= GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
755 msg_type
= BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE
;
758 if (devinfo
->gen
>= 7) {
759 msg_type
= GEN7_DATAPORT_DC_DWORD_SCATTERED_READ
;
760 } else if (devinfo
->gen
> 4 || devinfo
->is_g4x
) {
761 msg_type
= G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
763 msg_type
= BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ
;
767 const unsigned msg_control
=
768 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
769 SET_BITS(exec_size
== 16, 0, 0);
771 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
774 static inline uint32_t
775 brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info
*devinfo
,
776 unsigned exec_size
, /**< 0 for SIMD4x2 */
777 unsigned num_channels
,
780 assert(exec_size
<= 8 || exec_size
== 16);
781 assert(devinfo
->gen
>= 8);
784 write
? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE
:
785 GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ
;
787 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
788 const unsigned simd_mode
= exec_size
== 0 ? 0 : /* SIMD4x2 */
789 exec_size
<= 8 ? 2 : 1;
791 const unsigned msg_control
=
792 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
793 SET_BITS(simd_mode
, 5, 4);
795 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
796 msg_type
, msg_control
);
800 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
803 static inline uint32_t
804 brw_mdc_a64_ds(unsigned elems
)
812 unreachable("Unsupported elmeent count for A64 scattered message");
816 static inline uint32_t
817 brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info
*devinfo
,
818 unsigned exec_size
, /**< 0 for SIMD4x2 */
822 assert(exec_size
<= 8 || exec_size
== 16);
823 assert(devinfo
->gen
>= 8);
826 write
? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE
:
827 GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ
;
829 const unsigned msg_control
=
830 SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE
, 1, 0) |
831 SET_BITS(brw_mdc_a64_ds(bit_size
/ 8), 3, 2) |
832 SET_BITS(exec_size
== 16, 4, 4);
834 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
835 msg_type
, msg_control
);
838 static inline uint32_t
839 brw_dp_a64_untyped_atomic_desc(const struct gen_device_info
*devinfo
,
840 ASSERTED
unsigned exec_size
, /**< 0 for SIMD4x2 */
843 bool response_expected
)
845 assert(exec_size
== 8);
846 assert(devinfo
->gen
>= 8);
847 assert(bit_size
== 32 || bit_size
== 64);
849 const unsigned msg_type
= GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP
;
851 const unsigned msg_control
=
852 SET_BITS(atomic_op
, 3, 0) |
853 SET_BITS(bit_size
== 64, 4, 4) |
854 SET_BITS(response_expected
, 5, 5);
856 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
857 msg_type
, msg_control
);
860 static inline uint32_t
861 brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info
*devinfo
,
862 ASSERTED
unsigned exec_size
,
864 bool response_expected
)
866 assert(exec_size
== 8);
867 assert(devinfo
->gen
>= 9);
869 assert(exec_size
> 0);
870 const unsigned msg_type
= GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP
;
872 const unsigned msg_control
=
873 SET_BITS(atomic_op
, 1, 0) |
874 SET_BITS(response_expected
, 5, 5);
876 return brw_dp_desc(devinfo
, GEN8_BTI_STATELESS_NON_COHERENT
,
877 msg_type
, msg_control
);
880 static inline uint32_t
881 brw_dp_typed_atomic_desc(const struct gen_device_info
*devinfo
,
885 bool response_expected
)
887 assert(exec_size
> 0 || exec_group
== 0);
888 assert(exec_group
% 8 == 0);
891 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
892 if (exec_size
== 0) {
893 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2
;
895 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP
;
898 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
899 assert(exec_size
> 0);
900 msg_type
= GEN7_DATAPORT_RC_TYPED_ATOMIC_OP
;
903 const bool high_sample_mask
= (exec_group
/ 8) % 2 == 1;
905 const unsigned msg_control
=
906 SET_BITS(atomic_op
, 3, 0) |
907 SET_BITS(high_sample_mask
, 4, 4) |
908 SET_BITS(response_expected
, 5, 5);
910 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
913 static inline uint32_t
914 brw_dp_typed_surface_rw_desc(const struct gen_device_info
*devinfo
,
917 unsigned num_channels
,
920 assert(exec_size
> 0 || exec_group
== 0);
921 assert(exec_group
% 8 == 0);
923 /* Typed surface reads and writes don't support SIMD16 */
924 assert(exec_size
<= 8);
928 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
929 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE
;
931 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE
;
934 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
935 msg_type
= HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ
;
937 msg_type
= GEN7_DATAPORT_RC_TYPED_SURFACE_READ
;
941 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
942 unsigned msg_control
;
943 if (devinfo
->gen
>= 8 || devinfo
->is_haswell
) {
944 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
945 const unsigned slot_group
= exec_size
== 0 ? 0 : /* SIMD4x2 */
946 1 + ((exec_group
/ 8) % 2);
949 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
950 SET_BITS(slot_group
, 5, 4);
952 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
953 assert(exec_size
> 0);
954 const unsigned slot_group
= ((exec_group
/ 8) % 2);
957 SET_BITS(brw_mdc_cmask(num_channels
), 3, 0) |
958 SET_BITS(slot_group
, 5, 5);
961 return brw_dp_surface_desc(devinfo
, msg_type
, msg_control
);
965 * Construct a message descriptor immediate with the specified pixel
966 * interpolator function controls.
968 static inline uint32_t
969 brw_pixel_interp_desc(UNUSED
const struct gen_device_info
*devinfo
,
975 return (SET_BITS(slot_group
, 11, 11) |
976 SET_BITS(msg_type
, 13, 12) |
977 SET_BITS(!!noperspective
, 14, 14) |
978 SET_BITS(simd_mode
, 16, 16));
981 void brw_urb_WRITE(struct brw_codegen
*p
,
985 enum brw_urb_write_flags flags
,
987 unsigned response_length
,
992 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
993 * desc. If \p desc is not an immediate it will be transparently loaded to an
994 * address register using an OR instruction.
997 brw_send_indirect_message(struct brw_codegen
*p
,
1000 struct brw_reg payload
,
1001 struct brw_reg desc
,
1006 brw_send_indirect_split_message(struct brw_codegen
*p
,
1009 struct brw_reg payload0
,
1010 struct brw_reg payload1
,
1011 struct brw_reg desc
,
1013 struct brw_reg ex_desc
,
1014 unsigned ex_desc_imm
,
1017 void brw_ff_sync(struct brw_codegen
*p
,
1018 struct brw_reg dest
,
1019 unsigned msg_reg_nr
,
1020 struct brw_reg src0
,
1022 unsigned response_length
,
1025 void brw_svb_write(struct brw_codegen
*p
,
1026 struct brw_reg dest
,
1027 unsigned msg_reg_nr
,
1028 struct brw_reg src0
,
1029 unsigned binding_table_index
,
1030 bool send_commit_msg
);
1032 brw_inst
*brw_fb_WRITE(struct brw_codegen
*p
,
1033 struct brw_reg payload
,
1034 struct brw_reg implied_header
,
1035 unsigned msg_control
,
1036 unsigned binding_table_index
,
1037 unsigned msg_length
,
1038 unsigned response_length
,
1040 bool last_render_target
,
1041 bool header_present
);
1043 brw_inst
*gen9_fb_READ(struct brw_codegen
*p
,
1045 struct brw_reg payload
,
1046 unsigned binding_table_index
,
1047 unsigned msg_length
,
1048 unsigned response_length
,
1051 void brw_SAMPLE(struct brw_codegen
*p
,
1052 struct brw_reg dest
,
1053 unsigned msg_reg_nr
,
1054 struct brw_reg src0
,
1055 unsigned binding_table_index
,
1058 unsigned response_length
,
1059 unsigned msg_length
,
1060 unsigned header_present
,
1062 unsigned return_format
);
1064 void brw_adjust_sampler_state_pointer(struct brw_codegen
*p
,
1065 struct brw_reg header
,
1066 struct brw_reg sampler_index
);
1068 void gen4_math(struct brw_codegen
*p
,
1069 struct brw_reg dest
,
1071 unsigned msg_reg_nr
,
1073 unsigned precision
);
1075 void gen6_math(struct brw_codegen
*p
,
1076 struct brw_reg dest
,
1078 struct brw_reg src0
,
1079 struct brw_reg src1
);
1081 void brw_oword_block_read(struct brw_codegen
*p
,
1082 struct brw_reg dest
,
1085 uint32_t bind_table_index
);
1087 unsigned brw_scratch_surface_idx(const struct brw_codegen
*p
);
1089 void brw_oword_block_read_scratch(struct brw_codegen
*p
,
1090 struct brw_reg dest
,
1095 void brw_oword_block_write_scratch(struct brw_codegen
*p
,
1100 void gen7_block_read_scratch(struct brw_codegen
*p
,
1101 struct brw_reg dest
,
1105 void brw_shader_time_add(struct brw_codegen
*p
,
1106 struct brw_reg payload
,
1107 uint32_t surf_index
);
1110 * Return the generation-specific jump distance scaling factor.
1112 * Given the number of instructions to jump, we need to scale by
1113 * some number to obtain the actual jump distance to program in an
1116 static inline unsigned
1117 brw_jump_scale(const struct gen_device_info
*devinfo
)
1119 /* Broadwell measures jump targets in bytes. */
1120 if (devinfo
->gen
>= 8)
1123 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1124 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1126 if (devinfo
->gen
>= 5)
1129 /* Gen4 simply uses the number of 128-bit instructions. */
1133 void brw_barrier(struct brw_codegen
*p
, struct brw_reg src
);
1135 /* If/else/endif. Works by manipulating the execution flags on each
1138 brw_inst
*brw_IF(struct brw_codegen
*p
, unsigned execute_size
);
1139 brw_inst
*gen6_IF(struct brw_codegen
*p
, enum brw_conditional_mod conditional
,
1140 struct brw_reg src0
, struct brw_reg src1
);
1142 void brw_ELSE(struct brw_codegen
*p
);
1143 void brw_ENDIF(struct brw_codegen
*p
);
1147 brw_inst
*brw_DO(struct brw_codegen
*p
, unsigned execute_size
);
1149 brw_inst
*brw_WHILE(struct brw_codegen
*p
);
1151 brw_inst
*brw_BREAK(struct brw_codegen
*p
);
1152 brw_inst
*brw_CONT(struct brw_codegen
*p
);
1153 brw_inst
*brw_HALT(struct brw_codegen
*p
);
1157 void brw_land_fwd_jump(struct brw_codegen
*p
, int jmp_insn_idx
);
1159 brw_inst
*brw_JMPI(struct brw_codegen
*p
, struct brw_reg index
,
1160 unsigned predicate_control
);
1162 void brw_NOP(struct brw_codegen
*p
);
1164 void brw_WAIT(struct brw_codegen
*p
);
1166 void brw_SYNC(struct brw_codegen
*p
, enum tgl_sync_function func
);
1168 /* Special case: there is never a destination, execution size will be
1171 void brw_CMP(struct brw_codegen
*p
,
1172 struct brw_reg dest
,
1173 unsigned conditional
,
1174 struct brw_reg src0
,
1175 struct brw_reg src1
);
1178 brw_untyped_atomic(struct brw_codegen
*p
,
1180 struct brw_reg payload
,
1181 struct brw_reg surface
,
1183 unsigned msg_length
,
1184 bool response_expected
,
1185 bool header_present
);
1188 brw_untyped_surface_read(struct brw_codegen
*p
,
1190 struct brw_reg payload
,
1191 struct brw_reg surface
,
1192 unsigned msg_length
,
1193 unsigned num_channels
);
1196 brw_untyped_surface_write(struct brw_codegen
*p
,
1197 struct brw_reg payload
,
1198 struct brw_reg surface
,
1199 unsigned msg_length
,
1200 unsigned num_channels
,
1201 bool header_present
);
1204 brw_memory_fence(struct brw_codegen
*p
,
1207 enum opcode send_op
,
1208 enum brw_message_target sfid
,
1213 brw_pixel_interpolator_query(struct brw_codegen
*p
,
1214 struct brw_reg dest
,
1218 struct brw_reg data
,
1219 unsigned msg_length
,
1220 unsigned response_length
);
1223 brw_find_live_channel(struct brw_codegen
*p
,
1225 struct brw_reg mask
);
1228 brw_broadcast(struct brw_codegen
*p
,
1231 struct brw_reg idx
);
1234 brw_float_controls_mode(struct brw_codegen
*p
,
1235 unsigned mode
, unsigned mask
);
1237 /***********************************************************************
1241 void brw_copy_indirect_to_indirect(struct brw_codegen
*p
,
1242 struct brw_indirect dst_ptr
,
1243 struct brw_indirect src_ptr
,
1246 void brw_copy_from_indirect(struct brw_codegen
*p
,
1248 struct brw_indirect ptr
,
1251 void brw_copy4(struct brw_codegen
*p
,
1256 void brw_copy8(struct brw_codegen
*p
,
1261 void brw_math_invert( struct brw_codegen
*p
,
1263 struct brw_reg src
);
1265 void brw_set_src1(struct brw_codegen
*p
, brw_inst
*insn
, struct brw_reg reg
);
1267 void brw_set_desc_ex(struct brw_codegen
*p
, brw_inst
*insn
,
1268 unsigned desc
, unsigned ex_desc
);
1271 brw_set_desc(struct brw_codegen
*p
, brw_inst
*insn
, unsigned desc
)
1273 brw_set_desc_ex(p
, insn
, desc
, 0);
1276 void brw_set_uip_jip(struct brw_codegen
*p
, int start_offset
);
1278 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod
);
1279 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod
);
1281 /* brw_eu_compact.c */
1282 void brw_init_compaction_tables(const struct gen_device_info
*devinfo
);
1283 void brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1284 struct disasm_info
*disasm
);
1285 void brw_uncompact_instruction(const struct gen_device_info
*devinfo
,
1286 brw_inst
*dst
, brw_compact_inst
*src
);
1287 bool brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1288 brw_compact_inst
*dst
, const brw_inst
*src
);
1290 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1291 brw_inst
*orig
, brw_inst
*uncompacted
);
1293 /* brw_eu_validate.c */
1294 bool brw_validate_instruction(const struct gen_device_info
*devinfo
,
1295 const brw_inst
*inst
, int offset
,
1296 struct disasm_info
*disasm
);
1297 bool brw_validate_instructions(const struct gen_device_info
*devinfo
,
1298 const void *assembly
, int start_offset
, int end_offset
,
1299 struct disasm_info
*disasm
);
1302 next_offset(const struct gen_device_info
*devinfo
, void *store
, int offset
)
1304 brw_inst
*insn
= (brw_inst
*)((char *)store
+ offset
);
1306 if (brw_inst_cmpt_control(devinfo
, insn
))
1312 struct opcode_desc
{
1321 const struct opcode_desc
*
1322 brw_opcode_desc(const struct gen_device_info
*devinfo
, enum opcode opcode
);
1324 const struct opcode_desc
*
1325 brw_opcode_desc_from_hw(const struct gen_device_info
*devinfo
, unsigned hw
);
1327 static inline unsigned
1328 brw_opcode_encode(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1330 return brw_opcode_desc(devinfo
, opcode
)->hw
;
1333 static inline enum opcode
1334 brw_opcode_decode(const struct gen_device_info
*devinfo
, unsigned hw
)
1336 const struct opcode_desc
*desc
= brw_opcode_desc_from_hw(devinfo
, hw
);
1337 return desc
? (enum opcode
)desc
->ir
: BRW_OPCODE_ILLEGAL
;
1341 brw_inst_set_opcode(const struct gen_device_info
*devinfo
,
1342 brw_inst
*inst
, enum opcode opcode
)
1344 brw_inst_set_hw_opcode(devinfo
, inst
, brw_opcode_encode(devinfo
, opcode
));
1347 static inline enum opcode
1348 brw_inst_opcode(const struct gen_device_info
*devinfo
, const brw_inst
*inst
)
1350 return brw_opcode_decode(devinfo
, brw_inst_hw_opcode(devinfo
, inst
));
1354 is_3src(const struct gen_device_info
*devinfo
, enum opcode opcode
)
1356 const struct opcode_desc
*desc
= brw_opcode_desc(devinfo
, opcode
);
1357 return desc
&& desc
->nsrc
== 3;
1360 /** Maximum SEND message length */
1361 #define BRW_MAX_MSG_LENGTH 15
1363 /** First MRF register used by pull loads */
1364 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1366 /** First MRF register used by spills */
1367 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)