panfrost: add atomic_cmpxchg opcode
[mesa.git] / src / panfrost / midgard / midgard_ops.c
index e0d8d886a79260802d76fa7a06e5a430cce823b5..ef30523bc1aa39033ac1cc0be15ec7f461e2af78 100644 (file)
@@ -54,6 +54,9 @@ struct mir_op_props alu_opcode_props[256] = {
         [midgard_alu_op_ftrunc]          = {"ftrunc", UNITS_ADD},
         [midgard_alu_op_ffloor]                 = {"ffloor", UNITS_ADD},
         [midgard_alu_op_fceil]          = {"fceil", UNITS_ADD},
+
+        /* Multiplies the X/Y components of the first arg and adds the second
+         * arg. Like other LUTs, it must be scalarized. */
         [midgard_alu_op_ffma]           = {"ffma", UNIT_VLUT},
 
         /* Though they output a scalar, they need to run on a vector unit
@@ -64,6 +67,7 @@ struct mir_op_props alu_opcode_props[256] = {
 
         /* Incredibly, iadd can run on vmul, etc */
         [midgard_alu_op_iadd]           = {"iadd", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_ishladd]         = {"ishladd", UNITS_MUL},
         [midgard_alu_op_iaddsat]        = {"iaddsat", UNITS_ADD | OP_COMMUTES},
         [midgard_alu_op_uaddsat]        = {"uaddsat", UNITS_ADD | OP_COMMUTES},
         [midgard_alu_op_iabsdiff]       = {"iabsdiff", UNITS_ADD},
@@ -102,19 +106,19 @@ struct mir_op_props alu_opcode_props[256] = {
         [midgard_alu_op_fexp2]          = {"fexp2", UNIT_VLUT},
         [midgard_alu_op_flog2]          = {"flog2", UNIT_VLUT},
 
-        [midgard_alu_op_f2i_rte]        = {"f2i_rte", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2i_rte]        = {"f2i_rte", UNITS_ADD | OP_TYPE_CONVERT | MIDGARD_ROUNDS},
         [midgard_alu_op_f2i_rtz]        = {"f2i_rtz", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_f2i_rtn]        = {"f2i_rtn", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_f2i_rtp]        = {"f2i_rtp", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u_rte]        = {"f2i_rte", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u_rtz]        = {"f2i_rtz", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u_rtn]        = {"f2i_rtn", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u_rtp]        = {"f2i_rtp", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_i2f_rte]        = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2u_rte]        = {"f2u_rte", UNITS_ADD | OP_TYPE_CONVERT | MIDGARD_ROUNDS},
+        [midgard_alu_op_f2u_rtz]        = {"f2u_rtz", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2u_rtn]        = {"f2u_rtn", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2u_rtp]        = {"f2u_rtp", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_i2f_rte]        = {"i2f_rte", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_i2f_rtz]        = {"i2f_rtz", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_i2f_rtn]        = {"i2f_rtn", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_i2f_rtp]        = {"i2f_rtp", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_u2f_rte]        = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_u2f_rte]        = {"u2f_rte", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_u2f_rtz]        = {"u2f_rtz", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_u2f_rtn]        = {"u2f_rtn", UNITS_ADD | OP_TYPE_CONVERT},
         [midgard_alu_op_u2f_rtp]        = {"u2f_rtp", UNITS_ADD | OP_TYPE_CONVERT},
@@ -161,66 +165,118 @@ struct mir_op_props alu_opcode_props[256] = {
         [midgard_alu_op_ubany_lt]       = {"ubany_lt",  UNITS_VECTOR | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
         [midgard_alu_op_ubany_lte]      = {"ubany_lte", UNITS_VECTOR | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
 
-        /* These instructions are not yet emitted by the compiler, so
-         * don't speculate about units yet */
-        [midgard_alu_op_ishladd]        = {"ishladd", 0},
+        [midgard_alu_op_fatan2_pt1]     = {"fatan2_pt1", UNIT_VLUT},
+        [midgard_alu_op_fatan_pt2]      = {"fatan_pt2", UNIT_VLUT},
+
+        /* Haven't seen in a while */
         [midgard_alu_op_freduce]        = {"freduce", 0},
-        [midgard_alu_op_fatan2_pt1]     = {"fatan2_pt1", 0},
-        [midgard_alu_op_fatan_pt2]      = {"fatan_pt2", 0},
 };
 
-const char *load_store_opcode_names[256] = {
-        [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
-        [midgard_op_ld_global_id] = "ld_global_id",
-        [midgard_op_ldst_perspective_division_z] = "ldst_perspective_division_z",
-        [midgard_op_ldst_perspective_division_w] = "ldst_perspective_division_w",
-
-        [midgard_op_atomic_add] = "atomic_add",
-        [midgard_op_atomic_and] = "atomic_and",
-        [midgard_op_atomic_or] = "atomic_or",
-        [midgard_op_atomic_xor] = "atomic_xor",
-        [midgard_op_atomic_imin] = "atomic_imin",
-        [midgard_op_atomic_umin] = "atomic_umin",
-        [midgard_op_atomic_imax] = "atomic_imax",
-        [midgard_op_atomic_umax] = "atomic_umax",
-        [midgard_op_atomic_xchg] = "atomic_xchg",
-
-        [midgard_op_ld_char] = "ld_char",
-        [midgard_op_ld_char2] = "ld_char2",
-        [midgard_op_ld_short] = "ld_short",
-        [midgard_op_ld_char4] = "ld_char4",
-        [midgard_op_ld_short4] = "ld_short4",
-        [midgard_op_ld_int4] = "ld_int4",
-
-        [midgard_op_ld_attr_32] = "ld_attr_32",
-        [midgard_op_ld_attr_16] = "ld_attr_16",
-        [midgard_op_ld_attr_32i] = "ld_attr_32i",
-        [midgard_op_ld_attr_32u] = "ld_attr_32u",
-
-        [midgard_op_ld_vary_32] = "ld_vary_32",
-        [midgard_op_ld_vary_16] = "ld_vary_16",
-        [midgard_op_ld_vary_32i] = "ld_vary_32i",
-        [midgard_op_ld_vary_32u] = "ld_vary_32u",
-
-        [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
-
-        [midgard_op_ld_uniform_16] = "ld_uniform_16",
-        [midgard_op_ld_uniform_32] = "ld_uniform_32",
-        [midgard_op_ld_uniform_32i] = "ld_uniform_32i",
-        [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
-
-        [midgard_op_st_char] = "st_char",
-        [midgard_op_st_char2] = "st_char2",
-        [midgard_op_st_char4] = "st_char4",
-        [midgard_op_st_short4] = "st_short4",
-        [midgard_op_st_int4] = "st_int4",
-
-        [midgard_op_st_vary_32] = "st_vary_32",
-        [midgard_op_st_vary_16] = "st_vary_16",
-        [midgard_op_st_vary_32i] = "st_vary_32i",
-        [midgard_op_st_vary_32u] = "st_vary_32u",
-
-        [midgard_op_st_image_f] = "st_image_f",
-        [midgard_op_st_image_ui] = "st_image_ui",
-        [midgard_op_st_image_i] = "st_image_i",
+/* Define shorthands */
+
+#define M8  midgard_reg_mode_8
+#define M16 midgard_reg_mode_16
+#define M32 midgard_reg_mode_32
+#define M64 midgard_reg_mode_64
+
+struct mir_ldst_op_props load_store_opcode_props[256] = {
+        [midgard_op_unpack_colour] = {"unpack_colour", M32},
+        [midgard_op_pack_colour] = {"pack_colour", M32},
+        [midgard_op_pack_colour_32] = {"pack_colour_32", M32},
+        [midgard_op_ld_cubemap_coords] = {"ld_cubemap_coords", M32},
+        [midgard_op_ld_compute_id] = {"ld_compute_id", M32},
+        [midgard_op_ldst_perspective_division_z] = {"ldst_perspective_division_z", M32},
+        [midgard_op_ldst_perspective_division_w] = {"ldst_perspective_division_w", M32},
+
+        [midgard_op_atomic_add]  = {"atomic_add",  M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_and]  = {"atomic_and",  M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_or]   = {"atomic_or",   M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_xor]  = {"atomic_xor",  M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_imin] = {"atomic_imin", M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_umin] = {"atomic_umin", M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_imax] = {"atomic_imax", M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_umax] = {"atomic_umax", M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_xchg] = {"atomic_xchg", M32 | LDST_SIDE_FX},
+        [midgard_op_atomic_cmpxchg] = {"atomic_cmpxchg", M32 | LDST_SIDE_FX},
+
+        [midgard_op_atomic_add64]  = {"atomic_add64",  M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_and64]  = {"atomic_and64",  M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_or64]   = {"atomic_or64",   M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_xor64]  = {"atomic_xor64",  M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_imin64] = {"atomic_imin64", M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_umin64] = {"atomic_umin64", M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_imax64] = {"atomic_imax64", M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_umax64] = {"atomic_umax64", M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_xchg64] = {"atomic_xchg64", M64 | LDST_SIDE_FX},
+        [midgard_op_atomic_cmpxchg64] = {"atomic_cmpxchg64", M64 | LDST_SIDE_FX},
+
+        [midgard_op_ld_uchar]  = {"ld_uchar", M32 | LDST_ADDRESS},
+        [midgard_op_ld_char]   = {"ld_char",   M32 | LDST_ADDRESS},
+        [midgard_op_ld_ushort] = {"ld_ushort", M32 | LDST_ADDRESS},
+        [midgard_op_ld_short]  = {"ld_short",  M32 | LDST_ADDRESS},
+        [midgard_op_ld_char4]  = {"ld_char4",  M32 | LDST_ADDRESS},
+        [midgard_op_ld_short4] = {"ld_short4", M32 | LDST_ADDRESS},
+        [midgard_op_ld_int4]   = {"ld_int4",   M32 | LDST_ADDRESS},
+
+        [midgard_op_ld_attr_32]  = {"ld_attr_32",  M32},
+        [midgard_op_ld_attr_32i] = {"ld_attr_32i", M32},
+        [midgard_op_ld_attr_32u] = {"ld_attr_32u", M32},
+        [midgard_op_ld_attr_16]  = {"ld_attr_16",  M32},
+
+        [midgard_op_ld_vary_32]  = {"ld_vary_32",  M32},
+        [midgard_op_ld_vary_16]  = {"ld_vary_16",  M32},
+        [midgard_op_ld_vary_32i] = {"ld_vary_32i", M32},
+        [midgard_op_ld_vary_32u] = {"ld_vary_32u", M32},
+
+        [midgard_op_ld_color_buffer_32u]  = {"ld_color_buffer_32u",  M32},
+        [midgard_op_ld_color_buffer_32u_old]  = {"ld_color_buffer_32u_old",  M32},
+        [midgard_op_ld_color_buffer_as_fp16] = {"ld_color_buffer_as_fp16", M16},
+        [midgard_op_ld_color_buffer_as_fp32] = {"ld_color_buffer_as_fp32", M32},
+        [midgard_op_ld_color_buffer_as_fp16_old] = {"ld_color_buffer_as_fp16_old", M16 | LDST_SPECIAL_MASK},
+        [midgard_op_ld_color_buffer_as_fp32_old] = {"ld_color_buffer_as_fp32_old", M32 | LDST_SPECIAL_MASK},
+
+        [midgard_op_ld_ubo_char]   = {"ld_ubo_char",   M32},
+        [midgard_op_ld_ubo_char2]  = {"ld_ubo_char2",  M16},
+        [midgard_op_ld_ubo_char4]  = {"ld_ubo_char4",  M32},
+        [midgard_op_ld_ubo_short4] = {"ld_ubo_short4", M32},
+        [midgard_op_ld_ubo_int4]   = {"ld_ubo_int4",   M32},
+
+        [midgard_op_st_char]   = {"st_char",   M32 | LDST_STORE | LDST_ADDRESS},
+        [midgard_op_st_char2]  = {"st_char2",  M16 | LDST_STORE | LDST_ADDRESS},
+        [midgard_op_st_char4]  = {"st_char4",  M32 | LDST_STORE | LDST_ADDRESS},
+        [midgard_op_st_short4] = {"st_short4", M32 | LDST_STORE | LDST_ADDRESS},
+        [midgard_op_st_int4]   = {"st_int4",   M32 | LDST_STORE | LDST_ADDRESS},
+
+        [midgard_op_st_vary_32]  = {"st_vary_32",  M32 | LDST_STORE},
+        [midgard_op_st_vary_32i] = {"st_vary_32i", M32 | LDST_STORE},
+        [midgard_op_st_vary_32u] = {"st_vary_32u", M32 | LDST_STORE},
+        [midgard_op_st_vary_16]  = {"st_vary_16",  M16 | LDST_STORE},
+
+        [midgard_op_st_image_f]  = {"st_image_f",  M32 | LDST_STORE},
+        [midgard_op_st_image_ui] = {"st_image_ui", M32 | LDST_STORE},
+        [midgard_op_st_image_i]  = {"st_image_i",  M32 | LDST_STORE},
+};
+
+#undef M8
+#undef M16
+#undef M32
+#undef M64
+
+struct mir_tag_props midgard_tag_props[16] = {
+        [TAG_INVALID]           = {"invalid", 0},
+        [TAG_BREAK]             = {"break", 0},
+        [TAG_TEXTURE_4_VTX]     = {"tex/vt", 1},
+        [TAG_TEXTURE_4]         = {"tex", 1},
+        [TAG_TEXTURE_4_BARRIER] = {"tex/bar", 1},
+        [TAG_LOAD_STORE_4]      = {"ldst", 1},
+        [TAG_UNKNOWN_1]         = {"unk1", 1},
+        [TAG_UNKNOWN_2]         = {"unk2", 1},
+        [TAG_ALU_4]             = {"alu/4", 1},
+        [TAG_ALU_8]             = {"alu/8", 2},
+        [TAG_ALU_12]            = {"alu/12", 3},
+        [TAG_ALU_16]            = {"alu/16", 4},
+        [TAG_ALU_4_WRITEOUT]    = {"aluw/4", 1},
+        [TAG_ALU_8_WRITEOUT]    = {"aluw/8", 2},
+        [TAG_ALU_12_WRITEOUT]   = {"aluw/12", 3},
+        [TAG_ALU_16_WRITEOUT]   = {"aluw/16", 4}
 };