From 8027cc9975c358e2c4888c0413759606a7d930f2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 5 Jun 2019 15:41:03 -0700 Subject: [PATCH] panfrost/midgard: Expose vec8/vec16 modes Midgard ALUs can operate in one of four modes: vec2 64-bit, vec4 32-bit, vec8 16-bit, or vec16 8-bit. Our compiler (and indeed, any OpenGL ES shader) only uses 32-bit (and eventually vec4 16-bit) modes in normal circumstances. Nevertheless, the other modes do exist and are easily accessible through OpenCL; they also come up in cases like blend shaders. While we have had minimal support for decoding 8-bit/64-bit modes, we did so pretending they were vec4 in each case; 16-bit registers had a synthetically duplicated register file to separate lo/hi halves, etc. This works for GL, but it doesn't map to what the hardware is -actually- doing, which can cause some headscratchingly bizarre disassemblies from OpenCL. So, we dive in the deep end and support these other modes natively in the disassembler, using absurdly long masks/swizzles, since the hardware is considerably more flexible than what was exposed before. Outside of some fixed routines for blending, none of the above is supported in the compiler yet. But it's better to have it in the ISA definitions and disassembler than not, for future use if nothing else. Signed-off-by: Alyssa Rosenzweig --- .../drivers/panfrost/midgard/disassemble.c | 509 ++++++++++-------- 1 file changed, 273 insertions(+), 236 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/disassemble.c b/src/gallium/drivers/panfrost/midgard/disassemble.c index 7b6ab9ecd68..c02387d3ffd 100644 --- a/src/gallium/drivers/panfrost/midgard/disassemble.c +++ b/src/gallium/drivers/panfrost/midgard/disassemble.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "midgard.h" #include "midgard-parse.h" @@ -113,15 +114,9 @@ print_reg(unsigned reg, unsigned bits) { /* Perform basic static analysis for expanding constants correctly */ - if ((bits == 16) && (reg >> 1) == 26) { - is_embedded_constant_half = true; + if (reg == 26) { is_embedded_constant_int = is_instruction_int; - } else if ((bits == 32) && reg == 26) { - is_embedded_constant_int = is_instruction_int; - } else if (bits == 8) { - /* TODO */ - } else if (bits == 64) { - /* TODO */ + is_embedded_constant_half = (bits < 32); } char prefix = prefix_for_bits(bits); @@ -171,110 +166,167 @@ print_quad_word(uint32_t *words, unsigned tabs) printf("\n"); } +static const char components[16] = "xyzwefghijklmnop"; + +/* Helper to print 4 chars of a swizzle */ static void -print_vector_src(unsigned src_binary, bool out_high, - midgard_reg_mode mode, unsigned reg, - bool is_int) +print_swizzle_helper(unsigned swizzle, bool upper) { - midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary; + for (unsigned i = 0; i < 4; ++i) { + unsigned c = (swizzle >> (i * 2)) & 3; + c += upper*4; + printf("%c", components[c]); + } +} - /* Modifiers change meaning depending on the op's context */ +/* Helper to print 8 chars of a swizzle, duplicating over */ +static void +print_swizzle_helper_8(unsigned swizzle, bool upper) +{ + for (unsigned i = 0; i < 4; ++i) { + unsigned c = (swizzle >> (i * 2)) & 3; + c *= 2; + c += upper*8; + printf("%c%c", components[c], components[c+1]); + } +} - midgard_int_mod int_mod = src->mod; +static void +print_swizzle_vec16(unsigned swizzle, bool rep_high, bool rep_low, + midgard_dest_override override) +{ + printf("."); - if (is_int) { - printf("%s", srcmod_names_int[int_mod]); - } else { - if (src->mod & MIDGARD_FLOAT_MOD_NEG) - printf("-"); + if (override == midgard_dest_override_upper) { + if (rep_high) + printf(" /* rep_high */ "); + if (rep_low) + printf(" /* rep_low */ "); - if (src->mod & MIDGARD_FLOAT_MOD_ABS) - printf("abs("); + if (!rep_high && rep_low) + print_swizzle_helper_8(swizzle, true); + else + print_swizzle_helper_8(swizzle, false); + } else { + print_swizzle_helper_8(swizzle, rep_high & 1); + print_swizzle_helper_8(swizzle, !rep_low & 1); } +} - //register - - if (mode == midgard_reg_mode_8) { - if (src->half) - printf(" /* half */ "); +static void +print_swizzle_vec8(unsigned swizzle, bool rep_high, bool rep_low) +{ + printf("."); - unsigned quarter_reg = reg * 2; + print_swizzle_helper(swizzle, rep_high & 1); + print_swizzle_helper(swizzle, !rep_low & 1); +} - if (out_high) { - if (!src->rep_low) - quarter_reg++; +static void +print_swizzle_vec4(unsigned swizzle, bool rep_high, bool rep_low) +{ + if (rep_high) + printf(" /* rep_high */ "); + if (rep_low) + printf(" /* rep_low */ "); - if (src->rep_high) - printf(" /* rep_high */ "); - } else { - if (src->rep_high) - quarter_reg++; + if (swizzle == 0xE4) return; /* xyzw */ - if (src->rep_low) - printf(" /* rep_low */ "); - } + printf("."); + print_swizzle_helper(swizzle, 0); +} +static void +print_swizzle_vec2(unsigned swizzle, bool rep_high, bool rep_low) +{ + if (rep_high) + printf(" /* rep_high */ "); + if (rep_low) + printf(" /* rep_low */ "); - print_reg(quarter_reg, 8); - } else if (mode == midgard_reg_mode_16) { - if (src->half) - printf(" /* half */ "); + if (swizzle == 0xE4) return; /* XY */ - unsigned half_reg = reg * 2; + printf("."); - if (out_high) { - if (!src->rep_low) - half_reg++; + for (unsigned i = 0; i < 4; i += 2) { + unsigned a = (swizzle >> (i * 2)) & 3; + unsigned b = (swizzle >> ((i+1) * 2)) & 3; + + /* Normally we're adjacent, but if there's an issue, don't make + * it ambiguous */ + + if (a & 0x1) + printf("[%c%c]", components[a], components[b]); + else if (a == b) + printf("%c", components[a >> 1]); + else if (b == (a + 1)) + printf("%c", "XY"[a >> 1]); + else + printf("[%c%c]", components[a], components[b]); + } +} - if (src->rep_high) - printf(" /* rep_high */ "); - } else { - if (src->rep_high) - half_reg++; +static int +bits_for_mode(midgard_reg_mode mode) +{ + switch (mode) { + case midgard_reg_mode_8: + return 8; + case midgard_reg_mode_16: + return 16; + case midgard_reg_mode_32: + return 32; + case midgard_reg_mode_64: + return 64; + default: + return 0; + } +} - if (src->rep_low) - printf(" /* rep_low */ "); - } +static int +bits_for_mode_halved(midgard_reg_mode mode, bool half) +{ + unsigned bits = bits_for_mode(mode); - print_reg(half_reg, 16); - } else if (mode == midgard_reg_mode_32) { - if (src->rep_high) - printf(" /* rep_high */ "); + if (half) + bits >>= 1; - if (src->half) - print_reg(reg * 2 + src->rep_low, 16); - else { - if (src->rep_low) - printf(" /* rep_low */ "); + return bits; +} - print_reg(reg, 32); - } - } else if (mode == midgard_reg_mode_64) { - if (src->rep_high) - printf(" /* rep_high */ "); +static void +print_vector_src(unsigned src_binary, + midgard_reg_mode mode, unsigned reg, + midgard_dest_override override, bool is_int) +{ + midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary; - if (src->rep_low) - printf(" /* rep_low */ "); + /* Modifiers change meaning depending on the op's context */ - if (src->half) - printf(" /* half */ "); + midgard_int_mod int_mod = src->mod; - if (out_high) - printf(" /* out_high */ "); + if (is_int) { + printf("%s", srcmod_names_int[int_mod]); + } else { + if (src->mod & MIDGARD_FLOAT_MOD_NEG) + printf("-"); - print_reg(reg, 64); + if (src->mod & MIDGARD_FLOAT_MOD_ABS) + printf("abs("); } - //swizzle - - if (src->swizzle != 0xE4) { //default swizzle - unsigned i; - static const char c[4] = "xyzw"; - - printf("."); + //register + unsigned bits = bits_for_mode_halved(mode, src->half); + print_reg(reg, bits); - for (i = 0; i < 4; i++) - printf("%c", c[(src->swizzle >> (i * 2)) & 3]); - } + //swizzle + if (bits == 16) + print_swizzle_vec8(src->swizzle, src->rep_high, src->rep_low); + else if (bits == 8) + print_swizzle_vec16(src->swizzle, src->rep_high, src->rep_low, override); + else if (bits == 32) + print_swizzle_vec4(src->swizzle, src->rep_high, src->rep_low); + else if (bits == 64) + print_swizzle_vec2(src->swizzle, src->rep_high, src->rep_low); /* Since we wrapped with a function-looking thing */ @@ -304,68 +356,111 @@ print_immediate(uint16_t imm) printf("#%g", _mesa_half_to_float(imm)); } -static int -bits_for_mode(midgard_reg_mode mode) +static unsigned +print_dest(unsigned reg, midgard_reg_mode mode, midgard_dest_override override) { - switch (mode) { - case midgard_reg_mode_8: - return 8; - case midgard_reg_mode_16: - return 16; - case midgard_reg_mode_32: - return 32; - case midgard_reg_mode_64: - return 64; - default: - return 0; - } + /* Depending on the mode and override, we determine the type of + * destination addressed. Absent an override, we address just the + * type of the operation itself */ + + unsigned bits = bits_for_mode(mode); + + if (override != midgard_dest_override_none) + bits /= 2; + + print_reg(reg, bits); + + return bits; } static void -print_dest(unsigned reg, midgard_reg_mode mode, midgard_dest_override override, bool out_high) +print_mask_vec16(uint8_t mask, midgard_dest_override override) { - bool overriden = override != midgard_dest_override_none; - bool overriden_up = override == midgard_dest_override_upper; + printf("."); - /* Depending on the mode and override, we determine the type of - * destination addressed. Absent an override, we address just the - * type of the operation itself, directly at the out_reg register - * (scaled if necessary to disambiguate, raised if necessary) */ + if (override == midgard_dest_override_none) { + for (unsigned i = 0; i < 8; i++) { + if (mask & (1 << i)) + printf("%c%c", + components[i*2 + 0], + components[i*2 + 1]); + } + } else { + bool upper = (override == midgard_dest_override_upper); - unsigned bits = bits_for_mode(mode); + for (unsigned i = 0; i < 8; i++) { + if (mask & (1 << i)) + printf("%c", components[i + (upper ? 8 : 0)]); + } + } +} - if (overriden) - bits /= 2; +/* For 16-bit+ masks, we read off from the 8-bit mask field. For 16-bit (vec8), + * it's just one bit per channel, easy peasy. For 32-bit (vec4), it's one bit + * per channel with one duplicate bit in the middle. For 64-bit (vec2), it's + * one-bit per channel with _3_ duplicate bits in the middle. Basically, just + * subdividing the 128-bit word in 16-bit increments. For 64-bit, we uppercase + * the mask to make it obvious what happened */ + +static void +print_mask(uint8_t mask, unsigned bits, midgard_dest_override override) +{ + if (bits == 8) { + print_mask_vec16(mask, override); + return; + } - /* Sanity check the override */ + /* Skip 'complete' masks */ - if (overriden) { - bool modeable = (mode == midgard_reg_mode_32) || (mode == midgard_reg_mode_16); - bool known = override != 0x3; /* Unused value */ - bool uppable = !overriden_up || (mode == midgard_reg_mode_32); + if (bits >= 32 && mask == 0xFF) return; - if (!(modeable && known && uppable)) - printf("/* do%d */ ", override); + if (bits == 16) { + if (mask == 0x0F) + return; + else if (mask == 0xF0) { + printf("'"); + return; + } } - switch (mode) { - case midgard_reg_mode_8: - case midgard_reg_mode_16: - reg = reg * 2 + out_high; - break; + printf("."); - case midgard_reg_mode_32: - if (overriden) { - reg = (reg * 2) + overriden_up; - } + unsigned skip = (bits / 16); + bool uppercase = bits > 32; + bool tripped = false; - break; + for (unsigned i = 0; i < 8; i += skip) { + bool a = (mask & (1 << i)) != 0; - default: - break; + for (unsigned j = 1; j < skip; ++j) { + bool dupe = (mask & (1 << (i + j))) != 0; + tripped |= (dupe != a); + } + + if (a) { + char c = components[i / skip]; + + if (uppercase) + c = toupper(c); + + printf("%c", c); + } } - print_reg(reg, bits); + if (tripped) + printf(" /* %X */", mask); +} + +static void +print_mask_4(unsigned mask) +{ + printf("."); + + for (unsigned i = 0; i < 4; ++i) { + bool a = (mask & (1 << i)) != 0; + if (a) + printf("%c", components[i]); + } } static void @@ -385,66 +480,40 @@ print_vector_field(const char *name, uint16_t *words, uint16_t reg_word, midgard_is_integer_out_op(alu_field->op)); printf(" "); - bool out_high = false; - unsigned mask; - - if (mode == midgard_reg_mode_16 - || mode == midgard_reg_mode_8) { - - /* For partial views, the mask denotes which adjacent register - * is used as the window into the larger register */ - - if (alu_field->mask & 0xF) { - out_high = false; - - if ((alu_field->mask & 0xF0)) - printf("/* %X */ ", alu_field->mask); - - mask = alu_field->mask; - } else { - out_high = true; - mask = alu_field->mask >> 4; - } - } else { - /* For full 32-bit, every other bit is duplicated, so we only - * pick every other to find the effective mask */ - - mask = alu_field->mask & 1; - mask |= (alu_field->mask & 4) >> 1; - mask |= (alu_field->mask & 16) >> 2; - mask |= (alu_field->mask & 64) >> 3; - - /* ... but verify! */ - - unsigned checked = alu_field->mask & 0x55; - unsigned opposite = alu_field->mask & 0xAA; - - if ((checked << 1) != opposite) - printf("/* %X */ ", alu_field->mask); - } + /* Mask denoting status of 8-lanes */ + uint8_t mask = alu_field->mask; /* First, print the destination */ - print_dest(reg_info->out_reg, mode, alu_field->dest_override, out_high); - - /* The semantics here are not totally grokked yet */ - if (alu_field->dest_override == midgard_dest_override_upper) - out_high = true; - - if (mask != 0xF) { - unsigned i; - static const char c[4] = "xyzw"; + unsigned dest_size = + print_dest(reg_info->out_reg, mode, alu_field->dest_override); + + /* Apply the destination override to the mask */ + unsigned override = alu_field->dest_override; + + if (mode == midgard_reg_mode_32 || mode == midgard_reg_mode_64) { + if (override == midgard_dest_override_lower) + mask &= 0x0F; + else if (override == midgard_dest_override_upper) + mask &= 0xF0; + } else if (mode == midgard_reg_mode_16 + && override == midgard_dest_override_lower) { + /* stub */ + } - printf("."); + if (override != midgard_dest_override_none) { + bool modeable = (mode != midgard_reg_mode_8); + bool known = override != 0x3; /* Unused value */ - for (i = 0; i < 4; i++) - if (mask & (1 << i)) - printf("%c", c[i]); + if (!(modeable && known)) + printf("/* do%d */ ", override); } + print_mask(mask, dest_size, override); + printf(", "); bool is_int = midgard_is_integer_op(alu_field->op); - print_vector_src(alu_field->src1, out_high, mode, reg_info->src1_reg, is_int); + print_vector_src(alu_field->src1, mode, reg_info->src1_reg, override, is_int); printf(", "); @@ -452,8 +521,8 @@ print_vector_field(const char *name, uint16_t *words, uint16_t reg_word, uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2); print_immediate(imm); } else { - print_vector_src(alu_field->src2, out_high, mode, - reg_info->src2_reg, is_int); + print_vector_src(alu_field->src2, mode, + reg_info->src2_reg, override, is_int); } printf("\n"); @@ -470,14 +539,16 @@ print_scalar_src(unsigned src_binary, unsigned reg) if (src->abs) printf("abs("); - if (src->full) - print_reg(reg, 32); - else - print_reg(reg * 2 + (src->component >> 2), 16); + print_reg(reg, src->full ? 32 : 16); - static const char c[4] = "xyzw"; - \ - printf(".%c", c[src->full ? src->component >> 1 : src->component & 3]); + unsigned c = src->component; + + if (src->full) { + assert((c & 1) == 0); + c >>= 1; + } + + printf(".%c", components[c]); if (src->abs) printf(")"); @@ -512,16 +583,16 @@ print_scalar_field(const char *name, uint16_t *words, uint16_t reg_word, midgard_is_integer_out_op(alu_field->op)); printf(" "); - if (alu_field->output_full) - print_reg(reg_info->out_reg, 32); - else - print_reg(reg_info->out_reg * 2 + (alu_field->output_component >> 2), - 16); + bool full = alu_field->output_full; + print_reg(reg_info->out_reg, full ? 32 : 16); + unsigned c = alu_field->output_component; - static const char c[4] = "xyzw"; - printf(".%c, ", - c[alu_field->output_full ? alu_field->output_component >> 1 : - alu_field->output_component & 3]); + if (full) { + assert((c & 1) == 0); + c >>= 1; + } + + printf(".%c, ", components[c]); print_scalar_src(alu_field->src1, reg_info->src1_reg); @@ -825,40 +896,6 @@ print_alu_word(uint32_t *words, unsigned num_quad_words, } } -/* Swizzle/mask formats are common between load/store ops and texture ops, it - * looks like... */ - -static void -print_swizzle(uint32_t swizzle) -{ - unsigned i; - - if (swizzle != 0xE4) { - printf("."); - - for (i = 0; i < 4; i++) - printf("%c", "xyzw"[(swizzle >> (2 * i)) & 3]); - } -} - -static void -print_mask(uint32_t mask) -{ - unsigned i; - - if (mask != 0xF) { - printf("."); - - for (i = 0; i < 4; i++) - if (mask & (1 << i)) - printf("%c", "xyzw"[i]); - - /* Handle degenerate case */ - if (mask == 0) - printf("0"); - } -} - static void print_varying_parameters(midgard_load_store_word *word) { @@ -911,7 +948,7 @@ print_load_store_instr(uint64_t data, print_varying_parameters(word); printf(" r%d", word->reg); - print_mask(word->mask); + print_mask_4(word->mask); int address = word->address; @@ -927,7 +964,7 @@ print_load_store_instr(uint64_t data, printf(", %d", address); - print_swizzle(word->swizzle); + print_swizzle_vec4(word->swizzle, false, false); printf(", 0x%X /* %X */\n", word->unknown, word->varying_parameters); } @@ -1034,13 +1071,13 @@ print_texture_word(uint32_t *word, unsigned tabs) printf(" "); print_texture_reg(texture->out_full, texture->out_reg_select, texture->out_upper); - print_mask(texture->mask); + print_mask_4(texture->mask); printf(", "); printf("texture%d, ", texture->texture_handle); printf("sampler%d", texture->sampler_handle); - print_swizzle(texture->swizzle); + print_swizzle_vec4(texture->swizzle, false, false); printf(", "); print_texture_reg(/*texture->in_reg_full*/true, texture->in_reg_select, texture->in_reg_upper); -- 2.30.2