panfrost/midgard: Emit extended branches
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 17 Feb 2019 22:09:09 +0000 (22:09 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 21 Feb 2019 07:08:47 +0000 (07:08 +0000)
Previously, we only emitted compact branches; however, the offset range
of these branches is too small for many real world shaders. This patch
implements support for emitting extended branches and switches to always
using them for control flow. This incurs a code size and possibly
performance penalty, but expands the range of working shaders and
provides opportunity for further optimization.

Support for emitting compact branches is retained but this code path is
presently unused. In the future, we'll want to heuristically determine
which type of branch should be emitted for optimal codegen.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/midgard/helpers.h
src/gallium/drivers/panfrost/midgard/midgard_compile.c

index b8ccea8bcd8fa19b19d6157ac918a7ad31364d0f..6940f27b4aba38f2ceb9bdab3693334357eb35d5 100644 (file)
@@ -158,6 +158,13 @@ midgard_is_integer_op(int op)
         }
 }
 
+/* Is this unit a branch? */
+static bool
+midgard_is_branch_unit(unsigned unit)
+{
+        return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
+}
+
 /* There are five ALU units: VMUL, VADD, SMUL, SADD, LUT. A given opcode is
  * implemented on some subset of these units (or occassionally all of them).
  * This table encodes a bit mask of valid units for each opcode, so the
index bcd6bf151c1a116123da8a7bf0a74342f91f9a31..7b990fe709ad872db4d506e9ff1af9472f544fb0 100644 (file)
@@ -126,6 +126,7 @@ typedef struct midgard_instruction {
                 midgard_load_store_word load_store;
                 midgard_vector_alu alu;
                 midgard_texture_word texture;
+                midgard_branch_extended branch_extended;
                 uint16_t br_compact;
 
                 /* General branch, rather than packed br_compact. Higher level
@@ -293,7 +294,7 @@ v_branch(bool conditional, bool invert)
 {
         midgard_instruction ins = {
                 .type = TAG_ALU_4,
-                .unit = ALU_ENAB_BR_COMPACT,
+                .unit = ALU_ENAB_BRANCH,
                 .compact_branch = true,
                 .branch = {
                         .conditional = conditional,
@@ -304,6 +305,33 @@ v_branch(bool conditional, bool invert)
         return ins;
 }
 
+static midgard_branch_extended
+midgard_create_branch_extended( midgard_condition cond,
+                                midgard_jmp_writeout_op op,
+                                unsigned dest_tag,
+                                signed quadword_offset)
+{
+        /* For unclear reasons, the condition code is repeated 8 times */
+        uint16_t duplicated_cond =
+                (cond << 14) |
+                (cond << 12) |
+                (cond << 10) |
+                (cond << 8) |
+                (cond << 6) |
+                (cond << 4) |
+                (cond << 2) |
+                (cond << 0);
+
+        midgard_branch_extended branch = {
+                .op = midgard_jmp_writeout_op_branch_cond,
+                .dest_tag = dest_tag,
+                .offset = quadword_offset,
+                .cond = duplicated_cond
+        };
+
+        return branch;
+}
+
 typedef struct midgard_bundle {
         /* Tag for the overall bundle */
         int tag;
@@ -2267,9 +2295,15 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
                                         }
                                 }
 
-                                bundle.body_size[bundle.body_words_count] = sizeof(ains->br_compact);
-                                memcpy(&bundle.body_words[bundle.body_words_count++], &ains->br_compact, sizeof(ains->br_compact));
-                                bytes_emitted += sizeof(ains->br_compact);
+                                if (ains->unit == ALU_ENAB_BRANCH) {
+                                        bundle.body_size[bundle.body_words_count] = sizeof(midgard_branch_extended);
+                                        memcpy(&bundle.body_words[bundle.body_words_count++], &ains->branch_extended, sizeof(midgard_branch_extended));
+                                        bytes_emitted += sizeof(midgard_branch_extended);
+                                } else {
+                                        bundle.body_size[bundle.body_words_count] = sizeof(ains->br_compact);
+                                        memcpy(&bundle.body_words[bundle.body_words_count++], &ains->br_compact, sizeof(ains->br_compact));
+                                        bytes_emitted += sizeof(ains->br_compact);
+                                }
                         } else {
                                 memcpy(&bundle.register_words[bundle.register_words_count++], &ains->registers, sizeof(ains->registers));
                                 bytes_emitted += sizeof(midgard_reg_info);
@@ -2455,7 +2489,11 @@ emit_binary_bundle(compiler_context *ctx, midgard_bundle *bundle, struct util_dy
                                         memcpy(util_dynarray_grow(emission, sizeof(midgard_vector_alu)), &ins.alu, sizeof(midgard_vector_alu));
                                 }
 
-                                memcpy(util_dynarray_grow(emission, sizeof(ins->br_compact)), &ins->br_compact, sizeof(ins->br_compact));
+                                if (ins->unit == ALU_ENAB_BR_COMPACT) {
+                                        memcpy(util_dynarray_grow(emission, sizeof(ins->br_compact)), &ins->br_compact, sizeof(ins->br_compact));
+                                } else {
+                                        memcpy(util_dynarray_grow(emission, sizeof(ins->branch_extended)), &ins->branch_extended, sizeof(ins->branch_extended));
+                                }
                         } else {
                                 /* Scalar */
                                 midgard_scalar_alu scalarised = vector_to_scalar_alu(ins->alu, ins);
@@ -3480,12 +3518,10 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                         for (int c = 0; c < bundle->instruction_count; ++c) {
                                 midgard_instruction *ins = &bundle->instructions[c];
 
-                                if (ins->unit != ALU_ENAB_BR_COMPACT) continue;
+                                if (!midgard_is_branch_unit(ins->unit)) continue;
 
                                 if (ins->prepacked_branch) continue;
 
-                                uint16_t compact;
-
                                 /* Determine the block we're jumping to */
                                 int target_number = ins->branch.target_block;
 
@@ -3518,15 +3554,42 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                                         }
                                 }
 
-                                if (ins->branch.conditional) {
+                                bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
+                                bool is_conditional = ins->branch.conditional;
+                                bool is_inverted = ins->branch.invert_conditional;
+
+                                /* Unconditional extended branches (far jumps)
+                                 * have issues, so we always use a conditional
+                                 * branch, setting the condition to always for
+                                 * unconditional. For compact unconditional
+                                 * branches, cond isn't used so it doesn't
+                                 * matter what we pick. */
+
+                                midgard_condition cond =
+                                        !is_conditional ? midgard_condition_always :
+                                        is_inverted ? midgard_condition_false :
+                                        midgard_condition_true;
+
+                                if (!is_compact) {
+                                        midgard_branch_extended branch =
+                                                midgard_create_branch_extended(
+                                                        cond,
+                                                        midgard_jmp_writeout_op_branch_cond,
+                                                        dest_tag,
+                                                        quadword_offset);
+
+                                        memcpy(&ins->branch_extended, &branch, sizeof(branch));
+                                } else if (is_conditional) {
                                         midgard_branch_cond branch = {
                                                 .op = midgard_jmp_writeout_op_branch_cond,
                                                 .dest_tag = dest_tag,
                                                 .offset = quadword_offset,
-                                                .cond = ins->branch.invert_conditional ? midgard_condition_false : midgard_condition_true
+                                                .cond = cond
                                         };
 
-                                        memcpy(&compact, &branch, sizeof(branch));
+                                        assert(branch.offset == quadword_offset);
+
+                                        memcpy(&ins->br_compact, &branch, sizeof(branch));
                                 } else {
                                         midgard_branch_uncond branch = {
                                                 .op = midgard_jmp_writeout_op_branch_uncond,
@@ -3535,14 +3598,11 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
                                                 .unknown = 1
                                         };
 
-                                        memcpy(&compact, &branch, sizeof(branch));
-                                }
+                                        assert(branch.offset == quadword_offset);
 
-                                /* Swap in the generic branch for our actual branch */
-                                ins->unit = ALU_ENAB_BR_COMPACT;
-                                ins->br_compact = compact;
+                                        memcpy(&ins->br_compact, &branch, sizeof(branch));
+                                }
                         }
-
                 }
 
                 ++br_block_idx;