nir: Drop imov/fmov in favor of one mov instruction
[mesa.git] / src / freedreno / ir3 / ir3_shader.c
index 8b18e950ccae77175e2ea22db4493ba28f3847bb..dacccc1329e1f6295f12b079f0f66457fbed8b9f 100644 (file)
@@ -35,7 +35,7 @@
 #include "ir3_nir.h"
 
 int
-ir3_glsl_type_size(const struct glsl_type *type)
+ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
 {
        return glsl_count_attribute_slots(type, false);
 }
@@ -47,8 +47,6 @@ delete_variant(struct ir3_shader_variant *v)
                ir3_destroy(v->ir);
        if (v->bo)
                fd_bo_del(v->bo);
-       if (v->immediates)
-               free(v->immediates);
        free(v);
 }
 
@@ -63,7 +61,7 @@ delete_variant(struct ir3_shader_variant *v)
  * the reg off.
  */
 static void
-fixup_regfootprint(struct ir3_shader_variant *v)
+fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
 {
        unsigned i;
 
@@ -83,14 +81,30 @@ fixup_regfootprint(struct ir3_shader_variant *v)
 
                if (v->inputs[i].compmask) {
                        unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
-                       int32_t regid = (v->inputs[i].regid + n) >> 2;
-                       v->info.max_reg = MAX2(v->info.max_reg, regid);
+                       int32_t regid = v->inputs[i].regid + n;
+                       if (v->inputs[i].half) {
+                               if (gpu_id < 500) {
+                                       v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+                               } else {
+                                       v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+                               }
+                       } else {
+                               v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+                       }
                }
        }
 
        for (i = 0; i < v->outputs_count; i++) {
-               int32_t regid = (v->outputs[i].regid + 3) >> 2;
-               v->info.max_reg = MAX2(v->info.max_reg, regid);
+               int32_t regid = v->outputs[i].regid + 3;
+               if (v->outputs[i].half) {
+                       if (gpu_id < 500) {
+                               v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+                       } else {
+                               v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+                       }
+               } else {
+                       v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+               }
        }
 }
 
@@ -115,9 +129,9 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
         * the compiler (to worst-case value) since we don't know in
         * the assembler what the max addr reg value can be:
         */
-       v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
+       v->constlen = MAX2(v->constlen, v->info.max_const + 1);
 
-       fixup_regfootprint(v);
+       fixup_regfootprint(v, gpu_id);
 
        return bin;
 }
@@ -126,6 +140,7 @@ static void
 assemble_variant(struct ir3_shader_variant *v)
 {
        struct ir3_compiler *compiler = v->shader->compiler;
+       struct shader_info *info = &v->shader->nir->info;
        uint32_t gpu_id = compiler->gpu_id;
        uint32_t sz, *bin;
 
@@ -134,7 +149,8 @@ assemble_variant(struct ir3_shader_variant *v)
 
        v->bo = fd_bo_new(compiler->dev, sz,
                        DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
-                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+                       DRM_FREEDRENO_GEM_TYPE_KMEM,
+                       "%s:%s", ir3_shader_stage(v->shader), info->name);
 
        memcpy(fd_bo_map(v->bo), bin, sz);
 
@@ -226,7 +242,7 @@ ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
        struct ir3_shader_variant *v =
                        shader_variant(shader, key, created);
 
-       if (binning_pass) {
+       if (v && binning_pass) {
                if (!v->binning)
                        v->binning = create_variant(shader, key, true);
                return v->binning;
@@ -244,6 +260,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
                v = v->next;
                delete_variant(t);
        }
+       free(shader->const_state.immediates);
        ralloc_free(shader->nir);
        free(shader);
 }
@@ -260,6 +277,18 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir)
        NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
                           (nir_lower_io_options)0);
 
+       if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+               /* NOTE: lower load_barycentric_at_sample first, since it
+                * produces load_barycentric_at_offset:
+                */
+               NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
+               NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
+
+               NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
+       }
+
+       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
+
        /* do first pass optimization, ignoring the key: */
        shader->nir = ir3_optimize_nir(shader, nir, NULL);
        if (ir3_shader_debug & IR3_DBG_DISASM) {
@@ -320,16 +349,17 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
                                (regid >> 2), "xyzw"[regid & 0x3], i);
        }
 
-       for (i = 0; i < so->immediates_count; i++) {
-               fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
+       struct ir3_const_state *const_state = &so->shader->const_state;
+       for (i = 0; i < const_state->immediates_count; i++) {
+               fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
                fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
-                               so->immediates[i].val[0],
-                               so->immediates[i].val[1],
-                               so->immediates[i].val[2],
-                               so->immediates[i].val[3]);
+                               const_state->immediates[i].val[0],
+                               const_state->immediates[i].val[1],
+                               const_state->immediates[i].val[2],
+                               const_state->immediates[i].val[3]);
        }
 
-       disasm_a3xx(bin, so->info.sizedwords, 0, out);
+       disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
 
        switch (so->type) {
        case MESA_SHADER_VERTEX:
@@ -391,6 +421,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
 
        fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy);
 
+       fprintf(out, "; max_sun=%u\n", ir->max_sun);
+
        /* print shader type specific info: */
        switch (so->type) {
        case MESA_SHADER_VERTEX:
@@ -398,8 +430,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
                dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
                break;
        case MESA_SHADER_FRAGMENT:
-               dump_reg(out, "pos (bary)",
-                       ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD));
+               dump_reg(out, "pos (ij_pixel)",
+                       ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL));
+               dump_reg(out, "pos (ij_centroid)",
+                       ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID));
+               dump_reg(out, "pos (ij_size)",
+                       ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE));
                dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
                if (so->color0_mrt) {
                        dump_output(out, so, FRAG_RESULT_COLOR, "color");