freedreno/ir3: add assert
[mesa.git] / src / freedreno / ir3 / ir3_compiler_nir.c
index 133388a3a80a9df221467e4f322be9268c779eb3..01f0985da62adbcd0a6b226ecb40bdcf62c81b90 100644 (file)
@@ -467,12 +467,20 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
                dst[0] = ir3_DSX(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
+       case nir_op_fddx_fine:
+               dst[0] = ir3_DSXPP_1(b, src[0], 0);
+               dst[0]->cat5.type = TYPE_F32;
+               break;
        case nir_op_fddy:
        case nir_op_fddy_coarse:
                dst[0] = ir3_DSY(b, src[0], 0);
                dst[0]->cat5.type = TYPE_F32;
                break;
                break;
+       case nir_op_fddy_fine:
+               dst[0] = ir3_DSYPP_1(b, src[0], 0);
+               dst[0]->cat5.type = TYPE_F32;
+               break;
        case nir_op_flt16:
        case nir_op_flt32:
                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
@@ -1032,7 +1040,7 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 static struct ir3_instruction *
 get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
-       unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+       unsigned slot = nir_src_as_uint(intr->src[0]);
        unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
        struct ir3_instruction *texture, *sampler;
 
@@ -1051,13 +1059,12 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                struct ir3_instruction **dst)
 {
        struct ir3_block *b = ctx->block;
-       const nir_variable *var = nir_intrinsic_get_var(intr, 0);
        struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
        struct ir3_instruction *sam;
        struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
        struct ir3_instruction *coords[4];
-       unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
-       type_t type = ir3_get_image_type(var);
+       unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
+       type_t type = ir3_get_type_for_image_intrinsic(intr);
 
        /* hmm, this seems a bit odd, but it is what blob does and (at least
         * a5xx) just faults on bogus addresses otherwise:
@@ -1087,10 +1094,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                struct ir3_instruction **dst)
 {
        struct ir3_block *b = ctx->block;
-       const nir_variable *var = nir_intrinsic_get_var(intr, 0);
        struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
        struct ir3_instruction *sam, *lod;
-       unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
+       unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
        type_t dst_type = nir_dest_bit_size(intr->dest) < 32 ?
                        TYPE_U16 : TYPE_U32;
 
@@ -1118,9 +1124,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         *
         * TODO: This is at least true on a5xx. Check other gens.
         */
-       enum glsl_sampler_dim dim =
-               glsl_get_sampler_dim(glsl_without_array(var->type));
-       if (dim == GLSL_SAMPLER_DIM_BUF) {
+       if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) {
                /* Since all the possible values the divisor can take are
                 * power-of-two (4, 8, or 16), the division is implemented
                 * as a shift-right.
@@ -1130,7 +1134,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                 */
                struct ir3_const_state *const_state = &ctx->so->shader->const_state;
                unsigned cb = regid(const_state->offsets.image_dims, 0) +
-                       const_state->image_dims.off[var->data.driver_location];
+                       const_state->image_dims.off[nir_src_as_uint(intr->src[0])];
                struct ir3_instruction *aux = create_uniform(b, cb + 1);
 
                tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
@@ -1155,7 +1159,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        struct ir3_instruction *barrier;
 
        switch (intr->intrinsic) {
-       case nir_intrinsic_barrier:
+       case nir_intrinsic_control_barrier:
                barrier = ir3_BAR(b);
                barrier->cat7.g = true;
                barrier->cat7.l = true;
@@ -1174,7 +1178,6 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                                IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
                                IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
                break;
-       case nir_intrinsic_memory_barrier_atomic_counter:
        case nir_intrinsic_memory_barrier_buffer:
                barrier = ir3_FENCE(b);
                barrier->cat7.g = true;
@@ -1261,7 +1264,7 @@ get_barycentric_centroid(struct ir3_context *ctx)
                struct ir3_instruction *xy[2];
                struct ir3_instruction *ij;
 
-               ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_CENTROID, 0x3);
+               ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID, 0x3);
                ir3_split_dest(ctx->block, xy, ij, 0, 2);
 
                ctx->ij_centroid = ir3_create_collect(ctx, xy, 2);
@@ -1277,7 +1280,7 @@ get_barycentric_sample(struct ir3_context *ctx)
                struct ir3_instruction *xy[2];
                struct ir3_instruction *ij;
 
-               ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SAMPLE, 0x3);
+               ij = create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE, 0x3);
                ir3_split_dest(ctx->block, xy, ij, 0, 2);
 
                ctx->ij_sample = ir3_create_collect(ctx, xy, 2);
@@ -1299,12 +1302,12 @@ static struct ir3_instruction *
 get_frag_coord(struct ir3_context *ctx)
 {
        if (!ctx->frag_coord) {
-               struct ir3_block *b = ctx->block;
+               struct ir3_block *b = ctx->in_block;
                struct ir3_instruction *xyzw[4];
                struct ir3_instruction *hw_frag_coord;
 
                hw_frag_coord = create_sysval_input(ctx, SYSTEM_VALUE_FRAG_COORD, 0xf);
-               ir3_split_dest(ctx->block, xyzw, hw_frag_coord, 0, 4);
+               ir3_split_dest(b, xyzw, hw_frag_coord, 0, 4);
 
                /* for frag_coord.xy, we get unsigned values.. we need
                 * to subtract (integer) 8 and divide by 16 (right-
@@ -1424,7 +1427,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 
        case nir_intrinsic_end_patch_ir3:
                assert(ctx->so->type == MESA_SHADER_TESS_CTRL);
-               struct ir3_instruction *end = ir3_ENDPATCH(b);
+               struct ir3_instruction *end = ir3_ENDIF(b);
                array_insert(b, b->keeps, end);
 
                end->barrier_class = IR3_BARRIER_EVERYTHING;
@@ -1502,7 +1505,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        case nir_intrinsic_load_size_ir3:
                if (!ctx->ij_size) {
                        ctx->ij_size =
-                               create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SIZE, 0x1);
+                               create_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_SIZE, 0x1);
                }
                dst[0] = ctx->ij_size;
                break;
@@ -1537,6 +1540,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                                         * nir_variable to figure out what it is.
                                         */
                                        dst[i] = ctx->inputs[inloc];
+                                       compile_assert(ctx, dst[i]);
                                }
                        }
                } else {
@@ -1614,37 +1618,36 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        case nir_intrinsic_shared_atomic_comp_swap:
                dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
                break;
-       case nir_intrinsic_image_deref_load:
+       case nir_intrinsic_image_load:
                emit_intrinsic_load_image(ctx, intr, dst);
                break;
-       case nir_intrinsic_image_deref_store:
+       case nir_intrinsic_image_store:
                if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
                                !ctx->s->info.fs.early_fragment_tests)
                        ctx->so->no_earlyz = true;
                ctx->funcs->emit_intrinsic_store_image(ctx, intr);
                break;
-       case nir_intrinsic_image_deref_size:
+       case nir_intrinsic_image_size:
                emit_intrinsic_image_size(ctx, intr, dst);
                break;
-       case nir_intrinsic_image_deref_atomic_add:
-       case nir_intrinsic_image_deref_atomic_imin:
-       case nir_intrinsic_image_deref_atomic_umin:
-       case nir_intrinsic_image_deref_atomic_imax:
-       case nir_intrinsic_image_deref_atomic_umax:
-       case nir_intrinsic_image_deref_atomic_and:
-       case nir_intrinsic_image_deref_atomic_or:
-       case nir_intrinsic_image_deref_atomic_xor:
-       case nir_intrinsic_image_deref_atomic_exchange:
-       case nir_intrinsic_image_deref_atomic_comp_swap:
+       case nir_intrinsic_image_atomic_add:
+       case nir_intrinsic_image_atomic_imin:
+       case nir_intrinsic_image_atomic_umin:
+       case nir_intrinsic_image_atomic_imax:
+       case nir_intrinsic_image_atomic_umax:
+       case nir_intrinsic_image_atomic_and:
+       case nir_intrinsic_image_atomic_or:
+       case nir_intrinsic_image_atomic_xor:
+       case nir_intrinsic_image_atomic_exchange:
+       case nir_intrinsic_image_atomic_comp_swap:
                if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
                                !ctx->s->info.fs.early_fragment_tests)
                        ctx->so->no_earlyz = true;
                dst[0] = ctx->funcs->emit_intrinsic_atomic_image(ctx, intr);
                break;
-       case nir_intrinsic_barrier:
+       case nir_intrinsic_control_barrier:
        case nir_intrinsic_memory_barrier:
        case nir_intrinsic_group_memory_barrier:
-       case nir_intrinsic_memory_barrier_atomic_counter:
        case nir_intrinsic_memory_barrier_buffer:
        case nir_intrinsic_memory_barrier_image:
        case nir_intrinsic_memory_barrier_shared:
@@ -1774,6 +1777,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                cond->regs[0]->flags &= ~IR3_REG_SSA;
 
                kill = ir3_KILL(b, cond, 0);
+               kill->regs[1]->num = regid(REG_P0, 0);
                array_insert(ctx->ir, ctx->ir->predicates, kill);
 
                array_insert(b, b->keeps, kill);
@@ -1795,7 +1799,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                /* condition always goes in predicate register: */
                cond->regs[0]->num = regid(REG_P0, 0);
 
-               kill = ir3_CONDEND(b, cond, 0);
+               kill = ir3_IF(b, cond, 0);
 
                kill->barrier_class = IR3_BARRIER_EVERYTHING;
                kill->barrier_conflict = IR3_BARRIER_EVERYTHING;
@@ -1886,33 +1890,15 @@ get_tex_dest_type(nir_tex_instr *tex)
 static void
 tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
 {
-       unsigned coords, flags = 0;
+       unsigned coords = glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
+       unsigned flags = 0;
 
        /* note: would use tex->coord_components.. except txs.. also,
         * since array index goes after shadow ref, we don't want to
         * count it:
         */
-       switch (tex->sampler_dim) {
-       case GLSL_SAMPLER_DIM_1D:
-       case GLSL_SAMPLER_DIM_BUF:
-               coords = 1;
-               break;
-       case GLSL_SAMPLER_DIM_2D:
-       case GLSL_SAMPLER_DIM_RECT:
-       case GLSL_SAMPLER_DIM_EXTERNAL:
-       case GLSL_SAMPLER_DIM_MS:
-       case GLSL_SAMPLER_DIM_SUBPASS:
-       case GLSL_SAMPLER_DIM_SUBPASS_MS:
-               coords = 2;
-               break;
-       case GLSL_SAMPLER_DIM_3D:
-       case GLSL_SAMPLER_DIM_CUBE:
-               coords = 3;
+       if (coords == 3)
                flags |= IR3_INSTR_3D;
-               break;
-       default:
-               unreachable("bad sampler_dim");
-       }
 
        if (tex->is_shadow && tex->op != nir_texop_lod)
                flags |= IR3_INSTR_S;
@@ -2706,7 +2692,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
                return;
 
        so->inputs[n].slot = slot;
-       so->inputs[n].compmask = (1 << (ncomp + frac)) - 1;
+       so->inputs[n].compmask |= (1 << (ncomp + frac)) - 1;
        so->inputs_count = MAX2(so->inputs_count, n + 1);
        so->inputs[n].interpolate = in->data.interpolation;
 
@@ -2769,17 +2755,25 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
                        ctx->inputs[idx] = instr;
                }
        } else if (ctx->so->type == MESA_SHADER_VERTEX) {
-               /* We shouldn't have fractional input for VS input.. that only shows
-                * up with varying packing
-                */
-               assert(frac == 0);
+               struct ir3_instruction *input = NULL, *in;
+               struct ir3_instruction *components[4];
+               unsigned mask = (1 << (ncomp + frac)) - 1;
 
-               struct ir3_instruction *input = create_input(ctx, (1 << ncomp) - 1);
-               struct ir3_instruction *components[ncomp];
+               foreach_input(in, ctx->ir) {
+                       if (in->input.inidx == n) {
+                               input = in;
+                               break;
+                       }
+               }
 
-               input->input.inidx = n;
+               if (!input) {
+                       input = create_input(ctx, mask);
+                       input->input.inidx = n;
+               } else {
+                       input->regs[0]->wrmask |= mask;
+               }
 
-               ir3_split_dest(ctx->block, components, input, 0, ncomp);
+               ir3_split_dest(ctx->block, components, input, frac, ncomp);
 
                for (int i = 0; i < ncomp; i++) {
                        unsigned idx = (n * 4) + i + frac;
@@ -3045,7 +3039,7 @@ emit_instructions(struct ir3_context *ctx)
         * because sysvals need to be appended after varyings:
         */
        if (vcoord) {
-               add_sysval_input_compmask(ctx, SYSTEM_VALUE_BARYCENTRIC_PIXEL,
+               add_sysval_input_compmask(ctx, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL,
                                0x3, vcoord);
        }
 
@@ -3330,6 +3324,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        if (so->binning_pass && (ctx->compiler->gpu_id < 600))
                fixup_binning_pass(ctx);
 
+       ir3_debug_print(ir, "BEFORE CF");
+
+       ir3_cf(ir);
+
        ir3_debug_print(ir, "BEFORE CP");
 
        ir3_cp(ir, so);
@@ -3392,10 +3390,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                goto out;
        }
 
-       if (compiler->gpu_id >= 600) {
-               ir3_a6xx_fixup_atomic_dests(ir, so);
-       }
-
        ir3_debug_print(ir, "AFTER SCHED");
 
        /* Pre-assign VS inputs on a6xx+ binning pass shader, to align
@@ -3448,7 +3442,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                int idx = 0;
 
                foreach_input(instr, ir) {
-                       if (instr->input.sysval != SYSTEM_VALUE_BARYCENTRIC_PIXEL)
+                       if (instr->input.sysval != SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)
                                continue;
 
                        assert(idx < ARRAY_SIZE(precolor));
@@ -3468,7 +3462,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                goto out;
        }
 
-       ir3_debug_print(ir, "AFTER RA");
+       ir3_postsched(ctx);
+       ir3_debug_print(ir, "AFTER POSTSCHED");
+
+       if (compiler->gpu_id >= 600) {
+               if (ir3_a6xx_fixup_atomic_dests(ir, so)) {
+                       ir3_debug_print(ir, "AFTER ATOMIC FIXUP");
+               }
+       }
 
        if (so->type == MESA_SHADER_FRAGMENT)
                pack_inlocs(ctx);
@@ -3522,7 +3523,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        /* We need to do legalize after (for frag shader's) the "bary.f"
         * offsets (inloc) have been assigned.
         */
-       ir3_legalize(ir, &so->has_ssbo, &so->need_pixlod, &max_bary);
+       ir3_legalize(ir, so, &max_bary);
 
        ir3_debug_print(ir, "AFTER LEGALIZE");