freedreno/ir3: add generic get_barycentric()
[mesa.git] / src / freedreno / ir3 / ir3_nir_lower_tess.c
index 7a30ef09d9760114228d7fe438c78325523eaf12..bdb98f3d935a85d0221f06f81d975b22873aa8c3 100644 (file)
@@ -191,6 +191,13 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
                case nir_intrinsic_store_output: {
                        // src[] = { value, offset }.
 
+                       /* nir_lower_io_to_temporaries replaces all access to output
+                        * variables with temp variables and then emits a nir_copy_var at
+                        * the end of the shader.  Thus, we should always get a full wrmask
+                        * here.
+                        */
+                       assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
                        b->cursor = nir_instr_remove(&intr->instr);
 
                        nir_ssa_def *vertex_id = build_vertex_id(b, state);
@@ -199,10 +206,8 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
                        nir_intrinsic_instr *store =
                                nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
 
-                       nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
                        store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
                        store->src[1] = nir_src_for_ssa(offset);
-
                        store->num_components = intr->num_components;
 
                        nir_builder_instr_insert(b, &store->instr);
@@ -222,12 +227,13 @@ local_thread_id(nir_builder *b)
 }
 
 void
-ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
+               unsigned topology)
 {
        struct state state = { };
 
        build_primitive_map(shader, &state.map, &shader->outputs);
-       memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+       memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
 
        nir_function_impl *impl = nir_shader_get_entrypoint(shader);
        assert(impl);
@@ -236,7 +242,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsig
        nir_builder_init(&b, impl);
        b.cursor = nir_before_cf_list(&impl->body);
 
-       if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+       if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
                state.header = nir_load_tcs_header_ir3(&b);
        else
                state.header = nir_load_gs_header_ir3(&b);
@@ -247,7 +253,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsig
        nir_metadata_preserve(impl, nir_metadata_block_index |
                        nir_metadata_dominance);
 
-       s->output_size = state.map.stride;
+       v->output_size = state.map.stride;
 }
 
 
@@ -431,17 +437,21 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
                        b->cursor = nir_before_instr(&intr->instr);
 
+                       /* nir_lower_io_to_temporaries replaces all access to output
+                        * variables with temp variables and then emits a nir_copy_var at
+                        * the end of the shader.  Thus, we should always get a full wrmask
+                        * here.
+                        */
+                       assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
                        nir_ssa_def *value = intr->src[0].ssa;
                        nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
                        nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
                        nir_ssa_def *offset = build_per_vertex_offset(b, state,
                                        intr->src[1].ssa, intr->src[2].ssa, var);
 
-                       nir_intrinsic_instr *store =
-                               replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
-                                                                 nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
-
-                       nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+                                       nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
 
                        break;
                }
@@ -493,9 +503,12 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                        b->cursor = nir_before_instr(&intr->instr);
 
                        if (levels) {
-                               for (int i = 0; i < 4; i++)
-                                       if (nir_intrinsic_write_mask(intr) & (1 << i))
-                                               levels[i] = nir_channel(b, intr->src[0].ssa, i);
+                               for (int i = 0; i < 4; i++) {
+                                       if (nir_intrinsic_write_mask(intr) & (1 << i)) {
+                                               uint32_t component = nir_intrinsic_component(intr);
+                                               levels[i + component] = nir_channel(b, intr->src[0].ssa, i);
+                                       }
+                               }
                                nir_instr_remove(&intr->instr);
                        } else {
                                nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
@@ -503,11 +516,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
                                debug_assert(nir_intrinsic_component(intr) == 0);
 
-                               nir_intrinsic_instr *store =
-                                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
-                                                       intr->src[0].ssa, address, offset);
+                               /* nir_lower_io_to_temporaries replaces all access to output
+                                * variables with temp variables and then emits a nir_copy_var at
+                                * the end of the shader.  Thus, we should always get a full wrmask
+                                * here.
+                                */
+                               assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
 
-                               nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+                               replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+                                               intr->src[0].ssa, address, offset);
                        }
                        break;
                }
@@ -524,6 +541,9 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
        nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
        nir_ssa_def *levels[2];
 
+       if (!state->outer_levels[0])
+               return;
+
        /* Then emit the epilogue that actually writes out the tessellation levels
         * to the BOs.
         */
@@ -556,7 +576,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
        store->src[2] = nir_src_for_ssa(offset);
        nir_builder_instr_insert(b, &store->instr);
        store->num_components = levels[0]->num_components;
-       nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
 
        if (levels[1]) {
                store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
@@ -567,7 +586,6 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
                store->src[2] = nir_src_for_ssa(offset);
                nir_builder_instr_insert(b, &store->instr);
                store->num_components = levels[1]->num_components;
-               nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
        }
 
        /* Finally, Insert endpatch instruction:
@@ -581,7 +599,8 @@ emit_tess_epilouge(nir_builder *b, struct state *state)
 }
 
 void
-ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
+               unsigned topology)
 {
        struct state state = { .topology = topology };
 
@@ -592,8 +611,8 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topol
        }
 
        build_primitive_map(shader, &state.map, &shader->outputs);
-       memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
-       s->output_size = state.map.stride;
+       memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
+       v->output_size = state.map.stride;
 
        nir_function_impl *impl = nir_shader_get_entrypoint(shader);
        assert(impl);
@@ -693,6 +712,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
 
                case nir_intrinsic_load_tess_level_inner:
                case nir_intrinsic_load_tess_level_outer: {
+                               unsigned dest_comp = nir_intrinsic_dest_components(intr);
                                b->cursor = nir_before_instr(&intr->instr);
 
                                gl_varying_slot slot;
@@ -711,7 +731,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
                                 * component individually.
                                 */
                                nir_ssa_def *levels[4];
-                               for (unsigned i = 0; i < intr->num_components; i++) {
+                               for (unsigned i = 0; i < dest_comp; i++) {
                                        nir_intrinsic_instr *new_intr =
                                                nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
 
@@ -723,7 +743,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
                                        levels[i] = &new_intr->dest.ssa;
                                }
 
-                               nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
+                               nir_ssa_def *v = nir_vec(b, levels, dest_comp);
 
                                nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
 
@@ -969,7 +989,7 @@ ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
                nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
                        if (in_var->data.location == out_var->data.location) {
                                locs[in_var->data.driver_location] =
-                                       producer->shader->output_loc[out_var->data.driver_location] * factor;
+                                       producer->output_loc[out_var->data.driver_location] * factor;
 
                                debug_assert(num_loc <= in_var->data.driver_location + 1);
                                num_loc = in_var->data.driver_location + 1;