ilo: make ilo_render_emit_rectlist() direct
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
index 60d9ce92935f260cb8b15a8f9d45b5da2b98a2c0..8d243374dae2645ed9d6d7fdf7b5d5530b6671ee 100644 (file)
@@ -58,6 +58,10 @@ struct vc4_fs_key {
         bool stencil_full_writemasks;
         bool is_points;
         bool is_lines;
+        bool alpha_test;
+        bool point_coord_upper_left;
+        uint8_t alpha_test_func;
+        uint32_t point_sprite_mask;
 
         struct pipe_rt_blend_state blend;
 };
@@ -65,6 +69,7 @@ struct vc4_fs_key {
 struct vc4_vs_key {
         struct vc4_key base;
         enum pipe_format attr_formats[8];
+        bool per_vertex_point_size;
 };
 
 static void
@@ -805,19 +810,41 @@ emit_vertex_input(struct vc4_compile *c, int attr)
 
         for (int i = 0; i < 4; i++) {
                 uint8_t swiz = desc->swizzle[i];
+                struct qreg result;
 
-                if (swiz <= UTIL_FORMAT_SWIZZLE_W &&
-                    !format_warned &&
-                    (desc->channel[swiz].type != UTIL_FORMAT_TYPE_FLOAT ||
-                     desc->channel[swiz].size != 32)) {
-                        fprintf(stderr,
-                                "vtx element %d unsupported type: %s\n",
-                                attr, util_format_name(format));
-                        format_warned = true;
+                if (swiz > UTIL_FORMAT_SWIZZLE_W)
+                        result = get_swizzled_channel(c, vpm_reads, swiz);
+                else if (desc->channel[swiz].size == 32 &&
+                         desc->channel[swiz].type == UTIL_FORMAT_TYPE_FLOAT) {
+                        result = get_swizzled_channel(c, vpm_reads, swiz);
+                } else if (desc->channel[swiz].size == 8 &&
+                           (desc->channel[swiz].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+                            desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) &&
+                           desc->channel[swiz].normalized) {
+                        struct qreg vpm = vpm_reads[0];
+                        if (desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED)
+                                vpm = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
+                        result = qir_UNPACK_8(c, vpm, swiz);
+                } else {
+                        if (!format_warned) {
+                                fprintf(stderr,
+                                        "vtx element %d unsupported type: %s\n",
+                                        attr, util_format_name(format));
+                                format_warned = true;
+                        }
+                        result = qir_uniform_f(c, 0.0);
                 }
 
-                c->inputs[attr * 4 + i] =
-                        get_swizzled_channel(c, vpm_reads, swiz);
+                if (desc->channel[swiz].normalized &&
+                    desc->channel[swiz].type == UTIL_FORMAT_TYPE_SIGNED) {
+                        result = qir_FSUB(c,
+                                          qir_FMUL(c,
+                                                   result,
+                                                   qir_uniform_f(c, 2.0)),
+                                          qir_uniform_f(c, 1.0));
+                }
+
+                c->inputs[attr * 4 + i] = result;
         }
 }
 
@@ -843,6 +870,26 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
         c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
 }
 
+static void
+emit_point_coord_input(struct vc4_compile *c, int attr)
+{
+        if (c->point_x.file == QFILE_NULL) {
+                c->point_x = qir_uniform_f(c, 0.0);
+                c->point_y = qir_uniform_f(c, 0.0);
+        }
+
+        c->inputs[attr * 4 + 0] = c->point_x;
+        if (c->fs_key->point_coord_upper_left) {
+                c->inputs[attr * 4 + 1] = qir_FSUB(c,
+                                                   qir_uniform_f(c, 1.0),
+                                                   c->point_y);
+        } else {
+                c->inputs[attr * 4 + 1] = c->point_y;
+        }
+        c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
+        c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
+}
+
 static struct qreg
 emit_fragment_varying(struct vc4_compile *c, int index)
 {
@@ -851,17 +898,21 @@ emit_fragment_varying(struct vc4_compile *c, int index)
                 index
         };
 
-        /* XXX: multiply by W */
         return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
 }
 
 static void
-emit_fragment_input(struct vc4_compile *c, int attr)
+emit_fragment_input(struct vc4_compile *c, int attr,
+                    struct tgsi_full_declaration *decl)
 {
         for (int i = 0; i < 4; i++) {
                 c->inputs[attr * 4 + i] =
                         emit_fragment_varying(c, attr * 4 + i);
                 c->num_inputs++;
+
+                if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR ||
+                    decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR)
+                        c->color_inputs |= 1 << i;
         }
 }
 
@@ -886,8 +937,12 @@ emit_tgsi_declaration(struct vc4_compile *c,
                                 if (decl->Semantic.Name ==
                                     TGSI_SEMANTIC_POSITION) {
                                         emit_fragcoord_input(c, i);
+                                } else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+                                           (c->fs_key->point_sprite_mask &
+                                            (1 << decl->Semantic.Index))) {
+                                        emit_point_coord_input(c, i);
                                 } else {
-                                        emit_fragment_input(c, i);
+                                        emit_fragment_input(c, i, decl);
                                 }
                         } else {
                                 emit_vertex_input(c, i);
@@ -906,6 +961,9 @@ emit_tgsi_declaration(struct vc4_compile *c,
                 case TGSI_SEMANTIC_COLOR:
                         c->output_color_index = decl->Range.First * 4;
                         break;
+                case TGSI_SEMANTIC_PSIZE:
+                        c->output_point_size_index = decl->Range.First * 4;
+                        break;
                 }
 
                 break;
@@ -1200,9 +1258,67 @@ vc4_blend(struct vc4_compile *c, struct qreg *result,
                                    blend->alpha_func);
 }
 
+static void
+alpha_test_discard(struct vc4_compile *c)
+{
+        struct qreg src_alpha;
+        struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
+
+        if (!c->fs_key->alpha_test)
+                return;
+
+        if (c->output_color_index != -1)
+                src_alpha = c->outputs[c->output_color_index + 3];
+        else
+                src_alpha = qir_uniform_f(c, 1.0);
+
+        if (c->discard.file == QFILE_NULL)
+                c->discard = qir_uniform_f(c, 0.0);
+
+        switch (c->fs_key->alpha_test_func) {
+        case PIPE_FUNC_NEVER:
+                c->discard = qir_uniform_f(c, 1.0);
+                break;
+        case PIPE_FUNC_ALWAYS:
+                break;
+        case PIPE_FUNC_EQUAL:
+                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
+                c->discard = qir_SEL_X_Y_ZS(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        case PIPE_FUNC_NOTEQUAL:
+                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
+                c->discard = qir_SEL_X_Y_ZC(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        case PIPE_FUNC_GREATER:
+                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
+                c->discard = qir_SEL_X_Y_NC(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        case PIPE_FUNC_GEQUAL:
+                qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
+                c->discard = qir_SEL_X_Y_NS(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        case PIPE_FUNC_LESS:
+                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
+                c->discard = qir_SEL_X_Y_NS(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        case PIPE_FUNC_LEQUAL:
+                qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
+                c->discard = qir_SEL_X_Y_NC(c, c->discard,
+                                            qir_uniform_f(c, 1.0));
+                break;
+        }
+}
+
 static void
 emit_frag_end(struct vc4_compile *c)
 {
+        alpha_test_discard(c);
+
         enum pipe_format color_format = c->fs_key->color_format;
         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
         struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
@@ -1345,6 +1461,24 @@ emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w)
         qir_VPM_WRITE(c, rcp_w);
 }
 
+static void
+emit_point_size_write(struct vc4_compile *c)
+{
+        struct qreg point_size;
+
+        if (c->output_point_size_index)
+                point_size = c->outputs[c->output_point_size_index + 3];
+        else
+                point_size = qir_uniform_f(c, 1.0);
+
+        /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+         * BCM21553).
+         */
+        point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125));
+
+        qir_VPM_WRITE(c, point_size);
+}
+
 static void
 emit_vert_end(struct vc4_compile *c)
 {
@@ -1353,6 +1487,8 @@ emit_vert_end(struct vc4_compile *c)
         emit_scaled_viewport_write(c, rcp_w);
         emit_zs_write(c, rcp_w);
         emit_rcp_wc_write(c, rcp_w);
+        if (c->vs_key->per_vertex_point_size)
+                emit_point_size_write(c);
 
         for (int i = 4; i < c->num_outputs; i++) {
                 qir_VPM_WRITE(c, c->outputs[i]);
@@ -1370,6 +1506,8 @@ emit_coord_end(struct vc4_compile *c)
         emit_scaled_viewport_write(c, rcp_w);
         emit_zs_write(c, rcp_w);
         emit_rcp_wc_write(c, rcp_w);
+        if (c->vs_key->per_vertex_point_size)
+                emit_point_size_write(c);
 }
 
 static struct vc4_compile *
@@ -1506,6 +1644,7 @@ vc4_fs_compile(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                                                        QSTAGE_FRAG,
                                                        &key->base);
         shader->num_inputs = c->num_inputs;
+        shader->color_inputs = c->color_inputs;
         copy_uniform_state_to_shader(shader, 0, c);
         shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
                                       c->qpu_inst_count * sizeof(uint64_t),
@@ -1587,6 +1726,18 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
         key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
         key->depth_enabled = (vc4->zsa->base.depth.enabled ||
                               key->stencil_enabled);
+        if (vc4->zsa->base.alpha.enabled) {
+                key->alpha_test = true;
+                key->alpha_test_func = vc4->zsa->base.alpha.func;
+        }
+
+        if (key->is_points) {
+                key->point_sprite_mask =
+                        vc4->rasterizer->base.sprite_coord_enable;
+                key->point_coord_upper_left =
+                        (vc4->rasterizer->base.sprite_coord_mode ==
+                         PIPE_SPRITE_COORD_UPPER_LEFT);
+        }
 
         vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
         if (vc4->prog.fs)
@@ -1599,11 +1750,17 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
         vc4_fs_compile(vc4, shader, key);
         util_hash_table_set(vc4->fs_cache, key, shader);
 
+        if (vc4->rasterizer->base.flatshade &&
+            vc4->prog.fs &&
+            vc4->prog.fs->color_inputs != shader->color_inputs) {
+                vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
+        }
+
         vc4->prog.fs = shader;
 }
 
 static void
-vc4_update_compiled_vs(struct vc4_context *vc4)
+vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
 {
         struct vc4_vs_key local_key;
         struct vc4_vs_key *key = &local_key;
@@ -1615,6 +1772,10 @@ vc4_update_compiled_vs(struct vc4_context *vc4)
         for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++)
                 key->attr_formats[i] = vc4->vtx->pipe[i].src_format;
 
+        key->per_vertex_point_size =
+                (prim_mode == PIPE_PRIM_POINTS &&
+                 vc4->rasterizer->base.point_size_per_vertex);
+
         vc4->prog.vs = util_hash_table_get(vc4->vs_cache, key);
         if (vc4->prog.vs)
                 return;
@@ -1633,7 +1794,7 @@ void
 vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode)
 {
         vc4_update_compiled_fs(vc4, prim_mode);
-        vc4_update_compiled_vs(vc4);
+        vc4_update_compiled_vs(vc4, prim_mode);
 }
 
 static unsigned
@@ -1853,6 +2014,10 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                                 (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
                                 0));
                         break;
+
+                case QUNIFORM_ALPHA_REF:
+                        cl_f(&vc4->uniforms, vc4->zsa->base.alpha.ref_value);
+                        break;
                 }
 #if 0
                 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);