panfrost: Fold work_count packing for blend shaders
[mesa.git] / src / gallium / drivers / r600 / r600_state_common.c
index 1e775e565b57e9f5f47aac87e9dfed13ed291893..89429dd504e43d5ac5cda52ef6c4dd251b2bdd27 100644 (file)
@@ -28,7 +28,7 @@
 #include "r600_shader.h"
 #include "r600d.h"
 
-#include "util/u_format_s3tc.h"
+#include "util/format/u_format_s3tc.h"
 #include "util/u_index_modify.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_ureg.h"
 
+#include "nir.h"
+#include "nir/nir_to_tgsi_info.h"
+#include "tgsi/tgsi_from_mesa.h"
+
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
 {
        assert(!cb->buf);
@@ -94,6 +98,10 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom
 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
+
+       if (!(flags & ~PIPE_BARRIER_UPDATE))
+               return;
+
        if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
                rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
 
@@ -415,7 +423,7 @@ static void r600_sampler_view_destroy(struct pipe_context *ctx,
 
        if (view->tex_resource->gpu_address &&
            view->tex_resource->b.b.target == PIPE_BUFFER)
-               LIST_DELINIT(&view->list);
+               list_delinit(&view->list);
 
        pipe_resource_reference(&state->texture, NULL);
        FREE(view);
@@ -542,7 +550,8 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
 static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
 {
        struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state;
-       r600_resource_reference(&shader->buffer, NULL);
+       if (shader)
+               r600_resource_reference(&shader->buffer, NULL);
        FREE(shader);
 }
 
@@ -570,7 +579,10 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
        /* Set vertex buffers. */
        if (input) {
                for (i = 0; i < count; i++) {
-                       if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
+                       if ((input[i].buffer.resource != vb[i].buffer.resource) ||
+                           (vb[i].stride != input[i].stride) ||
+                           (vb[i].buffer_offset != input[i].buffer_offset) ||
+                           (vb[i].is_user_buffer != input[i].is_user_buffer)) {
                                if (input[i].buffer.resource) {
                                        vb[i].stride = input[i].stride;
                                        vb[i].buffer_offset = input[i].buffer_offset;
@@ -807,6 +819,7 @@ static inline void r600_shader_selector_key(const struct pipe_context *ctx,
                                      rctx->rasterizer && rctx->rasterizer->multisample_enable &&
                                      !rctx->framebuffer.cb0_is_integer;
                key->ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
+                key->ps.apply_sample_id_mask = (rctx->ps_iter_samples > 1) || !rctx->rasterizer->multisample_enable;
                /* Dual-source blending only makes sense with nr_cbufs == 1. */
                if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
                        key->ps.nr_cbufs = 2;
@@ -898,14 +911,19 @@ int r600_shader_select(struct pipe_context *ctx,
 }
 
 struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx,
-                                                                 const struct tgsi_token *tokens,
+                                                                 const void *prog, enum pipe_shader_ir ir,
                                                                  unsigned pipe_shader_type)
 {
        struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector);
 
        sel->type = pipe_shader_type;
-       sel->tokens = tgsi_dup_tokens(tokens);
-       tgsi_scan_shader(tokens, &sel->info);
+       if (ir == PIPE_SHADER_IR_TGSI) {
+               sel->tokens = tgsi_dup_tokens((const struct tgsi_token *)prog);
+               tgsi_scan_shader(sel->tokens, &sel->info);
+       } else if (ir == PIPE_SHADER_IR_NIR){
+               sel->nir = nir_shader_clone(NULL, (const nir_shader *)prog);
+               nir_tgsi_scan_shader(sel->nir, &sel->info, true);
+       }
        return sel;
 }
 
@@ -914,8 +932,16 @@ static void *r600_create_shader_state(struct pipe_context *ctx,
                               unsigned pipe_shader_type)
 {
        int i;
-       struct r600_pipe_shader_selector *sel = r600_create_shader_state_tokens(ctx, state->tokens, pipe_shader_type);
-
+       struct r600_pipe_shader_selector *sel;
+       
+       if (state->type == PIPE_SHADER_IR_TGSI)
+               sel = r600_create_shader_state_tokens(ctx, state->tokens, state->type, pipe_shader_type);
+       else if (state->type == PIPE_SHADER_IR_NIR) {
+               sel = r600_create_shader_state_tokens(ctx, state->ir.nir, state->type, pipe_shader_type);
+       } else
+               assert(0 && "Unknown shader type\n");
+       
+       sel->ir_type = state->type;
        sel->so = state->stream_output;
 
        switch (pipe_shader_type) {
@@ -1017,7 +1043,9 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
 
        rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
        r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx));
-       rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
+
+        if (rctx->vs_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
 }
 
 static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
@@ -1032,7 +1060,9 @@ static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
 
        if (!state)
                return;
-       rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride;
+
+        if (rctx->gs_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride;
 }
 
 static void r600_bind_tcs_state(struct pipe_context *ctx, void *state)
@@ -1054,7 +1084,9 @@ static void r600_bind_tes_state(struct pipe_context *ctx, void *state)
 
        if (!state)
                return;
-       rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride;
+
+        if (rctx->tes_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride;
 }
 
 void r600_delete_shader_selector(struct pipe_context *ctx,
@@ -1068,7 +1100,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
                p = c;
        }
 
-       free(sel->tokens);
+       if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+               free(sel->tokens);
+               /* We might have converted the TGSI shader to a NIR shader */
+               if (sel->nir)
+                       ralloc_free(sel->nir);
+       }
+       else if (sel->ir_type == PIPE_SHADER_IR_NIR)
+               ralloc_free(sel->nir);
        free(sel);
 }
 
@@ -1152,7 +1191,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
        struct pipe_constant_buffer *cb;
        const uint8_t *ptr;
 
-       /* Note that the state tracker can unbind constant buffers by
+       /* Note that the gallium frontend can unbind constant buffers by
         * passing NULL here.
         */
        if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
@@ -1307,7 +1346,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
 }
 
 static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
-                                  int array_size, uint32_t *base_offset)
+                                  unsigned array_size, uint32_t *base_offset)
 {
        struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
        if (array_size + R600_UCP_SIZE > info->alloc_size) {
@@ -1430,14 +1469,13 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
-       int i;
        struct pipe_context *ctx = &rctx->b.b;
 
        assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
        assert(rctx->framebuffer.nr_samples <= ARRAY_SIZE(rctx->sample_positions)/4);
 
        memset(rctx->sample_positions, 0, 4 * 4 * 16);
-       for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
+       for (unsigned i = 0; i < rctx->framebuffer.nr_samples; i++) {
                ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
                /* Also fill in center-zeroed positions used for interpolateAtSample */
                rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
@@ -1833,7 +1871,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
         * to LS slots and won't reflect what is dirty as VS stage even if the
         * TES didn't overwrite it. The story for re-enabled TES is similar.
         * In any case, we're not allowed to submit any TES state when
-        * TES is disabled (the state tracker may not do this but this looks
+        * TES is disabled (the gallium frontend may not do this but this looks
         * like an optimization to me, not something which can be relied on).
         */
 
@@ -2083,8 +2121,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                : (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
                : info->mode;
 
-       if (rctx->b.chip_class >= EVERGREEN)
-               evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
+       if (rctx->b.chip_class >= EVERGREEN) {
+               evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
+       }
 
        if (index_size) {
                index_offset += info->start * index_size;
@@ -2170,7 +2209,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                evergreen_setup_tess_constants(rctx, info, &num_patches);
 
        /* Emit states. */
-       r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
+       r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
        r600_flush_emit(rctx);
 
        mask = rctx->dirty_atoms;
@@ -2178,6 +2217,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
        }
 
+       if (rctx->b.chip_class >= EVERGREEN) {
+               evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
+       }
+               
        if (rctx->b.chip_class == CAYMAN) {
                /* Copied from radeonsi. */
                unsigned primgroup_size = 128; /* recommended without a GS */
@@ -2739,6 +2782,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
                case PIPE_FORMAT_RGTC1_SNORM:
                case PIPE_FORMAT_LATC1_SNORM:
                        word4 |= sign_bit[0];
+                       /* fallthrough */
                case PIPE_FORMAT_RGTC1_UNORM:
                case PIPE_FORMAT_LATC1_UNORM:
                        result = FMT_BC4;
@@ -2746,6 +2790,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
                case PIPE_FORMAT_RGTC2_SNORM:
                case PIPE_FORMAT_LATC2_SNORM:
                        word4 |= sign_bit[0] | sign_bit[1];
+                       /* fallthrough */
                case PIPE_FORMAT_RGTC2_UNORM:
                case PIPE_FORMAT_LATC2_UNORM:
                        result = FMT_BC5;
@@ -2910,6 +2955,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
                        switch (desc->nr_channels) {
                        case 1:
                                result = FMT_8;
+                               is_srgb_valid = TRUE;
                                goto out_word4;
                        case 2:
                                result = FMT_8_8;
@@ -3259,7 +3305,7 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
 
 }
 
-static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable)
+static void r600_set_active_query_state(struct pipe_context *ctx, bool enable)
 {
        struct r600_context *rctx = (struct r600_context*)ctx;
 
@@ -3282,7 +3328,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
 static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
                                    bool include_draw_vbo)
 {
-       r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
+       r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
 }
 
 /* keep this at the end of this file, please */