radeonsi: move si_shader_binary_upload out of si_compile_llvm
[mesa.git] / src / gallium / drivers / radeonsi / si_state_draw.c
index cf0891a2ab759e569e7eccd992d1e21cba1df637..91ccd07326774d23ac7d0a9a89de0b3e3f4db39b 100644 (file)
@@ -108,7 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                                       const struct pipe_draw_info *info,
                                       unsigned *num_patches)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        struct si_shader_ctx_state *ls = &sctx->vs_shader;
        /* The TES pointer will only be used for sctx->last_tcs.
         * It would be wrong to think that TCS = TES. */
@@ -163,7 +163,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
 
        lds_size = output_patch0_offset + output_patch_size * *num_patches;
-       ls_rsrc2 = ls->current->ls_rsrc2;
+       ls_rsrc2 = ls->current->config.rsrc2;
 
        if (sctx->b.chip_class >= CIK) {
                assert(lds_size <= 65536);
@@ -178,7 +178,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
                radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
        radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-       radeon_emit(cs, ls->current->ls_rsrc1);
+       radeon_emit(cs, ls->current->config.rsrc1);
        radeon_emit(cs, ls_rsrc2);
 
        /* Compute userdata SGPRs. */
@@ -216,6 +216,18 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
 }
 
+static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
+{
+       switch (info->mode) {
+       case PIPE_PRIM_PATCHES:
+               return info->count / info->vertices_per_patch;
+       case R600_PRIM_RECTANGLE_LIST:
+               return info->count / 3;
+       default:
+               return u_prims_for_vertices(info->mode, info->count);
+       }
+}
+
 static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
                                          const struct pipe_draw_info *info,
                                          unsigned num_patches)
@@ -320,7 +332,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
        if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
            (info->indirect ||
             (info->instance_count > 1 &&
-             u_prims_for_vertices(info->mode, info->count) <= 1)))
+             si_num_prims_for_vertices(info) <= 1)))
                sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
 
        return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
@@ -353,7 +365,7 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
 
 static void si_emit_scratch_reloc(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
        if (!sctx->emit_scratch_reloc)
                return;
@@ -362,7 +374,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
                               sctx->spi_tmpring_size);
 
        if (sctx->scratch_buffer) {
-               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      sctx->scratch_buffer, RADEON_USAGE_READWRITE,
                                      RADEON_PRIO_SCRATCH_BUFFER);
 
@@ -373,7 +385,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
 /* rast_prim is the primitive type after GS. */
 static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned rast_prim = sctx->current_rast_prim;
        struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
 
@@ -401,7 +413,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 static void si_emit_draw_registers(struct si_context *sctx,
                                   const struct pipe_draw_info *info)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned prim = si_conv_pipe_prim(info->mode);
        unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
        unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
@@ -455,8 +467,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
                                 const struct pipe_draw_info *info,
                                 const struct pipe_index_buffer *ib)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
+       bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
 
        if (info->count_from_stream_output) {
                struct r600_so_target *t =
@@ -476,7 +489,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
                radeon_emit(cs, 0); /* unused */
 
-               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      t->buf_filled_size, RADEON_USAGE_READ,
                                      RADEON_PRIO_SO_FILLED_SIZE);
        }
@@ -530,7 +543,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
        } else {
                si_invalidate_draw_sh_constants(sctx);
 
-               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      (struct r600_resource *)info->indirect,
                                      RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
        }
@@ -540,7 +553,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                                          ib->index_size;
                uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
 
-               radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      (struct r600_resource *)ib->buffer,
                                      RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 
@@ -563,7 +576,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                        radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
                        radeon_emit(cs, index_max_size);
 
-                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, sctx->b.predicate_drawing));
+                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, render_cond_bit));
                        radeon_emit(cs, info->indirect_offset);
                        radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
                        radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
@@ -571,7 +584,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
                } else {
                        index_va += info->start * ib->index_size;
 
-                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, sctx->b.predicate_drawing));
+                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
                        radeon_emit(cs, index_max_size);
                        radeon_emit(cs, index_va);
                        radeon_emit(cs, (index_va >> 32UL) & 0xFF);
@@ -590,13 +603,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
                        radeon_emit(cs, indirect_va);
                        radeon_emit(cs, indirect_va >> 32);
 
-                       radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, sctx->b.predicate_drawing));
+                       radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, render_cond_bit));
                        radeon_emit(cs, info->indirect_offset);
                        radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
                        radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
                        radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
                } else {
-                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, sctx->b.predicate_drawing));
+                       radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
                        radeon_emit(cs, info->count);
                        radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
                                    S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));
@@ -604,12 +617,10 @@ static void si_emit_draw_packets(struct si_context *sctx,
        }
 }
 
-#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
-
 void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 {
        struct r600_common_context *sctx = &si_ctx->b;
-       struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->gfx.cs;
        uint32_t cp_coher_cntl = 0;
        uint32_t compute =
                PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
@@ -624,12 +635,12 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 
        if (sctx->flags & SI_CONTEXT_INV_ICACHE)
                cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-       if (sctx->flags & SI_CONTEXT_INV_KCACHE)
+       if (sctx->flags & SI_CONTEXT_INV_SMEM_L1)
                cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
-       if (sctx->flags & SI_CONTEXT_INV_TC_L1)
+       if (sctx->flags & SI_CONTEXT_INV_VMEM_L1)
                cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-       if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
+       if (sctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
                cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
 
                /* TODO: this might not be needed. */
@@ -807,7 +818,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                        si_get_draw_start_count(sctx, info, &start, &count);
                        start_offset = start * ib.index_size;
 
-                       u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
+                       u_upload_alloc(sctx->b.uploader, start_offset, count * 2, 256,
                                       &out_offset, &out_buffer, &ptr);
                        if (!out_buffer) {
                                pipe_resource_reference(&ib.buffer, NULL);
@@ -831,7 +842,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                        start_offset = start * ib.index_size;
 
                        u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
-                                     (char*)ib.user_buffer + start_offset,
+                                     256, (char*)ib.user_buffer + start_offset,
                                      &ib.offset, &ib.buffer);
                        if (!ib.buffer)
                                return;
@@ -843,7 +854,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        /* VI reads index buffers through TC L2. */
        if (info->indexed && sctx->b.chip_class <= CIK &&
            r600_resource(ib.buffer)->TC_L2_dirty) {
-               sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
                r600_resource(ib.buffer)->TC_L2_dirty = false;
        }
 
@@ -873,7 +884,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
        /* Workaround for a VGT hang when streamout is enabled.
         * It must be done after drawing. */
-       if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
+       if ((sctx->b.family == CHIP_HAWAII ||
+            sctx->b.family == CHIP_TONGA ||
+            sctx->b.family == CHIP_FIJI) &&
            (sctx->b.streamout.streamout_enabled ||
             sctx->b.streamout.prims_gen_query_enabled)) {
                sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
@@ -909,10 +922,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 void si_trace_emit(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
        sctx->trace_id++;
-       radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
                              RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
        radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
        radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |