From: Dave Airlie Date: Thu, 25 Jun 2015 02:36:23 +0000 (+0100) Subject: radeonsi: add support for viewport array (v3) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7e5064360c03b8dbdd60298b46e1595418c6cea3;p=mesa.git radeonsi: add support for viewport array (v3) This isn't pretty and I'd suggest it the pm4 interface builder could be tweaked to do this more efficently, but I'd need guidance on how that would look. This seems to pass the few piglit tests I threw at it. v2: handle passing layer/viewport index to fragment shader. fix crash in blit changes, add support to io_get_unique_index for layer/viewport index update docs. v3: avoid looking up viewport index and layer in es (Marek). Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- diff --git a/docs/GL3.txt b/docs/GL3.txt index 220bcc8742f..df913bdd8c9 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10: GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_shader_precision started (Micah) GL_ARB_vertex_attrib_64bit DONE (nvc0, softpipe) - GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, llvmpipe) + GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) GL 4.2, GLSL 4.20: @@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30: GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) - GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe) + GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe) GL_ARB_framebuffer_no_attachments DONE (i965) GL_ARB_internalformat_query2 not started GL_ARB_invalidate_subdata DONE (all drivers) diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html index e089889667d..fcc50811b69 100644 --- a/docs/relnotes/10.7.0.html +++ b/docs/relnotes/10.7.0.html @@ -44,8 +44,11 @@ Note: some of the new features are only available with certain drivers.

    +
  • GL_AMD_vertex_shader_viewport_index on radeonsi
  • GL_ARB_framebuffer_no_attachments on i965
  • GL_ARB_shader_stencil_export on llvmpipe
  • +
  • GL_ARB_viewport_array on radeonsi
  • +
  • GL_ARB_fragment_layer_viewport on radeonsi

Bug fixes

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 1f2c4082dbc..6c7b383a4a3 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_sample_mask(sctx->blitter, sctx->queued.named.sample_mask->sample_mask); } - if (sctx->queued.named.viewport) { - util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport); + if (sctx->queued.named.viewport[0]) { + util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport); } - if (sctx->queued.named.scissor) { - util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor); + if (sctx->queued.named.scissor[0]) { + util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor); } util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer); util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 53ae71a8c92..480a3010d31 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 8; case PIPE_CAP_MAX_VIEWPORTS: - return 1; + return 16; /* Timer queries, present when the clock frequency is non zero. */ case PIPE_CAP_QUERY_TIMESTAMP: diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a293ef36fbb..4ca31728dff 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1132,7 +1132,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; LLVMValueRef args[9]; LLVMValueRef pos_args[4][9] = { { 0 } }; - LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL; + LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL; unsigned semantic_name, semantic_index; unsigned target; unsigned param_count = 0; @@ -1158,7 +1158,12 @@ handle_semantic: continue; case TGSI_SEMANTIC_LAYER: layer_value = outputs[i].values[0]; - continue; + semantic_name = TGSI_SEMANTIC_GENERIC; + goto handle_semantic; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + viewport_index_value = outputs[i].values[0]; + semantic_name = TGSI_SEMANTIC_GENERIC; + goto handle_semantic; case TGSI_SEMANTIC_POSITION: target = V_008DFC_SQ_EXP_POS; break; @@ -1224,11 +1229,13 @@ handle_semantic: /* Write the misc vector (point size, edgeflag, layer, viewport). */ if (shader->selector->info.writes_psize || shader->selector->info.writes_edgeflag || + shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */ shader->selector->info.writes_psize | (shader->selector->info.writes_edgeflag << 1) | - (shader->selector->info.writes_layer << 2)); + (shader->selector->info.writes_layer << 2) | + (shader->selector->info.writes_viewport_index << 3)); pos_args[1][1] = uint->zero; /* EXEC mask */ pos_args[1][2] = uint->zero; /* last export? */ pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1); @@ -1259,6 +1266,9 @@ handle_semantic: if (shader->selector->info.writes_layer) pos_args[1][7] = layer_value; + + if (shader->selector->info.writes_viewport_index) + pos_args[1][8] = viewport_index_value; } for (i = 0; i < 4; i++) @@ -1299,10 +1309,15 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < info->num_outputs; i++) { LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i]; - int param_index = get_param_index(info->output_semantic_name[i], - info->output_semantic_index[i], - es->key.vs.gs_used_inputs); + int param_index; + + if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX || + info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER) + continue; + param_index = get_param_index(info->output_semantic_name[i], + info->output_semantic_index[i], + es->key.vs.gs_used_inputs); if (param_index < 0) continue; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 6c18836d189..752467bcfd7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || info->writes_edgeflag || - info->writes_layer) | + info->writes_layer || + info->writes_viewport_index) | (sctx->queued.named.rasterizer->clip_plane_enable & clipdist_mask)); r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, @@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context *ctx, const struct pipe_scissor_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); - struct si_pm4_state *pm4 = &scissor->pm4; - - if (scissor == NULL) - return; + struct si_state_scissor *scissor; + struct si_pm4_state *pm4; + int i; - scissor->scissor = *state; - si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, - S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, - S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); + for (i = start_slot; i < start_slot + num_scissors; i++) { + int idx = i - start_slot; + int offset = i * 4 * 2; - si_pm4_set_state(sctx, scissor, scissor); + scissor = CALLOC_STRUCT(si_state_scissor); + if (scissor == NULL) + return; + pm4 = &scissor->pm4; + scissor->scissor = state[idx]; + si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, + S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset, + S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy)); + si_pm4_set_state(sctx, scissor[i], scissor); + } } static void si_set_viewport_states(struct pipe_context *ctx, @@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context *ctx, const struct pipe_viewport_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); - struct si_pm4_state *pm4 = &viewport->pm4; + struct si_state_viewport *viewport; + struct si_pm4_state *pm4; + int i; - if (viewport == NULL) - return; + for (i = start_slot; i < start_slot + num_viewports; i++) { + int idx = i - start_slot; + int offset = i * 4 * 6; - viewport->viewport = *state; - si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); - si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); - si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); - si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); - si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); - si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); + viewport = CALLOC_STRUCT(si_state_viewport); + if (!viewport) + return; + pm4 = &viewport->pm4; + + viewport->viewport = state[idx]; + si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, fui(state[idx].scale[0])); + si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, fui(state[idx].translate[0])); + si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, fui(state[idx].scale[1])); + si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, fui(state[idx].translate[1])); + si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, fui(state[idx].scale[2])); + si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, fui(state[idx].translate[2])); - si_pm4_set_state(sctx, viewport, viewport); + si_pm4_set_state(sctx, viewport[i], viewport); + } } /* diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 5e68b162137..d1f2dff2c3f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -92,8 +92,8 @@ union si_state { struct si_pm4_state *blend_color; struct si_pm4_state *clip; struct si_state_sample_mask *sample_mask; - struct si_state_scissor *scissor; - struct si_state_viewport *viewport; + struct si_state_scissor *scissor[16]; + struct si_state_viewport *viewport[16]; struct si_state_rasterizer *rasterizer; struct si_state_dsa *dsa; struct si_pm4_state *fb_rs; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 208c8523ef1..48128fa44e1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader) case TGSI_SEMANTIC_POSITION: case TGSI_SEMANTIC_PSIZE: case TGSI_SEMANTIC_EDGEFLAG: - case TGSI_SEMANTIC_VIEWPORT_INDEX: - case TGSI_SEMANTIC_LAYER: break; default: nparams++;