X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=1a852c5cba25ffc7c6f7a8a3f87de789660ec8e1;hb=09408764c1eafe0b75bfda0ce30ff7b76203a96c;hp=655894146fdbfc780241fb13157e52871575037d;hpb=d995d4830e77d55552972dcc9d64e106f55fdc3f;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 655894146fd..1a852c5cba2 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -376,7 +376,7 @@ static LLVMValueRef build_indexed_load_const( static LLVMValueRef get_instance_index_for_fetch( struct radeon_llvm_context *radeon_bld, - unsigned divisor) + unsigned param_start_instance, unsigned divisor) { struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); @@ -390,8 +390,8 @@ static LLVMValueRef get_instance_index_for_fetch( result = LLVMBuildUDiv(gallivm->builder, result, lp_build_const_int32(gallivm, divisor), ""); - return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( - radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + return LLVMBuildAdd(gallivm->builder, result, + LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } static void declare_input_vs( @@ -403,7 +403,8 @@ static void declare_input_vs( struct gallivm_state *gallivm = base->gallivm; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); - unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index]; + unsigned divisor = + ctx->shader->key.vs.prolog.instance_divisors[input_index]; unsigned chan; @@ -428,7 +429,9 @@ static void declare_input_vs( if (divisor) { /* Build index from instance ID, start instance and divisor */ ctx->shader->uses_instanceid = true; - buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor); + buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, + SI_PARAM_START_INSTANCE, + divisor); } else { /* Load the buffer index for vertices. */ LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn, @@ -854,7 +857,7 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) static unsigned select_interp_param(struct si_shader_context *ctx, unsigned param) { - if (!ctx->shader->key.ps.force_persample_interp) + if (!ctx->shader->key.ps.prolog.force_persample_interp) return param; /* If the shader doesn't use center/centroid, just return the parameter. @@ -924,7 +927,7 @@ static void interp_fs_input(struct si_shader_context *ctx, intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; if (semantic_name == TGSI_SEMANTIC_COLOR && - ctx->shader->key.ps.color_two_side) { + ctx->shader->key.ps.prolog.color_two_side) { LLVMValueRef args[4]; LLVMValueRef is_face_positive; LLVMValueRef back_attr_number; @@ -1331,12 +1334,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { const union si_shader_key *key = &ctx->shader->key; - unsigned col_formats = key->ps.spi_shader_col_format; + unsigned col_formats = key->ps.epilog.spi_shader_col_format; int cbuf = target - V_008DFC_SQ_EXP_MRT; assert(cbuf >= 0 && cbuf < 8); spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; - is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1; + is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1; } args[4] = uint->zero; /* COMPR flag */ @@ -1489,13 +1492,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { + if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) { LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_ALPHA_REF); LLVMValueRef alpha_pass = lp_build_cmp(&bld_base->base, - ctx->shader->key.ps.alpha_func, + ctx->shader->key.ps.epilog.alpha_func, alpha, alpha_ref); LLVMValueRef arg = lp_build_select(&bld_base->base, @@ -1512,7 +1515,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha) + LLVMValueRef alpha, + unsigned samplemask_param) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1520,7 +1524,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context * /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ coverage = LLVMGetParam(ctx->radeon_bld.main_fn, - SI_PARAM_SAMPLE_COVERAGE); + samplemask_param); coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage); coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32", @@ -1990,7 +1994,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, invocation_id, bld_base->uint_bld.zero, "")); /* Determine the layout of one tess factor element in the buffer. */ - switch (shader->key.tcs.prim_mode) { + switch (shader->key.tcs.epilog.prim_mode) { case PIPE_PRIM_LINES: stride = 2; /* 2 dwords, 1 vec2 store */ outer_comps = 2; @@ -2285,6 +2289,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, LLVMValueRef *color, unsigned index, + unsigned samplemask_param, bool is_last) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -2292,30 +2297,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, int i; /* Clamp color */ - if (ctx->shader->key.ps.clamp_color) + if (ctx->shader->key.ps.epilog.clamp_color) for (i = 0; i < 4; i++) color[i] = radeon_llvm_saturate(bld_base, color[i]); /* Alpha to one */ - if (ctx->shader->key.ps.alpha_to_one) + if (ctx->shader->key.ps.epilog.alpha_to_one) color[3] = base->one; /* Alpha test */ if (index == 0 && - ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(bld_base, color[3]); /* Line & polygon smoothing */ - if (ctx->shader->key.ps.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); + if (ctx->shader->key.ps.epilog.poly_line_smoothing) + color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3], + samplemask_param); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (ctx->shader->key.ps.last_cbuf > 0) { + if (ctx->shader->key.ps.epilog.last_cbuf > 0) { LLVMValueRef args[8][9]; int c, last = -1; /* Get the export arguments, also find out what the last one is. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + c, args[c]); if (args[c][0] != bld_base->uint_bld.zero) @@ -2323,7 +2329,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } /* Emit all exports. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { if (is_last && last == c) { args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ args[c][2] = bld_base->uint_bld.one; /* DONE bit */ @@ -2386,11 +2392,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) * Otherwise, find the last color export. */ if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) { - unsigned spi_format = shader->key.ps.spi_shader_col_format; + unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format; /* Don't export NULL and return if alpha-test is enabled. */ - if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS && - shader->key.ps.alpha_func != PIPE_FUNC_NEVER && + if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS && + shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER && (spi_format & 0xf) == 0) spi_format |= V_028714_SPI_SHADER_32_AR; @@ -2401,10 +2407,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) continue; /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (shader->key.ps.last_cbuf > 0) { + if (shader->key.ps.epilog.last_cbuf > 0) { /* Just set this if any of the colorbuffers are enabled. */ if (spi_format & - ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1)) + ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1)) last_color_export = i; continue; } @@ -2446,6 +2452,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) ctx->radeon_bld.soa.outputs[i][j], ""); si_export_mrt_color(bld_base, color, semantic_index, + SI_PARAM_SAMPLE_COVERAGE, last_color_export == i); break; default: @@ -3547,6 +3554,30 @@ static const struct lp_build_tgsi_action interp_action = { .emit = build_interp_intrinsic, }; +static void si_create_function(struct si_shader_context *ctx, + LLVMTypeRef *returns, unsigned num_returns, + LLVMTypeRef *params, unsigned num_params, + int last_array_pointer, int last_sgpr) +{ + int i; + + radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns, + params, num_params); + radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); + ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type); + + for (i = 0; i <= last_sgpr; ++i) { + LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); + + /* We tell llvm that array inputs are passed by value to allow Sinking pass + * to move load. Inputs are constant so this is fine. */ + if (i <= last_array_pointer) + LLVMAddAttribute(P, LLVMByValAttribute); + else + LLVMAddAttribute(P, LLVMInRegAttribute); + } +} + static void create_meta_data(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; @@ -3600,6 +3631,27 @@ static unsigned llvm_get_type_size(LLVMTypeRef type) } } +static void declare_tess_lds(struct si_shader_context *ctx) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type; + + /* This is the upper bound, maximum is 32 inputs times 32 vertices */ + unsigned vertex_data_dw_size = 32*32*4; + unsigned patch_data_dw_size = 32*4; + /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ + unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; + unsigned lds_dwords = patch_dw_size; + + /* The actual size is computed outside of the shader to reduce + * the number of shader variants. */ + ctx->lds = + LLVMAddGlobalInAddressSpace(gallivm->module, + LLVMArrayType(i32, lds_dwords), + "tess_lds", + LOCAL_ADDR_SPACE); +} + static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; @@ -3732,26 +3784,15 @@ static void create_function(struct si_shader_context *ctx) } assert(num_params <= Elements(params)); - radeon_llvm_create_func(&ctx->radeon_bld, NULL, 0, - params, num_params); - radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); - ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type); + + si_create_function(ctx, NULL, 0, params, + num_params, last_array_pointer, last_sgpr); shader->num_input_sgprs = 0; shader->num_input_vgprs = 0; - for (i = 0; i <= last_sgpr; ++i) { - LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); - - /* We tell llvm that array inputs are passed by value to allow Sinking pass - * to move load. Inputs are constant so this is fine. */ - if (i <= last_array_pointer) - LLVMAddAttribute(P, LLVMByValAttribute); - else - LLVMAddAttribute(P, LLVMInRegAttribute); - + for (i = 0; i <= last_sgpr; ++i) shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4; - } /* Unused fragment shader inputs are eliminated by the compiler, * so we don't know yet how many there will be. @@ -3775,22 +3816,8 @@ static void create_function(struct si_shader_context *ctx) if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) || ctx->type == TGSI_PROCESSOR_TESS_CTRL || - ctx->type == TGSI_PROCESSOR_TESS_EVAL) { - /* This is the upper bound, maximum is 32 inputs times 32 vertices */ - unsigned vertex_data_dw_size = 32*32*4; - unsigned patch_data_dw_size = 32*4; - /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ - unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; - unsigned lds_dwords = patch_dw_size; - - /* The actual size is computed outside of the shader to reduce - * the number of shader variants. */ - ctx->lds = - LLVMAddGlobalInAddressSpace(gallivm->module, - LLVMArrayType(ctx->i32, lds_dwords), - "tess_lds", - LOCAL_ADDR_SPACE); - } + ctx->type == TGSI_PROCESSOR_TESS_EVAL) + declare_tess_lds(ctx); } static void preload_constants(struct si_shader_context *ctx) @@ -4313,35 +4340,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) switch (shader) { case PIPE_SHADER_VERTEX: fprintf(f, " instance_divisors = {"); - for (i = 0; i < Elements(key->vs.instance_divisors); i++) + for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++) fprintf(f, !i ? "%u" : ", %u", - key->vs.instance_divisors[i]); + key->vs.prolog.instance_divisors[i]); fprintf(f, "}\n"); fprintf(f, " as_es = %u\n", key->vs.as_es); fprintf(f, " as_ls = %u\n", key->vs.as_ls); - fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id); break; case PIPE_SHADER_TESS_CTRL: - fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode); + fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode); break; case PIPE_SHADER_TESS_EVAL: fprintf(f, " as_es = %u\n", key->tes.as_es); - fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id); break; case PIPE_SHADER_GEOMETRY: break; case PIPE_SHADER_FRAGMENT: - fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format); - fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); - fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); - fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); - fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one); - fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple); - fprintf(f, " clamp_color = %u\n", key->ps.clamp_color); + fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side); + fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple); + fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp); + fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format); + fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8); + fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf); + fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func); + fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one); + fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing); + fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color); break; default: @@ -4427,7 +4457,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, LLVMModuleRef mod; int r = 0; bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT && - shader->key.ps.poly_stipple; + shader->key.ps.prolog.poly_stipple; if (poly_stipple) { tokens = util_pstipple_create_fragment_shader(tokens, NULL,