+ return false;
+ }
+
+ si_llvm_build_ret(ctx, ctx->return_value);
+ return true;
+}
+
+/**
+ * Compute the VS prolog key, which contains all the information needed to
+ * build the VS prolog function, and set shader->info bits where needed.
+ */
+static void si_get_vs_prolog_key(struct si_shader *shader,
+ union si_shader_part_key *key)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+
+ memset(key, 0, sizeof(*key));
+ key->vs_prolog.states = shader->key.vs.prolog;
+ key->vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+
+ /* Set the instanceID flag. */
+ for (unsigned i = 0; i < info->num_inputs; i++)
+ if (key->vs_prolog.states.instance_divisors[i])
+ shader->info.uses_instanceid = true;
+}
+
+/**
+ * Compute the VS epilog key, which contains all the information needed to
+ * build the VS epilog function, and set the PrimitiveID output offset.
+ */
+static void si_get_vs_epilog_key(struct si_shader *shader,
+ struct si_vs_epilog_bits *states,
+ union si_shader_part_key *key)
+{
+ memset(key, 0, sizeof(*key));
+ key->vs_epilog.states = *states;
+
+ /* Set up the PrimitiveID output. */
+ if (shader->key.vs.epilog.export_prim_id) {
+ unsigned index = shader->selector->info.num_outputs;
+ unsigned offset = shader->info.nr_param_exports++;
+
+ key->vs_epilog.prim_id_param_offset = offset;
+ assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[index] = offset;
+ }
+}
+
+/**
+ * Compute the PS prolog key, which contains all the information needed to
+ * build the PS prolog function, and set related bits in shader->config.
+ */
+static void si_get_ps_prolog_key(struct si_shader *shader,
+ union si_shader_part_key *key,
+ bool separate_prolog)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+
+ memset(key, 0, sizeof(*key));
+ key->ps_prolog.states = shader->key.ps.prolog;
+ key->ps_prolog.colors_read = info->colors_read;
+ key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
+ key->ps_prolog.wqm = info->uses_derivatives &&
+ (key->ps_prolog.colors_read ||
+ key->ps_prolog.states.force_persp_sample_interp ||
+ key->ps_prolog.states.force_linear_sample_interp ||
+ key->ps_prolog.states.force_persp_center_interp ||
+ key->ps_prolog.states.force_linear_center_interp ||
+ key->ps_prolog.states.bc_optimize_for_persp ||
+ key->ps_prolog.states.bc_optimize_for_linear);
+
+ if (info->colors_read) {
+ unsigned *color = shader->selector->color_attr_index;
+
+ if (shader->key.ps.prolog.color_two_side) {
+ /* BCOLORs are stored after the last input. */
+ key->ps_prolog.num_interp_inputs = info->num_inputs;
+ key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
+ shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+ }
+
+ for (unsigned i = 0; i < 2; i++) {
+ unsigned interp = info->input_interpolate[color[i]];
+ unsigned location = info->input_interpolate_loc[color[i]];
+
+ if (!(info->colors_read & (0xf << i*4)))
+ continue;
+
+ key->ps_prolog.color_attr_index[i] = color[i];
+
+ if (shader->key.ps.prolog.flatshade_colors &&
+ interp == TGSI_INTERPOLATE_COLOR)
+ interp = TGSI_INTERPOLATE_CONSTANT;
+
+ switch (interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ key->ps_prolog.color_interp_vgpr_index[i] = -1;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ case TGSI_INTERPOLATE_COLOR:
+ /* Force the interpolation location for colors here. */
+ if (shader->key.ps.prolog.force_persp_sample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_persp_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ key->ps_prolog.color_interp_vgpr_index[i] = 0;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ key->ps_prolog.color_interp_vgpr_index[i] = 2;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ key->ps_prolog.color_interp_vgpr_index[i] = 4;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_PERSP_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ /* Force the interpolation location for colors here. */
+ if (shader->key.ps.prolog.force_linear_sample_interp)
+ location = TGSI_INTERPOLATE_LOC_SAMPLE;
+ if (shader->key.ps.prolog.force_linear_center_interp)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+
+ /* The VGPR assignment for non-monolithic shaders
+ * works because InitialPSInputAddr is set on the
+ * main shader and PERSP_PULL_MODEL is never used.
+ */
+ switch (location) {
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ key->ps_prolog.color_interp_vgpr_index[i] =
+ separate_prolog ? 6 : 9;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_SAMPLE_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ key->ps_prolog.color_interp_vgpr_index[i] =
+ separate_prolog ? 8 : 11;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTER_ENA(1);
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ key->ps_prolog.color_interp_vgpr_index[i] =
+ separate_prolog ? 10 : 13;
+ shader->config.spi_ps_input_ena |=
+ S_0286CC_LINEAR_CENTROID_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+}
+
+/**
+ * Check whether a PS prolog is required based on the key.
+ */
+static bool si_need_ps_prolog(const union si_shader_part_key *key)
+{
+ return key->ps_prolog.colors_read ||
+ key->ps_prolog.states.force_persp_sample_interp ||
+ key->ps_prolog.states.force_linear_sample_interp ||
+ key->ps_prolog.states.force_persp_center_interp ||
+ key->ps_prolog.states.force_linear_center_interp ||
+ key->ps_prolog.states.bc_optimize_for_persp ||
+ key->ps_prolog.states.bc_optimize_for_linear ||
+ key->ps_prolog.states.poly_stipple;
+}
+
+/**
+ * Compute the PS epilog key, which contains all the information needed to
+ * build the PS epilog function.
+ */
+static void si_get_ps_epilog_key(struct si_shader *shader,
+ union si_shader_part_key *key)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ memset(key, 0, sizeof(*key));
+ key->ps_epilog.colors_written = info->colors_written;
+ key->ps_epilog.writes_z = info->writes_z;
+ key->ps_epilog.writes_stencil = info->writes_stencil;
+ key->ps_epilog.writes_samplemask = info->writes_samplemask;
+ key->ps_epilog.states = shader->key.ps.epilog;
+}
+
+/**
+ * Given a list of shader part functions, build a wrapper function that
+ * runs them in sequence to form a monolithic shader.
+ */
+static void si_build_wrapper_function(struct si_shader_context *ctx,
+ LLVMValueRef *parts,
+ unsigned num_parts,
+ unsigned main_part)
+{
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMBuilderRef builder = ctx->gallivm.builder;
+ /* PS epilog has one arg per color component */
+ LLVMTypeRef param_types[48];
+ LLVMValueRef out[48];
+ LLVMTypeRef function_type;
+ unsigned num_params;
+ unsigned num_out_sgpr, num_out;
+ unsigned num_sgprs, num_vgprs;
+ unsigned last_sgpr_param;
+ unsigned gprs;
+
+ for (unsigned i = 0; i < num_parts; ++i) {
+ LLVMAddFunctionAttr(parts[i], LLVMAlwaysInlineAttribute);
+ LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
+ }
+
+ /* The parameters of the wrapper function correspond to those of the
+ * first part in terms of SGPRs and VGPRs, but we use the types of the
+ * main part to get the right types. This is relevant for the
+ * dereferenceable attribute on descriptor table pointers.
+ */
+ num_sgprs = 0;
+ num_vgprs = 0;
+
+ function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
+ num_params = LLVMCountParamTypes(function_type);
+
+ for (unsigned i = 0; i < num_params; ++i) {
+ LLVMValueRef param = LLVMGetParam(parts[0], i);
+
+ if (ac_is_sgpr_param(param)) {
+ assert(num_vgprs == 0);
+ num_sgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4;
+ } else {
+ num_vgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4;
+ }
+ }
+ assert(num_vgprs + num_sgprs <= ARRAY_SIZE(param_types));
+
+ num_params = 0;
+ last_sgpr_param = 0;
+ gprs = 0;
+ while (gprs < num_sgprs + num_vgprs) {
+ LLVMValueRef param = LLVMGetParam(parts[main_part], num_params);
+ unsigned size;
+
+ param_types[num_params] = LLVMTypeOf(param);
+ if (gprs < num_sgprs)
+ last_sgpr_param = num_params;
+ size = llvm_get_type_size(param_types[num_params]) / 4;
+ num_params++;
+
+ assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
+ assert(gprs + size <= num_sgprs + num_vgprs &&
+ (gprs >= num_sgprs || gprs + size <= num_sgprs));
+
+ gprs += size;
+ }
+
+ si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params, last_sgpr_param);
+
+ /* Record the arguments of the function as if they were an output of
+ * a previous part.
+ */
+ num_out = 0;
+ num_out_sgpr = 0;
+
+ for (unsigned i = 0; i < num_params; ++i) {
+ LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
+ LLVMTypeRef param_type = LLVMTypeOf(param);
+ LLVMTypeRef out_type = i <= last_sgpr_param ? ctx->i32 : ctx->f32;
+ unsigned size = llvm_get_type_size(param_type) / 4;
+
+ if (size == 1) {
+ if (param_type != out_type)
+ param = LLVMBuildBitCast(builder, param, out_type, "");
+ out[num_out++] = param;
+ } else {
+ LLVMTypeRef vector_type = LLVMVectorType(out_type, size);
+
+ if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
+ param = LLVMBuildPtrToInt(builder, param, ctx->i64, "");
+ param_type = ctx->i64;
+ }
+
+ if (param_type != vector_type)
+ param = LLVMBuildBitCast(builder, param, vector_type, "");
+
+ for (unsigned j = 0; j < size; ++j)
+ out[num_out++] = LLVMBuildExtractElement(
+ builder, param, LLVMConstInt(ctx->i32, j, 0), "");
+ }
+
+ if (i <= last_sgpr_param)
+ num_out_sgpr = num_out;
+ }
+
+ /* Now chain the parts. */
+ for (unsigned part = 0; part < num_parts; ++part) {
+ LLVMValueRef in[48];
+ LLVMValueRef ret;
+ LLVMTypeRef ret_type;
+ unsigned out_idx = 0;
+
+ num_params = LLVMCountParams(parts[part]);
+ assert(num_params <= ARRAY_SIZE(param_types));
+
+ /* Derive arguments for the next part from outputs of the
+ * previous one.
+ */
+ for (unsigned param_idx = 0; param_idx < num_params; ++param_idx) {
+ LLVMValueRef param;
+ LLVMTypeRef param_type;
+ bool is_sgpr;
+ unsigned param_size;
+ LLVMValueRef arg = NULL;
+
+ param = LLVMGetParam(parts[part], param_idx);
+ param_type = LLVMTypeOf(param);
+ param_size = llvm_get_type_size(param_type) / 4;
+ is_sgpr = ac_is_sgpr_param(param);
+
+ if (is_sgpr) {
+ LLVMRemoveAttribute(param, LLVMByValAttribute);
+ LLVMAddAttribute(param, LLVMInRegAttribute);
+ }
+
+ assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
+ assert(is_sgpr || out_idx >= num_out_sgpr);
+
+ if (param_size == 1)
+ arg = out[out_idx];
+ else
+ arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
+
+ if (LLVMTypeOf(arg) != param_type) {
+ if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
+ arg = LLVMBuildBitCast(builder, arg, ctx->i64, "");
+ arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
+ } else {
+ arg = LLVMBuildBitCast(builder, arg, param_type, "");
+ }
+ }
+
+ in[param_idx] = arg;
+ out_idx += param_size;
+ }
+
+ ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
+ ret_type = LLVMTypeOf(ret);
+
+ /* Extract the returned GPRs. */
+ num_out = 0;
+ num_out_sgpr = 0;
+
+ if (LLVMGetTypeKind(ret_type) != LLVMVoidTypeKind) {
+ assert(LLVMGetTypeKind(ret_type) == LLVMStructTypeKind);
+
+ unsigned ret_size = LLVMCountStructElementTypes(ret_type);
+
+ for (unsigned i = 0; i < ret_size; ++i) {
+ LLVMValueRef val =
+ LLVMBuildExtractValue(builder, ret, i, "");
+
+ out[num_out++] = val;
+
+ if (LLVMTypeOf(val) == ctx->i32) {
+ assert(num_out_sgpr + 1 == num_out);
+ num_out_sgpr = num_out;
+ }
+ }
+ }
+ }
+
+ LLVMBuildRetVoid(builder);
+}
+
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
+{
+ struct si_shader_selector *sel = shader->selector;
+ struct si_shader_context ctx;
+ struct lp_build_tgsi_context *bld_base;
+ LLVMModuleRef mod;
+ int r = -1;
+
+ /* Dump TGSI code before doing TGSI->LLVM conversion in case the
+ * conversion fails. */
+ if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
+ !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
+ tgsi_dump(sel->tokens, 0);
+ si_dump_streamout(&sel->so);
+ }
+
+ si_init_shader_ctx(&ctx, sscreen, shader, tm);
+ ctx.separate_prolog = !is_monolithic;
+
+ memset(shader->info.vs_output_param_offset, 0xff,
+ sizeof(shader->info.vs_output_param_offset));
+
+ shader->info.uses_instanceid = sel->info.uses_instanceid;
+
+ bld_base = &ctx.soa.bld_base;
+ ctx.load_system_value = declare_system_value;
+
+ if (!si_compile_tgsi_main(&ctx, shader)) {
+ si_llvm_dispose(&ctx);
+ return -1;
+ }
+
+ if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
+ LLVMValueRef parts[3];
+ bool need_prolog;
+ bool need_epilog;
+
+ need_prolog = sel->info.num_inputs;
+ need_epilog = !shader->key.vs.as_es && !shader->key.vs.as_ls;
+
+ parts[need_prolog ? 1 : 0] = ctx.main_fn;
+
+ if (need_prolog) {
+ union si_shader_part_key prolog_key;
+ si_get_vs_prolog_key(shader, &prolog_key);
+ si_build_vs_prolog_function(&ctx, &prolog_key);
+ parts[0] = ctx.main_fn;
+ }
+
+ if (need_epilog) {
+ union si_shader_part_key epilog_key;
+ si_get_vs_epilog_key(shader, &shader->key.vs.epilog, &epilog_key);
+ si_build_vs_epilog_function(&ctx, &epilog_key);
+ parts[need_prolog ? 2 : 1] = ctx.main_fn;
+ }
+
+ si_build_wrapper_function(&ctx, parts, 1 + need_prolog + need_epilog,
+ need_prolog ? 1 : 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
+ LLVMValueRef parts[2];
+ union si_shader_part_key epilog_key;
+
+ parts[0] = ctx.main_fn;
+
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.tcs_epilog.states = shader->key.tcs.epilog;
+ si_build_tcs_epilog_function(&ctx, &epilog_key);
+ parts[1] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, 2, 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_EVAL &&
+ !shader->key.tes.as_es) {
+ LLVMValueRef parts[2];
+ union si_shader_part_key epilog_key;
+
+ parts[0] = ctx.main_fn;
+
+ si_get_vs_epilog_key(shader, &shader->key.tes.epilog, &epilog_key);
+ si_build_vs_epilog_function(&ctx, &epilog_key);
+ parts[1] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, 2, 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
+ LLVMValueRef parts[3];
+ union si_shader_part_key prolog_key;
+ union si_shader_part_key epilog_key;
+ bool need_prolog;
+
+ si_get_ps_prolog_key(shader, &prolog_key, false);
+ need_prolog = si_need_ps_prolog(&prolog_key);
+
+ parts[need_prolog ? 1 : 0] = ctx.main_fn;
+
+ if (need_prolog) {
+ si_build_ps_prolog_function(&ctx, &prolog_key);
+ parts[0] = ctx.main_fn;
+ }
+
+ si_get_ps_epilog_key(shader, &epilog_key);
+ si_build_ps_epilog_function(&ctx, &epilog_key);
+ parts[need_prolog ? 2 : 1] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2, need_prolog ? 1 : 0);