- param = si_shader_io_get_unique_index(info->output_semantic_name[i],
- info->output_semantic_index[i], false);
-
- for (chan = 0; chan < 4; chan++) {
- if (!(info->output_usagemask[i] & (1 << chan)))
- continue;
-
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
- out_val = ac_to_integer(&ctx->ac, out_val);
-
- /* GFX9 has the ESGS ring in LDS. */
- if (ctx->screen->info.chip_class >= GFX9) {
- lds_store(ctx, param * 4 + chan, lds_base, out_val);
- continue;
- }
-
- ac_build_buffer_store_dword(&ctx->ac,
- ctx->esgs_ring,
- out_val, 1, NULL, soffset,
- (4 * param + chan) * 4,
- 1, 1, true, true);
- }
- }
-
- if (ctx->screen->info.chip_class >= GFX9)
- si_set_es_return_value_for_gs(ctx);
-}
-
-static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
-{
- if (ctx->screen->info.chip_class >= GFX9)
- return si_unpack_param(ctx, ctx->param_merged_wave_info, 16, 8);
- else
- return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id);
-}
-
-static void emit_gs_epilogue(struct si_shader_context *ctx)
-{
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
- si_get_gs_wave_id(ctx));
-
- if (ctx->screen->info.chip_class >= GFX9)
- lp_build_endif(&ctx->merged_wrap_if_state);
-}
-
-static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct tgsi_shader_info UNUSED *info = &ctx->shader->selector->info;
-
- assert(info->num_outputs <= max_outputs);
-
- emit_gs_epilogue(ctx);
-}
-
-static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_gs_epilogue(ctx);
-}
-
-static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- struct si_shader_output_values *outputs = NULL;
- int i,j;
-
- assert(!ctx->shader->is_gs_copy_shader);
- assert(info->num_outputs <= max_outputs);
-
- outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
-
- /* Vertex color clamping.
- *
- * This uses a state constant loaded in a user data SGPR and
- * an IF statement is added that clamps all colors if the constant
- * is true.
- */
- struct lp_build_if_state if_ctx;
- LLVMValueRef cond = NULL;
- LLVMValueRef addr, val;
-
- for (i = 0; i < info->num_outputs; i++) {
- if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
- info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
- continue;
-
- /* We've found a color. */
- if (!cond) {
- /* The state is in the first bit of the user SGPR. */
- cond = LLVMGetParam(ctx->main_fn,
- ctx->param_vs_state_bits);
- cond = LLVMBuildTrunc(ctx->ac.builder, cond,
- ctx->i1, "");
- lp_build_if(&if_ctx, &ctx->gallivm, cond);
- }
-
- for (j = 0; j < 4; j++) {
- addr = addrs[4 * i + j];
- val = LLVMBuildLoad(ctx->ac.builder, addr, "");
- val = ac_build_clamp(&ctx->ac, val);
- LLVMBuildStore(ctx->ac.builder, val, addr);
- }
- }
-
- if (cond)
- lp_build_endif(&if_ctx);
-
- for (i = 0; i < info->num_outputs; i++) {
- outputs[i].semantic_name = info->output_semantic_name[i];
- outputs[i].semantic_index = info->output_semantic_index[i];
-
- for (j = 0; j < 4; j++) {
- outputs[i].values[j] =
- LLVMBuildLoad(ctx->ac.builder,
- addrs[4 * i + j],
- "");
- outputs[i].vertex_stream[j] =
- (info->output_streams[i] >> (2 * j)) & 3;
- }
- }
-
- if (ctx->shader->selector->so.num_outputs)
- si_llvm_emit_streamout(ctx, outputs, i, 0);
-
- /* Export PrimitiveID. */
- if (ctx->shader->key.mono.u.vs_export_prim_id) {
- outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
- outputs[i].semantic_index = 0;
- outputs[i].values[0] = ac_to_float(&ctx->ac, get_primitive_id(ctx, 0));
- for (j = 1; j < 4; j++)
- outputs[i].values[j] = LLVMConstReal(ctx->f32, 0);
-
- memset(outputs[i].vertex_stream, 0,
- sizeof(outputs[i].vertex_stream));
- i++;
- }
-
- si_llvm_export_vs(ctx, outputs, i);
- FREE(outputs);
-}
-
-static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef pos[4] = {};
-
- assert(info->num_outputs <= max_outputs);
-
- for (unsigned i = 0; i < info->num_outputs; i++) {
- if (info->output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
- continue;
-
- for (unsigned chan = 0; chan < 4; chan++)
- pos[chan] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
- break;
- }
- assert(pos[0] != NULL);
-
- /* Return the position output. */
- LLVMValueRef ret = ctx->return_value;
- for (unsigned chan = 0; chan < 4; chan++)
- ret = LLVMBuildInsertValue(ctx->ac.builder, ret, pos[chan], chan, "");
- ctx->return_value = ret;
-}
-
-static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
- &ctx->outputs[0][0]);
-}
-
-struct si_ps_exports {
- unsigned num;
- struct ac_export_args args[10];
-};
-
-static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef depth, LLVMValueRef stencil,
- LLVMValueRef samplemask, struct si_ps_exports *exp)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct ac_export_args args;
-
- ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
-
- memcpy(&exp->args[exp->num++], &args, sizeof(args));
-}
-
-static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef *color, unsigned index,
- unsigned samplemask_param,
- bool is_last, struct si_ps_exports *exp)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- int i;
-
- /* Clamp color */
- if (ctx->shader->key.part.ps.epilog.clamp_color)
- for (i = 0; i < 4; i++)
- color[i] = ac_build_clamp(&ctx->ac, color[i]);
-
- /* Alpha to one */
- if (ctx->shader->key.part.ps.epilog.alpha_to_one)
- color[3] = ctx->ac.f32_1;
-
- /* Alpha test */
- if (index == 0 &&
- ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
- si_alpha_test(bld_base, color[3]);
-
- /* Line & polygon smoothing */
- if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
- samplemask_param);
-
- /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) {
- struct ac_export_args args[8];
- int c, last = -1;
-
- /* Get the export arguments, also find out what the last one is. */
- for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
- si_llvm_init_export_args(ctx, color,
- V_008DFC_SQ_EXP_MRT + c, &args[c]);
- if (args[c].enabled_channels)
- last = c;
- }
-
- /* Emit all exports. */
- for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
- if (is_last && last == c) {
- args[c].valid_mask = 1; /* whether the EXEC mask is valid */
- args[c].done = 1; /* DONE bit */
- } else if (!args[c].enabled_channels)
- continue; /* unnecessary NULL export */
-
- memcpy(&exp->args[exp->num++], &args[c], sizeof(args[c]));
- }
- } else {
- struct ac_export_args args;
-
- /* Export */
- si_llvm_init_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + index,
- &args);
- if (is_last) {
- args.valid_mask = 1; /* whether the EXEC mask is valid */
- args.done = 1; /* DONE bit */
- } else if (!args.enabled_channels)
- return; /* unnecessary NULL export */
-
- memcpy(&exp->args[exp->num++], &args, sizeof(args));
- }
-}
-
-static void si_emit_ps_exports(struct si_shader_context *ctx,
- struct si_ps_exports *exp)
-{
- for (unsigned i = 0; i < exp->num; i++)
- ac_build_export(&ctx->ac, &exp->args[i]);
-}
-
-/**
- * Return PS outputs in this order:
- *
- * v[0:3] = color0.xyzw
- * v[4:7] = color1.xyzw
- * ...
- * vN+0 = Depth
- * vN+1 = Stencil
- * vN+2 = SampleMask
- * vN+3 = SampleMaskIn (used for OpenGL smoothing)
- *
- * The alpha-ref SGPR is returned via its original location.
- */
-static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader *shader = ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned i, j, first_vgpr, vgpr;
-
- LLVMValueRef color[8][4] = {};
- LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
- LLVMValueRef ret;
-
- if (ctx->postponed_kill)
- ac_build_kill_if_false(&ctx->ac, LLVMBuildLoad(builder, ctx->postponed_kill, ""));
-
- /* Read the output values. */
- for (i = 0; i < info->num_outputs; i++) {
- unsigned semantic_name = info->output_semantic_name[i];
- unsigned semantic_index = info->output_semantic_index[i];
-
- switch (semantic_name) {
- case TGSI_SEMANTIC_COLOR:
- assert(semantic_index < 8);
- for (j = 0; j < 4; j++) {
- LLVMValueRef ptr = addrs[4 * i + j];
- LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
- color[semantic_index][j] = result;
- }
- break;
- case TGSI_SEMANTIC_POSITION:
- depth = LLVMBuildLoad(builder,
- addrs[4 * i + 2], "");
- break;
- case TGSI_SEMANTIC_STENCIL:
- stencil = LLVMBuildLoad(builder,
- addrs[4 * i + 1], "");
- break;
- case TGSI_SEMANTIC_SAMPLEMASK:
- samplemask = LLVMBuildLoad(builder,
- addrs[4 * i + 0], "");
- break;
- default:
- fprintf(stderr, "Warning: GFX6 unhandled fs output type:%d\n",
- semantic_name);
- }
- }
-
- /* Fill the return structure. */
- ret = ctx->return_value;
-
- /* Set SGPRs. */
- ret = LLVMBuildInsertValue(builder, ret,
- ac_to_integer(&ctx->ac,
- LLVMGetParam(ctx->main_fn,
- SI_PARAM_ALPHA_REF)),
- SI_SGPR_ALPHA_REF, "");
-
- /* Set VGPRs */
- first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
- for (i = 0; i < ARRAY_SIZE(color); i++) {
- if (!color[i][0])
- continue;
-
- for (j = 0; j < 4; j++)
- ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
- }
- if (depth)
- ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
- if (stencil)
- ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
- if (samplemask)
- ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
-
- /* Add the input sample mask for smoothing at the end. */
- if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
- vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
- ret = LLVMBuildInsertValue(builder, ret,
- LLVMGetParam(ctx->main_fn,
- SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
-
- ctx->return_value = ret;
-}
-
-static void membar_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
- unsigned flags = LLVMConstIntGetZExtValue(src0);
- unsigned waitcnt = NOOP_WAITCNT;
-
- if (flags & TGSI_MEMBAR_THREAD_GROUP)
- waitcnt &= VM_CNT & LGKM_CNT;
-
- if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE))
- waitcnt &= VM_CNT;
-
- if (flags & TGSI_MEMBAR_SHARED)
- waitcnt &= LGKM_CNT;
-
- if (waitcnt != NOOP_WAITCNT)
- ac_build_waitcnt(&ctx->ac, waitcnt);
-}
-
-static void clock_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef tmp = ac_build_shader_clock(&ctx->ac);
-
- emit_data->output[0] =
- LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, "");
- emit_data->output[1] =
- LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
-}
-
-static void si_llvm_emit_ddxy(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned opcode = emit_data->info->opcode;
- LLVMValueRef val;
- int idx;
- unsigned mask;
-
- if (opcode == TGSI_OPCODE_DDX_FINE)
- mask = AC_TID_MASK_LEFT;
- else if (opcode == TGSI_OPCODE_DDY_FINE)
- mask = AC_TID_MASK_TOP;
- else
- mask = AC_TID_MASK_TOP_LEFT;
-
- /* for DDX we want to next X pixel, DDY next Y pixel. */
- idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
-
- val = ac_to_integer(&ctx->ac, emit_data->args[0]);
- val = ac_build_ddxy(&ctx->ac, mask, idx, val);
- emit_data->output[emit_data->chan] = val;
-}
-
-static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct si_shader *shader = ctx->shader;
- const struct tgsi_shader_info *info = &shader->selector->info;
- LLVMValueRef interp_param;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- const struct tgsi_full_src_register *input = &inst->Src[0];
- int input_base, input_array_size;
- int chan;
- int i;
- LLVMValueRef prim_mask = ctx->abi.prim_mask;
- LLVMValueRef array_idx, offset_x = NULL, offset_y = NULL;
- int interp_param_idx;
- unsigned interp;
- unsigned location;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
- /* offset is in second src, first two channels */
- offset_x = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
- TGSI_CHAN_X);
- offset_y = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
- TGSI_CHAN_Y);
- } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
- LLVMValueRef sample_position;
- LLVMValueRef sample_id;
- LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
-
- /* fetch sample ID, then fetch its sample position,
- * and place into first two channels.
- */
- sample_id = lp_build_emit_fetch(bld_base,
- emit_data->inst, 1, TGSI_CHAN_X);
- sample_id = ac_to_integer(&ctx->ac, sample_id);
-
- /* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading
- * Language 4.50 spec says about interpolateAtSample:
- *
- * "Returns the value of the input interpolant variable at
- * the location of sample number sample. If multisample
- * buffers are not available, the input variable will be
- * evaluated at the center of the pixel. If sample sample
- * does not exist, the position used to interpolate the
- * input variable is undefined."
- *
- * This means that sample_id values outside of the valid are
- * in fact valid input, and the usual mechanism for loading the
- * sample position doesn't work.
- */
- if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
- LLVMValueRef center[4] = {
- LLVMConstReal(ctx->f32, 0.5),
- LLVMConstReal(ctx->f32, 0.5),
- ctx->ac.f32_0,
- ctx->ac.f32_0,
- };
-
- sample_position = ac_build_gather_values(&ctx->ac, center, 4);
- } else {
- sample_position = load_sample_position(&ctx->abi, sample_id);
- }
-
- offset_x = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
- ctx->i32_0, "");
-
- offset_x = LLVMBuildFSub(ctx->ac.builder, offset_x, halfval, "");
- offset_y = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
- ctx->i32_1, "");
- offset_y = LLVMBuildFSub(ctx->ac.builder, offset_y, halfval, "");
- }
-
- assert(input->Register.File == TGSI_FILE_INPUT);
-
- if (input->Register.Indirect) {
- unsigned array_id = input->Indirect.ArrayID;
-
- if (array_id) {
- input_base = info->input_array_first[array_id];
- input_array_size = info->input_array_last[array_id] - input_base + 1;
- } else {
- input_base = inst->Src[0].Register.Index;
- input_array_size = info->num_inputs - input_base;
- }
-
- array_idx = si_get_indirect_index(ctx, &input->Indirect,
- 1, input->Register.Index - input_base);
- } else {
- input_base = inst->Src[0].Register.Index;
- input_array_size = 1;
- array_idx = ctx->i32_0;
- }
-
- interp = shader->selector->info.input_interpolate[input_base];
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
- inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
- location = TGSI_INTERPOLATE_LOC_CENTER;
- else
- location = TGSI_INTERPOLATE_LOC_CENTROID;
-
- interp_param_idx = lookup_interp_param_index(interp, location);
- if (interp_param_idx == -1)
- return;
- else if (interp_param_idx)
- interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
- else
- interp_param = NULL;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
- inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
- LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
- /*
- * take the I then J parameters, and the DDX/Y for it, and
- * calculate the IJ inputs for the interpolator.
- * temp1 = ddx * offset/sample.x + I;
- * interp_param.I = ddy * offset/sample.y + temp1;
- * temp1 = ddx * offset/sample.x + J;
- * interp_param.J = ddy * offset/sample.y + temp1;
- */
- for (i = 0; i < 2; i++) {
- LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
- LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
- LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, ix_ll, "");
- LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, iy_ll, "");
- LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
- interp_param, ix_ll, "");
- LLVMValueRef temp;
-
- interp_el = ac_to_float(&ctx->ac, interp_el);
-
- temp = ac_build_fmad(&ctx->ac, ddx_el, offset_x, interp_el);
- ij_out[i] = ac_build_fmad(&ctx->ac, ddy_el, offset_y, temp);
- }
- interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
- }
-
- if (interp_param)
- interp_param = ac_to_float(&ctx->ac, interp_param);