static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info);
+ LLVMTargetMachineRef tm);
/* Ideally pass the sample mask input to the PS epilog as v13, which
* is its usual location, so that the shader doesn't have to add v_mov.
LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn,
param);
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
+ value = bitcast(&ctx->radeon_bld.soa.bld_base,
+ TGSI_TYPE_UNSIGNED, value);
+
if (rshift)
value = LLVMBuildLShr(gallivm->builder, value,
lp_build_const_int32(gallivm, rshift), "");
input_index);
} else if (divisor) {
/* Build index from instance ID, start instance and divisor */
- ctx->shader->uses_instanceid = true;
+ ctx->shader->info.uses_instanceid = true;
buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
SI_PARAM_START_INSTANCE,
divisor);
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
case TGSI_SEMANTIC_CLIPDIST:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->vs_output_param_offset[i] = param_count;
+ assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[i] = param_count;
param_count++;
break;
default:
}
}
- shader->nr_param_exports = param_count;
+ shader->info.nr_param_exports = param_count;
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
for (i = 0; i < 4; i++)
if (pos_args[i][0])
- shader->nr_pos_exports++;
+ shader->info.nr_pos_exports++;
pos_idx = 0;
for (i = 0; i < 4; i++) {
/* Specify the target we are exporting */
pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
- if (pos_idx == shader->nr_pos_exports)
+ if (pos_idx == shader->info.nr_pos_exports)
/* Specify that this is the last export */
pos_args[i][2] = uint->one;
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
-static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef index, enum desc_type type)
+static LLVMValueRef get_sampler_desc_custom(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum desc_type type)
{
struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLERS);
switch (type) {
case DESC_IMAGE:
/* The sampler state is at [12:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
- ptr = LLVMBuildPointerCast(builder, ptr,
- const_array(ctx->v4i32, 0), "");
+ list = LLVMBuildPointerCast(builder, list,
+ const_array(ctx->v4i32, 0), "");
break;
}
- return build_indexed_load_const(ctx, ptr, index);
+ return build_indexed_load_const(ctx, list, index);
+}
+
+static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef index, enum desc_type type)
+{
+ LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+
+ return get_sampler_desc_custom(ctx, list, index, type);
}
static void tex_fetch_ptrs(
params[SI_PARAM_FRONT_FACE] = ctx->i32;
params[SI_PARAM_ANCILLARY] = ctx->i32;
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
- params[SI_PARAM_POS_FIXED_PT] = ctx->f32;
+ params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
num_params = SI_PARAM_POS_FIXED_PT+1;
if (!ctx->is_monolithic) {
S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1) |
S_0286D0_LINEAR_CENTROID_ENA(1) |
- S_0286D0_FRONT_FACE_ENA(1));
+ S_0286D0_FRONT_FACE_ENA(1) |
+ S_0286D0_POS_FIXED_PT_ENA(1));
}
- shader->num_input_sgprs = 0;
- shader->num_input_vgprs = 0;
+ shader->info.num_input_sgprs = 0;
+ shader->info.num_input_vgprs = 0;
for (i = 0; i <= last_sgpr; ++i)
- shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+ shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
/* Unused fragment shader inputs are eliminated by the compiler,
* so we don't know yet how many there will be.
*/
if (ctx->type != TGSI_PROCESSOR_FRAGMENT)
for (; i < num_params; ++i)
- shader->num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+ shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
}
}
+static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
+ LLVMValueRef param_sampler_views,
+ unsigned param_pos_fixed_pt)
+{
+ struct lp_build_tgsi_context *bld_base =
+ &ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_emit_data result = {};
+ struct tgsi_full_instruction inst = {};
+ LLVMValueRef desc, sampler_index, address[2], pix;
+
+ /* Use the fixed-point gl_FragCoord input.
+ * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
+ * per coordinate to get the repeating effect.
+ */
+ address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
+ address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
+
+ /* Load the sampler view descriptor. */
+ sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER);
+ desc = get_sampler_desc_custom(ctx, param_sampler_views,
+ sampler_index, DESC_IMAGE);
+
+ /* Load the texel. */
+ inst.Instruction.Opcode = TGSI_OPCODE_TXF;
+ inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */
+ result.inst = &inst;
+ set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF,
+ inst.Texture.Texture,
+ desc, NULL, address, ARRAY_SIZE(address), 0xf);
+ build_tex_intrinsic(&tex_action, bld_base, &result);
+
+ /* Kill the thread accordingly. */
+ pix = LLVMBuildExtractElement(gallivm->builder, result.output[0],
+ lp_build_const_int32(gallivm, 3), "");
+ pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix);
+ pix = LLVMBuildFNeg(gallivm->builder, pix, "");
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context),
+ &pix, 1, 0);
+}
+
void si_shader_binary_read_config(struct radeon_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset)
static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
struct pipe_debug_callback *debug,
- const char *name)
+ const char *name, FILE *file)
{
char *line, *p;
unsigned i, count;
if (binary->disasm_string) {
- fprintf(stderr, "Shader %s disassembly:\n", name);
- fprintf(stderr, "%s", binary->disasm_string);
+ fprintf(file, "Shader %s disassembly:\n", name);
+ fprintf(file, "%s", binary->disasm_string);
if (debug && debug->debug_message) {
/* Very long debug messages are cut off, so send the
"Shader Disassembly End");
}
} else {
- fprintf(stderr, "Shader %s binary:\n", name);
+ fprintf(file, "Shader %s binary:\n", name);
for (i = 0; i < binary->code_size; i += 4) {
- fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
+ fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
binary->code[i + 1], binary->code[i]);
}
unsigned num_inputs,
unsigned code_size,
struct pipe_debug_callback *debug,
- unsigned processor)
+ unsigned processor,
+ FILE *file)
{
unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
- if (r600_can_dump_shader(&sscreen->b, processor)) {
+ if (file != stderr ||
+ r600_can_dump_shader(&sscreen->b, processor)) {
if (processor == TGSI_PROCESSOR_FRAGMENT) {
- fprintf(stderr, "*** SHADER CONFIG ***\n"
+ fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
- fprintf(stderr, "*** SHADER STATS ***\n"
+ fprintf(file, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
"Code Size: %d bytes\n"
max_simd_waves);
}
+static const char *si_get_shader_name(struct si_shader *shader,
+ unsigned processor)
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ if (shader->key.vs.as_es)
+ return "Vertex Shader as ES";
+ else if (shader->key.vs.as_ls)
+ return "Vertex Shader as LS";
+ else
+ return "Vertex Shader as VS";
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return "Tessellation Control Shader";
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (shader->key.tes.as_es)
+ return "Tessellation Evaluation Shader as ES";
+ else
+ return "Tessellation Evaluation Shader as VS";
+ case TGSI_PROCESSOR_GEOMETRY:
+ if (shader->gs_copy_shader == NULL)
+ return "GS Copy Shader as VS";
+ else
+ return "Geometry Shader";
+ case TGSI_PROCESSOR_FRAGMENT:
+ return "Pixel Shader";
+ case TGSI_PROCESSOR_COMPUTE:
+ return "Compute Shader";
+ default:
+ return "Unknown Shader";
+ }
+}
+
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
- struct pipe_debug_callback *debug, unsigned processor)
+ struct pipe_debug_callback *debug, unsigned processor,
+ FILE *file)
{
- if (r600_can_dump_shader(&sscreen->b, processor) &&
- !(sscreen->b.debug_flags & DBG_NO_ASM)) {
- fprintf(stderr, "\n");
+ if (file != stderr ||
+ (r600_can_dump_shader(&sscreen->b, processor) &&
+ !(sscreen->b.debug_flags & DBG_NO_ASM))) {
+ fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
if (shader->prolog)
si_shader_dump_disassembly(&shader->prolog->binary,
- debug, "prolog");
+ debug, "prolog", file);
- si_shader_dump_disassembly(&shader->binary, debug, "main");
+ si_shader_dump_disassembly(&shader->binary, debug, "main", file);
if (shader->epilog)
si_shader_dump_disassembly(&shader->epilog->binary,
- debug, "epilog");
- fprintf(stderr, "\n");
+ debug, "epilog", file);
+ fprintf(file, "\n");
}
si_shader_dump_stats(sscreen, &shader->config,
shader->selector ? shader->selector->info.num_inputs : 0,
- si_get_shader_binary_size(shader), debug, processor);
+ si_get_shader_binary_size(shader), debug, processor,
+ file);
}
int si_compile_llvm(struct si_screen *sscreen,
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
- si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm, gsinfo);
+ si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm);
ctx->type = TGSI_PROCESSOR_VERTEX;
ctx->is_gs_copy_shader = true;
if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx->shader, debug,
- TGSI_PROCESSOR_GEOMETRY);
+ TGSI_PROCESSOR_GEOMETRY, stderr);
r = si_shader_binary_upload(sscreen, ctx->shader);
}
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct si_shader *shader,
- LLVMTargetMachineRef tm,
- struct tgsi_shader_info *info)
+ LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
bld_base = &ctx->radeon_bld.soa.bld_base;
- bld_base->info = info;
+ if (shader && shader->selector)
+ bld_base->info = &shader->selector->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
}
-static int si_compile_tgsi_shader(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
- struct si_shader *shader,
- bool is_monolithic,
- struct pipe_debug_callback *debug)
+int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
- struct tgsi_token *tokens = sel->tokens;
struct si_shader_context ctx;
struct lp_build_tgsi_context *bld_base;
- struct tgsi_shader_info stipple_shader_info;
LLVMModuleRef mod;
int r = 0;
- bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
- shader->key.ps.prolog.poly_stipple;
-
- if (poly_stipple) {
- tokens = util_pstipple_create_fragment_shader(tokens, NULL,
- SI_POLY_STIPPLE_SAMPLER,
- TGSI_FILE_SYSTEM_VALUE);
- tgsi_scan_shader(tokens, &stipple_shader_info);
- }
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
si_dump_shader_key(sel->type, &shader->key, stderr);
- tgsi_dump(tokens, 0);
+ tgsi_dump(sel->tokens, 0);
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, shader, tm,
- poly_stipple ? &stipple_shader_info : &sel->info);
+ si_init_shader_ctx(&ctx, sscreen, shader, tm);
ctx.is_monolithic = is_monolithic;
- shader->uses_instanceid = sel->info.uses_instanceid;
+ shader->info.uses_instanceid = sel->info.uses_instanceid;
bld_base = &ctx.radeon_bld.soa.bld_base;
ctx.radeon_bld.load_system_value = declare_system_value;
preload_streamout_buffers(&ctx);
preload_ring_buffers(&ctx);
+ if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
+ shader->key.ps.prolog.poly_stipple) {
+ LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+ si_llvm_emit_polygon_stipple(&ctx, views,
+ SI_PARAM_POS_FIXED_PT);
+ }
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
}
}
- if (!lp_build_tgsi_llvm(bld_base, tokens)) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
goto out;
}
/* Calculate the number of fragment input VGPRs. */
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
- shader->num_input_vgprs = 0;
- shader->face_vgpr_index = -1;
+ shader->info.num_input_vgprs = 0;
+ shader->info.face_vgpr_index = -1;
if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 3;
+ shader->info.num_input_vgprs += 3;
if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 2;
+ shader->info.num_input_vgprs += 2;
if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
- shader->face_vgpr_index = shader->num_input_vgprs;
- shader->num_input_vgprs += 1;
+ shader->info.face_vgpr_index = shader->info.num_input_vgprs;
+ shader->info.num_input_vgprs += 1;
}
if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
- shader->num_input_vgprs += 1;
+ shader->info.num_input_vgprs += 1;
}
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
out:
for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
FREE(ctx.constants[i]);
- if (poly_stipple)
- tgsi_free_tokens(tokens);
return r;
}
int last_sgpr, num_params, num_returns, i;
bool status = true;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
ctx.type = TGSI_PROCESSOR_VERTEX;
ctx.param_vertex_id = key->vs_prolog.num_input_sgprs;
ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3;
int num_params, i;
bool status = true;
- si_init_shader_ctx(&ctx, sscreen, NULL, tm, NULL);
+ si_init_shader_ctx(&ctx, sscreen, NULL, tm);
ctx.type = TGSI_PROCESSOR_VERTEX;
/* Declare input VGPRs. */
/* Set up the PrimitiveID output. */
if (shader->key.vs.epilog.export_prim_id) {
unsigned index = shader->selector->info.num_outputs;
- unsigned offset = shader->nr_param_exports++;
+ unsigned offset = shader->info.nr_param_exports++;
epilog_key.vs_epilog.prim_id_param_offset = offset;
- shader->vs_output_param_offset[index] = offset;
+ assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
+ shader->info.vs_output_param_offset[index] = offset;
}
shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
/* Get the prolog. */
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.vs_prolog.states = shader->key.vs.prolog;
- prolog_key.vs_prolog.num_input_sgprs = shader->num_input_sgprs;
+ prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
/* The prolog is a no-op if there are no inputs. */
/* Set the instanceID flag. */
for (i = 0; i < info->num_inputs; i++)
if (prolog_key.vs_prolog.states.instance_divisors[i])
- shader->uses_instanceid = true;
+ shader->info.uses_instanceid = true;
return true;
}
int last_array_pointer, last_sgpr, num_params;
bool status = true;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
ctx.type = TGSI_PROCESSOR_TESS_CTRL;
shader.key.tcs.epilog = key->tcs_epilog.states;
int last_sgpr, num_params, num_returns, i, num_color_channels;
bool status = true;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
ctx.type = TGSI_PROCESSOR_FRAGMENT;
shader.key.ps.prolog = key->ps_prolog.states;
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
+ /* Polygon stippling. */
+ if (key->ps_prolog.states.poly_stipple) {
+ /* POS_FIXED_PT is always last. */
+ unsigned pos = key->ps_prolog.num_input_sgprs +
+ key->ps_prolog.num_input_vgprs - 1;
+ LLVMValueRef ptr[2], views;
+
+ /* Get the pointer to sampler views. */
+ ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS);
+ ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1);
+ views = lp_build_gather_values(gallivm, ptr, 2);
+ views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, "");
+ views = LLVMBuildIntToPtr(gallivm->builder, views,
+ const_array(ctx.v8i32, SI_NUM_SAMPLERS), "");
+
+ si_llvm_emit_polygon_stipple(&ctx, views, pos);
+ }
+
/* Interpolate colors. */
for (i = 0; i < 2; i++) {
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
linear_sample[i], base + 10 + i, "");
}
- /* TODO: polygon stippling */
-
/* Compile. */
LLVMBuildRet(gallivm->builder, ret);
radeon_llvm_finalize_module(&ctx.radeon_bld);
int last_array_pointer, last_sgpr, num_params, i;
bool status = true;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+ si_init_shader_ctx(&ctx, sscreen, &shader, tm);
ctx.type = TGSI_PROCESSOR_FRAGMENT;
shader.key.ps.epilog = key->ps_epilog.states;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.ps_prolog.states = shader->key.ps.prolog;
prolog_key.ps_prolog.colors_read = info->colors_read;
- prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs;
- prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs;
+ prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
if (info->colors_read) {
unsigned *color = shader->selector->color_attr_index;
if (shader->key.ps.prolog.color_two_side) {
/* BCOLORs are stored after the last input. */
prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
- prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index;
+ prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
}
if (!shader->epilog)
return false;
+ /* Enable POS_FIXED_PT if polygon stippling is enabled. */
+ if (shader->key.ps.prolog.poly_stipple) {
+ shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
+ assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
+ }
+
/* Set up the enable bits for per-sample shading if needed. */
if (shader->key.ps.prolog.force_persample_interp) {
if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
+ struct si_shader *mainp = shader->selector->main_shader_part;
int r;
- /* Compile TGSI. */
- r = si_compile_tgsi_shader(sscreen, tm, shader,
- sscreen->use_monolithic_shaders, debug);
- if (r)
- return r;
+ /* LS and ES are always compiled on demand. */
+ if (!mainp ||
+ (shader->selector->type == PIPE_SHADER_VERTEX &&
+ (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+ (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
+ shader->key.tes.as_es)) {
+ /* Monolithic shader (compiled as a whole, has many variants,
+ * may take a long time to compile).
+ */
+ r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+ if (r)
+ return r;
+ } else {
+ /* The shader consists of 2-3 parts:
+ *
+ * - the middle part is the user shader, it has 1 variant only
+ * and it was compiled during the creation of the shader
+ * selector
+ * - the prolog part is inserted at the beginning
+ * - the epilog part is inserted at the end
+ *
+ * The prolog and epilog have many (but simple) variants.
+ */
- if (!sscreen->use_monolithic_shaders) {
+ /* Copy the compiled TGSI shader data over. */
+ shader->is_binary_shared = true;
+ shader->binary = mainp->binary;
+ shader->config = mainp->config;
+ shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
+ shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
+ shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
+ memcpy(shader->info.vs_output_param_offset,
+ mainp->info.vs_output_param_offset,
+ sizeof(mainp->info.vs_output_param_offset));
+ shader->info.uses_instanceid = mainp->info.uses_instanceid;
+ shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
+ shader->info.nr_param_exports = mainp->info.nr_param_exports;
+
+ /* Select prologs and/or epilogs. */
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
* are allocated inputs.
*/
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
- shader->num_input_vgprs);
+ shader->info.num_input_vgprs);
break;
}
}
}
- si_shader_dump(sscreen, shader, debug, shader->selector->info.processor);
+ si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
+ stderr);
/* Upload. */
r = si_shader_binary_upload(sscreen, shader);
r600_resource_reference(&shader->bo, NULL);
- radeon_shader_binary_clean(&shader->binary);
+ if (!shader->is_binary_shared)
+ radeon_shader_binary_clean(&shader->binary);
}