From: Glenn Kennard Date: Wed, 10 Sep 2014 09:54:40 +0000 (+0200) Subject: r600g: Implement GL_ARB_sample_shading X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a327fa3a068af49bb7ae00a4b03abcb91906e0d2;p=mesa.git r600g: Implement GL_ARB_sample_shading Also fixes two sided lighting which was broken at least on pre-evergreen by commit b1eb00. Signed-off-by: Glenn Kennard Signed-off-by: Marek Olšák --- diff --git a/docs/GL3.txt b/docs/GL3.txt index 5adc7598c0c..07d1d2c493f 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -110,7 +110,7 @@ GL 4.0, GLSL 4.00: - Interpolation functions DONE () - New overload resolution rules DONE GL_ARB_gpu_shader_fp64 started (Dave) - GL_ARB_sample_shading DONE (i965, nv50, nvc0, radeonsi) + GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_shader_subroutine not started GL_ARB_tessellation_shader started (Chris, Ilia) GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html index e6813d3d100..64cbfaefa6f 100644 --- a/docs/relnotes/10.4.html +++ b/docs/relnotes/10.4.html @@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.

    +
  • GL_ARB_sample_shading on r600
  • GL_ARB_texture_view on nv50, nvc0
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 27a9ad9e396..78c1b680acc 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1400,7 +1400,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* MSAA. */ if (rctx->b.chip_class == EVERGREEN) - rctx->framebuffer.atom.num_dw += 14; /* Evergreen */ + rctx->framebuffer.atom.num_dw += 17; /* Evergreen */ else rctx->framebuffer.atom.num_dw += 28; /* Cayman */ @@ -1420,8 +1420,22 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } rctx->framebuffer.atom.dirty = true; + + r600_set_sample_locations_constant_buffer(rctx); } +static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_samples) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + if (rctx->ps_iter_samples == min_samples) + return; + + rctx->ps_iter_samples = min_samples; + if (rctx->framebuffer.nr_samples > 1) { + rctx->framebuffer.atom.dirty = true; + } +} /* 8xMSAA */ static uint32_t sample_locs_8x[] = { @@ -1475,7 +1489,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx, } } -static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples) +static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; @@ -1508,10 +1522,12 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples) S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */ radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) | S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */ + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1)); } else { r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2); radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */ radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */ + r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0); } } @@ -1672,10 +1688,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r radeon_emit(cs, br); /* R_028208_PA_SC_WINDOW_SCISSOR_BR */ if (rctx->b.chip_class == EVERGREEN) { - evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples); + evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); } else { cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples); - cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, 1); + cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); } } @@ -2432,8 +2448,6 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb, r600_store_value(cb, tmp); /* R_008C0C_SQ_GPR_RESOURCE_MGMT_3 */ } - r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0); - /* The cs checker requires this register to be set. */ r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); @@ -2786,11 +2800,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader struct r600_command_buffer *cb = &shader->command_buffer; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control = 0; - int pos_index = -1, face_index = -1; + int pos_index = -1, face_index = -1, fixed_pt_position_index = -1; int ninterp = 0; - boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; - unsigned spi_baryc_cntl, sid, tmp, num = 0; - unsigned z_export = 0, stencil_export = 0; + boolean have_perspective = FALSE, have_linear = FALSE; + static const unsigned spi_baryc_enable_bit[6] = { + S_0286E0_PERSP_SAMPLE_ENA(1), + S_0286E0_PERSP_CENTER_ENA(1), + S_0286E0_PERSP_CENTROID_ENA(1), + S_0286E0_LINEAR_SAMPLE_ENA(1), + S_0286E0_LINEAR_CENTER_ENA(1), + S_0286E0_LINEAR_CENTROID_ENA(1) + }; + unsigned spi_baryc_cntl = 0, sid, tmp, num = 0; + unsigned z_export = 0, stencil_export = 0, mask_export = 0; unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; uint32_t spi_ps_input_cntl[32]; @@ -2813,14 +2835,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader if (face_index == -1) face_index = i; /* lives in same register, same enable bit */ } + else if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) { + fixed_pt_position_index = i; + } else { ninterp++; - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - have_linear = TRUE; - if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) - have_perspective = TRUE; - if (rshader->input[i].centroid) - have_centroid = TRUE; + int k = eg_get_interpolator_index( + rshader->input[i].interpolate, + rshader->input[i].interpolate_location); + if (k >= 0) { + spi_baryc_cntl |= spi_baryc_enable_bit[k]; + have_perspective |= k < 3; + have_linear |= !(k < 3); + } } sid = rshader->input[i].spi_sid; @@ -2852,17 +2879,22 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader z_export = 1; if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) stencil_export = 1; + if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && + rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) + mask_export = 1; } if (rshader->uses_kill) db_shader_control |= S_02880C_KILL_ENABLE(1); db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export); + db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); exports_ps = 0; for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || - rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + rshader->output[i].name == TGSI_SEMANTIC_STENCIL || + rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) exports_ps |= 1; } @@ -2878,6 +2910,8 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader ninterp = 1; have_perspective = TRUE; } + if (!spi_baryc_cntl) + spi_baryc_cntl |= spi_baryc_enable_bit[0]; if (!have_perspective && !have_linear) have_perspective = TRUE; @@ -2888,7 +2922,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader spi_input_z = 0; if (pos_index != -1) { spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) | S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); } @@ -2898,14 +2932,10 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } - - spi_baryc_cntl = 0; - if (have_perspective) - spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | - S_0286E0_PERSP_CENTROID_ENA(have_centroid); - if (have_linear) - spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + if (fixed_pt_position_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) | + S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr); + } r600_store_context_reg_seq(cb, R_0286CC_SPI_PS_IN_CONTROL_0, 2); r600_store_value(cb, spi_ps_in_control_0); /* R_0286CC_SPI_PS_IN_CONTROL_0 */ @@ -2924,7 +2954,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ shader->db_shader_control = db_shader_control; - shader->ps_depth_export = z_export | stencil_export; + shader->ps_depth_export = z_export | stencil_export | mask_export; shader->sprite_coord_enable = sprite_coord_enable; if (rctx->rasterizer) @@ -3446,6 +3476,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->b.b.create_sampler_view = evergreen_create_sampler_view; rctx->b.b.set_framebuffer_state = evergreen_set_framebuffer_state; rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple; + rctx->b.b.set_min_samples = evergreen_set_min_samples; rctx->b.b.set_scissor_states = evergreen_set_scissor_states; if (rctx->b.chip_class == EVERGREEN) diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 784d495a40f..49899960367 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -803,6 +803,9 @@ #define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6) #define G_02880C_KILL_ENABLE(x) (((x) >> 6) & 0x1) #define C_02880C_KILL_ENABLE 0xFFFFFFBF +#define S_02880C_MASK_EXPORT_ENABLE(x) (((x) & 0x1) << 8) +#define G_02880C_MASK_EXPORT_ENABLE(x) (((x) >> 8) & 0x1) +#define C_02880C_MASK_EXPORT_ENABLE 0XFFFFFEFF #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9) #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1) #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c6459d81209..3962fee4a43 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -265,6 +265,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_SAMPLE_SHADING: return 1; case PIPE_CAP_COMPUTE: @@ -319,7 +320,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 2df168f01b5..fa9d34b0d71 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -52,6 +52,14 @@ #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3) +/* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has 16 buffers, but the driver is + * trying to use 17. Avoid accidentally aliasing with user UBOs for SAMPLE_POSITIONS by using an id<16. + * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. + * + * Fixing this properly would require the driver to combine its buffers into a single hardware buffer, + * which would also allow supporting the d3d 11 mandated minimum of 15 user const buffers. + */ +#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4])) @@ -452,6 +460,7 @@ struct r600_context { bool force_blend_disable; boolean dual_src_blend; unsigned zwritemask; + int ps_iter_samples; /* Index buffer. */ struct pipe_index_buffer index_buffer; @@ -639,6 +648,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); +void r600_set_sample_locations_constant_buffer(struct r600_context *rctx); uint32_t r600_translate_stencil_op(int s_op); uint32_t r600_translate_fill(uint32_t func); unsigned r600_tex_wrap(unsigned wrap); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9f10c20c4c3..9e9a557e867 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -64,6 +64,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, struct r600_shader_key key); + static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, int size, unsigned comp_mask) { @@ -267,6 +268,11 @@ struct r600_shader_src { uint32_t value[4]; }; +struct eg_interp { + boolean enabled; + unsigned ij_index; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -283,13 +289,11 @@ struct r600_shader_ctx { uint32_t max_driver_temp_used; boolean use_llvm; /* needed for evergreen interpolation */ - boolean input_centroid; - boolean input_linear; - boolean input_perspective; - int num_interp_gpr; + struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid /* evergreen/cayman also store sample mask in face register */ int face_gpr; - boolean has_samplemask; + /* sample id is .w component stored in fixed point position register */ + int fixed_pt_position_gpr; int colors_used; boolean clip_vertex_write; unsigned cv_output; @@ -320,6 +324,12 @@ static int tgsi_endif(struct r600_shader_ctx *ctx); static int tgsi_bgnloop(struct r600_shader_ctx *ctx); static int tgsi_endloop(struct r600_shader_ctx *ctx); static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, + unsigned int cb_idx, unsigned int offset, unsigned ar_chan, + unsigned int dst_reg); +static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan); static int tgsi_is_supported(struct r600_shader_ctx *ctx) { @@ -364,27 +374,41 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, - int input) +int eg_get_interpolator_index(unsigned interpolate, unsigned location) { - int ij_index = 0; + if (interpolate == TGSI_INTERPOLATE_COLOR || + interpolate == TGSI_INTERPOLATE_LINEAR || + interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + { + int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; + int loc; - if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { - if (ctx->shader->input[input].centroid) - ij_index++; - } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { - /* if we have perspective add one */ - if (ctx->input_perspective) { - ij_index++; - /* if we have perspective centroid */ - if (ctx->input_centroid) - ij_index++; + switch(location) { + case TGSI_INTERPOLATE_LOC_CENTER: + loc = 1; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + loc = 2; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + default: + loc = 0; break; } - if (ctx->shader->input[input].centroid) - ij_index++; + + return is_linear * 3 + loc; } - ctx->shader->input[input].ij_index = ij_index; + return -1; +} + +static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, + int input) +{ + int i = eg_get_interpolator_index( + ctx->shader->input[input].interpolate, + ctx->shader->input[input].interpolate_location); + assert(i >= 0); + ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; } static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) @@ -582,13 +606,15 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Interp.Interpolate; - ctx->shader->input[i].centroid = d->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID; + ctx->shader->input[i].interpolate_location = d->Interp.Location; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); switch (ctx->shader->input[i].name) { case TGSI_SEMANTIC_FACE: - if (ctx->face_gpr == -1) + if (ctx->face_gpr != -1) + ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ + else ctx->face_gpr = ctx->shader->input[i].gpr; break; case TGSI_SEMANTIC_COLOR: @@ -679,14 +705,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) break; case TGSI_FILE_SYSTEM_VALUE: - if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) { - ctx->has_samplemask = true; - /* lives in Front Face GPR */ - if (ctx->face_gpr == -1) - ctx->face_gpr = ctx->file_offset[TGSI_FILE_SYSTEM_VALUE] + d->Range.First; - break; - } - else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || + d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || + d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { + break; /* Already handled from allocate_system_value_inputs */ + } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { if (!ctx->native_integers) { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -720,12 +743,69 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) +{ + struct tgsi_parse_context parse; + struct { + boolean enabled; + int *reg; + unsigned name, alternate_name; + } inputs[2] = { + { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ + + { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ + }; + int i, k, num_regs = 0; + + if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { + return 0; + } + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { + struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; + if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + for (k = 0; k < Elements(inputs); k++) { + if (d->Semantic.Name == inputs[k].name || + d->Semantic.Name == inputs[k].alternate_name) { + inputs[k].enabled = true; + } + } + } + } + } + + tgsi_parse_free(&parse); + + for (i = 0; i < Elements(inputs); i++) { + boolean enabled = inputs[i].enabled; + int *reg = inputs[i].reg; + unsigned name = inputs[i].name; + + if (enabled) { + int gpr = gpr_offset + num_regs++; + + // add to inputs, allocate a gpr + k = ctx->shader->ninput ++; + ctx->shader->input[k].name = name; + ctx->shader->input[k].sid = 0; + ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; + ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; + *reg = ctx->shader->input[k].gpr = gpr; + } + } + + return gpr_offset + num_regs; +} + /* * for evergreen we need to scan the shader to find the number of GPRs we need to - * reserve for interpolation. + * reserve for interpolation and system values * * we need to know if we are going to emit - * any centroid inputs + * any sample or centroid inputs * if perspective and linear are required */ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) @@ -733,39 +813,92 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) int i; int num_baryc; - ctx->input_linear = FALSE; - ctx->input_perspective = FALSE; - ctx->input_centroid = FALSE; - ctx->num_interp_gpr = 1; + memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); - /* any centroid inputs */ for (i = 0; i < ctx->info.num_inputs; i++) { - /* skip position/face */ + int k; + /* skip position/face/mask/sampleid */ if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || - ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK) + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) continue; - if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) - ctx->input_linear = TRUE; - if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) - ctx->input_perspective = TRUE; - if (ctx->info.input_interpolate_loc[i] == TGSI_INTERPOLATE_LOC_CENTROID) - ctx->input_centroid = TRUE; + + k = eg_get_interpolator_index( + ctx->info.input_interpolate[i], + ctx->info.input_interpolate_loc[i]); + if (k >= 0) + ctx->eg_interpolators[k].enabled = TRUE; } + /* assign gpr to each interpolator according to priority */ num_baryc = 0; - /* ignoring sample for now */ - if (ctx->input_perspective) - num_baryc++; - if (ctx->input_linear) - num_baryc++; - if (ctx->input_centroid) - num_baryc *= 2; - - ctx->num_interp_gpr += (num_baryc + 1) >> 1; - - /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */ - return ctx->num_interp_gpr; + for (i = 0; i < Elements(ctx->eg_interpolators); i++) { + if (ctx->eg_interpolators[i].enabled) { + ctx->eg_interpolators[i].ij_index = num_baryc; + num_baryc ++; + } + } + + /* XXX PULL MODEL and LINE STIPPLE */ + + num_baryc = (num_baryc + 1) >> 1; + return allocate_system_value_inputs(ctx, num_baryc); +} + +/* sample_id_sel == NULL means fetch for current sample */ +static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) +{ + struct r600_bytecode_vtx vtx; + int r, t1; + + assert(ctx->fixed_pt_position_gpr != -1); + + t1 = r600_get_temp(ctx); + + memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER; + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + if (sample_id == NULL) { + vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; + vtx.src_sel_x = 3; + } + else { + struct r600_bytecode_alu alu; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + r600_bytecode_src(&alu.src[0], sample_id, chan_sel); + alu.dst.sel = t1; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + vtx.src_gpr = t1; + vtx.src_sel_x = 0; + } + vtx.mega_fetch_count = 16; + vtx.dst_gpr = t1; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 7; + vtx.dst_sel_w = 7; + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; + vtx.format_comp_all = 1; + vtx.use_const_fields = 0; + vtx.offset = 1; // first element is size of buffer + vtx.endian = r600_endian_swap(32); + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + r = r600_bytecode_add_vtx(ctx->bc, &vtx); + if (r) + return r; + + return t1; } static void tgsi_src(struct r600_shader_ctx *ctx, @@ -797,10 +930,22 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { r600_src->swizzle[0] = 2; // Z value - r600_src->swizzle[0] = 2; - r600_src->swizzle[0] = 2; - r600_src->swizzle[0] = 2; + r600_src->swizzle[1] = 2; + r600_src->swizzle[2] = 2; + r600_src->swizzle[3] = 2; r600_src->sel = ctx->face_gpr; + } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { + r600_src->swizzle[0] = 3; // W value + r600_src->swizzle[1] = 3; + r600_src->swizzle[2] = 3; + r600_src->swizzle[3] = 3; + r600_src->sel = ctx->fixed_pt_position_gpr; + } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { + r600_src->swizzle[0] = 0; + r600_src->swizzle[1] = 1; + r600_src->swizzle[2] = 4; + r600_src->swizzle[3] = 4; + r600_src->sel = load_sample_position(ctx, NULL, -1); } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { r600_src->swizzle[0] = 3; r600_src->swizzle[1] = 3; @@ -1612,7 +1757,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.gs_next_vertex = 0; ctx.face_gpr = -1; - ctx.has_samplemask = false; + ctx.fixed_pt_position_gpr = -1; ctx.fragcoord_input = -1; ctx.colors_used = 0; ctx.clip_vertex_write = 0; @@ -1661,8 +1806,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { - ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { + if (ctx.bc->chip_class >= EVERGREEN) + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); + else + ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); } if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { /* FIXME 1 would be enough in some cases (3 or less input vertices) */ @@ -1775,14 +1923,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, shader->ring_item_size = ctx.next_ring_offset; - /* Need to tell setup to program FACE register */ - if (ctx.has_samplemask && ctx.face_gpr != -1) { - i = ctx.shader->ninput++; - ctx.shader->input[i].name = TGSI_SEMANTIC_SAMPLEMASK; - ctx.shader->input[i].spi_sid = 0; - ctx.shader->input[i].gpr = ctx.face_gpr; - } - /* Process two side if needed */ if (shader->two_side && ctx.colors_used) { int i, count = ctx.shader->ninput; @@ -1795,6 +1935,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, int gpr = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_max[TGSI_FILE_INPUT] + 1; + /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ if (ctx.face_gpr == -1) { i = ctx.shader->ninput++; ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; @@ -2162,6 +2303,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].swizzle_y = 1; output[j].swizzle_z = output[j].swizzle_w = 7; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { + output[j].array_base = 61; + output[j].swizzle_x = 7; + output[j].swizzle_y = 7; + output[j].swizzle_z = 0; + output[j].swizzle_w = 7; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 4b27ede96dd..20829fd9fdb 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -33,7 +33,7 @@ struct r600_shader_io { int spi_sid; unsigned interpolate; unsigned ij_index; - boolean centroid; + unsigned interpolate_location; // TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE unsigned lds_pos; /* for evergreen */ unsigned back_color_input; unsigned write_mask; @@ -115,4 +115,8 @@ struct r600_pipe_shader { unsigned ps_depth_export; }; +/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and + TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */ +int eg_get_interpolator_index(unsigned interpolate, unsigned location); + #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 9ca61718aba..1f933efca9e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -486,7 +486,12 @@ static void *r600_create_rs_state(struct pipe_context *ctx, sc_mode_cntl = S_028A4C_MSAA_ENABLE(state->multisample) | S_028A4C_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | - S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1); + S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | + S_028A4C_PS_ITER_SAMPLE(state->multisample && rctx->ps_iter_samples > 1); + if (rctx->b.family == CHIP_RV770) { + /* workaround possible rendering corruption on RV770 with hyperz together with sample shading */ + sc_mode_cntl |= S_028A4C_TILE_COVER_DISABLE(state->multisample && rctx->ps_iter_samples > 1); + } if (rctx->b.chip_class >= R700) { sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) | S_028A4C_R700_ZMM_LINE_OFFSET(1) | @@ -1245,6 +1250,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } rctx->framebuffer.atom.dirty = true; + + r600_set_sample_locations_constant_buffer(rctx); } static uint32_t sample_locs_2x[] = { @@ -1524,6 +1531,21 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a r600_emit_msaa_state(rctx, rctx->framebuffer.nr_samples); } +static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + if (rctx->ps_iter_samples == min_samples) + return; + + rctx->ps_iter_samples = min_samples; + if (rctx->framebuffer.nr_samples > 1) { + rctx->rasterizer_state.atom.dirty = true; + if (rctx->b.chip_class == R600) + rctx->db_misc_state.atom.dirty = true; + } +} + static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; @@ -1603,6 +1625,10 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom } else { db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); } + if (rctx->b.chip_class == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) { + /* sample shading and hyperz causes lockups on R6xx chips */ + db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); + } if (a->flush_depthstencil_through_cb) { assert(a->copy_depth || a->copy_stencil); @@ -2418,10 +2444,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha struct r600_command_buffer *cb = &shader->command_buffer; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; - int pos_index = -1, face_index = -1; + int pos_index = -1, face_index = -1, fixed_pt_position_index = -1; unsigned tmp, sid, ufi = 0; int need_linear = 0; - unsigned z_export = 0, stencil_export = 0; + unsigned z_export = 0, stencil_export = 0, mask_export = 0; unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; if (!cb->buf) { @@ -2434,8 +2460,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha for (i = 0; i < rshader->ninput; i++) { if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1) face_index = i; + if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) + fixed_pt_position_index = i; sid = rshader->input[i].spi_sid; @@ -2452,9 +2480,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha tmp |= S_028644_PT_SPRITE_TEX(1); } - if (rshader->input[i].centroid) + if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE) + tmp |= S_028644_SEL_SAMPLE(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) { need_linear = 1; tmp |= S_028644_SEL_LINEAR(1); @@ -2469,16 +2500,21 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha z_export = 1; if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) stencil_export = 1; + if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && + rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) + mask_export = 1; } db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export); + db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export); if (rshader->uses_kill) db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || - rshader->output[i].name == TGSI_SEMANTIC_STENCIL) { + rshader->output[i].name == TGSI_SEMANTIC_STENCIL || + rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { exports_ps |= 1; } } @@ -2497,9 +2533,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha spi_input_z = 0; if (pos_index != -1) { spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | - S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_CENTROID) | S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | - S_0286CC_BARYC_SAMPLE_CNTL(1)); + S_0286CC_BARYC_SAMPLE_CNTL(1)) | + S_0286CC_POSITION_SAMPLE(rshader->input[pos_index].interpolate_location == TGSI_INTERPOLATE_LOC_SAMPLE); spi_input_z |= S_0286D8_PROVIDE_Z_TO_SPI(1); } @@ -2508,6 +2545,10 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } + if (fixed_pt_position_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FIXED_PT_POSITION_ENA(1) | + S_0286D0_FIXED_PT_POSITION_ADDR(rshader->input[fixed_pt_position_index].gpr); + } /* HW bug in original R600 */ if (rctx->b.family == CHIP_R600) @@ -2531,7 +2572,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha /* only set some bits here, the other bits are set in the dsa state */ shader->db_shader_control = db_shader_control; - shader->ps_depth_export = z_export | stencil_export; + shader->ps_depth_export = z_export | stencil_export | mask_export; shader->sprite_coord_enable = sprite_coord_enable; if (rctx->rasterizer) @@ -3046,6 +3087,7 @@ void r600_init_state_functions(struct r600_context *rctx) rctx->b.b.create_sampler_view = r600_create_sampler_view; rctx->b.b.set_framebuffer_state = r600_set_framebuffer_state; rctx->b.b.set_polygon_stipple = r600_set_polygon_stipple; + rctx->b.b.set_min_samples = r600_set_min_samples; rctx->b.b.set_scissor_states = r600_set_scissor_states; rctx->b.b.get_sample_position = r600_get_sample_position; rctx->b.dma_copy = r600_dma_copy; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d9174a59230..68365f9d9af 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1085,6 +1085,26 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s pipe_resource_reference(&cb.buffer, NULL); } +/* set sample xy locations as array of fragment shader constants */ +void r600_set_sample_locations_constant_buffer(struct r600_context *rctx) +{ + struct pipe_constant_buffer constbuf = {0}; + float values[4*16] = {0.0f}; + int i; + struct pipe_context *ctx = &rctx->b.b; + + assert(rctx->framebuffer.nr_samples <= Elements(values)/4); + for (i = 0; i < rctx->framebuffer.nr_samples; i++) { + ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]); + } + + constbuf.user_buffer = values; + constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4; + ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, + R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf); + pipe_resource_reference(&constbuf.buffer, NULL); +} + static void update_shader_atom(struct pipe_context *ctx, struct r600_shader_state *state, struct r600_pipe_shader *shader) diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 3cf7b8800f4..6a5b9640ee0 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -841,6 +841,9 @@ #define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6) #define G_02880C_KILL_ENABLE(x) (((x) >> 6) & 0x1) #define C_02880C_KILL_ENABLE 0xFFFFFFBF +#define S_02880C_MASK_EXPORT_ENABLE(x) (((x) & 0x1) << 8) +#define G_02880C_MASK_EXPORT_ENABLE(x) (((x) >> 8) & 0x1) +#define C_02880C_MASK_EXPORT_ENABLE 0xFFFFFEFF #define S_02880C_DUAL_EXPORT_ENABLE(x) (((x) & 0x1) << 9) #define G_02880C_DUAL_EXPORT_ENABLE(x) (((x) >> 9) & 0x1) #define C_02880C_DUAL_EXPORT_ENABLE 0xFFFFFDFF diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 346ccc9dbc5..d787e5b1238 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -147,25 +147,28 @@ int bc_parser::parse_decls() { bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN && sh->target == TARGET_PS; - unsigned linear = 0, persp = 0, centroid = 1; + bool ij_interpolators[6]; + memset(ij_interpolators, 0, sizeof(ij_interpolators)); for (unsigned i = 0; i < pshader->ninput; ++i) { r600_shader_io & in = pshader->input[i]; bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); if (ps_interp && in.spi_sid) { - if (in.interpolate == TGSI_INTERPOLATE_LINEAR || - in.interpolate == TGSI_INTERPOLATE_COLOR) - linear = 1; - else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE) - persp = 1; - if (in.centroid) - centroid = 2; + int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); + if (k >= 0) + ij_interpolators[k] |= true; } } if (ps_interp) { - unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1; + /* add the egcm ij interpolators to live inputs */ + unsigned num_ij = 0; + for (unsigned i = 0; i < Elements(ij_interpolators); i++) { + num_ij += ij_interpolators[i]; + } + + unsigned mask = (1 << (2 * num_ij)) - 1; unsigned gpr = 0; while (mask) {