<ul>
<li>GL_AMD_pinned_memory on r600, radeonsi</li>
+<li>GL_ARB_draw_instanced on freedreno</li>
<li>GL_ARB_pipeline_statistics_query on i965, nvc0, r600, radeonsi, softpipe</li>
</ul>
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
{
- uint32_t i, j, last = 0;
+ int32_t i, j, last = -1;
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
- unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
+ unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+
+ for (i = 0; i < vp->inputs_count; i++) {
+ uint8_t semantic = sem2name(vp->inputs[i].semantic);
+ if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
+ vertex_regid = vp->inputs[i].regid;
+ else if (semantic == TGSI_SEMANTIC_INSTANCEID)
+ instance_regid = vp->inputs[i].regid;
+ else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+ last = i;
+ }
/* hw doesn't like to be configured for zero vbo's, it seems: */
- if (vtx->vtx->num_elements == 0)
+ if (vtx->vtx->num_elements == 0 &&
+ vertex_regid == regid(63, 0) &&
+ instance_regid == regid(63, 0))
return;
- for (i = 0; i < n; i++)
- if (vp->inputs[i].compmask)
- last = i;
-
for (i = 0, j = 0; i <= last; i++) {
+ uint8_t semantic = sem2name(vp->inputs[i].semantic);
+ assert(semantic == 0);
if (vp->inputs[i].compmask) {
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
const struct pipe_vertex_buffer *vb =
struct fd_resource *rsc = fd_resource(vb->buffer);
enum pipe_format pfmt = elem->src_format;
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
- bool switchnext = (i != last);
+ bool switchnext = (i != last) ||
+ vertex_regid != regid(63, 0) ||
+ instance_regid != regid(63, 0);
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
- A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
- A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+ A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
}
void
}
}
+ /* emit driver params every time */
+ if (emit->info && emit->prog == &ctx->prog) {
+ uint32_t vertex_params[4] = {
+ emit->info->indexed ? emit->info->index_bias : emit->info->start,
+ 0,
+ 0,
+ 0
+ };
+ if (vp->constlen > vp->first_driver_param) {
+ fd3_emit_constant(ring, SB_VERT_SHADER, vp->first_driver_param * 4,
+ 0, 4, vertex_params, NULL);
+ }
+ }
+
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
uint32_t i;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_VERTEXID_NOBASE:
return 1;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
return 0;
case PIPE_CAP_PRIMITIVE_RESTART:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_TGSI_INSTANCEID:
+ return is_a3xx(screen) && glsl130;
+
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
return 0;
*/
struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
+ /* For vertex shaders, keep track of the system values sources */
+ struct ir3_instruction *vertex_id, *basevertex, *instance_id;
+
struct tgsi_parse_context parser;
unsigned type;
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[8];
+ /* for looking up which system value is which */
+ unsigned sysval_semantics[8];
+
/* idx/slot for last compiler generated immediate */
unsigned immediate_idx;
ctx->atomic = false;
ctx->frag_pos = NULL;
ctx->frag_face = NULL;
+ ctx->vertex_id = NULL;
+ ctx->instance_id = NULL;
ctx->tmp_src = NULL;
ctx->using_tmp_dst = false;
* the assembler what the max addr reg value can be:
*/
if (info->indirect_files & FM(CONSTANT))
- so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1);
+ so->constlen = ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
i = 0;
i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
/* any others? we don't track arrays for const..*/
/* Immediates go after constants: */
- so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
+ if (so->type == SHADER_VERTEX) {
+ so->first_driver_param = info->file_max[TGSI_FILE_CONSTANT] + 1;
+ so->first_immediate = so->first_driver_param + 1;
+ } else {
+ so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
+ }
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
ntmp += 8 * 4;
nout = SCALAR_REGS(OUTPUT);
- nin = SCALAR_REGS(INPUT);
+ nin = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE);
/* for outermost block, 'inputs' are the actual shader INPUT
* register file. Reads from INPUT registers always go back to
block->temporaries[n] = instr;
}
break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ switch (ctx->sysval_semantics[n >> 2]) {
+ case TGSI_SEMANTIC_VERTEXID_NOBASE:
+ instr = ctx->vertex_id;
+ break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ instr = ctx->basevertex;
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ instr = ctx->instance_id;
+ break;
+ }
+ break;
}
return instr;
*/
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_SYSTEM_VALUE:
/* uses SSA */
break;
default:
}
}
+static void
+decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+ struct ir3_shader_variant *so = ctx->so;
+ unsigned r = regid(so->inputs_count, 0);
+ unsigned n = so->inputs_count++;
+
+ DBG("decl sv -> r%d", n);
+
+ compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+ compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics));
+
+ ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name;
+ so->inputs[n].semantic = decl_semantic(&decl->Semantic);
+ so->inputs[n].compmask = 1;
+ so->inputs[n].regid = r;
+ so->inputs[n].inloc = ctx->next_inloc;
+ so->inputs[n].interpolate = false;
+
+ struct ir3_instruction *instr = NULL;
+
+ switch (decl->Semantic.Name) {
+ case TGSI_SEMANTIC_VERTEXID_NOBASE:
+ ctx->vertex_id = instr = create_input(ctx->block, NULL, r);
+ break;
+ case TGSI_SEMANTIC_BASEVERTEX:
+ ctx->basevertex = instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = get_stype(ctx);
+ instr->cat1.dst_type = get_stype(ctx);
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, regid(so->first_driver_param, 0), IR3_REG_CONST);
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ ctx->instance_id = instr = create_input(ctx->block, NULL, r);
+ break;
+ default:
+ compile_error(ctx, "Unknown semantic: %s\n",
+ tgsi_semantic_names[decl->Semantic.Name]);
+ }
+
+ ctx->block->inputs[r] = instr;
+ ctx->next_inloc++;
+ so->total_in++;
+}
+
static void
decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
decl_out(ctx, decl);
} else if (file == TGSI_FILE_INPUT) {
decl_in(ctx, decl);
+ } else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+ decl_sv(ctx, decl);
}
if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
* (not regid, because TGSI thinks in terms of vec4 registers,
* not scalar registers)
*/
+ unsigned first_driver_param;
unsigned first_immediate;
unsigned immediates_count;
struct {