X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa4xx%2Ffd4_emit.c;h=81ed16ce8acc1a2e56d8440bac531bd1d0a3c6e5;hb=899bd63acefd49a668e11c42d2ad92fa55aa157d;hp=cf5dd7b0f17ed8b3d4e3305c3e8ac8cae27f5abb;hpb=b00e3f221b3f6dd0e87697c53331fd033b6e8676;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index cf5dd7b0f17..81ed16ce8ac 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "fd4_emit.h" #include "fd4_blend.h" @@ -133,7 +134,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, void *ptr; u_upload_alloc(fd4_ctx->border_color_uploader, - 0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off, + 0, BORDER_COLOR_UPLOAD_SIZE, + BORDER_COLOR_UPLOAD_SIZE, &off, &fd4_ctx->border_color_buf, &ptr); @@ -185,7 +187,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct fd4_pipe_sampler_view *view = tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i]) : &dummy_view; - unsigned start = fd_sampler_first_level(&view->base); OUT_RING(ring, view->texconst0); OUT_RING(ring, view->texconst1); @@ -193,8 +194,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); - uint32_t offset = fd_resource_offset(rsc, start, 0); - OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0); + OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x00000000); } @@ -286,7 +286,8 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format))); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); @@ -327,12 +328,15 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) int32_t i, j, last = -1; uint32_t total_in = 0; const struct fd_vertex_state *vtx = emit->vtx; - struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); + const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); unsigned vertex_regid = regid(63, 0); unsigned instance_regid = regid(63, 0); unsigned vtxcnt_regid = regid(63, 0); + /* Note that sysvals come *after* normal inputs: */ for (i = 0; i < vp->inputs_count; i++) { + if (!vp->inputs[i].compmask) + continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { case SYSTEM_VALUE_BASE_VERTEX: @@ -351,19 +355,11 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) unreachable("invalid system value"); break; } - } else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) { + } else if (i < vtx->vtx->num_elements) { last = i; } } - - /* hw doesn't like to be configured for zero vbo's, it seems: */ - if ((vtx->vtx->num_elements == 0) && - (vertex_regid == regid(63, 0)) && - (instance_regid == regid(63, 0)) && - (vtxcnt_regid == regid(63, 0))) - return; - for (i = 0, j = 0; i <= last; i++) { assert(!vp->inputs[i].sysval); if (vp->inputs[i].compmask) { @@ -408,6 +404,38 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) } } + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (last < 0) { + /* just recycle the shader bo, we just need to point to *something* + * valid: + */ + struct fd_bo *dummy_vbo = vp->bo; + bool switchnext = (vertex_regid != regid(63, 0)) || + (instance_regid != regid(63, 0)) || + (vtxcnt_regid != regid(63, 0)); + + OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | + COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); + OUT_RELOC(ring, dummy_vbo, 0, 0, 0); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1)); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1)); + + OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1); + OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL | + A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | + A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) | + A4XX_VFD_DECODE_INSTR_SWAP(XYZW) | + A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) | + A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) | + A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in = 1; + j = 1; + } + OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5); OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 0xa0000 | /* XXX */ @@ -432,8 +460,8 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { - struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); - struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); + const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); + const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); uint32_t dirty = emit->dirty; emit_marker(ring, 5); @@ -457,24 +485,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); } - if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) { - uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control; - - /* I suppose if we needed to (which I don't *think* we need - * to), we could emit this for binning pass too. But we - * would need to keep a different patch-list for binning - * vs render pass. - */ - - OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); - OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); - } - - if (dirty & FD_DIRTY_ZSA) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST; OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); - OUT_RING(ring, zsa->rb_alpha_control); + OUT_RING(ring, rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); @@ -498,14 +518,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, zsa->rb_depth_control | - COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE)); + COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) | + COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); /* maybe this register/bitfield needs a better name.. this * appears to be just disabling early-z */ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->gras_alpha_control | - COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE)); + COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) | + COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS)); } if (dirty & FD_DIRTY_RASTERIZER) { @@ -535,8 +557,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, */ if (emit->info) { const struct pipe_draw_info *info = emit->info; - uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) - ->pc_prim_vtx_cntl; + struct fd4_rasterizer_stateobj *rast = + fd4_rasterizer_stateobj(ctx->rasterizer); + uint32_t val = rast->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; @@ -552,7 +575,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); - OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ + OUT_RING(ring, rast->pc_prim_vtx_cntl2); } if (dirty & FD_DIRTY_SCISSOR) { @@ -581,15 +604,19 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) { + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); + unsigned n = pfb->nr_cbufs; + /* if we have depth/stencil, we need at least on MRT: */ + if (pfb->zsbuf) + n = MAX2(1, n); + fd4_program_emit(ring, emit, n, pfb->cbufs); } if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ - ir3_emit_consts(vp, ring, emit->info, dirty); + ir3_emit_consts(vp, ring, ctx, emit->info, dirty); if (!emit->key.binning_pass) - ir3_emit_consts(fp, ring, emit->info, dirty); + ir3_emit_consts(fp, ring, ctx, emit->info, dirty); /* mark clean after emitting consts: */ ctx->prog.dirty = 0; } @@ -599,11 +626,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t i; for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format( + ctx->framebuffer.cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A4XX_RB_MRT_CONTROL_BLEND2; + } + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].control); + OUT_RING(ring, control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend->rb_mrt[i].blend_control); + OUT_RING(ring, blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); @@ -611,17 +657,50 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } - if (dirty & FD_DIRTY_BLEND_COLOR) { + if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) { struct pipe_blend_color *bcolor = &ctx->blend_color; - OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4); - OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) | + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + float factor = 65535.0; + int i; + + for (i = 0; i < pfb->nr_cbufs; i++) { + enum pipe_format format = pipe_surface_format(pfb->cbufs[i]); + const struct util_format_description *desc = + util_format_description(format); + int j; + + if (desc->is_mixed) + continue; + + j = util_format_get_first_non_void_channel(format); + if (j == -1) + continue; + + if (desc->channel[j].size > 8 || !desc->channel[j].normalized || + desc->channel[j].pure_integer) + continue; + + /* Just use the first unorm8/snorm8 render buffer. Can't keep + * everyone happy. + */ + if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED) + factor = 32767.0; + break; + } + + OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8); + OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); - OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) | + OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0])); + OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); - OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) | + OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1])); + OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); - OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) | + OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2])); + OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); + OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } if (dirty & FD_DIRTY_VERTTEX) { @@ -699,15 +778,6 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1); OUT_RING(ring, 0x00000000); - OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1); - OUT_RING(ring, 0x00000000); - - OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1); - OUT_RING(ring, 0x00000000); - - OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1); - OUT_RING(ring, 0x00000000); - OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4); OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) | A4XX_RB_BLEND_RED_FLOAT(0.0)); @@ -718,9 +788,6 @@ fd4_emit_restore(struct fd_context *ctx) OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) | A4XX_RB_BLEND_ALPHA_FLOAT(1.0)); - OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1); - OUT_RING(ring, 0x3f800000); - OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1); OUT_RING(ring, 0x00000000); @@ -807,13 +874,23 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); + fd_hw_query_enable(ctx, ring); + ctx->needs_rb_fbd = true; } +static void +fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start, + struct fd_ringmarker *end) +{ + __OUT_IB(ring, true, start, end); +} + void fd4_emit_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); ctx->emit_const = fd4_emit_const; ctx->emit_const_bo = fd4_emit_const_bo; + ctx->emit_ib = fd4_emit_ib; }