X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa4xx%2Ffd4_emit.c;h=e6848a1aecc1e882378a73cb8d0d85282921ddcd;hb=00f9d4b1fdbfd9bf00c4eb2160ae85057369e8a1;hp=9ce93f6e33f93728b81b8a9663f5cdfee96baf4c;hpb=7f8fd02dc7cad1ddcfb610db10ffbb41e3e34e7d;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 9ce93f6e33f..e6848a1aecc 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -1,5 +1,3 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - /* * Copyright (C) 2014 Rob Clark * @@ -30,7 +28,8 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_helpers.h" -#include "util/u_format.h" +#include "util/format/u_format.h" +#include "util/u_viewport.h" #include "freedreno_resource.h" #include "freedreno_query_hw.h" @@ -44,46 +43,43 @@ #include "fd4_format.h" #include "fd4_zsa.h" -static const enum adreno_state_block sb[] = { - [SHADER_VERTEX] = SB_VERT_SHADER, - [SHADER_FRAGMENT] = SB_FRAG_SHADER, -}; +#include "ir3_const.h" /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ -void -fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, +static void +fd4_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; - enum adreno_state_src src; + enum a4xx_state_src src; debug_assert((regid % 4) == 0); debug_assert((sizedwords % 4) == 0); if (prsc) { sz = 0; - src = 0x2; // TODO ?? + src = SS4_INDIRECT; } else { sz = sizedwords; - src = SS_DIRECT; + src = SS4_DIRECT; } - OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | - CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(src) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } else { - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { @@ -92,43 +88,70 @@ fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, } static void -fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, +fd4_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { + uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); - debug_assert((num % 4) == 0); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(num/4)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + anum); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(type)) | + CP_LOAD_STATE4_0_NUM_UNIT(anum/4)); + OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); for (i = 0; i < num; i++) { if (prscs[i]) { - if (write) { - OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); - } else { - OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); - } + OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); } else { OUT_RING(ring, 0xbad00000 | (i << 16)); } } + + for (; i < anum; i++) + OUT_RING(ring, 0xffffffff); +} + +static bool +is_stateobj(struct fd_ringbuffer *ring) +{ + return false; +} + +void +emit_const(struct fd_ringbuffer *ring, + const struct ir3_shader_variant *v, uint32_t dst_offset, + uint32_t offset, uint32_t size, const void *user_buffer, + struct pipe_resource *buffer) +{ + /* TODO inline this */ + assert(dst_offset + size <= v->constlen * 4); + fd4_emit_const(ring, v->type, dst_offset, + offset, size, user_buffer, buffer); +} + +static void +emit_const_bo(struct fd_ringbuffer *ring, + const struct ir3_shader_variant *v, uint32_t dst_offset, + uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) +{ + /* TODO inline this */ + assert(dst_offset + num <= v->constlen * 4); + fd4_emit_const_bo(ring, v->type, dst_offset, num, prscs, offsets); } static void emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum adreno_state_block sb, struct fd_texture_stateobj *tex, + enum a4xx_state_block sb, struct fd_texture_stateobj *tex, const struct ir3_shader_variant *v) { static const uint32_t bcolor_reg[] = { - [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, - [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, + [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, }; struct fd4_context *fd4_ctx = fd4_context(ctx); bool needs_border = false; @@ -144,13 +167,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, num_samplers = align(tex->num_samplers, 2); /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(num_samplers)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(num_samplers)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { static const struct fd4_sampler_stateobj dummy_sampler = {}; const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ? @@ -172,13 +195,13 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, unsigned num_textures = tex->num_textures + v->astc_srgb.count; /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(num_textures)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(sb) | + CP_LOAD_STATE4_0_NUM_UNIT(num_textures)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_textures; i++) { static const struct fd4_pipe_sampler_view dummy_view = {}; const struct fd4_pipe_sampler_view *view = tex->textures[i] ? @@ -191,6 +214,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); + if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT) + rsc = rsc->stencil; OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x00000000); @@ -263,13 +288,13 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, } /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) | + CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < nr_bufs; i++) { OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | @@ -280,29 +305,29 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, } /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(nr_bufs)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (8 * nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | + CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | + CP_LOAD_STATE4_0_STATE_BLOCK(SB4_FS_TEX) | + CP_LOAD_STATE4_0_NUM_UNIT(nr_bufs)); + OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | + CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); for (i = 0; i < nr_bufs; i++) { if (bufs[i]) { struct fd_resource *rsc = fd_resource(bufs[i]->texture); - enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format); + enum pipe_format format = fd_gmem_restore_format(bufs[i]->format); /* The restore blit_zs shader expects stencil in sampler 0, * and depth in sampler 1 */ if (rsc->stencil && (i == 0)) { rsc = rsc->stencil; - format = fd4_gmem_restore_format(rsc->base.b.format); + format = fd_gmem_restore_format(rsc->base.format); } /* note: PIPE_BUFFER disallowed for surfaces */ unsigned lvl = bufs[i]->u.tex.level; - struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); + struct fdl_slice *slice = fd_resource_slice(rsc, lvl); unsigned offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer); /* z32 restore is accomplished using depth write. If there is @@ -324,7 +349,7 @@ fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) | A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height)); - OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch) | A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format))); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); @@ -377,9 +402,6 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) continue; if (vp->inputs[i].sysval) { switch(vp->inputs[i].slot) { - case SYSTEM_VALUE_BASE_VERTEX: - /* handled elsewhere */ - break; case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: vertex_regid = vp->inputs[i].regid; break; @@ -404,7 +426,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[elem->vertex_buffer_index]; - struct fd_resource *rsc = fd_resource(vb->buffer); + struct fd_resource *rsc = fd_resource(vb->buffer.resource); enum pipe_format pfmt = elem->src_format; enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt); bool switchnext = (i != last) || @@ -415,7 +437,14 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) uint32_t fs = util_format_get_blocksize(pfmt); uint32_t off = vb->buffer_offset + elem->src_offset; uint32_t size = fd_bo_size(rsc->bo) - off; - debug_assert(fmt != ~0); + debug_assert(fmt != VFMT4_NONE); + +#ifdef DEBUG + /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10 + */ + if (off > fd_bo_size(rsc->bo)) + continue; +#endif OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4); OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | @@ -437,7 +466,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) COND(isint, A4XX_VFD_DECODE_INSTR_INT) | COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); - total_in += vp->inputs[i].ncomp; + total_in += util_bitcount(vp->inputs[i].compmask); j++; } } @@ -500,11 +529,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, { const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); - uint32_t dirty = emit->dirty; + const enum fd_dirty_3d_state dirty = emit->dirty; emit_marker(ring, 5); - if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { + if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->binning_pass) { struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0}; @@ -550,14 +579,17 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); - bool fragz = fp->has_kill | fp->writes_pos; + bool fragz = fp->no_earlyz | fp->has_kill | fp->writes_pos; + bool clamp = !ctx->rasterizer->depth_clip_near; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, zsa->rb_depth_control | + COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) | COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) | - COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); + COND(fragz && fp->fragcoord_compmask != 0, + A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); /* maybe this register/bitfield needs a better name.. this * appears to be just disabling early-z @@ -565,7 +597,8 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->gras_alpha_control | COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) | - COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS)); + COND(fragz && fp->fragcoord_compmask != 0, + A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS)); } if (dirty & FD_DIRTY_RASTERIZER) { @@ -599,7 +632,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd4_rasterizer_stateobj(ctx->rasterizer); uint32_t val = rast->pc_prim_vtx_cntl; - if (info->indexed && info->primitive_restart) + if (info->index_size && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); @@ -616,7 +649,8 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, rast->pc_prim_vtx_cntl2); } - if (dirty & FD_DIRTY_SCISSOR) { + /* NOTE: scissor enabled bit is part of rasterizer state: */ + if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2); @@ -632,7 +666,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & FD_DIRTY_VIEWPORT) { - fd_wfi(ctx, ring); + fd_wfi(ctx->batch, ring); OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); @@ -642,6 +676,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } + if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { + float zmin, zmax; + int depth = 24; + if (ctx->batch->framebuffer.zsbuf) { + depth = util_format_get_component_bits( + pipe_surface_format(ctx->batch->framebuffer.zsbuf), + UTIL_FORMAT_COLORSPACE_ZS, 0); + } + util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, + &zmin, &zmax); + + OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2); + if (depth == 32) { + OUT_RING(ring, fui(zmin)); + OUT_RING(ring, fui(zmax)); + } else if (depth == 16) { + OUT_RING(ring, (uint32_t)(zmin * 0xffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffff)); + } else { + OUT_RING(ring, (uint32_t)(zmin * 0xffffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffffff)); + } + } + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; unsigned n = pfb->nr_cbufs; @@ -652,9 +710,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ - ir3_emit_consts(vp, ring, ctx, emit->info, dirty); - if (!emit->key.binning_pass) - ir3_emit_consts(fp, ring, ctx, emit->info, dirty); + ir3_emit_vs_consts(vp, ring, ctx, emit->info); + if (!emit->binning_pass) + ir3_emit_fs_consts(fp, ring, ctx); } if ((dirty & FD_DIRTY_BLEND)) { @@ -667,17 +725,13 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, bool is_int = util_format_is_pure_integer(format); bool has_alpha = util_format_has_alpha(format); uint32_t control = blend->rb_mrt[i].control; - uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; if (is_int) { control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); } - if (has_alpha) { - blend_control |= blend->rb_mrt[i].blend_control_rgb; - } else { - blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + if (!has_alpha) { control &= ~A4XX_RB_MRT_CONTROL_BLEND2; } @@ -685,7 +739,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend_control); + OUT_RING(ring, blend->rb_mrt[i].blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); @@ -715,21 +769,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } - if (dirty & FD_DIRTY_VERTTEX) { - if (vp->has_samp) - emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp); - else - dirty &= ~FD_DIRTY_VERTTEX; - } - - if (dirty & FD_DIRTY_FRAGTEX) { - if (fp->has_samp) - emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp); - else - dirty &= ~FD_DIRTY_FRAGTEX; - } + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB4_VS_TEX, &ctx->tex[PIPE_SHADER_VERTEX], vp); - ctx->dirty &= ~dirty; + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) + emit_textures(ctx, ring, SB4_FS_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT], fp); } /* emit setup at begin of new cmdstream buffer (don't rely on previous @@ -847,10 +891,10 @@ fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) /* we don't use this yet.. probably best to disable.. */ OUT_PKT3(ring, CP_SET_DRAW_STATE, 2); - OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) | - CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE_0_GROUP_ID(0)); - OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2); OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */ @@ -880,28 +924,42 @@ fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); - OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); - OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); - OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); fd_hw_query_enable(batch, ring); - - ctx->needs_rb_fbd = true; } static void -fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +fd4_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst, + unsigned dst_off, struct pipe_resource *src, unsigned src_off, + unsigned sizedwords) { - __OUT_IB(ring, true, target); + struct fd_bo *src_bo = fd_resource(src)->bo; + struct fd_bo *dst_bo = fd_resource(dst)->bo; + unsigned i; + + for (i = 0; i < sizedwords; i++) { + OUT_PKT3(ring, CP_MEM_TO_MEM, 3); + OUT_RING(ring, 0x00000000); + OUT_RELOC(ring, dst_bo, dst_off, 0, 0); + OUT_RELOC(ring, src_bo, src_off, 0, 0); + + dst_off += 4; + src_off += 4; + } +} + +void +fd4_emit_init_screen(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + + screen->emit_ib = fd4_emit_ib; + screen->mem_to_mem = fd4_mem_to_mem; } void fd4_emit_init(struct pipe_context *pctx) { - struct fd_context *ctx = fd_context(pctx); - ctx->emit_const = fd4_emit_const; - ctx->emit_const_bo = fd4_emit_const_bo; - ctx->emit_ib = fd4_emit_ib; }