X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa6xx%2Ffd6_emit.c;h=b732372f055afa5d954ae62cd1a4b8a4a11ff478;hb=f335a6663d0dd93f5240cefd3783af50c4dcbb47;hp=d17be0949a65f7593901297d1a36e2d66da058cb;hpb=d200d58e65d5e1836be1bd0dbea3c4c6b8edae48;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index d17be0949a6..b732372f055 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -45,24 +45,6 @@ #include "fd6_format.h" #include "fd6_zsa.h" -static uint32_t -shader_t_to_opcode(gl_shader_stage type) -{ - switch (type) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - return CP_LOAD_STATE6_GEOM; - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - case MESA_SHADER_KERNEL: - return CP_LOAD_STATE6_FRAG; - default: - unreachable("bad shader type"); - } -} - /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer @@ -87,7 +69,7 @@ fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, align_sz = align(sz, 4); - OUT_PKT7(ring, shader_t_to_opcode(type), 3 + align_sz); + OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(src) | @@ -121,7 +103,7 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean writ debug_assert((regid % 4) == 0); - OUT_PKT7(ring, shader_t_to_opcode(type), 3 + (2 * anum)); + OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum)); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)| CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | @@ -227,15 +209,16 @@ setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entrie /* * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the * stencil border color value in bc->ui[0] but according - * to desc->swizzle and desc->channel, the .x component + * to desc->swizzle and desc->channel, the .x/.w component * is NONE and the stencil value is in the y component. - * Meanwhile the hardware wants this in the .x componetn. + * Meanwhile the hardware wants this in the .w component + * for x24s8 and the .x component for x32_s8x24. */ if ((format == PIPE_FORMAT_X24S8_UINT) || (format == PIPE_FORMAT_X32_S8X24_UINT)) { if (j == 0) { c = 1; - cd = 0; + cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3; } else { continue; } @@ -397,6 +380,27 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO; tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT; break; + case PIPE_SHADER_TESS_CTRL: + sb = SB6_HS_TEX; + opcode = CP_LOAD_STATE6_GEOM; + tex_samp_reg = REG_A6XX_SP_HS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_HS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_HS_TEX_COUNT; + break; + case PIPE_SHADER_TESS_EVAL: + sb = SB6_DS_TEX; + opcode = CP_LOAD_STATE6_GEOM; + tex_samp_reg = REG_A6XX_SP_DS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_DS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_DS_TEX_COUNT; + break; + case PIPE_SHADER_GEOMETRY: + sb = SB6_GS_TEX; + opcode = CP_LOAD_STATE6_GEOM; + tex_samp_reg = REG_A6XX_SP_GS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_GS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_GS_TEX_COUNT; + break; case PIPE_SHADER_FRAGMENT: sb = SB6_FS_TEX; opcode = CP_LOAD_STATE6_FRAG; @@ -571,6 +575,9 @@ fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit, unsigned enable_mask; } s[PIPE_SHADER_TYPES] = { [PIPE_SHADER_VERTEX] = { FD6_GROUP_VS_TEX, 0x7 }, + [PIPE_SHADER_TESS_CTRL] = { FD6_GROUP_HS_TEX, 0x7 }, + [PIPE_SHADER_TESS_EVAL] = { FD6_GROUP_DS_TEX, 0x7 }, + [PIPE_SHADER_GEOMETRY] = { FD6_GROUP_GS_TEX, 0x7 }, [PIPE_SHADER_FRAGMENT] = { FD6_GROUP_FS_TEX, 0x6 }, }; @@ -619,10 +626,8 @@ fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit, needs_border |= fd6_emit_textures(ctx->pipe, stateobj, type, tex, bcolor_offset, v, ctx); - fd6_emit_add_group(emit, stateobj, s[type].state_id, + fd6_emit_take_group(emit, stateobj, s[type].state_id, s[type].enable_mask); - - fd_ringbuffer_del(stateobj); } } @@ -731,21 +736,27 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3 if (!target) continue; - unsigned offset = (so->offsets[i] * info->stride[i] * 4) + - target->buffer_offset; - OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3); /* VPC_SO[i].BUFFER_BASE_LO: */ - OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0); - OUT_RING(ring, target->buffer_size + offset); + OUT_RELOCW(ring, fd_resource(target->buffer)->bo, target->buffer_offset, 0, 0); + OUT_RING(ring, target->buffer_size - target->buffer_offset); - OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3); - OUT_RING(ring, offset); - /* VPC_SO[i].FLUSH_BASE_LO/HI: */ - // TODO just give hw a dummy addr for now.. we should - // be using this an then CP_MEM_TO_REG to set the - // VPC_SO[i].BUFFER_OFFSET for the next draw.. - OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0x100, 0, 0); + if (so->reset & (1 << i)) { + unsigned offset = (so->offsets[i] * info->stride[i] * 4); + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1); + OUT_RING(ring, offset); + } else { + OUT_PKT7(ring, CP_MEM_TO_REG, 3); + OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) | + CP_MEM_TO_REG_0_64B | CP_MEM_TO_REG_0_ACCUMULATE | + CP_MEM_TO_REG_0_CNT(1 - 1)); + OUT_RELOC(ring, control_ptr(fd6_context(ctx), flush_base[i].offset)); + } + + OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE_LO(i), 2); + OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base[i])); + + so->reset &= ~(1 << i); emit->streamout_mask |= (1 << i); } @@ -770,20 +781,31 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3 OUT_RING(ring, REG_A6XX_VPC_SO_PROG); OUT_RING(ring, tf->prog[i]); } - - OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); - OUT_RING(ring, 0x0); } else { OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 4); OUT_RING(ring, REG_A6XX_VPC_SO_CNTL); OUT_RING(ring, 0); OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL); OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); - OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); } +} + +static void +fd6_emit_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v, + enum pipe_shader_type type, enum fd6_state_id id, unsigned enable_mask) +{ + struct fd_context *ctx = emit->ctx; + + if (v && ctx->dirty_shader[type] & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { + struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer( + ctx->batch->submit, v->shader->ubo_state.cmdstream_size, + FD_RINGBUFFER_STREAMING); + + ir3_emit_user_consts(ctx->screen, v, constobj, &ctx->constbuf[type]); + ir3_emit_ubos(ctx->screen, v, constobj, &ctx->constbuf[type]); + fd6_emit_take_group(emit, constobj, id, enable_mask); + } } void @@ -792,8 +814,11 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) struct fd_context *ctx = emit->ctx; struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; const struct fd6_program_state *prog = fd6_emit_get_prog(emit); - const struct ir3_shader_variant *vp = emit->vs; - const struct ir3_shader_variant *fp = emit->fs; + const struct ir3_shader_variant *vs = emit->vs; + const struct ir3_shader_variant *hs = emit->hs; + const struct ir3_shader_variant *ds = emit->ds; + const struct ir3_shader_variant *gs = emit->gs; + const struct ir3_shader_variant *fs = emit->fs; const enum fd_dirty_3d_state dirty = emit->dirty; bool needs_border = false; @@ -803,19 +828,14 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) * we might at some point decide to do sysmem in some cases when * blend is enabled: */ - if (fp->fb_read) + if (fs->fb_read) ctx->batch->gmem_reason |= FD_GMEM_FB_READ; if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) { struct fd_ringbuffer *state; state = build_vbo_state(emit, emit->vs); - fd6_emit_add_group(emit, state, FD6_GROUP_VBO, 0x6); - fd_ringbuffer_del(state); - - state = build_vbo_state(emit, emit->bs); - fd6_emit_add_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1); - fd_ringbuffer_del(state); + fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x7); } if (dirty & FD_DIRTY_ZSA) { @@ -831,12 +851,10 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) struct fd_ringbuffer *state; state = build_lrz(emit, false); - fd6_emit_add_group(emit, state, FD6_GROUP_LRZ, 0x6); - fd_ringbuffer_del(state); + fd6_emit_take_group(emit, state, FD6_GROUP_LRZ, 0x6); state = build_lrz(emit, true); - fd6_emit_add_group(emit, state, FD6_GROUP_LRZ_BINNING, 0x1); - fd_ringbuffer_del(state); + fd6_emit_take_group(emit, state, FD6_GROUP_LRZ_BINNING, 0x1); } if (dirty & FD_DIRTY_STENCIL_REF) { @@ -889,6 +907,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) } if (dirty & FD_DIRTY_PROG) { + fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG, 0x7); fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, 0x6); fd6_emit_add_group(emit, prog->binning_stateobj, FD6_GROUP_PROG_BINNING, 0x1); @@ -934,8 +953,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) nr = 0; OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); - OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) | - COND(fp->writes_smask && pfb->samples > 1, + OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) | + COND(fs->writes_smask && pfb->samples > 1, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK)); OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr)); @@ -943,30 +962,23 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr)); } -#define DIRTY_CONST (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST | \ - FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE) - - if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) { - struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer( - ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); - - OUT_WFI5(vsconstobj); - ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info); - fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7); - fd_ringbuffer_del(vsconstobj); - } - - if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) { - struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer( - ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); - - OUT_WFI5(fsconstobj); - ir3_emit_fs_consts(fp, fsconstobj, ctx); - fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6); - fd_ringbuffer_del(fsconstobj); + fd6_emit_consts(emit, vs, PIPE_SHADER_VERTEX, FD6_GROUP_VS_CONST, 0x7); + fd6_emit_consts(emit, hs, PIPE_SHADER_TESS_CTRL, FD6_GROUP_HS_CONST, 0x7); + fd6_emit_consts(emit, ds, PIPE_SHADER_TESS_EVAL, FD6_GROUP_DS_CONST, 0x7); + fd6_emit_consts(emit, gs, PIPE_SHADER_GEOMETRY, FD6_GROUP_GS_CONST, 0x7); + fd6_emit_consts(emit, fs, PIPE_SHADER_FRAGMENT, FD6_GROUP_FS_CONST, 0x6); + + /* if driver-params are needed, emit each time: */ + if (ir3_needs_vs_driver_params(vs)) { + struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer( + ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING); + ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info); + fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, 0x7); + } else { + fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, 0x7); } - struct ir3_stream_output_info *info = &vp->shader->stream_output; + struct ir3_stream_output_info *info = &vs->shader->stream_output; if (info->num_outputs) fd6_emit_streamout(ring, emit, info); @@ -1000,6 +1012,9 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_RING(ring, blend_control); } + OUT_PKT4(ring, REG_A6XX_RB_DITHER_CNTL, 1); + OUT_RING(ring, blend->rb_dither_cntl); + OUT_PKT4(ring, REG_A6XX_SP_BLEND_CNTL, 1); OUT_RING(ring, blend->sp_blend_cntl); } @@ -1022,19 +1037,35 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_RING(ring, A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); } - needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vp); - needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fp); + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs); + if (hs) { + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_CTRL, hs); + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_TESS_EVAL, ds); + } + if (gs) { + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs); + } + needs_border |= fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs); if (needs_border) emit_border_color(ctx, ring); - if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & - (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) { + if (hs) { + debug_assert(hs->image_mapping.num_ibo == 0); + debug_assert(ds->image_mapping.num_ibo == 0); + } + if (gs) { + debug_assert(gs->image_mapping.num_ibo == 0); + } + +#define DIRTY_IBO (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE | \ + FD_DIRTY_SHADER_PROG) + if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_IBO) { struct fd_ringbuffer *state = - fd6_build_ibo_state(ctx, fp, PIPE_SHADER_FRAGMENT); + fd6_build_ibo_state(ctx, fs, PIPE_SHADER_FRAGMENT); struct fd_ringbuffer *obj = fd_submit_new_ringbuffer( - ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING); - const struct ir3_ibo_mapping *mapping = &fp->image_mapping; + ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING); + const struct ir3_ibo_mapping *mapping = &fs->image_mapping; OUT_PKT7(obj, CP_LOAD_STATE6, 3); OUT_RING(obj, CP_LOAD_STATE6_0_DST_OFF(0) | @@ -1047,11 +1078,18 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_PKT4(obj, REG_A6XX_SP_IBO_LO, 2); OUT_RB(obj, state); + /* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could + * de-duplicate this from program->config_stateobj + */ OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1); OUT_RING(obj, mapping->num_ibo); - fd6_emit_add_group(emit, obj, FD6_GROUP_IBO, 0x6); - fd_ringbuffer_del(obj); + ir3_emit_ssbo_sizes(ctx->screen, fs, obj, + &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]); + ir3_emit_image_dims(ctx->screen, fs, obj, + &ctx->shaderimg[PIPE_SHADER_FRAGMENT]); + + fd6_emit_take_group(emit, obj, FD6_GROUP_IBO, 0x6); fd_ringbuffer_del(state); } @@ -1059,7 +1097,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups); for (unsigned i = 0; i < emit->num_groups; i++) { struct fd6_state_group *g = &emit->groups[i]; - unsigned n = fd_ringbuffer_size(g->stateobj) / 4; + unsigned n = g->stateobj ? + fd_ringbuffer_size(g->stateobj) / 4 : 0; if (n == 0) { OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | @@ -1075,7 +1114,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) OUT_RB(ring, g->stateobj); } - fd_ringbuffer_del(g->stateobj); + if (g->stateobj) + fd_ringbuffer_del(g->stateobj); } emit->num_groups = 0; } @@ -1151,15 +1191,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0xfffff); -/* -t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) -0000000500024048: 70d08003 00000000 001c5000 00000005 -t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) -0000000500024058: 70d08003 00000010 001c7000 00000005 - -t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) -0000000500024068: 70268000 -*/ + OUT_WFI5(ring); WRITE(REG_A6XX_RB_CCU_CNTL, 0x7c400004); WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); @@ -1173,14 +1205,14 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880); - WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0); - WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000); + WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430); WRITE(REG_A6XX_SP_IBO_COUNT, 0); WRITE(REG_A6XX_SP_UNKNOWN_B182, 0); WRITE(REG_A6XX_HLSQ_UNKNOWN_BB11, 0); WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); WRITE(REG_A6XX_UCHE_CLIENT_PF, 4); - WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x0); + WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1); WRITE(REG_A6XX_SP_UNKNOWN_AB00, 0x5); WRITE(REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010); @@ -1191,12 +1223,8 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0); WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0); - WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2); - WRITE(REG_A6XX_RB_RENDER_CONTROL0, 0x401); - WRITE(REG_A6XX_RB_RENDER_CONTROL1, 0); - WRITE(REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); - WRITE(REG_A6XX_RB_SAMPLE_CNTL, 0); WRITE(REG_A6XX_RB_UNKNOWN_8818, 0); WRITE(REG_A6XX_RB_UNKNOWN_8819, 0); WRITE(REG_A6XX_RB_UNKNOWN_881A, 0); @@ -1219,8 +1247,8 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_PC_UNKNOWN_9806, 0); WRITE(REG_A6XX_PC_UNKNOWN_9980, 0); - WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); - WRITE(REG_A6XX_PC_UNKNOWN_9B06, 0); + WRITE(REG_A6XX_PC_PRIMITIVE_CNTL_6, 0); + WRITE(REG_A6XX_PC_UNKNOWN_9B07, 0); WRITE(REG_A6XX_SP_UNKNOWN_A81B, 0); @@ -1237,7 +1265,10 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3); WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0); - WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); + /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309 + * but this seems to kill texture gather offsets. + */ + WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2); WRITE(REG_A6XX_RB_UNKNOWN_8804, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0); @@ -1269,12 +1300,6 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) OUT_PKT4(ring, REG_A6XX_VPC_SO_BUF_CNTL, 1); OUT_RING(ring, 0x00000000); /* VPC_SO_BUF_CNTL */ - OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); - OUT_RING(ring, 0x00000000); - - OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1); - OUT_RING(ring, 0x00000000); - OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); OUT_RING(ring, 0x00000000); @@ -1319,7 +1344,7 @@ fd6_framebuffer_barrier(struct fd_context *ctx) OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, 0x00000013); - OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0); + OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); OUT_RING(ring, seqno); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0x00000010); @@ -1333,17 +1358,23 @@ fd6_framebuffer_barrier(struct fd_context *ctx) OUT_PKT7(ring, CP_UNK_A6XX_14, 4); OUT_RING(ring, 0x00000000); - OUT_RELOC(ring, fd6_ctx->blit_mem, 0, 0, 0); + OUT_RELOC(ring, control_ptr(fd6_ctx, seqno)); OUT_RING(ring, seqno); } +void +fd6_emit_init_screen(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + screen->emit_const = fd6_emit_const; + screen->emit_const_bo = fd6_emit_const_bo; + screen->emit_ib = fd6_emit_ib; + screen->mem_to_mem = fd6_mem_to_mem; +} + void fd6_emit_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); - ctx->emit_const = fd6_emit_const; - ctx->emit_const_bo = fd6_emit_const_bo; - ctx->emit_ib = fd6_emit_ib; - ctx->mem_to_mem = fd6_mem_to_mem; ctx->framebuffer_barrier = fd6_framebuffer_barrier; }