From: Ilia Mirkin Date: Mon, 21 Apr 2014 04:28:13 +0000 (-0400) Subject: nvc0: add support for PIPE_CAP_SAMPLE_SHADING X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=af38ef907;p=mesa.git nvc0: add support for PIPE_CAP_SAMPLE_SHADING Signed-off-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 96071be0e89..56b01158a4a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -136,6 +136,7 @@ enum operation OP_DFDY, OP_RDSV, // read system value OP_WRSV, // write system value + OP_PIXLD, // get info about raster object or surfaces OP_QUADOP, OP_QUADON, OP_QUADPOP, @@ -214,6 +215,12 @@ enum operation #define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) #define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) #define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) +#define NV50_IR_SUBOP_PIXLD_COUNT 0 +#define NV50_IR_SUBOP_PIXLD_COVMASK 1 +#define NV50_IR_SUBOP_PIXLD_COVERED 2 +#define NV50_IR_SUBOP_PIXLD_OFFSET 3 +#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4 +#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5 #define NV50_IR_SUBOP_MADSP_SD 0xffff // Yes, we could represent those with DataType. // Or put the type into operation and have a couple 1000 values in that enum. diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index a4b50ee2082..c258b6b4fef 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -113,6 +113,8 @@ private: void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); + void emitPIXLD(const Instruction *); + void emitFlow(const Instruction *); inline void defId(const ValueDef&, const int pos); @@ -1129,6 +1131,14 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask emitPredicate(i); } +void +CodeEmitterGK110::emitPIXLD(const Instruction *i) +{ + emitForm_L(i, 0x7f4, 2, Modifier(0)); + code[1] |= i->subOp << 2; + code[1] |= 0x00070000; +} + void CodeEmitterGK110::emitFlow(const Instruction *i) { @@ -1684,6 +1694,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_TEXBAR: emitTEXBAR(insn); break; + case OP_PIXLD: + emitPIXLD(insn); + break; case OP_BRA: case OP_CALL: case OP_PRERET: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index d486c8d39e2..cef92cfcf55 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -135,6 +135,8 @@ private: void emitVSHL(const Instruction *); void emitVectorSubOp(const Instruction *); + void emitPIXLD(const Instruction *); + inline void defId(const ValueDef&, const int pos); inline void defId(const Instruction *, int d, const int pos); inline void srcId(const ValueRef&, const int pos); @@ -2141,6 +2143,15 @@ CodeEmitterNVC0::emitVSHL(const Instruction *i) code[1] |= 1 << 16; } +void +CodeEmitterNVC0::emitPIXLD(const Instruction *i) +{ + assert(i->encSize == 8); + emitForm_A(i, HEX64(10000000, 00000006)); + code[0] |= i->subOp << 5; + code[1] |= 0x00e00000; +} + bool CodeEmitterNVC0::emitInstruction(Instruction *insn) { @@ -2390,6 +2401,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_VSHL: emitVSHL(insn); break; + case OP_PIXLD: + emitPIXLD(insn); + break; case OP_PHI: case OP_UNION: case OP_CONSTRAINT: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 44b5ecdcb13..ebdeee4050d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1426,6 +1426,27 @@ NVC0LoweringPass::handleRDSV(Instruction *i) bld.mkLoad(TYPE_U32, i->getDef(0), bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); break; + case SV_SAMPLE_INDEX: + // TODO: Properly pass source as an address in the PIX address space + // (which can be of the form [r0+offset]). But this is currently + // unnecessary. + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); + ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; + break; + case SV_SAMPLE_POS: { + Value *off = new_LValue(func, FILE_GPR); + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); + ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; + bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase + + 4 * sym->reg.data.sv.index), + off); + break; + } default: if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 1415eb5d209..e74b25f59c2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -166,6 +166,7 @@ const char *operationStr[OP_LAST + 1] = "dfdy", "rdsv", "wrsv", + "pixld", "quadop", "quadon", "quadpop", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 53c3c3e7cd0..f479cf4d35b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -49,7 +49,7 @@ const uint8_t Target::operationSrcNr[] = 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP 0, // TEXBAR 1, 1, // DFDX, DFDY - 1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP + 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, @@ -112,9 +112,9 @@ const OpClass Target::operationClass[] = OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, // TEXBAR OPCLASS_OTHER, - // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP + // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, - OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, // POPCNT, INSBF, EXTBF, PERMT OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, // ATOM, BAR diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index 10a5fe2e062..95ed849561c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -282,6 +282,8 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0; case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0; case SV_GRIDID: return kepler ? 0x18 : ~0; + case SV_SAMPLE_INDEX: return 0; + case SV_SAMPLE_POS: return 0; default: return 0xffffffff; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index b6b5bebbad8..76416a0e3b2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -56,6 +56,7 @@ #define NVC0_NEW_TFB_TARGETS (1 << 21) #define NVC0_NEW_IDXBUF (1 << 22) #define NVC0_NEW_SURFACES (1 << 23) +#define NVC0_NEW_MIN_SAMPLES (1 << 24) #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) @@ -182,6 +183,7 @@ struct nvc0_context { struct pipe_clip_state clip; unsigned sample_mask; + unsigned min_samples; boolean vbo_push_hint; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 1e7f7d21213..1df84f3cc5d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -134,12 +134,17 @@ static int nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) { unsigned count = info->prop.fp.numColourResults * 4; - unsigned i, c; + unsigned i, c, ci; - for (i = 0; i < info->numOutputs; ++i) - if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + for (i = 0, ci = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) { for (c = 0; c < 4; ++c) - info->out[i].slot[c] = info->out[i].si * 4 + c; + info->out[i].slot[c] = ci * 4 + c; + ci++; + } + } + + assert(ci == info->prop.fp.numColourResults); if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) info->out[info->io.sampleMask].slot[0] = count++; @@ -539,6 +544,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.genUserClip = prog->vp.num_ucps; info->io.ucpBase = 256; info->io.ucpCBSlot = 15; + info->io.sampleInterp = prog->fp.sample_interp; if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { @@ -551,10 +557,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; } else { if (chipset >= NVISA_GK104_CHIPSET) { - info->io.resInfoCBSlot = 15; info->io.texBindBase = 0x20; info->io.suInfoBase = 0; /* TODO */ } + info->io.resInfoCBSlot = 15; + info->io.sampleInfoBase = 256 + 128; info->io.msInfoCBSlot = 15; info->io.msInfoBase = 0; /* TODO */ } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index 9c184d1f1d5..750d8c10459 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -45,6 +45,7 @@ struct nvc0_program { struct { uint8_t early_z; uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; + uint8_t sample_interp; } fp; struct { uint32_t tess_mode; /* ~0 if defined by the other stage */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 2b8c6b2bb92..045b491c1b6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -181,11 +181,11 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_SAMPLE_SHADING: return 0; case PIPE_CAP_MAX_VIEWPORTS: return 1; case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: return 1; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return 4; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index a52fed02626..bdc3ab5821e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -107,6 +107,8 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; + fp->fp.sample_interp = nvc0->min_samples > 1; + if (!nvc0_program_validate(nvc0, fp)) return; nvc0_program_update_context_state(nvc0, fp, 4); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 05027a151b8..74f8a7630b4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -858,6 +858,16 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; } +static void +nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + if (nvc0->min_samples != min_samples) { + nvc0->min_samples = min_samples; + nvc0->dirty |= NVC0_NEW_MIN_SAMPLES; + } +} static void nvc0_set_framebuffer_state(struct pipe_context *pipe, @@ -1215,6 +1225,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_stencil_ref = nvc0_set_stencil_ref; pipe->set_clip_state = nvc0_set_clip_state; pipe->set_sample_mask = nvc0_set_sample_mask; + pipe->set_min_samples = nvc0_set_min_samples; pipe->set_constant_buffer = nvc0_set_constant_buffer; pipe->set_framebuffer_state = nvc0_set_framebuffer_state; pipe->set_polygon_stipple = nvc0_set_polygon_stipple; @@ -1237,4 +1248,5 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_shader_resources = nvc0_set_shader_resources; nvc0->sample_mask = ~0; + nvc0->min_samples = 1; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index dd71c6587a6..40016fcadcc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -72,7 +72,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - unsigned i; + unsigned i, ms; unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; boolean serialize = FALSE; @@ -180,6 +180,20 @@ nvc0_validate_fb(struct nvc0_context *nvc0) IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); + ms = 1 << ms_mode; + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); + PUSH_DATA (push, 256 + 128); + for (i = 0; i < ms; i++) { + float xy[2]; + nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); + PUSH_DATAf(push, xy[0]); + PUSH_DATAf(push, xy[1]); + } + if (serialize) IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); @@ -452,8 +466,19 @@ nvc0_validate_sample_mask(struct nvc0_context *nvc0) PUSH_DATA (push, mask[1]); PUSH_DATA (push, mask[2]); PUSH_DATA (push, mask[3]); - BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1); - PUSH_DATA (push, 0x01); +} + +static void +nvc0_validate_min_samples(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + int samples; + + samples = util_next_power_of_two(nvc0->min_samples); + if (samples > 1) + samples |= NVC0_3D_SAMPLE_SHADING_ENABLE; + + IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples); } void @@ -560,7 +585,8 @@ static struct state_validate { { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, - { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } + { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }, + { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES }, }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index ac37e0ef29a..4a550b0bb40 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -516,6 +516,7 @@ struct nvc0_blitctx unsigned num_samplers[5]; struct pipe_sampler_view *texture[2]; struct nv50_tsc_entry *sampler[2]; + unsigned min_samples; uint32_t dirty; } saved; struct nvc0_rasterizer_stateobj rast; @@ -746,6 +747,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) ctx->saved.gp = nvc0->gmtyprog; ctx->saved.fp = nvc0->fragprog; + ctx->saved.min_samples = nvc0->min_samples; + nvc0->rast = &ctx->rast; nvc0->vertprog = &blitter->vp; @@ -772,6 +775,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) nvc0->num_samplers[s] = 0; nvc0->num_samplers[4] = 2; + nvc0->min_samples = 1; + ctx->saved.dirty = nvc0->dirty; nvc0->textures_dirty[4] |= 3; @@ -781,7 +786,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); - nvc0->dirty = NVC0_NEW_FRAMEBUFFER | + nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES | NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS; @@ -809,6 +814,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) nvc0->gmtyprog = blit->saved.gp; nvc0->fragprog = blit->saved.fp; + nvc0->min_samples = blit->saved.min_samples; + pipe_sampler_view_reference(&nvc0->textures[4][0], NULL); pipe_sampler_view_reference(&nvc0->textures[4][1], NULL); @@ -841,6 +848,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | NVC0_NEW_TFB_TARGETS); + + nvc0->base.pipe.set_min_samples(&nvc0->base.pipe, blit->saved.min_samples); } static void