From e44089b2f79aa2dcaacf348911433d1e21235c0c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 14 Apr 2012 23:56:56 +0200 Subject: [PATCH] nvc0: add initial support for nve4+ (Kepler) chipsets Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue). --- src/gallium/drivers/nouveau/nouveau_screen.h | 2 + src/gallium/drivers/nouveau/nv_object.xml.h | 3 + src/gallium/drivers/nv50/codegen/nv50_ir.h | 2 + .../drivers/nv50/codegen/nv50_ir_driver.h | 2 + .../nv50/codegen/nv50_ir_from_tgsi.cpp | 6 +- .../drivers/nv50/codegen/nv50_ir_inlines.h | 1 + .../drivers/nv50/codegen/nv50_ir_print.cpp | 1 + .../drivers/nv50/codegen/nv50_ir_target.cpp | 3 +- src/gallium/drivers/nv50/nv50_screen.c | 1 + src/gallium/drivers/nv50/nv50_state.c | 7 + src/gallium/drivers/nv50/nv50_texture.xml.h | 12 +- .../nvc0/codegen/nv50_ir_emit_nvc0.cpp | 12 ++ .../nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 74 +++++++- .../nvc0/codegen/nv50_ir_target_nvc0.cpp | 49 ++--- src/gallium/drivers/nvc0/nvc0_3d.xml.h | 13 ++ src/gallium/drivers/nvc0/nvc0_context.c | 8 +- src/gallium/drivers/nvc0/nvc0_context.h | 25 ++- src/gallium/drivers/nvc0/nvc0_program.c | 61 ++++-- src/gallium/drivers/nvc0/nvc0_screen.c | 174 ++++++++++++------ src/gallium/drivers/nvc0/nvc0_screen.h | 8 +- src/gallium/drivers/nvc0/nvc0_shader_state.c | 3 +- .../drivers/nvc0/nvc0_state_validate.c | 38 ++-- src/gallium/drivers/nvc0/nvc0_surface.c | 2 +- src/gallium/drivers/nvc0/nvc0_tex.c | 165 ++++++++++++++++- src/gallium/drivers/nvc0/nvc0_transfer.c | 159 +++++++++++++++- src/gallium/drivers/nvc0/nvc0_winsys.h | 19 +- src/gallium/drivers/nvc0/nve4_p2mf.xml.h | 107 +++++++++++ .../winsys/nouveau/drm/nouveau_drm_winsys.c | 1 + 28 files changed, 799 insertions(+), 159 deletions(-) create mode 100644 src/gallium/drivers/nvc0/nve4_p2mf.xml.h diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index a2784773143..4ca286bfe8d 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -19,6 +19,8 @@ struct nouveau_screen { unsigned sysmem_bindings; + uint16_t class_3d; + struct { struct nouveau_fence *head; struct nouveau_fence *tail; diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index d87d7139bf3..66ba61b4622 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -188,15 +188,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLASS 0x00009097 #define NVC1_3D_CLASS 0x00009197 #define NVC8_3D_CLASS 0x00009297 +#define NVE4_3D_CLASS 0x0000a097 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 #define NVA3_COMPUTE_CLASS 0x000085c0 #define NVC0_COMPUTE_CLASS 0x000090c0 #define NVC8_COMPUTE_CLASS 0x000092c0 +#define NVE4_COMPUTE_CLASS 0x0000a0c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 +#define NVE4_P2MF_CLASS 0x0000a040 #define NV31_MPEG_CLASS 0x00003174 #define NV84_MPEG_CLASS 0x00008274 diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index 6ec4fc95441..c299cab3f52 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -131,6 +131,7 @@ enum operation OP_POPCNT, // bitcount(src0 & src1) OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] OP_EXTBF, + OP_TEXBAR, OP_LAST }; @@ -141,6 +142,7 @@ enum operation #define NV50_IR_SUBOP_LDC_ISL 3 #define NV50_IR_SUBOP_SHIFT_WRAP 1 #define NV50_IR_SUBOP_EMU_PRERET 1 +#define NV50_IR_SUBOP_TEXBAR(n) n enum DataType { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index e734c5b03bd..9632986fe40 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -163,6 +163,8 @@ struct nv50_ir_prog_info uint8_t clipDistanceMask; /* mask of clip distances defined */ uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ int8_t genUserClip; /* request user clip planes for ClipVertex */ + uint16_t ucpBase; /* base address for UCPs */ + uint8_t ucpBinding; /* constant buffer index of UCP data */ uint8_t pointSize; /* output index for PointSize */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 4530dc23715..8bd784fa47d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes() for (c = 0; c < 4; ++c) { for (i = 0; i < info->io.genUserClip; ++i) { - Value *ucp; - ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32, - i * 16 + c * 4), NULL); + Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding, + TYPE_F32, info->io.ucpBase + i * 16 + c * 4); + Value *ucp = mkLoad(TYPE_F32, sym, NULL); if (c == 0) res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); else diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index 4ce9deb131f..93e502ea609 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f) return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); } +// contrary to asTex(), this will never include SULD/SUST static inline bool isTextureOp(operation op) { return (op >= OP_TEX && op <= OP_TEXCSAA); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp index 45e61c5e58a..4652bb95f69 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp @@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] = "popcnt", "insbf", "extbf", + "texbar", "(invalid)" }; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index 27b9610ed52..e3eae69554c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] = 1, 2, // SULD, SUST 1, 1, // DFDX, DFDY 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP - 2, 3, 2, // POPCNT, INSBF, EXTBF + 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR 0 }; @@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset) switch (chipset & 0xf0) { case 0xc0: case 0xd0: + case 0xe0: return getTargetNVC0(chipset); case 0x50: case 0x80: diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 4bcd2049099..e8118d70ca7 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -594,6 +594,7 @@ nv50_screen_create(struct nouveau_device *dev) FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset); break; } + screen->base.class_3d = tesla_class; ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, NULL, 0, &screen->tesla); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index bf554427ca0..5b783da7ad7 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -465,6 +465,13 @@ nv50_sampler_state_create(struct pipe_context *pipe, (nv50_tsc_wrap_mode(cso->wrap_t) << 3) | (nv50_tsc_wrap_mode(cso->wrap_r) << 6)); + if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { + if (cso->seamless_cube_map) + so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + if (!cso->normalized_coords) + so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + } + switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR; diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h index 08f6efdd7bf..2b140be8d80 100644 --- a/src/gallium/drivers/nv50/nv50_texture.xml.h +++ b/src/gallium/drivers/nv50/nv50_texture.xml.h @@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58) -- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58) -- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58) -- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58) +- rnndb/nv50_texture.xml ( 8111 bytes, from 2012-03-31 16:47:45) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) -Copyright (C) 2006-2011 by the following authors: +Copyright (C) 2006-2012 by the following authors: - Artur Huillet (ahuillet) - Ben Skeggs (darktama, darktama_) - B. R. (koala_br) @@ -265,8 +265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_TSC_1_MIPF_NONE 0x00000040 #define NV50_TSC_1_MIPF_NEAREST 0x00000080 #define NV50_TSC_1_MIPF_LINEAR 0x000000c0 +#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 #define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 #define NV50_TSC_1_LOD_BIAS__SHIFT 12 +#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 #define NV50_TSC_2 0x00000008 #define NV50_TSC_2_MIN_LOD__MASK 0x00000fff diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index d4fd4da07e7..912540d0c40 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -102,6 +102,7 @@ private: void emitSLCT(const CmpInstruction *); void emitSELP(const Instruction *); + void emitTEXBAR(const Instruction *); void emitTEX(const TexInstruction *); void emitTEXCSAA(const TexInstruction *); void emitTXQ(const TexInstruction *); @@ -938,6 +939,14 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i) code[1] |= 1 << 20; } +void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) +{ + code[0] = 0x00000006 | (i->subOp << 26); + code[1] = 0xf0000000; + emitPredicate(i); + emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5); +} + void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) { code[0] = 0x00000086; @@ -1630,6 +1639,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_TXQ: emitTXQ(insn->asTex()); break; + case OP_TEXBAR: + emitTEXBAR(insn); + break; case OP_BRA: case OP_CALL: case OP_PRERET: diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index bd33fbfac5c..318d345efdb 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -117,6 +117,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) class NVC0LegalizePostRA : public Pass { +public: + NVC0LegalizePostRA(const Program *); + private: virtual bool visit(Function *); virtual bool visit(BasicBlock *); @@ -127,8 +130,15 @@ private: void propagateJoin(BasicBlock *); LValue *r63; + + const bool needTexBar; }; +NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) + : needTexBar(prog->getTarget()->getChipset() >= 0xe0) +{ +} + bool NVC0LegalizePostRA::visit(Function *fn) { @@ -225,6 +235,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) } else if (i->isNop()) { bb->remove(i); + } else + if (needTexBar && isTextureOp(i->op)) { + Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); + bar->fixed = 1; + bar->subOp = 0; + bb->insertAfter(i, bar); } else { if (i->op != OP_MOV && i->op != OP_PFETCH) replaceZero(i); @@ -310,7 +326,61 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) const int dim = i->tex.target.getDim() + i->tex.target.isCube(); const int arg = i->tex.target.getArgCount(); - // generate and move the tsc/tic/array source to the front + if (prog->getTarget()->getChipset() >= 0xe0) { + if (i->tex.r == i->tex.s) { + i->tex.r += 8; // NOTE: offset should probably be a driver option + i->tex.s = 0; // only a single cX[] value possible here + } else { + // TODO: extract handles and use register to select TIC/TSC entries + } + if (i->tex.target.isArray()) { + LValue *layer = new_LValue(func, FILE_GPR); + Value *src = i->getSrc(arg - 1); + const int sat = (i->op == OP_TXF) ? 1 : 0; + DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; + bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; + for (int s = dim; s >= 1; --s) + i->setSrc(s, i->getSrc(s - 1)); + i->setSrc(0, layer); + } + if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { + Value *tmp[2]; + Symbol *bind; + Value *rRel = i->getIndirectR(); + Value *sRel = i->getIndirectS(); + Value *shCnt = bld.loadImm(NULL, 2); + + if (rRel) { + tmp[0] = bld.getScratch(); + bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4); + bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt); + tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); + bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], + bld.loadImm(tmp[0], 0x00ffffffu)); + rRel = tmp[0]; + i->setSrc(i->tex.rIndirectSrc, NULL); + } + if (sRel) { + tmp[0] = bld.getScratch(); + bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4); + bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt); + tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); + bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], + bld.loadImm(tmp[0], 0xff000000u)); + sRel = tmp[0]; + i->setSrc(i->tex.sIndirectSrc, NULL); + } + bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel); + + int min = i->tex.rIndirectSrc; + if (min < 0 || min > i->tex.sIndirectSrc) + min = i->tex.sIndirectSrc; + for (int s = min; s >= 1; --s) + i->setSrc(s, i->getSrc(s - 1)); + i->setSrc(0, rRel); + } + } else + // (nvc0) generate and move the tsc/tic/array source to the front if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa @@ -717,7 +787,7 @@ TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const return pass.run(prog, false, true); } else if (stage == CG_STAGE_POST_RA) { - NVC0LegalizePostRA pass; + NVC0LegalizePostRA pass(prog); return pass.run(prog, false, true); } else if (stage == CG_STAGE_SSA) { diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 04425623bdb..2aa20053c14 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -42,6 +42,7 @@ TargetNVC0::TargetNVC0(unsigned int card) // Will probably make this nicer once we support subroutines properly, // i.e. when we have an input IR that provides function declarations. +// TODO: separate version for nve4+ which doesn't like the 4-byte insn formats static const uint32_t nvc0_builtin_code[] = { // DIV U32: slow unsigned integer division @@ -57,11 +58,11 @@ static const uint32_t nvc0_builtin_code[] = // #if 1 0x04009c03, 0x78000000, - 0x7c209cdd, - 0x0010dd18, + 0x7c209c82, 0x38000000, // 0x7c209cdd, + 0x0400dde2, 0x18000000, // 0x0010dd18, 0x08309c03, 0x60000000, - 0x05605c18, - 0x0810dc2a, + 0x05205d04, 0x1c000000, // 0x05605c18, + 0x0810dc03, 0x50000000, // 0x0810dc2a, 0x0c209c43, 0x20040000, 0x0810dc03, 0x50000000, 0x0c209c43, 0x20040000, @@ -73,15 +74,15 @@ static const uint32_t nvc0_builtin_code[] = 0x0c209c43, 0x20040000, 0x0000dde4, 0x28000000, 0x08001c43, 0x50000000, - 0x05609c18, - 0x0010430d, + 0x05209d04, 0x1c000000, // 0x05609c18, + 0x00105c03, 0x20060000, // 0x0010430d, 0x0811dc03, 0x1b0e0000, 0x08104103, 0x48000000, 0x04000002, 0x08000000, 0x0811c003, 0x1b0e0000, 0x08104103, 0x48000000, - 0x040000ac, - 0x90001dff, + 0x04000002, 0x08000000, // 0x040000ac, + 0x00001de7, 0x90000000, // 0x90001dff, #else 0x0401dc03, 0x1b0e0000, 0x00008003, 0x78000000, @@ -111,27 +112,27 @@ static const uint32_t nvc0_builtin_code[] = // 0xfc05dc23, 0x188e0000, 0xfc17dc23, 0x18c40000, - 0x03301e18, - 0x07305e18, + 0x01201ec4, 0x1c000000, // 0x03301e18, + 0x05205ec4, 0x1c000000, // 0x07305e18, 0x0401dc03, 0x1b0e0000, 0x00008003, 0x78000000, 0x0400c003, 0x78000000, 0x0c20c103, 0x48000000, 0x0c108003, 0x60000000, - 0x00005c28, - 0x00001d18, + 0x00005de4, 0x28000000, // 0x00005c28, + 0x00001de2, 0x18000000, // 0x00001d18, 0x0031c023, 0x1b0ec000, - 0xb000a1e7, 0x40000000, + 0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000, 0x04000003, 0x6000c000, 0x0813dc03, 0x1b000000, - 0x0420446c, - 0x040004bd, + 0x04204603, 0x48000000, // 0x0420446c, + 0x04000442, 0x38000000, // 0x040004bd, 0x04208003, 0x5800c000, 0x0430c103, 0x4800c000, - 0x0ffc5dff, - 0x01700e18, - 0x05704a18, - 0x90001dff, + 0xe0001de7, 0x4003fffe, // 0x0ffc5dff, + 0x01200f84, 0x1c000000, // 0x01700e18, + 0x05204b84, 0x1c000000, // 0x05704a18, + 0x00001de7, 0x90000000, // 0x90001dff, // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) // @@ -180,9 +181,9 @@ static const uint32_t nvc0_builtin_code[] = static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] = { 0, - 8 * (22), - 8 * (22 + 18), - 8 * (22 + 18 + 9) + 8 * (26), + 8 * (26 + 23), + 8 * (26 + 23 + 9) }; void @@ -270,7 +271,7 @@ void TargetNVC0::initOpInfo() OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, - OP_QUADON, OP_QUADPOP + OP_QUADON, OP_QUADPOP, OP_TEXBAR }; joinAnterior = false; @@ -445,6 +446,8 @@ TargetNVC0::isAccessSupported(DataFile file, DataType ty) const { if (ty == TYPE_NONE) return false; + if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ? + return typeSizeof(ty) <= 4; if (ty == TYPE_B96) return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT); return true; diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index 71fa1516e16..1cf1f96569f 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -94,6 +94,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_MEM_BARRIER_UNK8 0x00000100 #define NVC0_3D_MEM_BARRIER_UNK12 0x00001000 +#define NVC0_3D_CACHE_SPLIT 0x00000308 +#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002 +#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + #define NVC0_3D_TESS_MODE 0x00000320 #define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f #define NVC0_3D_TESS_MODE_PRIM__SHIFT 0 @@ -289,6 +294,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000 #define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16 +#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64 + #define NVC0_3D_COUNTER_ENABLE 0x00000d68 #define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001 #define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002 @@ -727,6 +734,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_POINT_SIZE 0x00001518 +#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c + #define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520 #define NVC0_3D_COUNTER_RESET 0x00001530 @@ -1303,6 +1312,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600 +#define NVE4_3D_TEX_CB_INDEX 0x00002608 +#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000 +#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010 + #define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1)) #define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004 #define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020 diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 461ceb14c45..8abac09ffd5 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -133,10 +133,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) goto out_err; nvc0->screen = screen; - nvc0->base.screen = &screen->base; - nvc0->base.copy_data = nvc0_m2mf_copy_linear; - nvc0->base.push_data = nvc0_m2mf_push_linear; - nvc0->base.push_cb = nvc0_cb_push; + nvc0->base.screen = &screen->base; pipe->screen = pscreen; pipe->priv = priv; @@ -158,6 +155,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); nvc0_init_state_functions(nvc0); + nvc0_init_transfer_functions(nvc0); nvc0_init_resource_functions(pipe); nvc0->draw = draw_create(pipe); @@ -174,7 +172,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniforms); + BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 7072b5918fa..140ce1ac7ef 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -27,7 +27,9 @@ #include "nvc0_3d.xml.h" #include "nvc0_2d.xml.h" #include "nvc0_m2mf.xml.h" +#include "nve4_p2mf.xml.h" +/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ #define NVC0_NEW_BLEND (1 << 0) #define NVC0_NEW_RASTERIZER (1 << 1) #define NVC0_NEW_ZSA (1 << 2) @@ -75,6 +77,11 @@ struct nvc0_context { struct nvc0_screen *screen; + void (*m2mf_copy_rect)(struct nvc0_context *, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy); + uint32_t dirty; struct { @@ -130,6 +137,8 @@ struct nvc0_context { unsigned num_samplers[5]; uint16_t samplers_dirty[5]; + uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */ + struct pipe_framebuffer_state framebuffer; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; @@ -165,7 +174,7 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *); extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ -boolean nvc0_program_translate(struct nvc0_program *); +boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset); boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); void nvc0_program_library_upload(struct nvc0_context *); @@ -206,6 +215,7 @@ extern void nvc0_init_surface_functions(struct nvc0_context *); /* nvc0_tex.c */ void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); +void nve4_set_tex_handles(struct nvc0_context *); struct pipe_sampler_view * nvc0_create_sampler_view(struct pipe_context *, @@ -214,19 +224,16 @@ nvc0_create_sampler_view(struct pipe_context *, /* nvc0_transfer.c */ void -nvc0_m2mf_transfer_rect(struct nvc0_context *, - const struct nv50_m2mf_rect *dst, - const struct nv50_m2mf_rect *src, - uint32_t nblocksx, uint32_t nblocksy); +nvc0_init_transfer_functions(struct nvc0_context *); + void nvc0_m2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, const void *data); void -nvc0_m2mf_copy_linear(struct nouveau_context *nv, - struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, - struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, - unsigned size); +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data); void nvc0_cb_push(struct nouveau_context *, struct nouveau_bo *bo, unsigned domain, diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 50a853abed9..f228d07bf6b 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) static int nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) { - unsigned last = info->prop.fp.numColourResults * 4; + unsigned count = info->prop.fp.numColourResults * 4; unsigned i, c; for (i = 0; i < info->numOutputs; ++i) @@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) info->out[i].slot[c] = info->out[i].si * 4 + c; if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) - info->out[info->io.sampleMask].slot[0] = last++; + info->out[info->io.sampleMask].slot[0] = count++; + else + if (info->target >= 0xe0) + count++; /* on Kepler, depth is always last colour reg + 2 */ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) - info->out[info->io.fragDepth].slot[2] = last; + info->out[info->io.fragDepth].slot[2] = count; return 0; } @@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) vp->vp.clip_mode |= 1 << (i * 4); if (info->io.genUserClip < 0) - vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */ + vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ return 0; } @@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) { unsigned i, c, a, m; + /* just 00062 on Kepler */ fp->hdr[0] = 0x20062 | (5 << 10); fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ @@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog) #endif boolean -nvc0_program_translate(struct nvc0_program *prog) +nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) { struct nv50_ir_prog_info *info; int ret; @@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog) return FALSE; info->type = prog->type; - info->target = 0xc0; + info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; info->io.genUserClip = prog->vp.num_ucps; + info->io.ucpBase = 256; + info->io.ucpBinding = 15; info->assignSlots = nvc0_program_assign_varying_slots; @@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) size = align(size, 0x40); size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ } - size = align(size, 0x40); /* required by SP_START_ID */ + /* On Fermi, SP_START_ID must be aligned to 0x40. + * On Kepler, the first instruction must be aligned to 0x80 because + * latency information is expected only at certain positions. + */ + if (screen->base.class_3d >= NVE4_3D_CLASS) + size = size + 0x70; + size = align(size, 0x40); ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); if (ret) { @@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= prog->mem->start + prog->mem->size)); + if (screen->base.class_3d >= NVE4_3D_CLASS) { + switch (prog->mem->start & 0xff) { + case 0x40: prog->code_base += 0x70; break; + case 0x80: prog->code_base += 0x30; break; + case 0xc0: prog->code_base += 0x70; break; + default: + prog->code_base += 0x30; + assert((prog->mem->start & 0xff) == 0x00); + break; + } + } code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; if (prog->relocs) @@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) nvc0_program_dump(prog); #endif - nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, - NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); - nvc0_m2mf_push_linear(&nvc0->base, screen->text, - prog->code_base + NVC0_SHADER_HEADER_SIZE, - NOUVEAU_BO_VRAM, prog->code_size, prog->code); + nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, + NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0->base.push_data(&nvc0->base, screen->text, + prog->code_base + NVC0_SHADER_HEADER_SIZE, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); if (prog->immd_size) - nvc0_m2mf_push_linear(&nvc0->base, - screen->text, prog->immd_base, NOUVEAU_BO_VRAM, - prog->immd_size, prog->immd_data); + nvc0->base.push_data(&nvc0->base, + screen->text, prog->immd_base, NOUVEAU_BO_VRAM, + prog->immd_size, prog->immd_data); BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); - PUSH_DATA (nvc0->base.pushbuf, 0x1111); + PUSH_DATA (nvc0->base.pushbuf, 0x1011); return TRUE; } @@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0) if (ret) return; - nvc0_m2mf_push_linear(&nvc0->base, - screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, - size, code); + nvc0->base.push_data(&nvc0->base, + screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, + size, code); /* no need for a memory barrier, will be emitted with first program */ } diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index bad06c3f009..eb8a9c5a0e0 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -30,7 +30,6 @@ #include "nvc0_context.h" #include "nvc0_screen.h" -#include "nouveau/nv_object.xml.h" #include "nvc0_graph_macros.h" static boolean @@ -67,6 +66,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + switch (param) { case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */ @@ -89,7 +90,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - return 0; + return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_TWO_SIDED_STENCIL: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: @@ -247,10 +248,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) FREE(screen->blitctx); nouveau_bo_ref(NULL, &screen->text); + nouveau_bo_ref(NULL, &screen->uniform_bo); nouveau_bo_ref(NULL, &screen->tls); nouveau_bo_ref(NULL, &screen->txc); nouveau_bo_ref(NULL, &screen->fence.bo); - nouveau_bo_ref(NULL, &screen->vfetch_cache); + nouveau_bo_ref(NULL, &screen->poly_cache); nouveau_heap_destroy(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); @@ -260,7 +262,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_mm_destroy(screen->mm_VRAM_fe0); - nouveau_object_del(&screen->fermi); + nouveau_object_del(&screen->eng3d); nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); @@ -288,16 +290,16 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, } static void -nvc0_magic_3d_init(struct nouveau_pushbuf *push) +nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) { BEGIN_NVC0(push, SUBC_3D(0x10cc), 1); PUSH_DATA (push, 0xff); BEGIN_NVC0(push, SUBC_3D(0x10e0), 2); - PUSH_DATA(push, 0xff); - PUSH_DATA(push, 0xff); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); - PUSH_DATA(push, 0xff); - PUSH_DATA(push, 0xff); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); BEGIN_NVC0(push, SUBC_3D(0x074c), 1); PUSH_DATA (push, 0x3f); @@ -308,11 +310,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) BEGIN_NVC0(push, SUBC_3D(0x0de8), 1); PUSH_DATA (push, 1); -#if 0 /* software method */ - BEGIN_NVC0(push, SUBC_3D(0x1528), 1); /* MP poke */ - PUSH_DATA (push, 0); -#endif - BEGIN_NVC0(push, SUBC_3D(0x12ac), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_3D(0x0218), 1); @@ -324,8 +321,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) BEGIN_NVC0(push, SUBC_3D(0x12d8), 2); PUSH_DATA (push, 0x10); PUSH_DATA (push, 0x10); - BEGIN_NVC0(push, SUBC_3D(0x06d4), 1); - PUSH_DATA (push, 8); BEGIN_NVC0(push, SUBC_3D(0x1140), 1); PUSH_DATA (push, 0x10); BEGIN_NVC0(push, SUBC_3D(0x1610), 1); @@ -333,24 +328,27 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push) BEGIN_NVC0(push, SUBC_3D(0x164c), 1); PUSH_DATA (push, 1 << 12); - BEGIN_NVC0(push, SUBC_3D(0x151c), 1); - PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_3D(0x030c), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_3D(0x0300), 1); PUSH_DATA (push, 3); -#if 0 /* software method */ - BEGIN_NVC0(push, SUBC_3D(0x1280), 1); /* PGRAPH poke */ - PUSH_DATA (push, 0); -#endif + BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); - PUSH_DATA (push, 0x1f40); + PUSH_DATA (push, 0x3fffff); BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_3D(0x075c), 1); PUSH_DATA (push, 3); + + if (obj_class >= NVE4_3D_CLASS) { + BEGIN_NVC0(push, SUBC_3D(0x07fc), 1); + PUSH_DATA (push, 1); + } + + /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc + * are supposed to do */ } static void @@ -391,10 +389,20 @@ nvc0_screen_create(struct nouveau_device *dev) struct pipe_screen *pscreen; struct nouveau_object *chan; struct nouveau_pushbuf *push; + uint32_t obj_class; int ret; unsigned i; union nouveau_bo_config mm_config; + switch (dev->chipset & ~0xf) { + case 0xc0: + case 0xd0: + case 0xe0: + break; + default: + return NULL; + } + screen = CALLOC_STRUCT(nvc0_screen); if (!screen) return NULL; @@ -431,17 +439,25 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.fence.emit = nvc0_screen_fence_emit; screen->base.fence.update = nvc0_screen_fence_update; - ret = nouveau_object_new(chan, 0xbeef9039, NVC0_M2MF_CLASS, NULL, 0, + switch (dev->chipset & 0xf0) { + case 0xe0: + obj_class = NVE4_P2MF_CLASS; + break; + default: + obj_class = NVC0_M2MF_CLASS; + break; + } + ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0, &screen->m2mf); if (ret) FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret); BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->m2mf->oclass); - BEGIN_NVC0(push, NVC0_M2MF(NOTIFY_ADDRESS_HIGH), 3); - PUSH_DATAh(push, screen->fence.bo->offset + 16); - PUSH_DATA (push, screen->fence.bo->offset + 16); - PUSH_DATA (push, 0); + if (screen->m2mf->oclass == NVE4_P2MF_CLASS) { + BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, 0xa0b5); + } ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0, &screen->eng2d); @@ -461,17 +477,39 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, SUBC_2D(0x0888), 1); PUSH_DATA (push, 1); - ret = nouveau_object_new(chan, 0xbeef9097, NVC0_3D_CLASS, NULL, 0, - &screen->fermi); + BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->fence.bo->offset + 16); + PUSH_DATA (push, screen->fence.bo->offset + 16); + + switch (dev->chipset & 0xf0) { + case 0xe0: + obj_class = NVE4_3D_CLASS; + break; + case 0xd0: + case 0xc0: + default: + switch (dev->chipset) { + case 0xd9: + case 0xc8: + obj_class = NVC8_3D_CLASS; + break; + case 0xc1: + obj_class = NVC1_3D_CLASS; + break; + default: + obj_class = NVC0_3D_CLASS; + break; + } + break; + } + ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0, + &screen->eng3d); if (ret) FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret); + screen->base.class_3d = obj_class; BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); - PUSH_DATA (push, screen->fermi->oclass); - BEGIN_NVC0(push, NVC0_3D(NOTIFY_ADDRESS_HIGH), 3); - PUSH_DATAh(push, screen->fence.bo->offset + 32); - PUSH_DATA (push, screen->fence.bo->offset + 32); - PUSH_DATA (push, 0); + PUSH_DATA (push, screen->eng3d->oclass); BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1); PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS); @@ -501,10 +539,23 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); - PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + if (screen->eng3d->oclass < NVE4_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); + PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + } else { + BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); + PUSH_DATA (push, 15); + } + BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 8); /* 128 */ + BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1); + PUSH_DATA (push, 1); + if (screen->eng3d->oclass >= NVC1_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1); + } - nvc0_magic_3d_init(push); + nvc0_magic_3d_init(push, screen->eng3d->oclass); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, &screen->text); @@ -517,21 +568,41 @@ nvc0_screen_create(struct nouveau_device *dev) nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL, - &screen->uniforms); + &screen->uniform_bo); if (ret) goto fail; - /* auxiliary constants (6 user clip planes, base instance id) */ - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 256); - PUSH_DATAh(push, screen->uniforms->offset + (5 << 16)); - PUSH_DATA (push, screen->uniforms->offset + (5 << 16)); for (i = 0; i < 5; ++i) { + /* TIC and TSC entries for each unit (nve4+ only) */ + /* auxiliary constants (6 user clip planes, base instance id */ + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); + PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); + if (screen->eng3d->oclass >= NVE4_3D_CLASS) { + unsigned j; + BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); + PUSH_DATA (push, 0); + for (j = 0; j < 8; ++j) + PUSH_DATA(push, j); + } else { + BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); + PUSH_DATA (push, 0x54); + } } + BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); + PUSH_DATA (push, 0); + + /* max MPs * max warps per MP (TODO: ask kernel) */ + if (screen->eng3d->oclass >= NVE4_3D_CLASS) + screen->tls_size = 8 * 64; + else + screen->tls_size = 16 * 48; + screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16; + screen->tls_size = align(screen->tls_size, 1 << 17); - screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, NULL, &screen->tls); if (ret) @@ -550,21 +621,14 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); PUSH_DATA (push, 0); - for (i = 0; i < 5; ++i) { - BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1); - PUSH_DATA (push, 0x54); - } - BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1); - PUSH_DATA (push, 0); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL, - &screen->vfetch_cache); + &screen->poly_cache); if (ret) goto fail; BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); - PUSH_DATAh(push, screen->vfetch_cache->offset); - PUSH_DATA (push, screen->vfetch_cache->offset); + PUSH_DATAh(push, screen->poly_cache->offset); + PUSH_DATA (push, screen->poly_cache->offset); PUSH_DATA (push, 3); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL, diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index e0f5e5ec246..8bcc1470593 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -6,6 +6,8 @@ #include "nouveau/nouveau_fence.h" #include "nouveau/nouveau_heap.h" +#include "nouveau/nv_object.xml.h" + #include "nvc0_winsys.h" #include "nvc0_stateobj.h" @@ -24,10 +26,10 @@ struct nvc0_screen { int num_occlusion_queries_active; struct nouveau_bo *text; - struct nouveau_bo *uniforms; + struct nouveau_bo *uniform_bo; struct nouveau_bo *tls; struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ - struct nouveau_bo *vfetch_cache; + struct nouveau_bo *poly_cache; uint64_t tls_size; @@ -55,7 +57,7 @@ struct nvc0_screen { struct nouveau_mman *mm_VRAM_fe0; - struct nouveau_object *fermi; + struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ struct nouveau_object *eng2d; struct nouveau_object *m2mf; struct nouveau_object *dijkstra; diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 54dfd8d1a1a..786889f8b57 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -70,7 +70,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) return TRUE; if (!prog->translated) { - prog->translated = nvc0_program_translate(prog); + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); if (!prog->translated) return FALSE; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 3533a5e1ba4..5d34f2b0bcc 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -250,17 +250,17 @@ nvc0_validate_viewport(struct nvc0_context *nvc0) } static INLINE void -nvc0_upload_uclip_planes(struct nvc0_context *nvc0) +nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nouveau_bo *bo = nvc0->screen->uniforms; + struct nouveau_bo *bo = nvc0->screen->uniform_bo; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 256); - PUSH_DATAh(push, bo->offset + (5 << 16)); - PUSH_DATA (push, bo->offset + (5 << 16)); + PUSH_DATA (push, 512); + PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9)); + PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9)); BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); - PUSH_DATA (push, 0); + PUSH_DATA (push, 256); PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); } @@ -289,21 +289,28 @@ nvc0_validate_clip(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *vp; + unsigned stage; uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable; - if (nvc0->dirty & NVC0_NEW_CLIP) - nvc0_upload_uclip_planes(nvc0); - - vp = nvc0->gmtyprog; - if (!vp) { + if (nvc0->gmtyprog) { + stage = 3; + vp = nvc0->gmtyprog; + } else + if (nvc0->tevlprog) { + stage = 2; vp = nvc0->tevlprog; - if (!vp) - vp = nvc0->vertprog; + } else { + stage = 0; + vp = nvc0->vertprog; } if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) nvc0_check_program_ucps(nvc0, vp, clip_enable); + if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) + if (vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) + nvc0_upload_uclip_planes(nvc0, stage); + clip_enable &= vp->vp.clip_enable; if (nvc0->state.clip_enable != clip_enable) { @@ -375,7 +382,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) if (!nouveau_resource_mapped_by_gpu(&res->base)) { if (i == 0 && (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) { base = s << 16; - bo = nvc0->screen->uniforms; + bo = nvc0->screen->uniform_bo; if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0) rebind = FALSE; @@ -396,7 +403,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) nvc0->state.uniform_buffer_bound[s] = 0; } - if (bo != nvc0->screen->uniforms) + if (bo != nvc0->screen->uniform_bo) BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); if (rebind) { @@ -517,6 +524,7 @@ static struct state_validate { { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, + { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index 3378b513936..2b47c04056e 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -233,7 +233,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe, src_box->x, src_box->y, src_box->z); for (i = 0; i < src_box->depth; ++i) { - nvc0_m2mf_transfer_rect(nvc0, &drect, &srect, nx, ny); + nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); if (nv50_miptree(dst)->layout_3d) drect.z++; diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c index f6c4ab39bd9..8dd7185bcdf 100644 --- a/src/gallium/drivers/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nvc0/nvc0_tex.c @@ -26,6 +26,9 @@ #include "util/u_format.h" +#define NVE4_TIC_ENTRY_INVALID 0x000fffff +#define NVE4_TSC_ENTRY_INVALID 0xfff00000 + #define NV50_TIC_0_SWIZZLE__MASK \ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) @@ -271,13 +274,76 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) return need_flush; } +static boolean +nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_textures[s]; ++i) { + struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); + struct nv04_resource *res; + const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); + + if (!tic) { + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + continue; + } + res = nv04_resource(tic->pipe.texture); + + if (tic->id < 0) { + tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + (tic->id * 32)); + PUSH_DATA (push, txc->offset + (tic->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tic->tic[0], 8); + + need_flush = TRUE; + } else + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, (tic->id << 4) | 1); + } + nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; + + nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tic->id; + if (dirty) + BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + } + for (; i < nvc0->state.num_textures[s]; ++i) + nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; + + nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + return need_flush; +} + void nvc0_validate_textures(struct nvc0_context *nvc0) { boolean need_flush; - need_flush = nvc0_validate_tic(nvc0, 0); - need_flush |= nvc0_validate_tic(nvc0, 3); - need_flush |= nvc0_validate_tic(nvc0, 4); + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tic(nvc0, 0); + need_flush |= nve4_validate_tic(nvc0, 3); + need_flush |= nve4_validate_tic(nvc0, 4); + } else { + need_flush = nvc0_validate_tic(nvc0, 0); + need_flush |= nvc0_validate_tic(nvc0, 3); + need_flush |= nvc0_validate_tic(nvc0, 4); + } if (need_flush) { BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1); @@ -329,16 +395,103 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) return need_flush; } +static boolean +nve4_validate_tsc(struct nvc0_context *nvc0, int s) +{ + struct nouveau_bo *txc = nvc0->screen->txc; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + unsigned i; + boolean need_flush = FALSE; + + for (i = 0; i < nvc0->num_samplers[s]; ++i) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); + + if (!tsc) { + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + continue; + } + if (tsc->id < 0) { + tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); + + PUSH_SPACE(push, 16); + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32)); + PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32)); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, 32); + PUSH_DATA (push, 1); + BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, &tsc->tsc[0], 8); + + need_flush = TRUE; + } + nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID; + nvc0->tex_handles[s][i] |= tsc->id << 20; + } + for (; i < nvc0->state.num_samplers[s]; ++i) + nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; + + nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + + return need_flush; +} + void nvc0_validate_samplers(struct nvc0_context *nvc0) { boolean need_flush; - need_flush = nvc0_validate_tsc(nvc0, 0); - need_flush |= nvc0_validate_tsc(nvc0, 3); - need_flush |= nvc0_validate_tsc(nvc0, 4); + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + need_flush = nve4_validate_tsc(nvc0, 0); + need_flush |= nve4_validate_tsc(nvc0, 3); + need_flush |= nve4_validate_tsc(nvc0, 4); + } else { + need_flush = nvc0_validate_tsc(nvc0, 0); + need_flush |= nvc0_validate_tsc(nvc0, 3); + need_flush |= nvc0_validate_tsc(nvc0, 4); + } if (need_flush) { BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } } + +/* Upload the "diagonal" entries for the possible texture sources ($t == $s). + * At some point we might want to get a list of the combinations used by a + * shader and fill in those entries instead of having it extract the handles. + */ +void +nve4_set_tex_handles(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + uint64_t address; + unsigned s; + + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) + return; + address = nvc0->screen->uniform_bo->offset + (5 << 16); + + for (s = 0; s < 5; ++s, address += (1 << 9)) { + uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; + if (!dirty) + continue; + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 512); + PUSH_DATAh(push, address); + PUSH_DATA (push, address); + do { + int i = ffs(dirty) - 1; + dirty &= ~(1 << i); + + BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); + PUSH_DATA (push, (8 + i) * 4); + PUSH_DATA (push, nvc0->tex_handles[s][i]); + } while (dirty); + + nvc0->textures_dirty[s] = 0; + nvc0->samplers_dirty[s] = 0; + } +} diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c index 774793d8d02..fb44190574e 100644 --- a/src/gallium/drivers/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nvc0/nvc0_transfer.c @@ -13,7 +13,7 @@ struct nvc0_transfer { uint16_t nlayers; }; -void +static void nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, const struct nv50_m2mf_rect *dst, const struct nv50_m2mf_rect *src, @@ -108,6 +108,71 @@ nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, nouveau_bufctx_reset(bctx, 0); } +static void +nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, + const struct nv50_m2mf_rect *dst, + const struct nv50_m2mf_rect *src, + uint32_t nblocksx, uint32_t nblocksy) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nouveau_bufctx *bctx = nvc0->bufctx; + uint32_t exec; + uint32_t src_base = src->base; + uint32_t dst_base = dst->base; + const int cpp = dst->cpp; + + assert(dst->cpp == src->cpp); + + nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); + nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; + + if (!nouveau_bo_memtype(dst->bo)) { + assert(!dst->z); + dst_base += dst->y * dst->pitch + dst->x * cpp; + exec |= 0x100; /* DST_MODE_2D_LINEAR */ + } + if (!nouveau_bo_memtype(src->bo)) { + assert(!src->z); + src_base += src->y * src->pitch + src->x * cpp; + exec |= 0x080; /* SRC_MODE_2D_LINEAR */ + } + + BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); + PUSH_DATA (push, 0x1000 | dst->tile_mode); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, dst->height); + PUSH_DATA (push, dst->depth); + PUSH_DATA (push, dst->z); + PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); + PUSH_DATA (push, 0x1000 | src->tile_mode); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, src->height); + PUSH_DATA (push, src->depth); + PUSH_DATA (push, src->z); + PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); + PUSH_DATAh(push, src->bo->offset + src_base); + PUSH_DATA (push, src->bo->offset + src_base); + PUSH_DATAh(push, dst->bo->offset + dst_base); + PUSH_DATA (push, dst->bo->offset + dst_base); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, nblocksx * cpp); + PUSH_DATA (push, nblocksy); + + BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); + PUSH_DATA (push, exec); + + nouveau_bufctx_reset(bctx, 0); +} + void nvc0_m2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, @@ -154,6 +219,49 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, } void +nve4_p2mf_push_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned offset, unsigned domain, + unsigned size, const void *data) +{ + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nouveau_pushbuf *push = nv->pushbuf; + uint32_t *src = (uint32_t *)data; + unsigned count = (size + 3) / 4; + + nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + while (count) { + unsigned nr; + + if (!PUSH_SPACE(push, 16)) + break; + nr = PUSH_AVAIL(push); + assert(nr >= 16); + nr = MIN2(count, nr - 8); + nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); + + BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, dst->offset + offset); + PUSH_DATA (push, dst->offset + offset); + BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, nr * 4); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + PUSH_DATAp(push, src, nr); + + count -= nr; + src += nr; + offset += nr * 4; + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void nvc0_m2mf_copy_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, @@ -191,6 +299,32 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv, nouveau_bufctx_reset(bctx, 0); } +static void +nve4_m2mf_copy_linear(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom, + struct nouveau_bo *src, unsigned srcoff, unsigned srcdom, + unsigned size) +{ + struct nouveau_pushbuf *push = nv->pushbuf; + struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; + + nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD); + nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, bctx); + nouveau_pushbuf_validate(push); + + BEGIN_NVC0(push, SUBC_COPY(0x0400), 4); + PUSH_DATAh(push, src->offset + srcoff); + PUSH_DATA (push, src->offset + srcoff); + PUSH_DATAh(push, dst->offset + dstoff); + PUSH_DATA (push, dst->offset + dstoff); + BEGIN_NVC0(push, SUBC_COPY(0x0418), 1); + PUSH_DATA (push, size); + IMMED_NVC0(push, SUBC_COPY(0x0300), 0x6); + + nouveau_bufctx_reset(bctx, 0); +} + struct pipe_transfer * nvc0_miptree_transfer_new(struct pipe_context *pctx, struct pipe_resource *res, @@ -253,8 +387,8 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx, unsigned z = tx->rect[0].z; unsigned i; for (i = 0; i < tx->nlayers; ++i) { - nvc0_m2mf_transfer_rect(nvc0, &tx->rect[1], &tx->rect[0], - tx->nblocksx, tx->nblocksy); + nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0], + tx->nblocksx, tx->nblocksy); if (mt->layout_3d) tx->rect[0].z++; else @@ -280,8 +414,8 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx, if (tx->base.usage & PIPE_TRANSFER_WRITE) { for (i = 0; i < tx->nlayers; ++i) { - nvc0_m2mf_transfer_rect(nvc0, &tx->rect[0], &tx->rect[1], - tx->nblocksx, tx->nblocksy); + nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], + tx->nblocksx, tx->nblocksy); if (mt->layout_3d) tx->rect[0].z++; else @@ -362,3 +496,18 @@ nvc0_cb_push(struct nouveau_context *nv, nouveau_bufctx_reset(bctx, 0); } + +void +nvc0_init_transfer_functions(struct nvc0_context *nvc0) +{ + if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) { + nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect; + nvc0->base.copy_data = nve4_m2mf_copy_linear; + nvc0->base.push_data = nve4_p2mf_push_linear; + } else { + nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect; + nvc0->base.copy_data = nvc0_m2mf_copy_linear; + nvc0->base.push_data = nvc0_m2mf_push_linear; + } + nvc0->base.push_cb = nvc0_cb_push; +} diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h index c3ab1c93644..c13ebd5fb58 100644 --- a/src/gallium/drivers/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nvc0/nvc0_winsys.h @@ -46,17 +46,24 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) } -#define SUBC_3D(m) 1, (m) +#define SUBC_3D(m) 0, (m) #define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) +#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) -#define SUBC_2D(m) 2, (m) -#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) +#define SUBC_COMPUTE(m) 1, (m) +#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) +#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) -#define SUBC_M2MF(m) 3, (m) +#define SUBC_M2MF(m) 2, (m) +#define SUBC_P2MF(m) 2, (m) #define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n) +#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n) -#define SUBC_COMPUTE(m) 4, (m) -#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) +#define SUBC_2D(m) 3, (m) +#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n) + +#define SUBC_COPY(m) 4, (m) +#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n) static INLINE uint32_t NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) diff --git a/src/gallium/drivers/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h new file mode 100644 index 00000000000..68a742fadfe --- /dev/null +++ b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h @@ -0,0 +1,107 @@ +#ifndef RNNDB_NVE4_P2MF_XML +#define RNNDB_NVE4_P2MF_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11) +- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24) +- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59) +- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) +- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12) + +Copyright (C) 2006-2012 by the following authors: +- Artur Huillet (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. (koala_br) +- Carlos Martin (carlosmn) +- Christoph Bumiller (calim, chrisbmr) +- Dawid Gajownik (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov (lumag) +- EdB (edb_) +- Erik Waling (erikwaling) +- Francisco Jerez (curro) +- imirkin (imirkin) +- jb17bsome (jb17bsome) +- Jeremy Kolb (kjeremy) +- Laurent Carlier (lordheavy) +- Luca Barbieri (lb, lb1) +- Maarten Maathuis (stillunknown) +- Marcin Kościelnicki (mwk, koriakin) +- Mark Carey (careym) +- Matthieu Castet (mat-c) +- nvidiaman (nvidiaman) +- Patrice Mandin (pmandin, pmdata) +- Pekka Paalanen (pq, ppaalanen) +- Peter Popov (ironpeter) +- Richard Hughes (hughsient) +- Rudi Cilibrasi (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet (leroutier) +- Stephane Marchesin (marcheu) +- sturmflut (sturmflut) +- Sylvain Munaut +- Victor Stinner (haypo) +- Wladmir van der Laan (miathan6) +- Younes Manton (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180 + +#define NVE4_P2MF_LINE_COUNT 0x00000184 + +#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188 + +#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c + +#define NVE4_P2MF_DST_TILE_MODE 0x00000194 + +#define NVE4_P2MF_DST_PITCH 0x00000198 + +#define NVE4_P2MF_DST_HEIGHT 0x0000019c + +#define NVE4_P2MF_DST_DEPTH 0x000001a0 + +#define NVE4_P2MF_DST_Z 0x000001a4 + +#define NVE4_P2MF_DST_X 0x000001a8 + +#define NVE4_P2MF_DST_Y 0x000001ac + +#define NVE4_P2MF_EXEC 0x000001b0 +#define NVE4_P2MF_EXEC_LINEAR 0x00000001 +#define NVE4_P2MF_EXEC_UNK12 0x00001000 + +#define NVE4_P2MF_DATA 0x000001b4 + + +#endif /* RNNDB_NVE4_P2MF_XML */ diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index 91d51c08ed5..bf990147764 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -34,6 +34,7 @@ nouveau_drm_screen_create(int fd) break; case 0xc0: case 0xd0: + case 0xe0: init = nvc0_screen_create; break; default: -- 2.30.2