uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
+ void *interpData;
struct nv50_ir_prog_symbol *syms;
uint16_t numSyms;
} bin;
uint32_t libPos,
uint32_t dataPos);
+extern void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_per_sample, bool flatshade);
+
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
code[1] |= (i->ipa & 0xc) << (19 - 2);
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 19);
+ code[loc + 1] |= (ipa & 0x3) << 21;
+ code[loc + 1] |= (ipa & 0xc) << (19 - 2);
+ code[loc + 0] &= ~(0xff << 23);
+ code[loc + 0] |= reg << 23;
+}
+
void
CodeEmitterGK110::emitINTERP(const Instruction *i)
{
if (i->saturate)
code[1] |= 1 << 18;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 23);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0xff << 23;
+ addInterp(i->ipa, 0xff, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
emitGPR (0x00, insn->def(0));
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0xff;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 1] &= ~(0xf << 0x14);
+ code[loc + 1] |= (ipa & 0x3) << 0x16;
+ code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
+ code[loc + 0] &= ~(0xff << 0x14);
+ code[loc + 0] |= reg << 0x14;
+}
+
void
CodeEmitterGM107::emitIPA()
{
emitGPR(0x14, insn->src(1));
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(2));
+ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
} else {
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(1));
emitGPR(0x14);
+ addInterp(insn->ipa, 0xff, interpApply);
}
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
}
}
+static void
+interpApply(const InterpEntry *entry, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ int ipa = entry->ipa;
+ int reg = entry->reg;
+ int loc = entry->loc;
+
+ if (flatshade &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
+ ipa = NV50_IR_INTERP_FLAT;
+ reg = 0x3f;
+ } else if (force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+ code[loc + 0] &= ~(0xf << 6);
+ code[loc + 0] |= ipa << 6;
+ code[loc + 0] &= ~(0x3f << 26);
+ code[loc + 0] |= reg << 26;
+}
+
void
CodeEmitterNVC0::emitINTERP(const Instruction *i)
{
if (i->saturate)
code[0] |= 1 << 5;
- if (i->op == OP_PINTERP)
+ if (i->op == OP_PINTERP) {
srcId(i->src(1), 26);
- else
+ addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
+ } else {
code[0] |= 0x3f << 26;
+ addInterp(i->ipa, 0x3f, interpApply);
+ }
srcId(i->src(0).getIndirect(0), 20);
} else {
default:
break;
}
- if (decl->Interp.Location || info->io.sampleInterp)
+ if (decl->Interp.Location)
info->in[i].centroid = 1;
}
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_SAMPLEID:
+ case TGSI_SEMANTIC_SAMPLEPOS:
+ info->io.sampleInterp = 1;
+ break;
default:
break;
}
void handleINTERP(Value *dst0[4]);
+ uint8_t translateInterpMode(const struct nv50_ir_varying *var,
+ operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
return sym;
}
-static inline uint8_t
-translateInterpMode(const struct nv50_ir_varying *var, operation& op)
+uint8_t
+Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
- if (var->centroid)
+ if (var->centroid || info->io.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
delete targ;
}
-CodeEmitter::CodeEmitter(const Target *target) : targ(target)
+CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
{
}
}
}
info->bin.relocData = emit->getRelocInfo();
+ info->bin.interpData = emit->getInterpInfo();
emitSymbolTable(info);
return true;
}
+bool
+CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
+{
+ unsigned int n = interpInfo ? interpInfo->count : 0;
+
+ if (!(n % RELOC_ALLOC_INCREMENT)) {
+ size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
+ interpInfo = reinterpret_cast<InterpInfo *>(
+ REALLOC(interpInfo, n ? size : 0,
+ size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
+ if (!interpInfo)
+ return false;
+ if (n == 0)
+ memset(interpInfo, 0, sizeof(InterpInfo));
+ }
+ ++interpInfo->count;
+
+ interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
+ interpInfo->apply = apply;
+
+ return true;
+}
+
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
info->entry[i].apply(code, info);
}
+void
+nv50_ir_change_interp(void *interpData, uint32_t *code,
+ bool force_persample_interp, bool flatshade)
+{
+ nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>(
+ interpData);
+
+ // force_persample_interp: all non-flat -> per-sample
+ // flatshade: all color -> flat
+ for (unsigned i = 0; i < info->count; ++i)
+ info->apply(&info->entry[i], code, force_persample_interp, flatshade);
+}
+
void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)
RelocEntry entry[0];
};
+struct InterpEntry
+{
+ InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
+ uint32_t ipa:4; // SC mode used to identify colors
+ uint32_t reg:8; // The reg used for perspective division
+ uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
+};
+
+typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
+
+struct InterpInfo
+{
+ uint32_t count;
+ InterpApply apply;
+ InterpEntry entry[0];
+};
+
class CodeEmitter
{
public:
inline void *getRelocInfo() const { return relocInfo; }
+ bool addInterp(int ipa, int reg, InterpApply apply);
+ inline void *getInterpInfo() const { return interpInfo; }
+
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
uint32_t codeSizeLimit;
RelocInfo *relocInfo;
+ InterpInfo *interpInfo;
};
for (i = 0; i < info->numInputs; ++i) {
m = nvc0_hdr_interp_mode(&info->in[i]);
+ if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
+ fp->fp.colors |= 1 << info->in[i].si;
+ if (info->in[i].sc)
+ fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
+ }
for (c = 0; c < 4; ++c) {
if (!(info->in[i].mask & (1 << c)))
continue;
info->io.genUserClip = prog->vp.num_ucps;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
- info->io.sampleInterp = prog->fp.sample_interp;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
+ prog->interps = info->bin.interpData;
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->num_barriers = info->numBarriers;
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
+ if (prog->interps) {
+ nv50_ir_change_interp(prog->interps, prog->code,
+ prog->fp.force_persample_interp,
+ prog->fp.flatshade);
+ for (int i = 0; i < 2; i++) {
+ unsigned mask = prog->fp.color_interp[i] >> 4;
+ unsigned interp = prog->fp.color_interp[i] & 3;
+ if (!mask)
+ continue;
+ prog->hdr[14] &= ~(0xff << (8 * i));
+ if (prog->fp.flatshade)
+ interp = NVC0_INTERP_FLAT;
+ for (int c = 0; c < 4; c++)
+ if (mask & (1 << c))
+ prog->hdr[14] |= interp << (2 * (4 * i + c));
+ }
+ }
#ifdef DEBUG
if (debug_get_bool_option("NV50_PROG_DEBUG", false))
FREE(prog->code); /* may be 0 for hardcoded shaders */
FREE(prog->immd_data);
FREE(prog->relocs);
+ FREE(prog->interps);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
FREE(prog->cp.syms);
if (prog->tfb) {
} vp;
struct {
uint8_t early_z;
- uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
- uint8_t sample_interp;
+ uint8_t colors;
+ uint8_t color_interp[2];
+ bool force_persample_interp;
+ bool flatshade;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */
uint8_t num_barriers;
void *relocs;
+ void *interps;
struct nvc0_transform_feedback_state *tfb;
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ bool flatshade;
uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *fp = nvc0->fragprog;
+ struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
- fp->fp.sample_interp = nvc0->min_samples > 1;
+ if (fp->fp.force_persample_interp != rast->force_persample_interp) {
+ /* Force the program to be reuploaded, which will trigger interp fixups
+ * to get applied
+ */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.force_persample_interp = rast->force_persample_interp;
+ }
+
+ /* Shade model works well enough when both colors follow it. However if one
+ * (or both) is explicitly set, then we have to go the patching route.
+ */
+ bool has_explicit_color = fp->fp.colors &&
+ (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
+ ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
+ bool hwflatshade = false;
+ if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
+ /* Force re-upload */
+ if (fp->mem)
+ nouveau_heap_free(&fp->mem);
+
+ fp->fp.flatshade = rast->flatshade;
+
+ /* Always smooth-shade in this mode, the shader will decide on its own
+ * when to flat-shade.
+ */
+ } else if (!has_explicit_color) {
+ hwflatshade = rast->flatshade;
+
+ /* No need to binary-patch the shader each time, make sure that it's set
+ * up for the default behaviour.
+ */
+ fp->fp.flatshade = 0;
+ }
+
+ if (hwflatshade != nvc0->state.flatshade) {
+ nvc0->state.flatshade = hwflatshade;
+ BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
+ PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ }
+
+ if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
+ return;
+ }
if (!nvc0_program_validate(nvc0, fp))
return;
* always emit 16 commands, one for each scissor rectangle, here.
*/
- SB_BEGIN_3D(so, SHADE_MODEL, 1);
- SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
- NVC0_3D_SHADE_MODEL_SMOOTH);
SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
- { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
struct nvc0_rasterizer_stateobj {
struct pipe_rasterizer_state pipe;
int size;
- uint32_t state[44];
+ uint32_t state[42];
};
struct nvc0_zsa_stateobj {