X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fnvc0%2Fnvc0_program.c;h=32487248c7aee7549b19d3ee3b37e264195f3b41;hb=23dfff0669ef351372379b517b455cee2f9bb9c7;hp=ccf3ecc3c5f37916cab7f1bc205e4c0db8622870;hpb=cd0dec0d9dfab642c51774c3f5788cbdf00b8c9b;p=mesa.git diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index ccf3ecc3c5f..32487248c7a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -22,6 +22,9 @@ #include "pipe/p_defines.h" +#include "compiler/nir/nir.h" +#include "tgsi/tgsi_ureg.h" + #include "nvc0/nvc0_context.h" #include "codegen/nv50_ir_driver.h" @@ -31,28 +34,28 @@ * 124 scalar varying values. */ static uint32_t -nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) +nvc0_shader_input_address(unsigned sn, unsigned si) { switch (sn) { - case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; + case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; case TGSI_SEMANTIC_PRIMID: return 0x060; case TGSI_SEMANTIC_LAYER: return 0x064; case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; case TGSI_SEMANTIC_PSIZE: return 0x06c; case TGSI_SEMANTIC_POSITION: return 0x070; - case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; case TGSI_SEMANTIC_FOG: return 0x2e8; case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; - case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; case TGSI_SEMANTIC_PCOORD: return 0x2e0; - case NV50_SEMANTIC_TESSCOORD: return 0x2f0; + case TGSI_SEMANTIC_TESSCOORD: return 0x2f0; case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; case TGSI_SEMANTIC_VERTEXID: return 0x2fc; case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; - case TGSI_SEMANTIC_FACE: return 0x3fc; default: assert(!"invalid TGSI input semantic"); return ~0; @@ -60,23 +63,25 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) } static uint32_t -nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) +nvc0_shader_output_address(unsigned sn, unsigned si) { switch (sn) { - case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4; + case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4; + case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10; case TGSI_SEMANTIC_PRIMID: return 0x060; case TGSI_SEMANTIC_LAYER: return 0x064; case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; case TGSI_SEMANTIC_PSIZE: return 0x06c; case TGSI_SEMANTIC_POSITION: return 0x070; - case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10; case TGSI_SEMANTIC_FOG: return 0x2e8; case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; - case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; + /* case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0; */ case TGSI_SEMANTIC_EDGEFLAG: return ~0; default: assert(!"invalid TGSI output semantic"); @@ -95,7 +100,7 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: info->in[i].mask = 0x1; info->in[i].slot[0] = - nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4; + nvc0_shader_input_address(info->in[i].sn, 0) / 4; continue; default: break; @@ -111,18 +116,11 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) static int nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) { - unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); unsigned offset; unsigned i, c; for (i = 0; i < info->numInputs; ++i) { - offset = nvc0_shader_input_address(info->in[i].sn, - info->in[i].si, ubase); - if (info->in[i].patch && offset >= 0x20) - offset = 0x20 + info->in[i].si * 0x10; - - if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD) - info->in[i].mask &= 3; + offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si); for (c = 0; c < 4; ++c) info->in[i].slot[c] = (offset + c * 0x4) / 4; @@ -137,10 +135,20 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) unsigned count = info->prop.fp.numColourResults * 4; unsigned i, c; + /* Compute the relative position of each color output, since skipped MRT + * positions will not have registers allocated to them. + */ + unsigned colors[8] = {0}; + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + colors[info->out[i].si] = 1; + for (i = 0, c = 0; i < 8; i++) + if (colors[i]) + colors[i] = c++; for (i = 0; i < info->numOutputs; ++i) if (info->out[i].sn == TGSI_SEMANTIC_COLOR) for (c = 0; c < 4; ++c) - info->out[i].slot[c] = info->out[i].si * 4 + c; + info->out[i].slot[c] = colors[info->out[i].si] * 4 + c; if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) info->out[info->io.sampleMask].slot[0] = count++; @@ -157,15 +165,11 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) static int nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) { - unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); unsigned offset; unsigned i, c; for (i = 0; i < info->numOutputs; ++i) { - offset = nvc0_shader_output_address(info->out[i].sn, - info->out[i].si, ubase); - if (info->out[i].patch && offset >= 0x20) - offset = 0x20 + info->out[i].si * 0x10; + offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si); for (c = 0; c < 4; ++c) info->out[i].slot[c] = (offset + c * 0x4) / 4; @@ -193,7 +197,7 @@ nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) return ret; } -static INLINE void +static inline void nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) { uint8_t min = (vp->hdr[4] >> 12) & 0xff; @@ -216,12 +220,8 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) continue; for (c = 0; c < 4; ++c) { a = info->in[i].slot[c]; - if (info->in[i].mask & (1 << c)) { - if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD) - vp->hdr[5 + a / 32] |= 1 << (a % 32); - else - nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]); - } + if (info->in[i].mask & (1 << c)) + vp->hdr[5 + a / 32] |= 1 << (a % 32); } } @@ -250,15 +250,24 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_VERTEXID: vp->hdr[10] |= 1 << 31; break; + case TGSI_SEMANTIC_TESSCOORD: + /* We don't have the mask, nor the slots populated. While this could + * be achieved, the vast majority of the time if either of the coords + * are read, then both will be read. + */ + nvc0_vtgp_hdr_update_oread(vp, 0x2f0 / 4); + nvc0_vtgp_hdr_update_oread(vp, 0x2f4 / 4); + break; default: break; } } - vp->vp.clip_enable = info->io.clipDistanceMask; - for (i = 0; i < 8; ++i) - if (info->io.cullDistanceMask & (1 << i)) - vp->vp.clip_mode |= 1 << (i * 4); + vp->vp.clip_enable = (1 << info->io.clipDistances) - 1; + vp->vp.cull_enable = + ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; + for (i = 0; i < info->io.cullDistances; ++i) + vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (info->io.genUserClip < 0) vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ @@ -272,12 +281,9 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) vp->hdr[0] = 0x20061 | (1 << 10); vp->hdr[4] = 0xff000; - vp->hdr[18] = info->io.clipDistanceMask; - return nvc0_vtgp_gen_header(vp, info); } -#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) static void nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) { @@ -291,8 +297,6 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) break; case PIPE_PRIM_TRIANGLES: tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; - if (info->prop.tp.winding > 0) - tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; break; case PIPE_PRIM_QUADS: tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; @@ -301,18 +305,31 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) tp->tp.tess_mode = ~0; return; } - if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) - tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; + + /* It seems like lines want the "CW" bit to indicate they're connected, and + * spit out errors in dmesg when the "CONNECTED" bit is set. + */ + if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) { + if (info->prop.tp.domain == PIPE_PRIM_LINES) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; + else + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; + } + + /* Winding only matters for triangles/quads, not lines. */ + if (info->prop.tp.domain != PIPE_PRIM_LINES && + info->prop.tp.outputPrim != PIPE_PRIM_POINTS && + info->prop.tp.winding > 0) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; switch (info->prop.tp.partitioning) { - case PIPE_TESS_PART_INTEGER: - case PIPE_TESS_PART_POW2: + case PIPE_TESS_SPACING_EQUAL: tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; break; - case PIPE_TESS_PART_FRACT_ODD: + case PIPE_TESS_SPACING_FRACTIONAL_ODD: tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; break; - case PIPE_TESS_PART_FRACT_EVEN: + case PIPE_TESS_SPACING_FRACTIONAL_EVEN: tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; break; default: @@ -320,16 +337,12 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) break; } } -#endif -#ifdef PIPE_SHADER_HULL static int nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) { unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ - tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; - if (info->numPatchConstants) opcs = 8 + info->numPatchConstants * 4; @@ -342,18 +355,23 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) nvc0_vtgp_gen_header(tcp, info); + if (info->target >= NVISA_GM107_CHIPSET) { + /* On GM107+, the number of output patch components has moved in the TCP + * header, but it seems like blob still also uses the old position. + * Also, the high 8-bits are located inbetween the min/max parallel + * field and has to be set after updating the outputs. */ + tcp->hdr[3] = (opcs & 0x0f) << 28; + tcp->hdr[4] |= (opcs & 0xf0) << 16; + } + nvc0_tp_get_tess_mode(tcp, info); return 0; } -#endif -#ifdef PIPE_SHADER_DOMAIN static int nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) { - tep->tp.input_patch_size = ~0; - tep->hdr[0] = 0x20061 | (3 << 10); tep->hdr[4] = 0xff000; @@ -365,7 +383,6 @@ nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) return 0; } -#endif static int nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) @@ -392,7 +409,7 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) break; } - gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024); + gp->hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024); return nvc0_vtgp_gen_header(gp, info); } @@ -434,6 +451,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) for (i = 0; i < info->numInputs; ++i) { m = nvc0_hdr_interp_mode(&info->in[i]); + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) { + fp->fp.colors |= 1 << info->in[i].si; + if (info->in[i].sc) + fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); + } for (c = 0; c < 4; ++c) { if (!(info->in[i].mask & (1 << c))) continue; @@ -456,13 +478,30 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } } } + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) + fp->hdr[5] |= 0x30000000; for (i = 0; i < info->numOutputs; ++i) { if (info->out[i].sn == TGSI_SEMANTIC_COLOR) - fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; + fp->hdr[18] |= 0xf << (4 * info->out[i].si); } + /* There are no "regular" attachments, but the shader still needs to be + * executed. It seems like it wants to think that it has some color + * outputs in order to actually run. + */ + if (info->prop.fp.numColourResults == 0 && !info->prop.fp.writesDepth) + fp->hdr[18] |= 0xf; + fp->fp.early_z = info->prop.fp.earlyFragTests; + fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn; + fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer; + fp->fp.post_depth_coverage = info->prop.fp.postDepthCoverage; + + /* Mark position xy and layer as read */ + if (fp->fp.reads_framebuffer) + fp->hdr[5] |= 0x32000000; return 0; } @@ -486,11 +525,14 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, for (i = 0; i < pso->num_outputs; ++i) { unsigned s = pso->output[i].start_component; unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; b = pso->output[i].output_buffer; + if (r >= info->numOutputs) + continue; + for (c = 0; c < pso->output[i].num_components; ++c) - tfb->varying_index[b][p++] = - info->out[pso->output[i].register_index].slot[s + c]; + tfb->varying_index[b][p++] = info->out[r].slot[s + c]; tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); tfb->stream[b] = pso->output[i].stream; @@ -502,14 +544,15 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, return tfb; } -#ifdef DEBUG +#ifndef NDEBUG static void nvc0_program_dump(struct nvc0_program *prog) { unsigned pos; if (prog->type != PIPE_SHADER_COMPUTE) { - for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) + debug_printf("dumping HDR for type %i\n", prog->type); + for (pos = 0; pos < ARRAY_SIZE(prog->hdr); ++pos) debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n", pos * sizeof(prog->hdr[0]), prog->hdr[pos]); } @@ -524,7 +567,8 @@ nvc0_program_dump(struct nvc0_program *prog) #endif bool -nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) +nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; int ret; @@ -535,43 +579,58 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->type = prog->type; info->target = chipset; - info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; - info->bin.source = (void *)prog->pipe.tokens; + info->bin.sourceRep = prog->pipe.type; + switch (prog->pipe.type) { + case PIPE_SHADER_IR_TGSI: + info->bin.source = (void *)prog->pipe.tokens; + break; + case PIPE_SHADER_IR_NIR: + info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); + break; + default: + assert(!"unsupported IR!"); + free(info); + return false; + } + +#ifndef NDEBUG + info->target = debug_get_num_option("NV50_PROG_CHIPSET", chipset); + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); + info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0); +#else + info->optLevel = 3; +#endif + + info->bin.smemSize = prog->cp.smem_size; info->io.genUserClip = prog->vp.num_ucps; - info->io.ucpBase = 256; - info->io.ucpCBSlot = 15; - info->io.sampleInterp = prog->fp.sample_interp; + info->io.auxCBSlot = 15; + info->io.msInfoCBSlot = 15; + info->io.ucpBase = NVC0_CB_AUX_UCP_INFO; + info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO; + info->io.msInfoBase = NVC0_CB_AUX_MS_INFO; + info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0); + info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0); + if (info->target >= NVISA_GK104_CHIPSET) { + info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); + info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; + info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0); + } if (prog->type == PIPE_SHADER_COMPUTE) { - if (chipset >= NVISA_GK104_CHIPSET) { - info->io.resInfoCBSlot = 0; - info->io.texBindBase = NVE4_CP_INPUT_TEX(0); - info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); - info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + if (info->target >= NVISA_GK104_CHIPSET) { + info->io.auxCBSlot = 7; + info->io.msInfoCBSlot = 7; + info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0); } - info->io.msInfoCBSlot = 0; - info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; + info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO(0); } else { - if (chipset >= NVISA_GK104_CHIPSET) { - info->io.texBindBase = 0x20; - info->io.suInfoBase = 0; /* TODO */ - } - info->io.resInfoCBSlot = 15; - info->io.sampleInfoBase = 256 + 128; - info->io.msInfoCBSlot = 15; - info->io.msInfoBase = 0; /* TODO */ + info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO; } info->assignSlots = nvc0_program_assign_varying_slots; -#ifdef DEBUG - info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); - info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); -#else - info->optLevel = 3; -#endif - ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); @@ -582,13 +641,14 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) prog->code = info->bin.code; prog->code_size = info->bin.codeSize; - prog->immd_data = info->immd.buf; - prog->immd_size = info->immd.bufSize; prog->relocs = info->bin.relocData; + prog->fixups = info->bin.fixupData; prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->cp.smem_size = info->bin.smemSize; prog->num_barriers = info->numBarriers; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + prog->vp.need_draw_parameters = info->prop.vp.usesDrawParameters; if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ @@ -598,16 +658,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) case PIPE_SHADER_VERTEX: ret = nvc0_vp_gen_header(prog, info); break; -#ifdef PIPE_SHADER_HULL - case PIPE_SHADER_HULL: + case PIPE_SHADER_TESS_CTRL: ret = nvc0_tcp_gen_header(prog, info); break; -#endif -#ifdef PIPE_SHADER_DOMAIN - case PIPE_SHADER_DOMAIN: + case PIPE_SHADER_TESS_EVAL: ret = nvc0_tep_gen_header(prog, info); break; -#endif case PIPE_SHADER_GEOMETRY: ret = nvc0_gp_gen_header(prog, info); break; @@ -642,6 +698,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) } */ if (info->io.globalAccess) + prog->hdr[0] |= 1 << 26; + if (info->io.globalAccess & 0x2) prog->hdr[0] |= 1 << 16; if (info->io.fp64) prog->hdr[0] |= 1 << 27; @@ -650,28 +708,32 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) prog->tfb = nvc0_program_create_tfb_state(info, &prog->pipe.stream_output); + pipe_debug_message(debug, SHADER_INFO, + "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", + prog->type, info->bin.tlsSpace, info->bin.smemSize, + prog->num_gprs, info->bin.instructions, + info->bin.codeSize); + +#ifndef NDEBUG + if (debug_get_option("NV50_PROG_CHIPSET", NULL) && info->dbgFlags) + nvc0_program_dump(prog); +#endif + out: + if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) + ralloc_free((void *)info->bin.source); FREE(info); return !ret; } -bool -nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) +static inline int +nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) { struct nvc0_screen *screen = nvc0->screen; const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; int ret; uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); - uint32_t lib_pos = screen->lib_code->start; - uint32_t code_pos; - /* c[] bindings need to be aligned to 0x100, but we could use relocations - * to save space. */ - if (prog->immd_size) { - prog->immd_base = size; - size = align(size, 0x40); - size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ - } /* On Fermi, SP_START_ID must be aligned to 0x40. * On Kepler, the first instruction must be aligned to 0x80 because * latency information is expected only at certain positions. @@ -681,27 +743,9 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) size = align(size, 0x40); ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); - if (ret) { - struct nouveau_heap *heap = screen->text_heap; - /* Note that the code library, which is allocated before anything else, - * does not have a priv pointer. We can stop once we hit it. - */ - while (heap->next && heap->next->priv) { - struct nvc0_program *evict = heap->next->priv; - nouveau_heap_free(&evict->mem); - } - debug_printf("WARNING: out of code space, evicting all shaders.\n"); - ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); - if (ret) { - NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); - return false; - } - IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); - } + if (ret) + return ret; prog->code_base = prog->mem->start; - prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100); - assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= - prog->mem->start + prog->mem->size)); if (!is_cp) { if (screen->base.class_3d >= NVE4_3D_CLASS) { @@ -715,33 +759,131 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) break; } } - code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; } else { if (screen->base.class_3d >= NVE4_3D_CLASS) { if (prog->mem->start & 0x40) prog->code_base += 0x40; assert((prog->code_base & 0x7f) == 0x00); } - code_pos = prog->code_base; } - if (prog->relocs) - nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + return 0; +} -#ifdef DEBUG - if (debug_get_bool_option("NV50_PROG_DEBUG", false)) - nvc0_program_dump(prog); -#endif +static inline void +nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + struct nvc0_screen *screen = nvc0->screen; + const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; + uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + + if (prog->relocs) + nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, + screen->lib_code->start, 0); + if (prog->fixups) { + nv50_ir_apply_fixups(prog->fixups, prog->code, + prog->fp.force_persample_interp, + prog->fp.flatshade, + 0 /* alphatest */); + for (int i = 0; i < 2; i++) { + unsigned mask = prog->fp.color_interp[i] >> 4; + unsigned interp = prog->fp.color_interp[i] & 3; + if (!mask) + continue; + prog->hdr[14] &= ~(0xff << (8 * i)); + if (prog->fp.flatshade) + interp = NVC0_INTERP_FLAT; + for (int c = 0; c < 4; c++) + if (mask & (1 << c)) + prog->hdr[14] |= interp << (2 * (4 * i + c)); + } + } if (!is_cp) nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, - NV_VRAM_DOMAIN(&screen->base), NVC0_SHADER_HEADER_SIZE, prog->hdr); + NV_VRAM_DOMAIN(&screen->base), + NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0->base.push_data(&nvc0->base, screen->text, code_pos, - NV_VRAM_DOMAIN(&screen->base), prog->code_size, prog->code); - if (prog->immd_size) - nvc0->base.push_data(&nvc0->base, - screen->text, prog->immd_base, NV_VRAM_DOMAIN(&screen->base), - prog->immd_size, prog->immd_data); + NV_VRAM_DOMAIN(&screen->base), prog->code_size, + prog->code); +} + +bool +nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + struct nvc0_screen *screen = nvc0->screen; + const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; + int ret; + uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); + + ret = nvc0_program_alloc_code(nvc0, prog); + if (ret) { + struct nouveau_heap *heap = screen->text_heap; + struct nvc0_program *progs[] = { /* Sorted accordingly to SP_START_ID */ + nvc0->compprog, nvc0->vertprog, nvc0->tctlprog, + nvc0->tevlprog, nvc0->gmtyprog, nvc0->fragprog + }; + + /* Note that the code library, which is allocated before anything else, + * does not have a priv pointer. We can stop once we hit it. + */ + while (heap->next && heap->next->priv) { + struct nvc0_program *evict = heap->next->priv; + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); + + /* Make sure to synchronize before deleting the code segment. */ + IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); + + if ((screen->text->size << 1) <= (1 << 23)) { + ret = nvc0_screen_resize_text_area(screen, screen->text->size << 1); + if (ret) { + NOUVEAU_ERR("Error allocating TEXT area: %d\n", ret); + return false; + } + + /* Re-upload the builtin function into the new code segment. */ + nvc0_program_library_upload(nvc0); + } + + ret = nvc0_program_alloc_code(nvc0, prog); + if (ret) { + NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); + return false; + } + + /* All currently bound shaders have to be reuploaded. */ + for (int i = 0; i < ARRAY_SIZE(progs); i++) { + if (!progs[i] || progs[i] == prog) + continue; + + ret = nvc0_program_alloc_code(nvc0, progs[i]); + if (ret) { + NOUVEAU_ERR("failed to re-upload a shader after code eviction.\n"); + return false; + } + nvc0_program_upload_code(nvc0, progs[i]); + + if (progs[i]->type == PIPE_SHADER_COMPUTE) { + /* Caches have to be invalidated but the CP_START_ID will be + * updated in the launch_grid functions. */ + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE); + } else { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1); + PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base); + } + } + } + + nvc0_program_upload_code(nvc0, prog); + +#ifndef NDEBUG + if (debug_get_bool_option("NV50_PROG_DEBUG", false)) + nvc0_program_dump(prog); +#endif BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); PUSH_DATA (nvc0->base.pushbuf, 0x1011); @@ -785,8 +927,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->mem) nouveau_heap_free(&prog->mem); FREE(prog->code); /* may be 0 for hardcoded shaders */ - FREE(prog->immd_data); FREE(prog->relocs); + FREE(prog->fixups); if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) FREE(prog->cp.syms); if (prog->tfb) { @@ -815,3 +957,18 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) return prog->code_base + base + syms[i].offset; return prog->code_base; /* no symbols or symbol not found */ } + +void +nvc0_program_init_tcp_empty(struct nvc0_context *nvc0) +{ + struct ureg_program *ureg; + + ureg = ureg_create(PIPE_SHADER_TESS_CTRL); + if (!ureg) + return; + + ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1); + ureg_END(ureg); + + nvc0->tcp_empty = ureg_create_shader_and_destroy(ureg, &nvc0->base.pipe); +}