X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fnv50%2Fnv50_program.c;h=76d06aeddfef14caf546075f25a6d6b63e93263e;hb=75b47dda0c8895afe77858cbb67efa38e17e1838;hp=73df71c61e2f818f1ba26dfc405cc90ced5a761d;hpb=5eb7ff1175a644ffe3b0f1a75cb235400355f9fb;p=mesa.git diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 73df71c61e2..76d06aeddfe 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -25,7 +25,7 @@ #include "codegen/nv50_ir_driver.h" -static INLINE unsigned +static inline unsigned bitcount4(const uint32_t val) { static const uint8_t cnt[16] @@ -52,6 +52,9 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) for (c = 0; c < 4; ++c) if (info->in[i].mask & (1 << c)) info->in[i].slot[c] = n++; + + if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; } prog->in_nr = info->numInputs; @@ -62,7 +65,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) continue; case TGSI_SEMANTIC_VERTEXID: prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; - prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START; continue; default: break; @@ -100,6 +103,14 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) case TGSI_SEMANTIC_BCOLOR: prog->vp.bfc[info->out[i].si] = i; break; + case TGSI_SEMANTIC_LAYER: + prog->gp.has_layer = true; + prog->gp.layerid = n; + break; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + prog->gp.has_viewport = true; + prog->gp.viewportid = n; + break; default: break; } @@ -115,6 +126,8 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) } prog->out_nr = info->numOutputs; prog->max_out = n; + if (!prog->max_out) + prog->max_out = 1; if (prog->vp.psiz < info->numOutputs) prog->vp.psiz = prog->out[prog->vp.psiz].hw; @@ -135,7 +148,6 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) for (m = 0, i = 0; i < info->numInputs; ++i) { switch (info->in[i].sn) { case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_FACE: continue; default: m += info->in[i].flat ? 0 : 1; @@ -153,14 +165,13 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) for (c = 0; c < 4; ++c) if (info->in[i].mask & (1 << c)) info->in[i].slot[c] = nintp++; - } else - if (info->in[i].sn == TGSI_SEMANTIC_FACE) { - info->in[i].slot[0] = 255; } else { unsigned j = info->in[i].flat ? m++ : n++; if (info->in[i].sn == TGSI_SEMANTIC_COLOR) prog->vp.bfc[info->in[i].si] = j; + else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID) + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID; prog->in[j].id = i; prog->in[j].mask = info->in[i].mask; @@ -219,8 +230,10 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); } - if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) { info->out[info->io.sampleMask].slot[0] = prog->max_out++; + prog->fp.has_samplemask = 1; + } if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) info->out[info->io.fragDepth].slot[2] = prog->max_out++; @@ -241,6 +254,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) return nv50_vertprog_assign_slots(info); case PIPE_SHADER_FRAGMENT: return nv50_fragprog_assign_slots(info); + case PIPE_SHADER_COMPUTE: + return 0; default: return -1; } @@ -292,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, const unsigned r = pso->output[i].register_index; b = pso->output[i].output_buffer; + if (r >= info->numOutputs) + continue; + for (c = 0; c < pso->output[i].num_components; ++c) so->map[base[b] + p + c] = info->out[r].slot[s + c]; } @@ -299,25 +317,33 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, return so; } -boolean -nv50_program_translate(struct nv50_program *prog, uint16_t chipset) +bool +nv50_program_translate(struct nv50_program *prog, uint16_t chipset, + struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; - int ret; + int i, ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) - return FALSE; + return false; info->type = prog->type; info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; - info->io.ucpCBSlot = 15; - info->io.ucpBase = 0; + info->io.auxCBSlot = 15; + info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; + if (prog->fp.alphatest) + info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET; + + info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; + info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; + info->io.msInfoCBSlot = 15; + info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info->assignSlots = nv50_program_assign_varying_slots; @@ -327,7 +353,11 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; - prog->gp.primid = 0x80; + prog->gp.has_layer = 0; + prog->gp.has_viewport = 0; + + if (prog->type == PIPE_SHADER_COMPUTE) + info->prop.cp.inputOffset = 0x10; info->driverPriv = prog; @@ -343,13 +373,22 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } - FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; + prog->interps = info->bin.fixupData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->mul_zero_wins = info->io.mul_zero_wins; + prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; + + prog->vp.clip_enable = (1 << info->io.clipDistances) - 1; + prog->vp.cull_enable = + ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; + prog->vp.clip_mode = 0; + for (i = 0; i < info->io.cullDistances; ++i) + prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { @@ -358,31 +397,61 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; + } else + if (prog->type == PIPE_SHADER_GEOMETRY) { + switch (info->prop.gp.outputPrim) { + case PIPE_PRIM_LINE_STRIP: + prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + break; + case PIPE_PRIM_POINTS: + default: + assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); + prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + break; + } + prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024); + } + + if (prog->type == PIPE_SHADER_COMPUTE) { + prog->cp.syms = info->bin.syms; + prog->cp.num_syms = info->bin.numSyms; + } else { + FREE(info->bin.syms); } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); + pipe_debug_message(debug, SHADER_INFO, + "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d", + prog->type, info->bin.tlsSpace, prog->max_gpr, + info->bin.instructions, info->bin.codeSize); + out: FREE(info); return !ret; } -boolean +bool nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) { struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); + uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; - case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; - case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); - return FALSE; + return false; } ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); @@ -399,28 +468,42 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); - return FALSE; + return false; } } - prog->code_base = prog->mem->start; + + if (prog->type == PIPE_SHADER_COMPUTE) { + /* CP code must be uploaded in FP code segment. */ + prog_type = 1; + } else { + prog->code_base = prog->mem->start; + prog_type = prog->type; + } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); - if (ret < 0) - return FALSE; + if (ret < 0) { + nouveau_heap_free(&prog->mem); + return false; + } if (ret > 0) - nv50->state.new_tls_space = TRUE; + nv50->state.new_tls_space = true; if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + if (prog->interps) + nv50_ir_apply_fixups(prog->interps, prog->code, + prog->fp.force_persample_interp, + false /* flatshade */, + prog->fp.alphatest - 1); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); - return TRUE; + return true; } void @@ -435,9 +518,12 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) FREE(p->code); FREE(p->fixups); - + FREE(p->interps); FREE(p->so); + if (type == PIPE_SHADER_COMPUTE) + FREE(p->cp.syms); + memset(p, 0, sizeof(*p)); p->pipe = pipe;