gallium/util: replace pipe_mutex_init() with mtx_init()
[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_program.c
index 73df71c61e2f818f1ba26dfc405cc90ced5a761d..76d06aeddfef14caf546075f25a6d6b63e93263e 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "codegen/nv50_ir_driver.h"
 
-static INLINE unsigned
+static inline unsigned
 bitcount4(const uint32_t val)
 {
    static const uint8_t cnt[16]
@@ -52,6 +52,9 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
       for (c = 0; c < 4; ++c)
          if (info->in[i].mask & (1 << c))
             info->in[i].slot[c] = n++;
+
+      if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
    }
    prog->in_nr = info->numInputs;
 
@@ -62,7 +65,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
          continue;
       case TGSI_SEMANTIC_VERTEXID:
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
-         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
+         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
          continue;
       default:
          break;
@@ -100,6 +103,14 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
       case TGSI_SEMANTIC_BCOLOR:
          prog->vp.bfc[info->out[i].si] = i;
          break;
+      case TGSI_SEMANTIC_LAYER:
+         prog->gp.has_layer = true;
+         prog->gp.layerid = n;
+         break;
+      case TGSI_SEMANTIC_VIEWPORT_INDEX:
+         prog->gp.has_viewport = true;
+         prog->gp.viewportid = n;
+         break;
       default:
          break;
       }
@@ -115,6 +126,8 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
    }
    prog->out_nr = info->numOutputs;
    prog->max_out = n;
+   if (!prog->max_out)
+      prog->max_out = 1;
 
    if (prog->vp.psiz < info->numOutputs)
       prog->vp.psiz = prog->out[prog->vp.psiz].hw;
@@ -135,7 +148,6 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
    for (m = 0, i = 0; i < info->numInputs; ++i) {
       switch (info->in[i].sn) {
       case TGSI_SEMANTIC_POSITION:
-      case TGSI_SEMANTIC_FACE:
          continue;
       default:
          m += info->in[i].flat ? 0 : 1;
@@ -153,14 +165,13 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
          for (c = 0; c < 4; ++c)
             if (info->in[i].mask & (1 << c))
                info->in[i].slot[c] = nintp++;
-      } else
-      if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
-         info->in[i].slot[0] = 255;
       } else {
          unsigned j = info->in[i].flat ? m++ : n++;
 
          if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
             prog->vp.bfc[info->in[i].si] = j;
+         else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
+            prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
 
          prog->in[j].id = i;
          prog->in[j].mask = info->in[i].mask;
@@ -219,8 +230,10 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
       prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
    }
 
-   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
       info->out[info->io.sampleMask].slot[0] = prog->max_out++;
+      prog->fp.has_samplemask = 1;
+   }
 
    if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
       info->out[info->io.fragDepth].slot[2] = prog->max_out++;
@@ -241,6 +254,8 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
       return nv50_vertprog_assign_slots(info);
    case PIPE_SHADER_FRAGMENT:
       return nv50_fragprog_assign_slots(info);
+   case PIPE_SHADER_COMPUTE:
+      return 0;
    default:
       return -1;
    }
@@ -292,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
       const unsigned r = pso->output[i].register_index;
       b = pso->output[i].output_buffer;
 
+      if (r >= info->numOutputs)
+         continue;
+
       for (c = 0; c < pso->output[i].num_components; ++c)
          so->map[base[b] + p + c] = info->out[r].slot[s + c];
    }
@@ -299,25 +317,33 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
    return so;
 }
 
-boolean
-nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+bool
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+                       struct pipe_debug_callback *debug)
 {
    struct nv50_ir_prog_info *info;
-   int ret;
+   int i, ret;
    const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
 
    info = CALLOC_STRUCT(nv50_ir_prog_info);
    if (!info)
-      return FALSE;
+      return false;
 
    info->type = prog->type;
    info->target = chipset;
    info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
    info->bin.source = (void *)prog->pipe.tokens;
 
-   info->io.ucpCBSlot = 15;
-   info->io.ucpBase = 0;
+   info->io.auxCBSlot = 15;
+   info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
    info->io.genUserClip = prog->vp.clpd_nr;
+   if (prog->fp.alphatest)
+      info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;
+
+   info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
+   info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
+   info->io.msInfoCBSlot = 15;
+   info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
 
    info->assignSlots = nv50_program_assign_varying_slots;
 
@@ -327,7 +353,11 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
    prog->vp.clpd[0] = map_undef;
    prog->vp.clpd[1] = map_undef;
    prog->vp.psiz = map_undef;
-   prog->gp.primid = 0x80;
+   prog->gp.has_layer = 0;
+   prog->gp.has_viewport = 0;
+
+   if (prog->type == PIPE_SHADER_COMPUTE)
+      info->prop.cp.inputOffset = 0x10;
 
    info->driverPriv = prog;
 
@@ -343,13 +373,22 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
       NOUVEAU_ERR("shader translation failed: %i\n", ret);
       goto out;
    }
-   FREE(info->bin.syms);
 
    prog->code = info->bin.code;
    prog->code_size = info->bin.codeSize;
    prog->fixups = info->bin.relocData;
+   prog->interps = info->bin.fixupData;
    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
    prog->tls_space = info->bin.tlsSpace;
+   prog->mul_zero_wins = info->io.mul_zero_wins;
+   prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
+   prog->vp.clip_enable = (1 << info->io.clipDistances) - 1;
+   prog->vp.cull_enable =
+      ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
+   prog->vp.clip_mode = 0;
+   for (i = 0; i < info->io.cullDistances; ++i)
+      prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
 
    if (prog->type == PIPE_SHADER_FRAGMENT) {
       if (info->prop.fp.writesDepth) {
@@ -358,31 +397,61 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
       }
       if (info->prop.fp.usesDiscard)
          prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
+   } else
+   if (prog->type == PIPE_SHADER_GEOMETRY) {
+      switch (info->prop.gp.outputPrim) {
+      case PIPE_PRIM_LINE_STRIP:
+         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
+         break;
+      case PIPE_PRIM_TRIANGLE_STRIP:
+         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+         break;
+      case PIPE_PRIM_POINTS:
+      default:
+         assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS);
+         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
+         break;
+      }
+      prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024);
+   }
+
+   if (prog->type == PIPE_SHADER_COMPUTE) {
+      prog->cp.syms = info->bin.syms;
+      prog->cp.num_syms = info->bin.numSyms;
+   } else {
+      FREE(info->bin.syms);
    }
 
    if (prog->pipe.stream_output.num_outputs)
       prog->so = nv50_program_create_strmout_state(info,
                                                    &prog->pipe.stream_output);
 
+   pipe_debug_message(debug, SHADER_INFO,
+                      "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+                      prog->type, info->bin.tlsSpace, prog->max_gpr,
+                      info->bin.instructions, info->bin.codeSize);
+
 out:
    FREE(info);
    return !ret;
 }
 
-boolean
+bool
 nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
 {
    struct nouveau_heap *heap;
    int ret;
    uint32_t size = align(prog->code_size, 0x40);
+   uint8_t prog_type;
 
    switch (prog->type) {
    case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
-   case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
-   case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
+   case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
+   case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
+   case PIPE_SHADER_COMPUTE:  heap = nv50->screen->fp_code_heap; break;
    default:
       assert(!"invalid program type");
-      return FALSE;
+      return false;
    }
 
    ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
@@ -399,28 +468,42 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
       ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
       if (ret) {
          NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
-         return FALSE;
+         return false;
       }
    }
-   prog->code_base = prog->mem->start;
+
+   if (prog->type == PIPE_SHADER_COMPUTE) {
+      /* CP code must be uploaded in FP code segment. */
+      prog_type = 1;
+   } else {
+      prog->code_base = prog->mem->start;
+      prog_type = prog->type;
+   }
 
    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
-   if (ret < 0)
-      return FALSE;
+   if (ret < 0) {
+      nouveau_heap_free(&prog->mem);
+      return false;
+   }
    if (ret > 0)
-      nv50->state.new_tls_space = TRUE;
+      nv50->state.new_tls_space = true;
 
    if (prog->fixups)
       nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+   if (prog->interps)
+      nv50_ir_apply_fixups(prog->interps, prog->code,
+                           prog->fp.force_persample_interp,
+                           false /* flatshade */,
+                           prog->fp.alphatest - 1);
 
    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
-                       (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+                       (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
 
    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
    PUSH_DATA (nv50->base.pushbuf, 0);
 
-   return TRUE;
+   return true;
 }
 
 void
@@ -435,9 +518,12 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
    FREE(p->code);
 
    FREE(p->fixups);
-
+   FREE(p->interps);
    FREE(p->so);
 
+   if (type == PIPE_SHADER_COMPUTE)
+      FREE(p->cp.syms);
+
    memset(p, 0, sizeof(*p));
 
    p->pipe = pipe;