nvc0: bind images on compute shaders for Kepler
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 7 Apr 2016 22:56:54 +0000 (00:56 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 26 Apr 2016 17:47:49 +0000 (19:47 +0200)
Old surfaces validation code will be removed once images are completely
done for Fermi/Kepler, that explains why I only disable it for now.

This also introduces nvc0_get_surface_dims() which computes correct
dimensions regarding the given target.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_context.h
src/gallium/drivers/nouveau/nvc0/nvc0_program.c
src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c

index 17733f51317686cdd1726cfa399958d8eeb1ca4b..e6bad3d61bc028fcf4db751accda302058007ec1 100644 (file)
 /* 32 user buffers, at 4 32-bits integers each */
 #define NVC0_CB_AUX_BUF_INFO(i)     0x200 + (i) * 4 * 4
 #define NVC0_CB_AUX_BUF_SIZE        (NVC0_MAX_BUFFERS * 4 * 4)
+/* 8 surfaces, at 16 32-bits integers each */
+#define NVC0_CB_AUX_SU_INFO(i)      0x400 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_SIZE         (NVC0_MAX_IMAGES * 16 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
 
@@ -324,7 +327,7 @@ void nvc0_validate_textures(struct nvc0_context *);
 void nvc0_validate_samplers(struct nvc0_context *);
 void nve4_set_tex_handles(struct nvc0_context *);
 void nvc0_validate_surfaces(struct nvc0_context *);
-void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
+void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_image_view *,
                            struct nvc0_screen *);
 
 struct pipe_sampler_view *
index 0f66e2a0bbd4f7254557be93d0c7377b89c15307..8e0285b1bcd6f321939181dd2582d75b1be21bed 100644 (file)
@@ -551,11 +551,13 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
          info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
          info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
          info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
+         info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
+      } else {
+         info->io.suInfoBase = 0; /* TODO */
       }
       info->io.msInfoCBSlot = 0;
       info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
       info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
-      info->io.suInfoBase = 0; /* TODO */
    } else {
       if (chipset >= NVISA_GK104_CHIPSET) {
          info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
index 99e63825edaff1881f5c1784cd49f570fa587a8f..300078f6a6c33148850c00f2246bdc45ff93ae29 100644 (file)
@@ -743,23 +743,59 @@ static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
 
+static void
+nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
+                      int *depth)
+{
+   struct nv04_resource *res = nv04_resource(view->resource);
+   int level;
+
+   *width = *height = *depth = 1;
+   if (res->base.target == PIPE_BUFFER) {
+      *width = view->u.buf.last_element - view->u.buf.first_element + 1;
+      return;
+   }
+
+   level = view->u.tex.level;
+   *width = u_minify(view->resource->width0, level);
+   *height = u_minify(view->resource->height0, level);
+   *depth = u_minify(view->resource->depth0, level);
+
+   switch (res->base.target) {
+   case PIPE_TEXTURE_1D_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
+      break;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_3D:
+      break;
+   default:
+      assert(!"unexpected texture target");
+      break;
+   }
+}
+
 void
 nve4_set_surface_info(struct nouveau_pushbuf *push,
-                      struct pipe_surface *psf,
+                      struct pipe_image_view *view,
                       struct nvc0_screen *screen)
 {
-   struct nv50_surface *sf = nv50_surface(psf);
    struct nv04_resource *res;
    uint64_t address;
    uint32_t *const info = push->cur;
+   int width, height, depth;
    uint8_t log2cpp;
 
-   if (psf && !nve4_su_format_map[psf->format])
+   if (view && !nve4_su_format_map[view->format])
       NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
 
    push->cur += 16;
 
-   if (!psf || !nve4_su_format_map[psf->format]) {
+   if (!view || !nve4_su_format_map[view->format]) {
       memset(info, 0, 16 * sizeof(*info));
 
       info[0] = 0xbadf0000;
@@ -768,13 +804,16 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
          screen->lib_code->start;
       return;
    }
-   res = nv04_resource(sf->base.texture);
+   res = nv04_resource(view->resource);
+
+   address = res->address;
 
-   address = res->address + sf->offset;
+   /* get surface dimensions based on the target. */
+   nvc0_get_surface_dims(view, &width, &height, &depth);
 
-   info[8] = sf->width;
-   info[9] = sf->height;
-   info[10] = sf->depth;
+   info[8] = width;
+   info[9] = height;
+   info[10] = depth;
    switch (res->base.target) {
    case PIPE_TEXTURE_1D_ARRAY:
       info[11] = 1;
@@ -795,17 +834,17 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
       info[11] = 0;
       break;
    }
-   log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
+   log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
 
-   info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
+   info[12] = nve4_suldp_lib_offset[view->format] + screen->lib_code->start;
 
    /* limit in bytes for raw access */
-   info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
+   info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
 
-   info[1] = nve4_su_format_map[sf->base.format];
+   info[1] = nve4_su_format_map[view->format];
 
 #if 0
-   switch (util_format_get_blocksizebits(sf->base.format)) {
+   switch (util_format_get_blocksizebits(view->format)) {
    case  16: info[1] |= 1 << 16; break;
    case  32: info[1] |= 2 << 16; break;
    case  64: info[1] |= 3 << 16; break;
@@ -816,13 +855,13 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
 #else
    info[1] |= log2cpp << 16;
    info[1] |=  0x4000;
-   info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
+   info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
 #endif
 
    if (res->base.target == PIPE_BUFFER) {
       info[0]  = address >> 8;
-      info[2]  = sf->width - 1;
-      info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+      info[2]  = width - 1;
+      info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
       info[3]  = 0;
       info[4]  = 0;
       info[5]  = 0;
@@ -832,28 +871,29 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
       info[15] = 0;
    } else {
       struct nv50_miptree *mt = nv50_miptree(&res->base);
-      struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
-      const unsigned z = sf->base.u.tex.first_layer;
+      struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
+      const unsigned z = view->u.tex.first_layer;
 
       if (z) {
          if (mt->layout_3d) {
-            address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
+            address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
             /* doesn't work if z passes z-tile boundary */
-            assert(sf->depth == 1);
+            assert(depth == 1);
          } else {
             address += mt->layer_stride * z;
          }
       }
+
       info[0]  = address >> 8;
-      info[2]  = sf->width - 1;
+      info[2]  = width - 1;
       /* NOTE: this is really important: */
-      info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+      info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
-      info[4]  = sf->height - 1;
+      info[4]  = height - 1;
       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
       info[5]  = mt->layer_stride >> 8;
-      info[6]  = sf->depth - 1;
+      info[6]  = depth - 1;
       info[6] |= (lvl->tile_mode & 0xf00) << 21;
       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
       info[7]  = 0;
index 441166ac4182955f2c583f842026d1ed98bf6a60..05a49b1c9ad08fd90a7b391e27bf006168d6c8ca 100644 (file)
@@ -192,7 +192,44 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    return 0;
 }
 
+static void
+nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   uint64_t address;
+   const int s = 5;
+   int i, j;
+
+   if (!nvc0->images_dirty[s])
+      return;
+
+   address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
+
+   BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, address + NVC0_CB_AUX_SU_INFO(0));
+   PUSH_DATA (push, address + NVC0_CB_AUX_SU_INFO(0));
+   BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+   PUSH_DATA (push, 16 * NVC0_MAX_IMAGES * 4);
+   PUSH_DATA (push, 0x1);
+   BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 16 * NVC0_MAX_IMAGES);
+   PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+   for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
+      struct pipe_image_view *view = &nvc0->images[s][i];
+      if (view->resource) {
+         struct nv04_resource *res = nv04_resource(view->resource);
+
+         nve4_set_surface_info(push, view, screen);
+         BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+      } else {
+         for (j = 0; j < 16; j++)
+            PUSH_DATA(push, 0);
+      }
+   }
+}
 
+/* Will be removed once images are completely done. */
+#if 0
 static void
 nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
 {
@@ -259,7 +296,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
 
    nvc0->surfaces_dirty[t] = 0;
 }
-
+#endif
 
 /* Thankfully, textures with samplers follow the normal rules. */
 static void