radeonsi/gfx10: simplify a streamout loop in gfx10_emit_ngg_epilogue
[mesa.git] / src / gallium / drivers / svga / svga_resource_texture.c
index 39e11d3a6165a1540d656a66b3f1950d9a22f7c5..bfedbee7a488af813af190ed1d50903119f5f820 100644 (file)
@@ -116,19 +116,19 @@ svga_transfer_dma(struct svga_context *svga,
    /* Ensure any pending operations on host surfaces are queued on the command
     * buffer first.
     */
-   svga_surfaces_flush( svga );
+   svga_surfaces_flush(svga);
 
    if (!st->swbuf) {
       /* Do the DMA transfer in a single go */
       svga_transfer_dma_band(svga, st, transfer,
-                             st->base.box.x, st->base.box.y, st->base.box.z,
-                             st->base.box.width, st->base.box.height, st->base.box.depth,
+                             st->box.x, st->box.y, st->box.z,
+                             st->box.w, st->box.h, st->box.d,
                              0, 0, 0,
                              flags);
 
       if (transfer == SVGA3D_READ_HOST_VRAM) {
          svga_context_flush(svga, &fence);
-         sws->fence_finish(sws, fence, 0);
+         sws->fence_finish(sws, fence, PIPE_TIMEOUT_INFINITE, 0);
          sws->fence_reference(sws, &fence, NULL);
       }
    }
@@ -140,12 +140,12 @@ svga_transfer_dma(struct svga_context *svga,
       h = st->hw_nblocksy * blockheight;
       srcy = 0;
 
-      for (y = 0; y < st->base.box.height; y += h) {
+      for (y = 0; y < st->box.h; y += h) {
          unsigned offset, length;
          void *hw, *sw;
 
-         if (y + h > st->base.box.height)
-            h = st->base.box.height - y;
+         if (y + h > st->box.h)
+            h = st->box.h - y;
 
          /* Transfer band must be aligned to pixel block boundaries */
          assert(y % blockheight == 0);
@@ -175,8 +175,8 @@ svga_transfer_dma(struct svga_context *svga,
          }
 
          svga_transfer_dma_band(svga, st, transfer,
-                                st->base.box.x, y, st->base.box.z,
-                                st->base.box.width, h, st->base.box.depth,
+                                st->box.x, y, st->box.z,
+                                st->box.w, h, st->box.d,
                                 0, srcy, 0, flags);
 
          /*
@@ -187,7 +187,7 @@ svga_transfer_dma(struct svga_context *svga,
 
          if (transfer == SVGA3D_READ_HOST_VRAM) {
             svga_context_flush(svga, &fence);
-            sws->fence_finish(sws, fence, 0);
+            sws->fence_finish(sws, fence, PIPE_TIMEOUT_INFINITE, 0);
 
             hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ);
             assert(hw);
@@ -223,7 +223,7 @@ svga_texture_get_handle(struct pipe_screen *screen,
 
 static void
 svga_texture_destroy(struct pipe_screen *screen,
-                    struct pipe_resource *pt)
+                     struct pipe_resource *pt)
 {
    struct svga_screen *ss = svga_screen(screen);
    struct svga_texture *tex = svga_texture(pt);
@@ -241,7 +241,7 @@ svga_texture_destroy(struct pipe_screen *screen,
    /* Destroy the backed surface handle if exists */
    if (tex->backed_handle)
       svga_screen_surface_destroy(ss, &tex->backed_key, &tex->backed_handle);
-      
+
    ss->hud.total_resource_bytes -= tex->size;
 
    FREE(tex->defined);
@@ -255,40 +255,19 @@ svga_texture_destroy(struct pipe_screen *screen,
 }
 
 
-/**
- * Determine if the resource was rendered to
- */
-static inline boolean
-was_tex_rendered_to(struct pipe_resource *resource,
-                    const struct pipe_transfer *transfer)
-{
-   unsigned face;
-
-   if (resource->target == PIPE_TEXTURE_CUBE) {
-      assert(transfer->box.depth == 1);
-      face = transfer->box.z;
-   }
-   else {
-      face = 0;
-   }
-
-   return svga_was_texture_rendered_to(svga_texture(resource),
-                                       face, transfer->level);
-}
-
-
 /**
  * Determine if we need to read back a texture image before mapping it.
  */
 static inline boolean
-need_tex_readback(struct pipe_transfer *transfer)
+need_tex_readback(struct svga_transfer *st)
 {
-   if (transfer->usage & PIPE_TRANSFER_READ)
+   if (st->base.usage & PIPE_TRANSFER_READ)
       return TRUE;
 
-   if ((transfer->usage & PIPE_TRANSFER_WRITE) &&
-       ((transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) == 0)) {
-      return was_tex_rendered_to(transfer->resource, transfer);
+   if ((st->base.usage & PIPE_TRANSFER_WRITE) &&
+       ((st->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) == 0)) {
+      return svga_was_texture_rendered_to(svga_texture(st->base.resource),
+                                          st->slice, st->base.level);
    }
 
    return FALSE;
@@ -346,9 +325,9 @@ svga_texture_transfer_map_dma(struct svga_context *svga,
    unsigned usage = st->base.usage;
 
    /* we'll put the data into a tightly packed buffer */
-   nblocksx = util_format_get_nblocksx(texture->format, st->base.box.width);
-   nblocksy = util_format_get_nblocksy(texture->format, st->base.box.height);
-   d = st->base.box.depth;
+   nblocksx = util_format_get_nblocksx(texture->format, st->box.w);
+   nblocksy = util_format_get_nblocksy(texture->format, st->box.h);
+   d = st->box.d;
 
    st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
    st->base.layer_stride = st->base.stride * nblocksy;
@@ -414,28 +393,31 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
    struct svga_texture *tex = svga_texture(texture);
    struct svga_winsys_surface *surf = tex->handle;
    unsigned level = st->base.level;
-   unsigned w, h, nblocksx, nblocksy;
+   unsigned w, h, nblocksx, nblocksy, i;
    unsigned usage = st->base.usage;
 
-   if (need_tex_readback(transfer)) {
+   if (need_tex_readback(st)) {
       enum pipe_error ret;
 
       svga_surfaces_flush(svga);
 
-      if (svga_have_vgpu10(svga)) {
-         ret = readback_image_vgpu10(svga, surf, st->slice, level,
-                                     tex->b.b.last_level + 1);
-      } else {
-         ret = readback_image_vgpu9(svga, surf, st->slice, level);
-      }
-
-      svga->hud.num_readbacks++;
-      SVGA_STATS_COUNT_INC(sws, SVGA_STATS_COUNT_TEXREADBACK);
+      if (!svga->swc->force_coherent || tex->imported) {
+         for (i = 0; i < st->box.d; i++) {
+            if (svga_have_vgpu10(svga)) {
+               ret = readback_image_vgpu10(svga, surf, st->slice + i, level,
+                                           tex->b.b.last_level + 1);
+            } else {
+               ret = readback_image_vgpu9(svga, surf, st->slice + i, level);
+            }
+         }
+         svga->hud.num_readbacks++;
+         SVGA_STATS_COUNT_INC(sws, SVGA_STATS_COUNT_TEXREADBACK);
 
-      assert(ret == PIPE_OK);
-      (void) ret;
+         assert(ret == PIPE_OK);
+         (void) ret;
 
-      svga_context_flush(svga, NULL);
+         svga_context_flush(svga, NULL);
+      }
       /*
        * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
        * we could potentially clear the flag for all faces/layers/mips.
@@ -504,7 +486,8 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
       baseLevelSize.depth = tex->b.b.depth0;
 
       if ((tex->b.b.target == PIPE_TEXTURE_1D_ARRAY) ||
-          (tex->b.b.target == PIPE_TEXTURE_2D_ARRAY)) {
+          (tex->b.b.target == PIPE_TEXTURE_2D_ARRAY) ||
+          (tex->b.b.target == PIPE_TEXTURE_CUBE_ARRAY)) {
          st->base.layer_stride =
             svga3dsurface_get_image_offset(tex->key.format, baseLevelSize,
                                            tex->b.b.last_level + 1, 1, 0);
@@ -522,9 +505,10 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
 
       offset += svga3dsurface_get_pixel_offset(tex->key.format,
                                                mip_width, mip_height,
-                                               st->base.box.x,
-                                               st->base.box.y,
-                                               st->base.box.z);
+                                               st->box.x,
+                                               st->box.y,
+                                               st->box.z);
+
       return (void *) (map + offset);
    }
 }
@@ -572,15 +556,26 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    st->base.usage = usage;
    st->base.box = *box;
 
+   /* The modified transfer map box with the array index removed from z.
+    * The array index is specified in slice.
+    */
+   st->box.x = box->x;
+   st->box.y = box->y;
+   st->box.z = box->z;
+   st->box.w = box->width;
+   st->box.h = box->height;
+   st->box.d = box->depth;
+
    switch (tex->b.b.target) {
    case PIPE_TEXTURE_CUBE:
       st->slice = st->base.box.z;
-      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      st->box.z = 0;   /* so we don't apply double offsets below */
       break;
-   case PIPE_TEXTURE_2D_ARRAY:
    case PIPE_TEXTURE_1D_ARRAY:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE_ARRAY:
       st->slice = st->base.box.z;
-      st->base.box.z = 0;   /* so we don't apply double offsets below */
+      st->box.z = 0;   /* so we don't apply double offsets below */
 
       /* Force direct map for transfering multiple slices */
       if (st->base.box.depth > 1)
@@ -592,6 +587,13 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       break;
    }
 
+   /* Force direct map for multisample surface */
+   if (texture->nr_samples > 1) {
+      assert(svga_have_gb_objects(svga));
+      assert(sws->have_sm4_1);
+      use_direct_map = TRUE;
+   }
+
    st->use_direct_map = use_direct_map;
    pipe_resource_reference(&st->base.resource, texture);
 
@@ -609,7 +611,9 @@ svga_texture_transfer_map(struct pipe_context *pipe,
    else {
       boolean can_use_upload = tex->can_use_upload &&
                                !(st->base.usage & PIPE_TRANSFER_READ);
-      boolean was_rendered_to = was_tex_rendered_to(texture, &st->base);
+      boolean was_rendered_to =
+         svga_was_texture_rendered_to(svga_texture(texture),
+                                      st->slice, st->base.level);
 
       /* If the texture was already rendered to and upload buffer
        * is supported, then we will use upload buffer to
@@ -654,7 +658,7 @@ svga_texture_transfer_map(struct pipe_context *pipe,
       if (usage & PIPE_TRANSFER_WRITE) {
          /* record texture upload for HUD */
          svga->hud.num_bytes_uploaded +=
-            st->base.layer_stride * st->base.box.depth;
+            st->base.layer_stride * st->box.d;
 
          /* mark this texture level as dirty */
          svga_set_texture_dirty(tex, st->slice, level);
@@ -692,6 +696,15 @@ svga_texture_surface_unmap(struct svga_context *svga,
          ret = SVGA3D_BindGBSurface(swc, surf);
          assert(ret == PIPE_OK);
       }
+      if (swc->force_coherent) {
+         ret = SVGA3D_UpdateGBSurface(swc, surf);
+         if (ret != PIPE_OK) {
+            /* flush and retry */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_UpdateGBSurface(swc, surf);
+            assert(ret == PIPE_OK);
+         }
+      }
    }
 }
 
@@ -726,6 +739,7 @@ update_image_vgpu10(struct svga_context *svga,
    unsigned subResource;
 
    subResource = slice * numMipLevels + level;
+
    ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
@@ -740,16 +754,19 @@ update_image_vgpu10(struct svga_context *svga,
  */
 static void
 svga_texture_transfer_unmap_dma(struct svga_context *svga,
-                               struct svga_transfer *st)
+                                struct svga_transfer *st)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 
-   if (st->hwbuf) 
+   if (st->hwbuf)
       sws->buffer_unmap(sws, st->hwbuf);
 
    if (st->base.usage & PIPE_TRANSFER_WRITE) {
       /* Use DMA to transfer texture data */
       SVGA3dSurfaceDMAFlags flags;
+      struct pipe_resource *texture = st->base.resource;
+      struct svga_texture *tex = svga_texture(texture);
+
 
       memset(&flags, 0, sizeof flags);
       if (st->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
@@ -760,6 +777,7 @@ svga_texture_transfer_unmap_dma(struct svga_context *svga,
       }
 
       svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags);
+      svga_set_texture_rendered_to(tex, st->slice, st->base.level);
    }
 
    FREE(st->swbuf);
@@ -772,7 +790,7 @@ svga_texture_transfer_unmap_dma(struct svga_context *svga,
  */
 static void
 svga_texture_transfer_unmap_direct(struct svga_context *svga,
-                                  struct svga_transfer *st)
+                                   struct svga_transfer *st)
 {
    struct pipe_transfer *transfer = &st->base;
    struct svga_texture *tex = svga_texture(transfer->resource);
@@ -782,64 +800,58 @@ svga_texture_transfer_unmap_direct(struct svga_context *svga,
    /* Now send an update command to update the content in the backend. */
    if (st->base.usage & PIPE_TRANSFER_WRITE) {
       struct svga_winsys_surface *surf = tex->handle;
-      SVGA3dBox box;
       enum pipe_error ret;
-      unsigned nlayers = 1;
 
       assert(svga_have_gb_objects(svga));
 
       /* update the effected region */
-      box.x = transfer->box.x;
-      box.y = transfer->box.y;
-      box.w = transfer->box.width;
-      box.h = transfer->box.height;
-      box.d = transfer->box.depth;
+      SVGA3dBox box = st->box;
+      unsigned nlayers;
 
       switch (tex->b.b.target) {
-      case PIPE_TEXTURE_CUBE:
-         box.z = 0;
-         break;
       case PIPE_TEXTURE_2D_ARRAY:
-         nlayers = box.d;
-         box.z = 0;
-         box.d = 1;
-         break;
+      case PIPE_TEXTURE_CUBE_ARRAY:
       case PIPE_TEXTURE_1D_ARRAY:
          nlayers = box.d;
-         box.y = box.z = 0;
          box.d = 1;
          break;
       default:
-         box.z = transfer->box.z;
+         nlayers = 1;
          break;
       }
 
+
       if (0)
          debug_printf("%s %d, %d, %d  %d x %d x %d\n",
                       __FUNCTION__,
                       box.x, box.y, box.z,
                       box.w, box.h, box.d);
 
-      if (svga_have_vgpu10(svga)) {
-         unsigned i;
-         for (i = 0; i < nlayers; i++) {
-            ret = update_image_vgpu10(svga, surf, &box,
-                                      st->slice + i, transfer->level,
-                                      tex->b.b.last_level + 1);
+      if (!svga->swc->force_coherent || tex->imported) {
+         if (svga_have_vgpu10(svga)) {
+            unsigned i;
+
+            for (i = 0; i < nlayers; i++) {
+               ret = update_image_vgpu10(svga, surf, &box,
+                                         st->slice + i, transfer->level,
+                                         tex->b.b.last_level + 1);
+               assert(ret == PIPE_OK);
+            }
+         } else {
+            assert(nlayers == 1);
+            ret = update_image_vgpu9(svga, surf, &box, st->slice,
+                                     transfer->level);
             assert(ret == PIPE_OK);
          }
-      } else {
-         assert(nlayers == 1);
-         ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
-         assert(ret == PIPE_OK);
       }
       (void) ret;
    }
 }
 
+
 static void
 svga_texture_transfer_unmap(struct pipe_context *pipe,
-                           struct pipe_transfer *transfer)
+                            struct pipe_transfer *transfer)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_screen *ss = svga_screen(pipe->screen);
@@ -915,6 +927,39 @@ svga_texture_create(struct pipe_screen *screen,
       goto fail_notex;
    }
 
+   /* Verify the number of mipmap levels isn't impossibly large.  For example,
+    * if the base 2D image is 16x16, we can't have 8 mipmap levels.
+    * The state tracker should never ask us to create a resource with invalid
+    * parameters.
+    */
+   {
+      unsigned max_dim = template->width0;
+
+      switch (template->target) {
+      case PIPE_TEXTURE_1D:
+      case PIPE_TEXTURE_1D_ARRAY:
+         // nothing
+         break;
+      case PIPE_TEXTURE_2D:
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_CUBE_ARRAY:
+      case PIPE_TEXTURE_2D_ARRAY:
+         max_dim = MAX2(max_dim, template->height0);
+         break;
+      case PIPE_TEXTURE_3D:
+         max_dim = MAX3(max_dim, template->height0, template->depth0);
+         break;
+      case PIPE_TEXTURE_RECT:
+      case PIPE_BUFFER:
+         assert(template->last_level == 0);
+         /* the assertion below should always pass */
+         break;
+      default:
+         debug_printf("Unexpected texture target type\n");
+      }
+      assert(1 << template->last_level <= max_dim);
+   }
+
    tex = CALLOC_STRUCT(svga_texture);
    if (!tex) {
       goto fail_notex;
@@ -951,12 +996,16 @@ svga_texture_create(struct pipe_screen *screen,
    tex->key.arraySize = 1;
    tex->key.numFaces = 1;
 
-   /* single sample texture can be treated as non-multisamples texture */
-   tex->key.sampleCount = template->nr_samples > 1 ? template->nr_samples : 0;
-
-   if (template->nr_samples > 1) {
-      tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
+   /* nr_samples=1 must be treated as a non-multisample texture */
+   if (tex->b.b.nr_samples == 1) {
+      tex->b.b.nr_samples = 0;
    }
+   else if (tex->b.b.nr_samples > 1) {
+      assert(svgascreen->sws->have_sm4_1);
+      tex->key.flags |= SVGA3D_SURFACE_MULTISAMPLE;
+   }
+
+   tex->key.sampleCount = tex->b.b.nr_samples;
 
    if (svgascreen->sws->have_vgpu10) {
       switch (template->target) {
@@ -977,6 +1026,12 @@ svga_texture_create(struct pipe_screen *screen,
          tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
          tex->key.numFaces = 6;
          break;
+      case PIPE_TEXTURE_CUBE_ARRAY:
+         assert(svgascreen->sws->have_sm4_1);
+         tex->key.flags |= (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_ARRAY);
+         tex->key.numFaces = 1;  // arraySize already includes the 6 faces
+         tex->key.arraySize = template->array_size;
+         break;
       default:
          break;
       }
@@ -1003,6 +1058,7 @@ svga_texture_create(struct pipe_screen *screen,
       if (screen->is_format_supported(screen, template->format,
                                       template->target,
                                       template->nr_samples,
+                                      template->nr_storage_samples,
                                       PIPE_BIND_SAMPLER_VIEW)) {
          bindings |= PIPE_BIND_SAMPLER_VIEW;
       }
@@ -1017,6 +1073,7 @@ svga_texture_create(struct pipe_screen *screen,
          if (screen->is_format_supported(screen, template->format,
                                          template->target,
                                          template->nr_samples,
+                                         template->nr_storage_samples,
                                          PIPE_BIND_RENDER_TARGET)) {
             bindings |= PIPE_BIND_RENDER_TARGET;
          }
@@ -1027,6 +1084,7 @@ svga_texture_create(struct pipe_screen *screen,
          if (screen->is_format_supported(screen, template->format,
                                          template->target,
                                          template->nr_samples,
+                                         template->nr_storage_samples,
                                          PIPE_BIND_DEPTH_STENCIL)) {
             bindings |= PIPE_BIND_DEPTH_STENCIL;
          }
@@ -1295,7 +1353,10 @@ boolean
 svga_texture_transfer_map_upload_create(struct svga_context *svga)
 {
    svga->tex_upload = u_upload_create(&svga->pipe, TEX_UPLOAD_DEFAULT_SIZE,
-                                      0, PIPE_USAGE_STAGING);
+                                      PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 0);
+   if (svga->tex_upload)
+      u_upload_disable_persistent(svga->tex_upload);
+
    return svga->tex_upload != NULL;
 }
 
@@ -1368,6 +1429,7 @@ svga_texture_transfer_map_upload(struct svga_context *svga,
       st->upload.box.z = 0;
       break;
    case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_CUBE_ARRAY:
       st->upload.nlayers = st->base.box.depth;
       st->upload.box.z = 0;
       st->upload.box.d = 1;
@@ -1440,7 +1502,7 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga,
    struct svga_winsys_surface *dstsurf;
    struct pipe_resource *texture = st->base.resource;
    struct svga_texture *tex = svga_texture(texture);
-   enum pipe_error ret; 
+   enum pipe_error ret;
    unsigned subResource;
    unsigned numMipLevels;
    unsigned i, layer;
@@ -1448,11 +1510,11 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga,
 
    assert(svga->tex_upload);
    assert(st->upload.buf);
-   
+
    /* unmap the texture upload buffer */
    u_upload_unmap(svga->tex_upload);
 
-   srcsurf = svga_buffer_handle(svga, st->upload.buf);
+   srcsurf = svga_buffer_handle(svga, st->upload.buf, 0);
    dstsurf = svga_texture(texture)->handle;
    assert(dstsurf);
 
@@ -1505,6 +1567,9 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga,
 boolean
 svga_texture_device_format_has_alpha(struct pipe_resource *texture)
 {
+   /* the svga_texture() call below is invalid for PIPE_BUFFER resources */
+   assert(texture->target != PIPE_BUFFER);
+
    enum svga3d_block_desc block_desc =
       svga3dsurface_get_desc(svga_texture(texture)->key.format)->block_desc;