ilo: add support for indirect access of CONST in FS
[mesa.git] / src / gallium / drivers / nv50 / nv50_screen.c
index 873946f520d0c23b549e87d7a7bade777730d033..b6da3034ad0e50ccc648a574f1a808b8be6588e1 100644 (file)
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
- * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "util/u_format.h"
 #include "nv50_screen.h"
 
 #include "nouveau/nv_object.xml.h"
+#include <errno.h>
 
 #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
 # define NOUVEAU_GETPARAM_GRAPH_UNITS 13
 #endif
 
+/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
+#define LOCAL_WARPS_ALLOC 32
+/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
+#define STACK_WARPS_ALLOC 32
+
+#define THREADS_IN_WARP 32
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
 static boolean
 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                                 enum pipe_format format,
@@ -40,6 +50,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                                 unsigned sample_count,
                                 unsigned bindings)
 {
+   if (sample_count > 8)
+      return FALSE;
    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
       return FALSE;
    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
@@ -53,11 +65,6 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
          return FALSE;
       break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
-      /* HACK: GL requires equal formats for MS resolve and window is BGRA */
-      if (bindings & PIPE_BIND_RENDER_TARGET)
-         return FALSE;
    default:
       break;
    }
@@ -96,20 +103,27 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_NPOT_TEXTURES:
    case PIPE_CAP_ANISOTROPIC_FILTER:
    case PIPE_CAP_SCALED_RESOLVE:
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
       return 1;
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return 65536;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return 0;
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+      return 0;
+      /*
+      return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
+      */
    case PIPE_CAP_TWO_SIDED_STENCIL:
    case PIPE_CAP_DEPTH_CLIP_DISABLE:
-   case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
    case PIPE_CAP_POINT_SPRITE:
       return 1;
    case PIPE_CAP_SM3:
       return 1;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 130;
+      return 140;
    case PIPE_CAP_MAX_RENDER_TARGETS:
       return 8;
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -118,7 +132,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return 1;
-   case PIPE_CAP_TIMER_QUERY:
+   case PIPE_CAP_QUERY_TIMESTAMP:
+   case PIPE_CAP_QUERY_TIME_ELAPSED:
    case PIPE_CAP_OCCLUSION_QUERY:
       return 1;
    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
@@ -159,10 +174,22 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
       return 1;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 256;
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+      return 1; /* 256 for binding as RT, but that's not possible in GL */
+   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+      return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_TGSI_TEXCOORD:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
       return 0;
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return 1;
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+      return 0;
+   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+      return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
    default:
       NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
       return 0;
@@ -209,9 +236,11 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_MAX_PREDS:
       return 0;
    case PIPE_SHADER_CAP_MAX_TEMPS:
-      return NV50_CAP_MAX_PROGRAM_TEMPS;
+      return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
       return 1;
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+      return 0;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
    case PIPE_SHADER_CAP_INTEGERS:
@@ -256,8 +285,8 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
    if (screen->base.pushbuf)
       screen->base.pushbuf->user_priv = NULL;
 
-   if (screen->blitctx)
-      FREE(screen->blitctx);
+   if (screen->blitter)
+      nv50_blitter_destroy(screen);
 
    nouveau_bo_ref(NULL, &screen->code);
    nouveau_bo_ref(NULL, &screen->tls_bo);
@@ -270,8 +299,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
    nouveau_heap_destroy(&screen->gp_code_heap);
    nouveau_heap_destroy(&screen->fp_code_heap);
 
-   if (screen->tic.entries)
-      FREE(screen->tic.entries);
+   FREE(screen->tic.entries);
 
    nouveau_object_del(&screen->tesla);
    nouveau_object_del(&screen->eng2d);
@@ -311,7 +339,7 @@ nv50_screen_fence_update(struct pipe_screen *pscreen)
 }
 
 static void
-nv50_screen_init_hwctx(struct nv50_screen *screen, unsigned tls_space)
+nv50_screen_init_hwctx(struct nv50_screen *screen)
 {
    struct nouveau_pushbuf *push = screen->base.pushbuf;
    struct nv04_fifo *fifo;
@@ -411,7 +439,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen, unsigned tls_space)
    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
    PUSH_DATAh(push, screen->tls_bo->offset);
    PUSH_DATA (push, screen->tls_bo->offset);
-   PUSH_DATA (push, util_logbase2(tls_space / 8));
+   PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
 
    BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
    PUSH_DATAh(push, screen->stack_bo->offset);
@@ -443,6 +471,18 @@ nv50_screen_init_hwctx(struct nv50_screen *screen, unsigned tls_space)
    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
 
+   /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
+   BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+   PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX);
+   BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
+   PUSH_DATAf(push, 0.0f);
+   PUSH_DATAf(push, 0.0f);
+   PUSH_DATAf(push, 0.0f);
+   PUSH_DATAf(push, 0.0f);
+   BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+   PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+
    /* max TIC (bits 4:8) & TSC bindings, per program type */
    for (i = 0; i < 3; ++i) {
       BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
@@ -508,6 +548,60 @@ nv50_screen_init_hwctx(struct nv50_screen *screen, unsigned tls_space)
    PUSH_KICK (push);
 }
 
+static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
+      uint64_t *tls_size)
+{
+   struct nouveau_device *dev = screen->base.device;
+   int ret;
+
+   screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
+         ONE_TEMP_SIZE;
+   if (nouveau_mesa_debug)
+      debug_printf("allocating space for %u temps\n",
+            util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
+   *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
+         screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+                        *tls_size, NULL, &screen->tls_bo);
+   if (ret) {
+      NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
+      return ret;
+   }
+
+   return 0;
+}
+
+int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
+{
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+   int ret;
+   uint64_t tls_size;
+
+   if (tls_space < screen->cur_tls_space)
+      return 0;
+   if (tls_space > screen->max_tls_space) {
+      /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
+       * LOCAL_WARPS_NO_CLAMP) */
+      NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
+            (unsigned)(tls_space / ONE_TEMP_SIZE),
+            (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
+      return -ENOMEM;
+   }
+
+   nouveau_bo_ref(NULL, &screen->tls_bo);
+   ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+   if (ret)
+      return ret;
+
+   BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
+   PUSH_DATAh(push, screen->tls_bo->offset);
+   PUSH_DATA (push, screen->tls_bo->offset);
+   PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
+
+   return 1;
+}
+
 struct pipe_screen *
 nv50_screen_create(struct nouveau_device *dev)
 {
@@ -516,7 +610,7 @@ nv50_screen_create(struct nouveau_device *dev)
    struct nouveau_object *chan;
    uint64_t value;
    uint32_t tesla_class;
-   unsigned stack_size, max_warps, tls_space;
+   unsigned stack_size;
    int ret;
 
    screen = CALLOC_STRUCT(nv50_screen);
@@ -637,10 +731,11 @@ nv50_screen_create(struct nouveau_device *dev)
 
    nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
 
-   max_warps  = util_bitcount(value & 0xffff);
-   max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
+   screen->TPs = util_bitcount(value & 0xffff);
+   screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
 
-   stack_size = max_warps * 64 * 8;
+   stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
+         STACK_WARPS_ALLOC * 64 * 8;
 
    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
                         &screen->stack_bo);
@@ -649,20 +744,24 @@ nv50_screen_create(struct nouveau_device *dev)
       goto fail;
    }
 
-   tls_space = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
-
-   screen->tls_size = tls_space * max_warps * 32;
+   uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
+         screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
+         ONE_TEMP_SIZE;
+   screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
+   screen->max_tls_space /= 2; /* half of vram */
 
-   if (nouveau_mesa_debug)
-      debug_printf("max_warps = %i, tls_size = %"PRIu64" KiB\n",
-                     max_warps, screen->tls_size >> 10);
+   /* hw can address max 64 KiB */
+   screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, screen->tls_size, NULL,
-                        &screen->tls_bo);
-   if (ret) {
-      NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
+   uint64_t tls_size;
+   unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
+   ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+   if (ret)
       goto fail;
-   }
+
+   if (nouveau_mesa_debug)
+      debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
+            screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
 
    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
                         &screen->uniforms);
@@ -681,10 +780,10 @@ nv50_screen_create(struct nouveau_device *dev)
    screen->tic.entries = CALLOC(4096, sizeof(void *));
    screen->tsc.entries = screen->tic.entries + 2048;
 
-   if (!nv50_blitctx_create(screen))
+   if (!nv50_blitter_create(screen))
       goto fail;
 
-   nv50_screen_init_hwctx(screen, tls_space);
+   nv50_screen_init_hwctx(screen);
 
    nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);