gallium: add PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_screen.c
index 8c06061c620b7f605713743d6a0d1d491ef70d48..25315913490b6bd307a1f5d1975bc469c903b7bb 100644 (file)
@@ -84,12 +84,17 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
        nouveau_screen(pscreen)->class_3d != NVEA_3D_CLASS)
       return false;
 
-   /* transfers & shared are always supported */
-   bindings &= ~(PIPE_BIND_TRANSFER_READ |
-                 PIPE_BIND_TRANSFER_WRITE |
-                 PIPE_BIND_LINEAR |
+   /* shared is always supported */
+   bindings &= ~(PIPE_BIND_LINEAR |
                  PIPE_BIND_SHARED);
 
+   if (bindings & PIPE_BIND_SHADER_IMAGE && sample_count > 1 &&
+       nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
+      /* MS images are currently unsupported on Maxwell because they have to
+       * be handled explicitly. */
+      return false;
+   }
+
    return (( nvc0_format_table[format].usage |
             nvc0_vertex_format[format].usage) & bindings) == bindings;
 }
@@ -121,7 +126,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
       return 128 * 1024 * 1024;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
       if (class_3d <= NVF0_3D_CLASS)
-         return 420;
+         return 430;
       return 410;
    case PIPE_CAP_MAX_RENDER_TARGETS:
       return 8;
@@ -157,6 +162,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
       return PIPE_ENDIAN_LITTLE;
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
       return 30;
+   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+      return NVC0_MAX_WINDOW_RECTANGLES;
 
    /* supported caps */
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -164,6 +171,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_SHADOW_MAP:
    case PIPE_CAP_NPOT_TEXTURES:
    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
    case PIPE_CAP_ANISOTROPIC_FILTER:
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
    case PIPE_CAP_CUBE_MAP_ARRAY:
@@ -181,6 +189,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_QUERY_TIME_ELAPSED:
    case PIPE_CAP_OCCLUSION_QUERY:
    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
    case PIPE_CAP_INDEP_BLEND_ENABLE:
@@ -229,11 +238,15 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
    case PIPE_CAP_CULL_DISTANCE:
    case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+   case PIPE_CAP_TGSI_VOTE:
+   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
       return 1;
+   case PIPE_CAP_COMPUTE:
+      return (class_3d < GP100_3D_CLASS);
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
-   case PIPE_CAP_COMPUTE:
-      return 1;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
 
@@ -260,7 +273,10 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_PCI_BUS:
    case PIPE_CAP_PCI_DEVICE:
    case PIPE_CAP_PCI_FUNCTION:
-   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
+   case PIPE_CAP_NATIVE_FENCE_FD:
+   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -296,11 +312,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_FRAGMENT:
    case PIPE_SHADER_COMPUTE:
-      break;
    case PIPE_SHADER_TESS_CTRL:
    case PIPE_SHADER_TESS_EVAL:
-      if (class_3d >= GM107_3D_CLASS)
-         return 0;
       break;
    default:
       return 0;
@@ -341,7 +354,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
       return shader != PIPE_SHADER_FRAGMENT;
    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-      return shader != PIPE_SHADER_FRAGMENT || class_3d < GM107_3D_CLASS;
    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
       return 1;
@@ -365,13 +377,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+   case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
       return 0;
    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
       return NVC0_MAX_BUFFERS;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
-      return 16; /* would be 32 in linked (OpenGL-style) mode */
+      return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-      return 16; /* XXX not sure if more are really safe */
+      return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
    case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
       return 32;
    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
@@ -441,6 +454,12 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
       RET(((uint64_t []) { 1024, 1024, 64 }));
    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
       RET((uint64_t []) { 1024 });
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      if (obj_class >= NVE4_COMPUTE_CLASS) {
+         RET((uint64_t []) { 1024 });
+      } else {
+         RET((uint64_t []) { 512 });
+      }
    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
       RET((uint64_t []) { 1ULL << 40 });
    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
@@ -469,6 +488,8 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
       RET((uint32_t []) { screen->mp_count_compute });
    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
       RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      RET((uint32_t []) { 64 });
    default:
       return 0;
    }
@@ -654,12 +675,14 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
    case 0x110:
    case 0x120:
       return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+   case 0x130:
+      return 0;
    default:
       return -1;
    }
 }
 
-bool
+static int
 nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
                             uint32_t lpos, uint32_t lneg, uint32_t cstack)
 {
@@ -669,7 +692,7 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
 
    if (size >= (1 << 20)) {
       NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
-      return false;
+      return -1;
    }
 
    size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
@@ -680,13 +703,47 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
 
    ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size,
                         NULL, &bo);
-   if (ret) {
-      NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
-      return false;
-   }
+   if (ret)
+      return ret;
    nouveau_bo_ref(NULL, &screen->tls);
    screen->tls = bo;
-   return true;
+   return 0;
+}
+
+int
+nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
+{
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+   struct nouveau_bo *bo;
+   int ret;
+
+   ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base),
+                        1 << 17, size, NULL, &bo);
+   if (ret)
+      return ret;
+
+   nouveau_bo_ref(NULL, &screen->text);
+   screen->text = bo;
+
+   nouveau_heap_destroy(&screen->lib_code);
+   nouveau_heap_destroy(&screen->text_heap);
+
+   /* XXX: getting a page fault at the end of the code buffer every few
+    *  launches, don't use the last 256 bytes to work around them - prefetch ?
+    */
+   nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
+
+   /* update the code segment setup */
+   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
+   PUSH_DATAh(push, screen->text->offset);
+   PUSH_DATA (push, screen->text->offset);
+   if (screen->compute) {
+      BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, screen->text->offset);
+      PUSH_DATA (push, screen->text->offset);
+   }
+
+   return 0;
 }
 
 #define FAIL_SCREEN_INIT(str, err)                    \
@@ -716,6 +773,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    case 0x100:
    case 0x110:
    case 0x120:
+   case 0x130:
       break;
    default:
       return NULL;
@@ -728,10 +786,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    pscreen->destroy = nvc0_screen_destroy;
 
    ret = nouveau_screen_init(&screen->base, dev);
-   if (ret) {
-      nvc0_screen_destroy(pscreen);
-      return NULL;
-   }
+   if (ret)
+      FAIL_SCREEN_INIT("Base screen init failed: %d\n", ret);
    chan = screen->base.channel;
    push = screen->base.pushbuf;
    push->user_priv = screen;
@@ -768,7 +824,7 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
    if (ret)
-      goto fail;
+      FAIL_SCREEN_INIT("Error allocating fence BO: %d\n", ret);
    nouveau_bo_map(screen->fence.bo, 0, NULL);
    screen->fence.map = screen->fence.bo->map;
    screen->base.fence.emit = nvc0_screen_fence_emit;
@@ -784,6 +840,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATA (push, screen->nvsw->handle);
 
    switch (dev->chipset & ~0xf) {
+   case 0x130:
    case 0x120:
    case 0x110:
    case 0x100:
@@ -836,6 +893,9 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATA (push, screen->fence.bo->offset + 16);
 
    switch (dev->chipset & ~0xf) {
+   case 0x130:
+      obj_class = GP100_3D_CLASS;
+      break;
    case 0x120:
       obj_class = GM200_3D_CLASS;
       break;
@@ -896,7 +956,7 @@ nvc0_screen_create(struct nouveau_device *dev)
                     screen->base.drm->version >= 0x01000101);
    BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
    for (i = 0; i < 8; ++i)
-           PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
+      PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
 
    BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
    PUSH_DATA (push, 1);
@@ -936,46 +996,17 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    nvc0_magic_3d_init(push, screen->eng3d->oclass);
 
-   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
-                        &screen->text);
+   ret = nvc0_screen_resize_text_area(screen, 1 << 19);
    if (ret)
-      goto fail;
-
-   /* XXX: getting a page fault at the end of the code buffer every few
-    *  launches, don't use the last 256 bytes to work around them - prefetch ?
-    */
-   nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
+      FAIL_SCREEN_INIT("Error allocating TEXT area: %d\n", ret);
 
    ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL,
                         &screen->uniform_bo);
    if (ret)
-      goto fail;
+      FAIL_SCREEN_INIT("Error allocating uniform BO: %d\n", ret);
 
    PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR);
 
-   for (i = 0; i < 5; ++i) {
-      /* TIC and TSC entries for each unit (nve4+ only) */
-      /* auxiliary constants (6 user clip planes, base instance id) */
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, 2048);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
-      if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
-         unsigned j;
-         BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
-         PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
-         for (j = 0; j < 8; ++j)
-            PUSH_DATA(push, j);
-      } else {
-         BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
-         PUSH_DATA (push, 0x54);
-      }
-   }
-   BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
-   PUSH_DATA (push, 0);
-
    /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
    PUSH_DATA (push, 256);
@@ -993,10 +1024,8 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    if (screen->base.drm->version >= 0x01000101) {
       ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
-      if (ret) {
-         NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
-         goto fail;
-      }
+      if (ret)
+         FAIL_SCREEN_INIT("NOUVEAU_GETPARAM_GRAPH_UNITS failed: %d\n", ret);
    } else {
       if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
          value = (8 << 8) | 4;
@@ -1007,11 +1036,10 @@ nvc0_screen_create(struct nouveau_device *dev)
    screen->mp_count = value >> 8;
    screen->mp_count_compute = screen->mp_count;
 
-   nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
+   ret = nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
+   if (ret)
+      FAIL_SCREEN_INIT("Error allocating TLS area: %d\n", ret);
 
-   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, screen->text->offset);
-   PUSH_DATA (push, screen->text->offset);
    BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
    PUSH_DATAh(push, screen->tls->offset);
    PUSH_DATA (push, screen->tls->offset);
@@ -1030,7 +1058,7 @@ nvc0_screen_create(struct nouveau_device *dev)
       ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
                            &screen->poly_cache);
       if (ret)
-         goto fail;
+         FAIL_SCREEN_INIT("Error allocating poly cache BO: %d\n", ret);
 
       BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
       PUSH_DATAh(push, screen->poly_cache->offset);
@@ -1041,7 +1069,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL,
                         &screen->txc);
    if (ret)
-      goto fail;
+      FAIL_SCREEN_INIT("Error allocating txc BO: %d\n", ret);
 
    BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3);
    PUSH_DATAh(push, screen->txc->offset);
@@ -1148,6 +1176,50 @@ nvc0_screen_create(struct nouveau_device *dev)
    if (nvc0_screen_init_compute(screen))
       goto fail;
 
+   /* XXX: Compute and 3D are somehow aliased on Fermi. */
+   for (i = 0; i < 5; ++i) {
+      /* TIC and TSC entries for each unit (nve4+ only) */
+      /* auxiliary constants (6 user clip planes, base instance id) */
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+      PUSH_DATA (push, (15 << 4) | 1);
+      if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
+         unsigned j;
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+         PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
+         for (j = 0; j < 8; ++j)
+            PUSH_DATA(push, j);
+      } else {
+         BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
+         PUSH_DATA (push, 0x54);
+      }
+
+      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8);
+      PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
+      PUSH_DATA (push, 0); /* 0 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 1); /* 1 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 0); /* 2 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 1); /* 3 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 2); /* 4 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 3); /* 5 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 2); /* 6 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 3); /* 7 */
+      PUSH_DATA (push, 1);
+   }
+   BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
+   PUSH_DATA (push, 0);
+
    PUSH_KICK (push);
 
    screen->tic.entries = CALLOC(4096, sizeof(void *));