gallium: Add PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT
[mesa.git] / src / gallium / drivers / nouveau / nv50 / nv50_screen.c
index 375f0dca993826e9702718df043f424578e83ea7..20fb61b51f42cc7679e55e321a14cc75ff60a2bd 100644 (file)
@@ -20,6 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <errno.h>
+#include <xf86drm.h>
+#include <nouveau_drm.h>
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "pipe/p_screen.h"
 #include "nouveau_vp3_video.h"
 
 #include "nv_object.xml.h"
-#include <errno.h>
-
-#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
-# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
-#endif
 
 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
 #define LOCAL_WARPS_ALLOC 32
@@ -43,8 +41,6 @@
 
 #define THREADS_IN_WARP 32
 
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
 static boolean
 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                                 enum pipe_format format,
@@ -53,36 +49,47 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
                                 unsigned bindings)
 {
    if (sample_count > 8)
-      return FALSE;
+      return false;
    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
-      return FALSE;
+      return false;
    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
-      return FALSE;
+      return false;
 
    if (!util_format_is_supported(format, bindings))
-      return FALSE;
+      return false;
 
    switch (format) {
    case PIPE_FORMAT_Z16_UNORM:
       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
-         return FALSE;
+         return false;
       break;
    default:
       break;
    }
 
+   if (bindings & PIPE_BIND_LINEAR)
+      if (util_format_is_depth_or_stencil(format) ||
+          (target != PIPE_TEXTURE_1D &&
+           target != PIPE_TEXTURE_2D &&
+           target != PIPE_TEXTURE_RECT) ||
+          sample_count > 1)
+         return false;
+
    /* transfers & shared are always supported */
    bindings &= ~(PIPE_BIND_TRANSFER_READ |
                  PIPE_BIND_TRANSFER_WRITE |
+                 PIPE_BIND_LINEAR |
                  PIPE_BIND_SHARED);
 
-   return (nv50_format_table[format].usage & bindings) == bindings;
+   return (( nv50_format_table[format].usage |
+            nv50_vertex_format[format].usage) & bindings) == bindings;
 }
 
 static int
 nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
    const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
 
    switch (param) {
    /* non-boolean caps */
@@ -101,7 +108,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MAX_TEXEL_OFFSET:
       return 7;
    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
       return 330;
    case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -118,10 +125,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
       return 1024;
    case PIPE_CAP_MAX_VERTEX_STREAMS:
       return 1;
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return 2048;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 256;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-      return 1; /* 256 for binding as RT, but that's not possible in GL */
+      return 16; /* 256 for binding as RT, but that's not possible in GL */
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
       return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
    case PIPE_CAP_MAX_VIEWPORTS:
@@ -169,6 +178,21 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_USER_VERTEX_BUFFERS:
    case PIPE_CAP_TEXTURE_MULTISAMPLE:
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+   case PIPE_CAP_CLIP_HALFZ:
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_COMPUTE:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -180,6 +204,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_INDEP_BLEND_FUNC:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
       return class_3d >= NVA3_3D_CLASS;
 
    /* unsupported caps */
@@ -192,14 +217,52 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_TGSI_TEXCOORD:
-   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-   case PIPE_CAP_TGSI_VS_LAYER:
+   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_TEXTURE_GATHER_SM5:
    case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
-   case PIPE_CAP_COMPUTE:
    case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+   case PIPE_CAP_VERTEXID_NOBASE:
+   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
+   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
+   case PIPE_CAP_PCI_GROUP:
+   case PIPE_CAP_PCI_BUS:
+   case PIPE_CAP_PCI_DEVICE:
+   case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+      return 0;
+
+   case PIPE_CAP_VENDOR_ID:
+      return 0x10de;
+   case PIPE_CAP_DEVICE_ID: {
+      uint64_t device_id;
+      if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) {
+         NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n");
+         return -1;
+      }
+      return device_id;
+   }
+   case PIPE_CAP_ACCELERATED:
+      return 1;
+   case PIPE_CAP_VIDEO_MEMORY:
+      return dev->vram_size >> 20;
+   case PIPE_CAP_UMA:
       return 0;
    }
 
@@ -216,6 +279,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_FRAGMENT:
       break;
+   case PIPE_SHADER_COMPUTE:
    default:
       return 0;
    }
@@ -232,12 +296,12 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       if (shader == PIPE_SHADER_VERTEX)
          return 32;
       return 15;
-   case PIPE_SHADER_CAP_MAX_CONSTS:
-      return 65536 / 16;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      return 16;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 65536;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
       return NV50_MAX_PIPE_CONSTBUFS;
-   case PIPE_SHADER_CAP_MAX_ADDRS:
-      return 1;
    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
       return shader != PIPE_SHADER_FRAGMENT;
@@ -251,7 +315,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-      return 0;
+      return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
    case PIPE_SHADER_CAP_INTEGERS:
@@ -259,7 +323,18 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       /* The chip could handle more sampler views than samplers */
    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-      return MIN2(32, PIPE_MAX_SAMPLERS);
+      return MIN2(16, PIPE_MAX_SAMPLERS);
+   case PIPE_SHADER_CAP_DOUBLES:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
@@ -292,6 +367,53 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
    return 0.0f;
 }
 
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+                              enum pipe_shader_ir ir_type,
+                              enum pipe_compute_cap param, void *data)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do {                  \
+   if (data)                         \
+      memcpy(data, x, sizeof(x));    \
+   return sizeof(x);                 \
+} while (0)
+
+   switch (param) {
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+      RET((uint64_t []) { 2 });
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      RET(((uint64_t []) { 65535, 65535 }));
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      RET(((uint64_t []) { 512, 512, 64 }));
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      RET((uint64_t []) { 512 });
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+      RET((uint64_t []) { 1ULL << 32 });
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+      RET((uint64_t []) { 16 << 10 });
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+      RET((uint64_t []) { 16 << 10 });
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+      RET((uint64_t []) { 4096 });
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      RET((uint32_t []) { 32 });
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+      RET((uint64_t []) { 1ULL << 40 });
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t []) { 0 });
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      RET((uint32_t []) { screen->mp_count });
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+   default:
+      return 0;
+   }
+
+#undef RET
+}
+
 static void
 nv50_screen_destroy(struct pipe_screen *pscreen)
 {
@@ -307,7 +429,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
-      nouveau_fence_wait(current);
+      nouveau_fence_wait(current, NULL);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
@@ -316,6 +438,11 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 
    if (screen->blitter)
       nv50_blitter_destroy(screen);
+   if (screen->pm.prog) {
+      screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+      nv50_program_destroy(NULL, screen->pm.prog);
+      FREE(screen->pm.prog);
+   }
 
    nouveau_bo_ref(NULL, &screen->code);
    nouveau_bo_ref(NULL, &screen->tls_bo);
@@ -333,6 +460,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
    nouveau_object_del(&screen->tesla);
    nouveau_object_del(&screen->eng2d);
    nouveau_object_del(&screen->m2mf);
+   nouveau_object_del(&screen->compute);
    nouveau_object_del(&screen->sync);
 
    nouveau_screen_fini(&screen->base);
@@ -349,6 +477,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    /* we need to do it after possible flush in MARK_RING */
    *sequence = ++screen->base.fence.sequence;
 
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
    PUSH_DATAh(push, screen->fence.bo->offset);
    PUSH_DATA (push, screen->fence.bo->offset);
@@ -421,11 +550,18 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
    PUSH_DATA (push, 0xf);
 
-   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
       PUSH_DATA (push, 0x18);
    }
 
+   BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
+   PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
+
+   BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
+   for (i = 0; i < 8; ++i)
+      PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
+
    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
    PUSH_DATA (push, 1);
 
@@ -437,14 +573,14 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
    PUSH_DATA (push, 0);
-   BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
-   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
+   PUSH_DATA (push, 1);
    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
    PUSH_DATA (push, 1);
 
    if (screen->tesla->oclass >= NVA0_3D_CLASS) {
       BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
-      PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+      PUSH_DATA (push, 0);
    }
 
    BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
@@ -585,6 +721,13 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
    BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
    PUSH_DATA (push, 1);
 
+   BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+   PUSH_DATA (push, 0);
+   if (screen->base.class_3d >= NV84_3D_CLASS) {
+      BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
+      PUSH_DATA (push, 0);
+   }
+
    PUSH_KICK (push);
 }
 
@@ -642,7 +785,7 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
    return 1;
 }
 
-struct pipe_screen *
+struct nouveau_screen *
 nv50_screen_create(struct nouveau_device *dev)
 {
    struct nv50_screen *screen;
@@ -657,6 +800,7 @@ nv50_screen_create(struct nouveau_device *dev)
    if (!screen)
       return NULL;
    pscreen = &screen->base.base;
+   pscreen->destroy = nv50_screen_destroy;
 
    ret = nouveau_screen_init(&screen->base, dev);
    if (ret) {
@@ -677,17 +821,19 @@ nv50_screen_create(struct nouveau_device *dev)
 
    chan = screen->base.channel;
 
-   pscreen->destroy = nv50_screen_destroy;
    pscreen->context_create = nv50_create;
    pscreen->is_format_supported = nv50_screen_is_format_supported;
    pscreen->get_param = nv50_screen_get_param;
    pscreen->get_shader_param = nv50_screen_get_shader_param;
    pscreen->get_paramf = nv50_screen_get_paramf;
+   pscreen->get_compute_param = nv50_screen_get_compute_param;
+   pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+   pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
 
    nv50_screen_init_resource_functions(pscreen);
 
    if (screen->base.device->chipset < 0x84 ||
-       debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+       debug_get_bool_option("NOUVEAU_PMPEG", false)) {
       /* PMPEG */
       nouveau_screen_init_vdec(&screen->base);
    } else if (screen->base.device->chipset < 0x98 ||
@@ -792,6 +938,8 @@ nv50_screen_create(struct nouveau_device *dev)
    screen->TPs = util_bitcount(value & 0xffff);
    screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
 
+   screen->mp_count = screen->TPs * screen->MPsInTP;
+
    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
          STACK_WARPS_ALLOC * 64 * 8;
 
@@ -843,13 +991,19 @@ nv50_screen_create(struct nouveau_device *dev)
 
    nv50_screen_init_hwctx(screen);
 
-   nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+   ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+   if (ret) {
+      NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+      goto fail;
+   }
+
+   nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
 
-   return pscreen;
+   return &screen->base;
 
 fail:
-   nv50_screen_destroy(pscreen);
-   return NULL;
+   screen->base.base.context_create = NULL;
+   return &screen->base;
 }
 
 int