#define THREADS_IN_WARP 32
-#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
-
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
unsigned bindings)
{
if (sample_count > 8)
- return FALSE;
+ return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
- return FALSE;
+ return false;
if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings))
- return FALSE;
+ return false;
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
- return FALSE;
+ return false;
break;
default:
break;
}
+ if (bindings & PIPE_BIND_LINEAR)
+ if (util_format_is_depth_or_stencil(format) ||
+ (target != PIPE_TEXTURE_1D &&
+ target != PIPE_TEXTURE_2D &&
+ target != PIPE_TEXTURE_RECT) ||
+ sample_count > 1)
+ return false;
+
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_LINEAR |
PIPE_BIND_SHARED);
- return (nv50_format_table[format].usage & bindings) == bindings;
+ return (( nv50_format_table[format].usage |
+ nv50_vertex_format[format].usage) & bindings) == bindings;
}
static int
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 7;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return 65536;
+ return 128 * 1024 * 1024;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 330;
case PIPE_CAP_MAX_RENDER_TARGETS:
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- return 1; /* 256 for binding as RT, but that's not possible in GL */
+ return 16; /* 256 for binding as RT, but that's not possible in GL */
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return class_3d >= NVA3_3D_CLASS;
/* unsupported caps */
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_TGSI_TEXCOORD:
- case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_DRAW_INDIRECT:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_VERTEXID_NOBASE:
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
return 0;
case PIPE_CAP_VENDOR_ID:
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
+ case PIPE_SHADER_COMPUTE:
default:
return 0;
}
if (shader == PIPE_SHADER_VERTEX)
return 32;
return 15;
+ case PIPE_SHADER_CAP_MAX_OUTPUTS:
+ return 16;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return 65536;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
- return 0;
+ return 1;
case PIPE_SHADER_CAP_SUBROUTINES:
return 0; /* please inline, or provide function declarations */
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
/* The chip could handle more sampler views than samplers */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return MIN2(32, PIPE_MAX_SAMPLERS);
+ return MIN2(16, PIPE_MAX_SAMPLERS);
+ case PIPE_SHADER_CAP_DOUBLES:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
return 0.0f;
}
+static int
+nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param, void *data)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+#define RET(x) do { \
+ if (data) \
+ memcpy(data, x, sizeof(x)); \
+ return sizeof(x); \
+} while (0)
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ RET((uint64_t []) { 2 });
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ RET(((uint64_t []) { 65535, 65535 }));
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ RET(((uint64_t []) { 512, 512, 64 }));
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ RET((uint64_t []) { 512 });
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
+ RET((uint64_t []) { 1ULL << 32 });
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ RET((uint64_t []) { 16 << 10 });
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ RET((uint64_t []) { 4096 });
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ RET((uint32_t []) { 32 });
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ RET((uint64_t []) { 1ULL << 40 });
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ RET((uint32_t []) { 0 });
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ RET((uint32_t []) { screen->mp_count });
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
+ default:
+ return 0;
+ }
+
+#undef RET
+}
+
static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
* _current_ one, and remove both.
*/
nouveau_fence_ref(screen->base.fence.current, ¤t);
- nouveau_fence_wait(current);
+ nouveau_fence_wait(current, NULL);
nouveau_fence_ref(NULL, ¤t);
nouveau_fence_ref(NULL, &screen->base.fence.current);
}
if (screen->blitter)
nv50_blitter_destroy(screen);
+ if (screen->pm.prog) {
+ screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+ nv50_program_destroy(NULL, screen->pm.prog);
+ FREE(screen->pm.prog);
+ }
nouveau_bo_ref(NULL, &screen->code);
nouveau_bo_ref(NULL, &screen->tls_bo);
nouveau_object_del(&screen->tesla);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_object_del(&screen->sync);
nouveau_screen_fini(&screen->base);
/* we need to do it after possible flush in MARK_RING */
*sequence = ++screen->base.fence.sequence;
+ assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
PUSH_DATAh(push, screen->fence.bo->offset);
PUSH_DATA (push, screen->fence.bo->offset);
BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
PUSH_DATA (push, 0xf);
- if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
PUSH_DATA (push, 0x18);
}
+ BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
+ PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
+
+ BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
+
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
- PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
PUSH_DATA (push, 1);
if (screen->tesla->oclass >= NVA0_3D_CLASS) {
BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
- PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ PUSH_DATA (push, 0);
}
BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, 0);
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
+ PUSH_DATA (push, 0);
+ }
+
PUSH_KICK (push);
}
return 1;
}
-struct pipe_screen *
+struct nouveau_screen *
nv50_screen_create(struct nouveau_device *dev)
{
struct nv50_screen *screen;
if (!screen)
return NULL;
pscreen = &screen->base.base;
+ pscreen->destroy = nv50_screen_destroy;
ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
chan = screen->base.channel;
- pscreen->destroy = nv50_screen_destroy;
pscreen->context_create = nv50_create;
pscreen->is_format_supported = nv50_screen_is_format_supported;
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+ pscreen->get_compute_param = nv50_screen_get_compute_param;
+ pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+ pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
nv50_screen_init_resource_functions(pscreen);
if (screen->base.device->chipset < 0x84 ||
- debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ debug_get_bool_option("NOUVEAU_PMPEG", false)) {
/* PMPEG */
nouveau_screen_init_vdec(&screen->base);
} else if (screen->base.device->chipset < 0x98 ||
screen->TPs = util_bitcount(value & 0xffff);
screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+ screen->mp_count = screen->TPs * screen->MPsInTP;
+
stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
STACK_WARPS_ALLOC * 64 * 8;
nv50_screen_init_hwctx(screen);
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
+ goto fail;
+ }
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
- return pscreen;
+ return &screen->base;
fail:
- nv50_screen_destroy(pscreen);
- return NULL;
+ screen->base.base.context_create = NULL;
+ return &screen->base;
}
int