* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
- * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
#include "util/u_format.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
+#include "nouveau/nouveau_vp3_video.h"
+
#include "nvc0_context.h"
#include "nvc0_screen.h"
#include "nvc0_graph_macros.h"
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
unsigned sample_count,
unsigned bindings)
{
+ if (sample_count > 8)
+ return FALSE;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
return FALSE;
if (!util_format_is_supported(format, bindings))
return FALSE;
- switch (format) {
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- case PIPE_FORMAT_R8G8B8X8_UNORM:
- /* HACK: GL requires equal formats for MS resolve and window is BGRA */
- if (bindings & PIPE_BIND_RENDER_TARGET)
+ if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
+ if (util_format_get_blocksizebits(format) == 3 * 32)
return FALSE;
- default:
- break;
- }
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
+ return 16 * 5;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 15;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 12;
+ return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return 2048;
case PIPE_CAP_MIN_TEXEL_OFFSET:
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_TWO_SIDED_STENCIL:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_TGSI_TEXCOORD:
return 1;
case PIPE_CAP_SM3:
return 1;
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
return 1;
- case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
return 1;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return 4;
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_START_INSTANCE:
return 1;
- case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_USER_INDEX_BUFFERS:
return 0; /* state trackers will know better */
case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
return 0;
+ case PIPE_CAP_COMPUTE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 1;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
default:
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
return 0;
nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
enum pipe_shader_cap param)
{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
switch (shader) {
case PIPE_SHADER_VERTEX:
/*
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
+ case PIPE_SHADER_COMPUTE:
+ if (class_3d < NVE4_3D_CLASS)
+ return 0;
+ break;
default:
return 0;
}
switch (param) {
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_INPUTS:
if (shader == PIPE_SHADER_VERTEX)
return 32;
+ /* NOTE: These only count our slots for GENERIC varyings.
+ * The address space may be larger, but the actual hard limit seems to be
+ * less than what the address space layout permits, so don't add TEXCOORD,
+ * COLOR, etc. here.
+ */
if (shader == PIPE_SHADER_FRAGMENT)
- return (0x200 + 0x20 + 0x80) / 16; /* generic + colors + TexCoords */
- return (0x200 + 0x40 + 0x80) / 16; /* without 0x60 for per-patch inputs */
+ return 0x1f0 / 16;
+ /* Actually this counts CLIPVERTEX, which occupies the last generic slot,
+ * and excludes 0x60 per-patch inputs.
+ */
+ return 0x200 / 16;
case PIPE_SHADER_CAP_MAX_CONSTS:
return 65536 / 16;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 14;
+ if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
+ return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
+ return NVC0_MAX_PIPE_CONSTBUFS;
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
return NVC0_CAP_MAX_PROGRAM_TEMPS;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
case PIPE_SHADER_CAP_SUBROUTINES:
- return 1; /* but inlining everything, we need function declarations */
+ return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
}
}
+static int
+nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ uint64_t *data64 = (uint64_t *)data;
+ const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ data64[0] = 3;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
+ data64[1] = 65535;
+ data64[2] = 65535;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ data64[0] = 1024;
+ data64[1] = 1024;
+ data64[2] = 64;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ data64[0] = 1024;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
+ data64[0] = (uint64_t)1 << 40;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ data64[0] = 48 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ data64[0] = 512 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ data64[0] = 4096;
+ return 8;
+ default:
+ return 0;
+ }
+}
+
static void
nvc0_screen_destroy(struct pipe_screen *pscreen)
{
if (screen->base.pushbuf)
screen->base.pushbuf->user_priv = NULL;
- if (screen->blitctx)
- FREE(screen->blitctx);
+ if (screen->blitter)
+ nvc0_blitter_destroy(screen);
+ if (screen->pm.prog) {
+ screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+ nvc0_program_destroy(NULL, screen->pm.prog);
+ }
nouveau_bo_ref(NULL, &screen->text);
nouveau_bo_ref(NULL, &screen->uniform_bo);
nouveau_bo_ref(NULL, &screen->txc);
nouveau_bo_ref(NULL, &screen->fence.bo);
nouveau_bo_ref(NULL, &screen->poly_cache);
+ nouveau_bo_ref(NULL, &screen->parm);
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
- if (screen->tic.entries)
- FREE(screen->tic.entries);
+ FREE(screen->tic.entries);
nouveau_mm_destroy(screen->mm_VRAM_fe0);
nouveau_object_del(&screen->eng3d);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
nouveau_screen_fini(&screen->base);
return screen->fence.map[0];
}
+static int
+nvc0_screen_init_compute(struct nvc0_screen *screen)
+{
+ screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
+
+ switch (screen->base.device->chipset & 0xf0) {
+ case 0xc0:
+ case 0xd0:
+ /* Using COMPUTE has weird effects on 3D state, we need to
+ * investigate this further before enabling it by default.
+ */
+ if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
+ return 0;
+ case 0xe0:
+ case 0xf0:
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ default:
+ return -1;
+ }
+}
+
+boolean
+nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
+ uint32_t lpos, uint32_t lneg, uint32_t cstack)
+{
+ struct nouveau_bo *bo = NULL;
+ int ret;
+ uint64_t size = (lpos + lneg) * 32 + cstack;
+
+ if (size >= (1 << 20)) {
+ NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+
+ size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
+ size = align(size, 0x8000);
+ size *= screen->mp_count;
+
+ size = align(size, 1 << 17);
+
+ ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+ NULL, &bo);
+ if (ret) {
+ NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+ nouveau_bo_ref(NULL, &screen->tls);
+ screen->tls = bo;
+ return TRUE;
+}
+
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
struct pipe_screen *pscreen;
struct nouveau_object *chan;
struct nouveau_pushbuf *push;
+ uint64_t value;
uint32_t obj_class;
int ret;
unsigned i;
case 0xc0:
case 0xd0:
case 0xe0:
+ case 0xf0:
break;
default:
return NULL;
return NULL;
pscreen = &screen->base.base;
- screen->base.sysmem_bindings = PIPE_BIND_CONSTANT_BUFFER;
-
ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
nvc0_screen_destroy(pscreen);
chan = screen->base.channel;
push = screen->base.pushbuf;
push->user_priv = screen;
+ push->rsvd_kick = 5;
+
+ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+ screen->base.sysmem_bindings |=
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
pscreen->destroy = nvc0_screen_destroy;
pscreen->context_create = nvc0_create;
pscreen->get_param = nvc0_screen_get_param;
pscreen->get_shader_param = nvc0_screen_get_shader_param;
pscreen->get_paramf = nvc0_screen_get_paramf;
+ pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
nvc0_screen_init_resource_functions(pscreen);
- nouveau_screen_init_vdec(&screen->base);
+ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
&screen->fence.bo);
screen->base.fence.update = nvc0_screen_fence_update;
switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_P2MF_CLASS;
+ break;
case 0xe0:
obj_class = NVE4_P2MF_CLASS;
break;
BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->eng2d->oclass);
+ BEGIN_NVC0(push, NVC0_2D(SINGLE_GPC), 1);
+ PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_2D(OPERATION), 1);
PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY);
BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1);
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_3D_CLASS;
+ break;
case 0xe0:
obj_class = NVE4_3D_CLASS;
break;
PUSH_DATA (push, 0x17);
}
+ IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101);
+ BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, dev->drm_version >= 0x01000101);
+
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
for (i = 0; i < 5; ++i) {
/* TIC and TSC entries for each unit (nve4+ only) */
- /* auxiliary constants (6 user clip planes, base instance id */
+ /* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
PUSH_DATA (push, 0);
- /* max MPs * max warps per MP (TODO: ask kernel) */
- if (screen->eng3d->oclass >= NVE4_3D_CLASS)
- screen->tls_size = 8 * 64;
- else
- screen->tls_size = 16 * 48;
- screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
- screen->tls_size = align(screen->tls_size, 1 << 17);
+ /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 256);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
+ PUSH_DATA (push, 0);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+
+ if (dev->drm_version >= 0x01000101) {
+ ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+ if (ret) {
+ NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
+ goto fail;
+ }
+ } else {
+ if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
+ value = (8 << 8) | 4;
+ else
+ value = (16 << 8) | 4;
+ }
+ screen->mp_count = value >> 8;
+ screen->mp_count_compute = screen->mp_count;
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
- screen->tls_size, NULL, &screen->tls);
- if (ret)
- goto fail;
+ nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
- PUSH_DATA (push, screen->tls_size >> 32);
- PUSH_DATA (push, screen->tls_size);
+ PUSH_DATA (push, screen->tls->size >> 32);
+ PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATA (push, 0xab);
- PUSH_DATA (push, 0x00000000);
+ if (nvc0_screen_init_compute(screen))
+ goto fail;
PUSH_KICK (push);
mm_config.nvc0.memtype = 0xfe0;
screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
- if (!nvc0_blitctx_create(screen))
+ if (!nvc0_blitter_create(screen))
goto fail;
nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);