From 268dc60d3a091bc563e319c38e74cc10e544aa8a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 7 Jun 2020 09:52:46 +1000 Subject: [PATCH] nvc0: initial support for gv100 v2: - remove unnecessary MAX2() - add proper method definitions Signed-off-by: Ben Skeggs Acked-by: Karol Herbst Part-of: --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/meson.build | 2 + src/gallium/drivers/nouveau/nouveau_screen.c | 6 +- src/gallium/drivers/nouveau/nv_object.xml.h | 2 + src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h | 245 ++++++++++++++++++ .../drivers/nouveau/nvc0/nvc0_3d.xml.h | 3 + .../drivers/nouveau/nvc0/nvc0_program.c | 5 +- .../drivers/nouveau/nvc0/nvc0_screen.c | 57 ++-- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 10 +- .../drivers/nouveau/nvc0/nve4_compute.c | 111 +++++++- src/gallium/drivers/nouveau/nvc0/qmd.h | 1 + src/gallium/drivers/nouveau/nvc0/qmdc3c0.c | 168 ++++++++++++ .../winsys/nouveau/drm/nouveau_drm_winsys.c | 1 + 13 files changed, 578 insertions(+), 35 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h create mode 100644 src/gallium/drivers/nouveau/nvc0/qmdc3c0.c diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 313f41c79bd..9de8168fbd9 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -153,10 +153,12 @@ NVC0_CODEGEN_SOURCES := \ NVC0_C_SOURCES := \ nvc0/cla0c0qmd.h \ nvc0/clc0c0qmd.h \ + nvc0/clc3c0qmd.h \ nvc0/drf.h \ nvc0/qmd.h \ nvc0/qmda0c0.c \ nvc0/qmdc0c0.c \ + nvc0/qmdc3c0.c \ nvc0/gm107_texture.xml.h \ nvc0/nvc0_3d.xml.h \ nvc0/nvc0_compute.c \ diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index 9d93a5ef42c..d0cfbe76478 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -169,10 +169,12 @@ files_libnouveau = files( 'codegen/nv50_ir_target_nvc0.h', 'nvc0/cla0c0qmd.h', 'nvc0/clc0c0qmd.h', + 'nvc0/clc3c0qmd.h', 'nvc0/drf.h', 'nvc0/qmd.h', 'nvc0/qmda0c0.c', 'nvc0/qmdc0c0.c', + 'nvc0/qmdc3c0.c', 'nvc0/gm107_texture.xml.h', 'nvc0/nvc0_3d.xml.h', 'nvc0/nvc0_compute.c', diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 7f90784b8ed..702d88b6c93 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); - screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); + if (dev->chipset < 0x140) + screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); + else + screen->prefer_nir = true; + screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false); if (screen->force_enable_cl) glsl_type_singleton_init_or_ref(); diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index 664bfae9f64..0c1337028f3 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -195,6 +195,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GM200_3D_CLASS 0x0000b197 #define GP100_3D_CLASS 0x0000c097 #define GP102_3D_CLASS 0x0000c197 +#define GV100_3D_CLASS 0x0000c397 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 @@ -207,6 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define GM200_COMPUTE_CLASS 0x0000b1c0 #define GP100_COMPUTE_CLASS 0x0000c0c0 #define GP104_COMPUTE_CLASS 0x0000c1c0 +#define GV100_COMPUTE_CLASS 0x0000c3c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h new file mode 100644 index 00000000000..588cc639d32 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h @@ -0,0 +1,245 @@ +/******************************************************************************* + Copyright (c) 2001-2010 NVIDIA Corporation + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +*******************************************************************************/ + +/* AUTO GENERATED FILE -- DO NOT EDIT */ + +#ifndef __CLC3C0QMD_H__ +#define __CLC3C0QMD_H__ + +/* +** Queue Meta Data, Version 02_02 + */ + +// The below C preprocessor definitions describe "multi-word" structures, where +// fields may have bit numbers beyond 32. For example, MW(127:96) means +// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" +// syntax is to distinguish from similar "X:Y" single-word definitions: the +// macros historically used for single-word definitions would fail with +// multi-word definitions. +// +// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel +// interface layer of nvidia.ko for an example of how to manipulate +// these MW(X:Y) definitions. + +#define NVC3C0_QMDV02_02_OUTER_PUT MW(30:0) +#define NVC3C0_QMDV02_02_OUTER_OVERFLOW MW(31:31) +#define NVC3C0_QMDV02_02_OUTER_GET MW(62:32) +#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW MW(63:63) +#define NVC3C0_QMDV02_02_INNER_GET MW(94:64) +#define NVC3C0_QMDV02_02_INNER_OVERFLOW MW(95:95) +#define NVC3C0_QMDV02_02_INNER_PUT MW(126:96) +#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW MW(127:127) +#define NVC3C0_QMDV02_02_QMD_GROUP_ID MW(133:128) +#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE MW(134:134) +#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135) +#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_IS_QUEUE MW(136:136) +#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137) +#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0 MW(138:138) +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1 MW(139:139) +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS MW(140:140) +#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141) +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE MW(142:142) +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID 0x00000001 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY MW(143:143) +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_QMD_RESERVED_B MW(159:144) +#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE MW(184:160) +#define NVC3C0_QMDV02_02_QMD_RESERVED_C MW(185:185) +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186) +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187) +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188) +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE MW(189:189) +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE MW(190:190) +#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191) +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME MW(223:192) +#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME MW(239:224) +#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME MW(255:240) +#define NVC3C0_QMDV02_02_PROGRAM_OFFSET MW(287:256) +#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) +#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) +#define NVC3C0_QMDV02_02_QMD_RESERVED_D MW(335:328) +#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID MW(357:352) +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) +#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE MW(366:366) +#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE MW(369:368) +#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 +#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 +#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 +#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS MW(370:370) +#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT MW(378:378) +#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32 0x00000000 +#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 +#define NVC3C0_QMDV02_02_SAMPLER_INDEX MW(382:382) +#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 +#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 +#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH MW(415:384) +#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT MW(431:416) +#define NVC3C0_QMDV02_02_QMD_RESERVED13A MW(447:432) +#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH MW(463:448) +#define NVC3C0_QMDV02_02_QMD_RESERVED14A MW(479:464) +#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER MW(511:480) +#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) +#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD MW(529:522) +#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE MW(561:544) +#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE MW(568:562) +#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE MW(575:569) +#define NVC3C0_QMDV02_02_QMD_VERSION MW(579:576) +#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION MW(583:580) +#define NVC3C0_QMDV02_02_QMD_RESERVED_H MW(591:584) +#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0 MW(607:592) +#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1 MW(623:608) +#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2 MW(639:624) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_REGISTER_COUNT_V MW(656:648) +#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE MW(663:657) +#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM MW(671:664) +#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER MW(703:672) +#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER MW(735:704) +#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER MW(767:736) +#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER MW(775:768) +#define NVC3C0_QMDV02_02_QMD_RESERVED_J MW(783:776) +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP MW(790:788) +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 +#define NVC3C0_QMDV02_02_QMD_RESERVED_K MW(791:791) +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT MW(793:792) +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE MW(794:794) +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE MW(799:799) +#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD MW(831:800) +#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER MW(863:832) +#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER MW(871:864) +#define NVC3C0_QMDV02_02_QMD_RESERVED_L MW(879:872) +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP MW(886:884) +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 +#define NVC3C0_QMDV02_02_QMD_RESERVED_M MW(887:887) +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT MW(889:888) +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE MW(890:890) +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE MW(895:895) +#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 +#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 +#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD MW(927:896) +#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) +#define NVC3C0_QMDV02_02_QMD_RESERVED_N MW(954:952) +#define NVC3C0_QMDV02_02_BARRIER_COUNT MW(959:955) +#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) +#define NVC3C0_QMDV02_02_REGISTER_COUNT MW(991:984) +#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) +#define NVC3C0_QMDV02_02_SASS_VERSION MW(1023:1016) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) +#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER MW(1567:1536) +#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER MW(1584:1568) +#define NVC3C0_QMDV02_02_QMD_RESERVED_S MW(1599:1585) +#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET MW(1630:1600) +#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631) +#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT MW(1662:1632) +#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE MW(1663:1663) +#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664) +#define NVC3C0_QMDV02_02_QMD_RESERVED_Q MW(1694:1694) +#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695) +#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 +#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 +#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696) +#define NVC3C0_QMDV02_02_QMD_SPARE_G MW(1759:1728) +#define NVC3C0_QMDV02_02_QMD_SPARE_H MW(1791:1760) +#define NVC3C0_QMDV02_02_QMD_SPARE_I MW(1823:1792) +#define NVC3C0_QMDV02_02_QMD_SPARE_J MW(1855:1824) +#define NVC3C0_QMDV02_02_QMD_SPARE_K MW(1887:1856) +#define NVC3C0_QMDV02_02_QMD_SPARE_L MW(1919:1888) +#define NVC3C0_QMDV02_02_QMD_SPARE_M MW(1951:1920) +#define NVC3C0_QMDV02_02_QMD_SPARE_N MW(1983:1952) +#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER MW(2015:1984) +#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER MW(2047:2016) + + + +#endif // #ifndef __CLC3C0QMD_H__ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h index 221bab3105b..1c5a8dc0b1f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h @@ -1787,6 +1787,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_SP_UNK14__ESIZE 0x00000004 #define NVC0_3D_SP_UNK14__LEN 0x00000004 +#define GV100_3D_SP_ADDRESS_HIGH(i0) (0x00002014 + 0x40*(i0)) +#define GV100_3D_SP_ADDRESS_LOW(i0) (0x00002018 + 0x40*(i0)) + #define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) #define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 #define NVC0_3D_TEX_LIMITS__LEN 0x00000005 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 4f3d1cd5b3b..b9fff341f28 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->code_size = info->bin.codeSize; prog->relocs = info->bin.relocData; prog->fixups = info->bin.fixupData; - prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + if (info->target >= NVISA_GV100_CHIPSET) + prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why? + else + prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); prog->cp.smem_size = info->bin.smemSize; prog->num_barriers = info->numBarriers; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 64a22104a1f..2807b59a4fd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -444,8 +444,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_PREFERRED_IR: return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: { - uint32_t irs = 1 << PIPE_SHADER_IR_TGSI | - 1 << PIPE_SHADER_IR_NIR; + uint32_t irs = 1 << PIPE_SHADER_IR_NIR | + ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI); if (screen->force_enable_cl) irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED; return irs; @@ -468,6 +468,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: return shader != PIPE_SHADER_FRAGMENT; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + /* HW doesn't support indirect addressing of fragment program inputs + * on Volta. The binary driver generates a function to handle every + * possible indirection, and indirectly calls the function to handle + * this instead. + */ + if (class_3d >= GV100_3D_CLASS) + return shader != PIPE_SHADER_FRAGMENT; + return 1; case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: return 1; @@ -731,8 +739,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); PUSH_DATA (push, 0xff); PUSH_DATA (push, 0xff); - BEGIN_NVC0(push, SUBC_3D(0x074c), 1); - PUSH_DATA (push, 0x3f); + if (obj_class < GV100_3D_CLASS) { + BEGIN_NVC0(push, SUBC_3D(0x074c), 1); + PUSH_DATA (push, 0x3f); + } BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); PUSH_DATA (push, (3 << 16) | 3); @@ -764,8 +774,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) BEGIN_NVC0(push, SUBC_3D(0x0300), 1); PUSH_DATA (push, 3); - BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); - PUSH_DATA (push, 0x3fffff); + if (obj_class < GV100_3D_CLASS) { + BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); + PUSH_DATA (push, 0x3fffff); + } BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); @@ -825,6 +837,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0x110: case 0x120: case 0x130: + case 0x140: return nve4_screen_compute_setup(screen, screen->base.pushbuf); default: return -1; @@ -896,13 +909,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) nouveau_heap_init(&screen->text_heap, 0, size - 0x100); /* update the code segment setup */ - BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->text->offset); - PUSH_DATA (push, screen->text->offset); - if (screen->compute) { - BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); + if (screen->eng3d->oclass < GV100_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); + if (screen->compute) { + BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + } } return 0; @@ -981,6 +996,7 @@ nvc0_screen_create(struct nouveau_device *dev) case 0x110: case 0x120: case 0x130: + case 0x140: break; default: return NULL; @@ -1047,16 +1063,18 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.fence.emit = nvc0_screen_fence_emit; screen->base.fence.update = nvc0_screen_fence_update; + if (dev->chipset < 0x140) { + ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, + NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); + if (ret) + FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); - ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, - NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); - if (ret) - FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); - - BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); - PUSH_DATA (push, screen->nvsw->handle); + BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->nvsw->handle); + } switch (dev->chipset & ~0xf) { + case 0x140: case 0x130: case 0x120: case 0x110: @@ -1110,6 +1128,9 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->fence.bo->offset + 16); switch (dev->chipset & ~0xf) { + case 0x140: + obj_class = GV100_3D_CLASS; + break; case 0x130: switch (dev->chipset) { case 0x130: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 02f1c6fba8f..490026b2c00 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -70,8 +70,14 @@ nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage, { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1); - PUSH_DATA (push, prog->code_base); + if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1); + PUSH_DATA (push, prog->code_base); + } else { + BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2); + PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base); + PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base); + } } void diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 58f29213dad..3a3f0a926de 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -31,11 +31,14 @@ #include "qmd.h" #include "cla0c0qmd.h" #include "clc0c0qmd.h" +#include "clc3c0qmd.h" #define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a) #define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a) #define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a) #define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a) +#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a) +#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a) int nve4_screen_compute_setup(struct nvc0_screen *screen, @@ -49,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, uint64_t address; switch (dev->chipset & ~0xf) { + case 0x140: + obj_class = GV100_COMPUTE_CLASS; + break; case 0x100: case 0xf0: obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ @@ -92,24 +98,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); - BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); - PUSH_DATAh(push, screen->tls->size / screen->mp_count); - PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); - PUSH_DATA (push, 0xff); + if (obj_class < GV100_COMPUTE_CLASS) { + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); + } /* Unified address space ? Who needs that ? Certainly not OpenCL. * * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be * accessible. We cannot prevent that at the moment, so expect failure. */ - BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); - PUSH_DATA (push, 0xff << 24); - BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); - PUSH_DATA (push, 0xfe << 24); - - BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->text->offset); - PUSH_DATA (push, screen->text->offset); + if (obj_class < GV100_COMPUTE_CLASS) { + BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); + PUSH_DATA (push, 0xff << 24); + BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); + PUSH_DATA (push, 0xfe << 24); + + BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + } else { + BEGIN_NVC0(push, SUBC_CP(0x2a0), 2); + PUSH_DATAh(push, 0xfeULL << 24); + PUSH_DATA (push, 0xfeULL << 24); + BEGIN_NVC0(push, SUBC_CP(0x7b0), 2); + PUSH_DATAh(push, 0xffULL << 24); + PUSH_DATA (push, 0xffULL << 24); + } BEGIN_NVC0(push, SUBC_CP(0x0310), 1); PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); @@ -717,6 +734,69 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, nve4_compute_setup_buf_cb(nvc0, true, qmd); } +static int +gv100_sm_config_smem_size(u32 size) +{ + if (size > 64 * 1024) size = 96 * 1024; + else if (size > 32 * 1024) size = 64 * 1024; + else if (size > 16 * 1024) size = 32 * 1024; + else if (size > 8 * 1024) size = 16 * 1024; + else size = 8 * 1024; + return (size / 4096) + 1; +} + +static void +gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, + const struct pipe_grid_info *info) +{ + struct nvc0_program *cp = nvc0->compprog; + struct nvc0_screen *screen = nvc0->screen; + uint64_t entry = + screen->text->offset + nvc0_program_symbol_offset(cp, info->pc); + + NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); + NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); + NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX); + NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, + align(cp->cp.smem_size, 0x100)); + NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(8 * 1024)); + NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(96 * 1024)); + NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2); + NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2); + NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(cp->cp.smem_size)); + + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); + NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); + NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs); + NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + + // Only bind user uniforms and the driver constant buffer through the + // launch descriptor because UBOs are sticked to the driver cb to avoid the + // limitation of 8 CBs. + if (nvc0->constbuf[5][0].user || cp->parm_size) { + gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, + NVC0_CB_USR_INFO(5), 1 << 16); + + // Later logic will attempt to bind a real buffer at position 0. That + // should not happen if we've bound a user buffer. + assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); + } + gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, + NVC0_CB_AUX_INFO(5), 1 << 11); + + nve4_compute_setup_buf_cb(nvc0, true, qmd); + + NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff); + NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32); +} + static inline void * nve4_compute_alloc_launch_desc(struct nouveau_context *nv, struct nouveau_bo **pbo, uint64_t *pgpuaddr) @@ -787,6 +867,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) if (ret) goto out; + if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) + gv100_compute_setup_launch_desc(nvc0, desc, info); + else if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) gp100_compute_setup_launch_desc(nvc0, desc, info); else @@ -797,6 +880,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) #ifndef NDEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { debug_printf("Queue Meta Data:\n"); + if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) + NVC3C0QmdDump_V02_02(desc); + else if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) NVC0C0QmdDump_V02_01(desc); else @@ -931,7 +1017,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } - #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER static void nve4_compute_trap_info(struct nvc0_context *nvc0) diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h index 50db3cb5cf9..86c290fe836 100644 --- a/src/gallium/drivers/nouveau/nvc0/qmd.h +++ b/src/gallium/drivers/nouveau/nvc0/qmd.h @@ -64,4 +64,5 @@ void NVA0C0QmdDump_V00_06(uint32_t *); void NVC0C0QmdDump_V02_01(uint32_t *); +void NVC3C0QmdDump_V02_02(uint32_t *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c new file mode 100644 index 00000000000..c9bd8966114 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c @@ -0,0 +1,168 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "qmd.h" +#include "clc3c0qmd.h" + +#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a) +#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a) +#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a) + +void +NVC3C0QmdDump_V02_02(uint32_t *qmd) +{ + NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID); + NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); + NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); + NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); + NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); + for (int i = 0; i < 8; i++) + NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); + NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, + RED_MIN, + RED_MAX, + RED_INC, + RED_DEC, + RED_AND, + RED_OR, + RED_XOR); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, + RED_MIN, + RED_MAX, + RED_INC, + RED_DEC, + RED_AND, + RED_OR, + RED_XOR); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); + NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); + NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x"); + for (int i = 0; i < 8; i++) { + NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); + NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); + NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x"); + } + NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x"); + NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE); + NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); + NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); +} diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index 5c43518afcb..2dbe7be0211 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -104,6 +104,7 @@ nouveau_drm_screen_create(int fd) case 0x110: case 0x120: case 0x130: + case 0x140: init = nvc0_screen_create; break; default: -- 2.30.2