nvc0: initial support for gv100
authorBen Skeggs <bskeggs@redhat.com>
Sat, 6 Jun 2020 23:52:46 +0000 (09:52 +1000)
committerMarge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 22:52:42 +0000 (22:52 +0000)
v2:
- remove unnecessary MAX2()
- add proper method definitions

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>

13 files changed:
src/gallium/drivers/nouveau/Makefile.sources
src/gallium/drivers/nouveau/meson.build
src/gallium/drivers/nouveau/nouveau_screen.c
src/gallium/drivers/nouveau/nv_object.xml.h
src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h [new file with mode: 0644]
src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
src/gallium/drivers/nouveau/nvc0/nvc0_program.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c
src/gallium/drivers/nouveau/nvc0/qmd.h
src/gallium/drivers/nouveau/nvc0/qmdc3c0.c [new file with mode: 0644]
src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c

index 313f41c79bd8feee7cbe1d322e37a8b7717f80a9..9de8168fbd974b0ed8f90a82217fad8d32e12ce2 100644 (file)
@@ -153,10 +153,12 @@ NVC0_CODEGEN_SOURCES := \
 NVC0_C_SOURCES := \
        nvc0/cla0c0qmd.h \
        nvc0/clc0c0qmd.h \
+       nvc0/clc3c0qmd.h \
        nvc0/drf.h \
        nvc0/qmd.h \
        nvc0/qmda0c0.c \
        nvc0/qmdc0c0.c \
+       nvc0/qmdc3c0.c \
        nvc0/gm107_texture.xml.h \
        nvc0/nvc0_3d.xml.h \
        nvc0/nvc0_compute.c \
index 9d93a5ef42ca8f7c280ee8aec1bea45925d2ea50..d0cfbe76478ee0a94ab11bc448b035facc38b7d7 100644 (file)
@@ -169,10 +169,12 @@ files_libnouveau = files(
   'codegen/nv50_ir_target_nvc0.h',
   'nvc0/cla0c0qmd.h',
   'nvc0/clc0c0qmd.h',
+  'nvc0/clc3c0qmd.h',
   'nvc0/drf.h',
   'nvc0/qmd.h',
   'nvc0/qmda0c0.c',
   'nvc0/qmdc0c0.c',
+  'nvc0/qmdc3c0.c',
   'nvc0/gm107_texture.xml.h',
   'nvc0/nvc0_3d.xml.h',
   'nvc0/nvc0_compute.c',
index 7f90784b8eda96dd933a92b6fb80a1e1cf75a3eb..702d88b6c9340dbad9814b0b7d47c2897c3bd1eb 100644 (file)
@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
    if (nv_dbg)
       nouveau_mesa_debug = atoi(nv_dbg);
 
-   screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+   if (dev->chipset < 0x140)
+      screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+   else
+      screen->prefer_nir = true;
+
    screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
    if (screen->force_enable_cl)
       glsl_type_singleton_init_or_ref();
index 664bfae9f64bdc8dc3ae833e87742af7fd984879..0c1337028f321325d6083c88ff84511fa8d30d37 100644 (file)
@@ -195,6 +195,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GM200_3D_CLASS                                         0x0000b197
 #define GP100_3D_CLASS                                         0x0000c097
 #define GP102_3D_CLASS                                         0x0000c197
+#define GV100_3D_CLASS                                         0x0000c397
 #define NV50_2D_CLASS                                          0x0000502d
 #define NVC0_2D_CLASS                                          0x0000902d
 #define NV50_COMPUTE_CLASS                                     0x000050c0
@@ -207,6 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define GM200_COMPUTE_CLASS                                    0x0000b1c0
 #define GP100_COMPUTE_CLASS                                    0x0000c0c0
 #define GP104_COMPUTE_CLASS                                    0x0000c1c0
+#define GV100_COMPUTE_CLASS                                    0x0000c3c0
 #define NV84_CRYPT_CLASS                                       0x000074c1
 #define BLOB_NVC0_PCOPY1_CLASS                                 0x000090b8
 #define BLOB_NVC0_PCOPY0_CLASS                                 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
new file mode 100644 (file)
index 0000000..588cc63
--- /dev/null
@@ -0,0 +1,245 @@
+/*******************************************************************************
+    Copyright (c) 2001-2010 NVIDIA Corporation
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal in the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+/* AUTO GENERATED FILE -- DO NOT EDIT */
+
+#ifndef __CLC3C0QMD_H__
+#define __CLC3C0QMD_H__
+
+/*
+** Queue Meta Data, Version 02_02
+ */
+
+// The below C preprocessor definitions describe "multi-word" structures, where
+// fields may have bit numbers beyond 32.  For example, MW(127:96) means
+// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
+// syntax is to distinguish from similar "X:Y" single-word definitions: the
+// macros historically used for single-word definitions would fail with
+// multi-word definitions.
+//
+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
+// interface layer of nvidia.ko for an example of how to manipulate
+// these MW(X:Y) definitions.
+
+#define NVC3C0_QMDV02_02_OUTER_PUT                                 MW(30:0)
+#define NVC3C0_QMDV02_02_OUTER_OVERFLOW                            MW(31:31)
+#define NVC3C0_QMDV02_02_OUTER_GET                                 MW(62:32)
+#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW                     MW(63:63)
+#define NVC3C0_QMDV02_02_INNER_GET                                 MW(94:64)
+#define NVC3C0_QMDV02_02_INNER_OVERFLOW                            MW(95:95)
+#define NVC3C0_QMDV02_02_INNER_PUT                                 MW(126:96)
+#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW                     MW(127:127)
+#define NVC3C0_QMDV02_02_QMD_GROUP_ID                              MW(133:128)
+#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE                  MW(134:134)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION               MW(135:135)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
+#define NVC3C0_QMDV02_02_IS_QUEUE                                  MW(136:136)
+#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE                            0x00000000
+#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE                             0x00000001
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(137:137)
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0                 MW(138:138)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1                 MW(139:139)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS                   MW(140:140)
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(141:141)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE                        MW(142:142)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID                   0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY                  MW(143:143)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
+#define NVC3C0_QMDV02_02_QMD_RESERVED_B                            MW(159:144)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE                       MW(184:160)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_C                            MW(185:185)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE           MW(186:186)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(187:187)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE             MW(188:188)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE              MW(189:189)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE              MW(190:190)
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE          MW(191:191)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME                   MW(223:192)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME                  MW(239:224)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME                   MW(255:240)
+#define NVC3C0_QMDV02_02_PROGRAM_OFFSET                            MW(287:256)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_D                            MW(335:328)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID                    MW(357:352)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE                       MW(366:366)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE                           MW(369:368)
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT                    MW(378:378)
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32                0x00000000
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX                             MW(382:382)
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH                          MW(415:384)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT                         MW(431:416)
+#define NVC3C0_QMDV02_02_QMD_RESERVED13A                           MW(447:432)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH                          MW(463:448)
+#define NVC3C0_QMDV02_02_QMD_RESERVED14A                           MW(479:464)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER                     MW(511:480)
+#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
+#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD                   MW(529:522)
+#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE                        MW(561:544)
+#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE             MW(568:562)
+#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE             MW(575:569)
+#define NVC3C0_QMDV02_02_QMD_VERSION                               MW(579:576)
+#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION                         MW(583:580)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_H                            MW(591:584)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0                     MW(607:592)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1                     MW(623:608)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2                     MW(639:624)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE               0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE                0x00000001
+#define NVC3C0_QMDV02_02_REGISTER_COUNT_V                          MW(656:648)
+#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE          MW(663:657)
+#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM                   MW(671:664)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER                     MW(703:672)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER                     MW(735:704)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER                    MW(767:736)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER                    MW(775:768)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_J                            MW(783:776)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP                     MW(790:788)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_K                            MW(791:791)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD                          MW(831:800)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER                    MW(863:832)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER                    MW(871:864)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_L                            MW(879:872)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP                     MW(886:884)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_M                            MW(887:887)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD                          MW(927:896)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_N                            MW(954:952)
+#define NVC3C0_QMDV02_02_BARRIER_COUNT                             MW(959:955)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
+#define NVC3C0_QMDV02_02_REGISTER_COUNT                            MW(991:984)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
+#define NVC3C0_QMDV02_02_SASS_VERSION                              MW(1023:1016)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER                     MW(1567:1536)
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER                     MW(1584:1568)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_S                            MW(1599:1585)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET                         MW(1630:1600)
+#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1631:1631)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT                         MW(1662:1632)
+#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE                          MW(1663:1663)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1693:1664)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_Q                            MW(1694:1694)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1695:1695)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
+#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1727:1696)
+#define NVC3C0_QMDV02_02_QMD_SPARE_G                               MW(1759:1728)
+#define NVC3C0_QMDV02_02_QMD_SPARE_H                               MW(1791:1760)
+#define NVC3C0_QMDV02_02_QMD_SPARE_I                               MW(1823:1792)
+#define NVC3C0_QMDV02_02_QMD_SPARE_J                               MW(1855:1824)
+#define NVC3C0_QMDV02_02_QMD_SPARE_K                               MW(1887:1856)
+#define NVC3C0_QMDV02_02_QMD_SPARE_L                               MW(1919:1888)
+#define NVC3C0_QMDV02_02_QMD_SPARE_M                               MW(1951:1920)
+#define NVC3C0_QMDV02_02_QMD_SPARE_N                               MW(1983:1952)
+#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER                            MW(2015:1984)
+#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER                            MW(2047:2016)
+
+
+
+#endif // #ifndef __CLC3C0QMD_H__
index 221bab3105b4020530c2c49ad042aa65cc071d3f..1c5a8dc0b1f4839cf0ada71a09a9829fd9260bd0 100644 (file)
@@ -1787,6 +1787,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVC0_3D_SP_UNK14__ESIZE                                0x00000004
 #define NVC0_3D_SP_UNK14__LEN                                  0x00000004
 
+#define GV100_3D_SP_ADDRESS_HIGH(i0)                          (0x00002014 + 0x40*(i0))
+#define GV100_3D_SP_ADDRESS_LOW(i0)                           (0x00002018 + 0x40*(i0))
+
 #define NVC0_3D_TEX_LIMITS(i0)                                (0x00002200 + 0x10*(i0))
 #define NVC0_3D_TEX_LIMITS__ESIZE                              0x00000010
 #define NVC0_3D_TEX_LIMITS__LEN                                0x00000005
index 4f3d1cd5b3b5c35f00b03d10e471d9708fb59dfb..b9fff341f28edadfa525c5f692f018da2f4d6549 100644 (file)
@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    prog->code_size = info->bin.codeSize;
    prog->relocs = info->bin.relocData;
    prog->fixups = info->bin.fixupData;
-   prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+   if (info->target >= NVISA_GV100_CHIPSET)
+      prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why?
+   else
+      prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
    prog->cp.smem_size = info->bin.smemSize;
    prog->num_barriers = info->numBarriers;
 
index 64a22104a1f7c8de44ee19c907e7091fdd481191..2807b59a4fdec9f5af4c5eb5b0a531c8e205ec8b 100644 (file)
@@ -444,8 +444,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
    case PIPE_SHADER_CAP_SUPPORTED_IRS: {
-      uint32_t irs = 1 << PIPE_SHADER_IR_TGSI |
-                     1 << PIPE_SHADER_IR_NIR;
+      uint32_t irs = 1 << PIPE_SHADER_IR_NIR |
+         ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI);
       if (screen->force_enable_cl)
          irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
       return irs;
@@ -468,6 +468,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
       return shader != PIPE_SHADER_FRAGMENT;
    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+      /* HW doesn't support indirect addressing of fragment program inputs
+       * on Volta.  The binary driver generates a function to handle every
+       * possible indirection, and indirectly calls the function to handle
+       * this instead.
+       */
+      if (class_3d >= GV100_3D_CLASS)
+         return shader != PIPE_SHADER_FRAGMENT;
+      return 1;
    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
       return 1;
@@ -731,8 +739,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
    BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
    PUSH_DATA (push, 0xff);
    PUSH_DATA (push, 0xff);
-   BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
-   PUSH_DATA (push, 0x3f);
+   if (obj_class < GV100_3D_CLASS) {
+      BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+      PUSH_DATA (push, 0x3f);
+   }
 
    BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
    PUSH_DATA (push, (3 << 16) | 3);
@@ -764,8 +774,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
    BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
    PUSH_DATA (push, 3);
 
-   BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
-   PUSH_DATA (push, 0x3fffff);
+   if (obj_class < GV100_3D_CLASS) {
+      BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+      PUSH_DATA (push, 0x3fffff);
+   }
    BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
    PUSH_DATA (push, 1);
    BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
@@ -825,6 +837,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
    case 0x110:
    case 0x120:
    case 0x130:
+   case 0x140:
       return nve4_screen_compute_setup(screen, screen->base.pushbuf);
    default:
       return -1;
@@ -896,13 +909,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
    nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
 
    /* update the code segment setup */
-   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, screen->text->offset);
-   PUSH_DATA (push, screen->text->offset);
-   if (screen->compute) {
-      BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+   if (screen->eng3d->oclass < GV100_3D_CLASS) {
+      BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
       PUSH_DATAh(push, screen->text->offset);
       PUSH_DATA (push, screen->text->offset);
+      if (screen->compute) {
+         BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+         PUSH_DATAh(push, screen->text->offset);
+         PUSH_DATA (push, screen->text->offset);
+      }
    }
 
    return 0;
@@ -981,6 +996,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    case 0x110:
    case 0x120:
    case 0x130:
+   case 0x140:
       break;
    default:
       return NULL;
@@ -1047,16 +1063,18 @@ nvc0_screen_create(struct nouveau_device *dev)
    screen->base.fence.emit = nvc0_screen_fence_emit;
    screen->base.fence.update = nvc0_screen_fence_update;
 
+   if (dev->chipset < 0x140) {
+      ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
+                               NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
+      if (ret)
+         FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
 
-   ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
-                            NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
-   if (ret)
-      FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
-
-   BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
-   PUSH_DATA (push, screen->nvsw->handle);
+      BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+      PUSH_DATA (push, screen->nvsw->handle);
+   }
 
    switch (dev->chipset & ~0xf) {
+   case 0x140:
    case 0x130:
    case 0x120:
    case 0x110:
@@ -1110,6 +1128,9 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATA (push, screen->fence.bo->offset + 16);
 
    switch (dev->chipset & ~0xf) {
+   case 0x140:
+      obj_class = GV100_3D_CLASS;
+      break;
    case 0x130:
       switch (dev->chipset) {
       case 0x130:
index 02f1c6fba8fdcc277ffd01bdda46b8d365de2271..490026b2c003e8587615c412b095b186bfbca5c9 100644 (file)
@@ -70,8 +70,14 @@ nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
 
-   BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
-   PUSH_DATA (push, prog->code_base);
+   if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
+      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
+      PUSH_DATA (push, prog->code_base);
+   } else {
+      BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
+      PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
+      PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
+   }
 }
 
 void
index 58f29213dadcb8835965db65b0f0de7e92dc9576..3a3f0a926de2d4afadbdcc7e1e101ebbef287362 100644 (file)
 #include "qmd.h"
 #include "cla0c0qmd.h"
 #include "clc0c0qmd.h"
+#include "clc3c0qmd.h"
 
 #define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
 #define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
 #define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
 #define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
 
 int
 nve4_screen_compute_setup(struct nvc0_screen *screen,
@@ -49,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    uint64_t address;
 
    switch (dev->chipset & ~0xf) {
+   case 0x140:
+      obj_class = GV100_COMPUTE_CLASS;
+      break;
    case 0x100:
    case 0xf0:
       obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
@@ -92,24 +98,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    PUSH_DATAh(push, screen->tls->size / screen->mp_count);
    PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
    PUSH_DATA (push, 0xff);
-   BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
-   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
-   PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
-   PUSH_DATA (push, 0xff);
+   if (obj_class < GV100_COMPUTE_CLASS) {
+      BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
+      PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+      PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+      PUSH_DATA (push, 0xff);
+   }
 
    /* Unified address space ? Who needs that ? Certainly not OpenCL.
     *
     * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
     *  accessible. We cannot prevent that at the moment, so expect failure.
     */
-   BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
-   PUSH_DATA (push, 0xff << 24);
-   BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
-   PUSH_DATA (push, 0xfe << 24);
-
-   BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, screen->text->offset);
-   PUSH_DATA (push, screen->text->offset);
+   if (obj_class < GV100_COMPUTE_CLASS) {
+      BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
+      PUSH_DATA (push, 0xff << 24);
+      BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
+      PUSH_DATA (push, 0xfe << 24);
+
+      BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, screen->text->offset);
+      PUSH_DATA (push, screen->text->offset);
+   } else {
+      BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
+      PUSH_DATAh(push, 0xfeULL << 24);
+      PUSH_DATA (push, 0xfeULL << 24);
+      BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
+      PUSH_DATAh(push, 0xffULL << 24);
+      PUSH_DATA (push, 0xffULL << 24);
+   }
 
    BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
    PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
@@ -717,6 +734,69 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
    nve4_compute_setup_buf_cb(nvc0, true, qmd);
 }
 
+static int
+gv100_sm_config_smem_size(u32 size)
+{
+   if      (size > 64 * 1024) size = 96 * 1024;
+   else if (size > 32 * 1024) size = 64 * 1024;
+   else if (size > 16 * 1024) size = 32 * 1024;
+   else if (size >  8 * 1024) size = 16 * 1024;
+   else                       size =  8 * 1024;
+   return (size / 4096) + 1;
+}
+
+static void
+gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
+                                const struct pipe_grid_info *info)
+{
+   struct nvc0_program *cp = nvc0->compprog;
+   struct nvc0_screen *screen = nvc0->screen;
+   uint64_t entry =
+      screen->text->offset + nvc0_program_symbol_offset(cp, info->pc);
+
+   NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+   NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+   NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+                                  align(cp->cp.smem_size, 0x100));
+   NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+                                  gv100_sm_config_smem_size(8 * 1024));
+   NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+                                  gv100_sm_config_smem_size(96 * 1024));
+   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+                                  gv100_sm_config_smem_size(cp->cp.smem_size));
+
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
+
+   // Only bind user uniforms and the driver constant buffer through the
+   // launch descriptor because UBOs are sticked to the driver cb to avoid the
+   // limitation of 8 CBs.
+   if (nvc0->constbuf[5][0].user || cp->parm_size) {
+      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
+                                  NVC0_CB_USR_INFO(5), 1 << 16);
+
+      // Later logic will attempt to bind a real buffer at position 0. That
+      // should not happen if we've bound a user buffer.
+      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
+   }
+   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
+                               NVC0_CB_AUX_INFO(5), 1 << 11);
+
+   nve4_compute_setup_buf_cb(nvc0, true, qmd);
+
+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
+}
+
 static inline void *
 nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
                                struct nouveau_bo **pbo, uint64_t *pgpuaddr)
@@ -787,6 +867,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    if (ret)
       goto out;
 
+   if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+      gv100_compute_setup_launch_desc(nvc0, desc, info);
+   else
    if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
       gp100_compute_setup_launch_desc(nvc0, desc, info);
    else
@@ -797,6 +880,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 #ifndef NDEBUG
    if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
       debug_printf("Queue Meta Data:\n");
+      if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+         NVC3C0QmdDump_V02_02(desc);
+      else
       if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
          NVC0C0QmdDump_V02_01(desc);
       else
@@ -931,7 +1017,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
    nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
 }
 
-
 #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
 static void
 nve4_compute_trap_info(struct nvc0_context *nvc0)
index 50db3cb5cf946c734beb36cdf9aab1fecdc1e3ba..86c290fe8362d93af557acac3a6bd691c33c244c 100644 (file)
@@ -64,4 +64,5 @@
 
 void NVA0C0QmdDump_V00_06(uint32_t *);
 void NVC0C0QmdDump_V02_01(uint32_t *);
+void NVC3C0QmdDump_V02_02(uint32_t *);
 #endif
diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
new file mode 100644 (file)
index 0000000..c9bd896
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "qmd.h"
+#include "clc3c0qmd.h"
+
+#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a)
+
+void
+NVC3C0QmdDump_V02_02(uint32_t *qmd)
+{
+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
+   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
+   NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
+   NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
+   for (int i = 0; i < 8; i++)
+      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
+   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
+                                                    RED_MIN,
+                                                    RED_MAX,
+                                                    RED_INC,
+                                                    RED_DEC,
+                                                    RED_AND,
+                                                    RED_OR,
+                                                    RED_XOR);
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
+                                                    RED_MIN,
+                                                    RED_MAX,
+                                                    RED_INC,
+                                                    RED_DEC,
+                                                    RED_AND,
+                                                    RED_OR,
+                                                    RED_XOR);
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x");
+   for (int i = 0; i < 8; i++) {
+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
+      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
+   }
+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x");
+   NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
+   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
+}
index 5c43518afcbbf21a12bd913961bd25e5e2b41179..2dbe7be021190290b29d7fda7c5a963d0e8f8cf5 100644 (file)
@@ -104,6 +104,7 @@ nouveau_drm_screen_create(int fd)
        case 0x110:
        case 0x120:
        case 0x130:
+       case 0x140:
                init = nvc0_screen_create;
                break;
        default: