From f014ae3c7cce504afe5d3c3de154f9cf9aea0821 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 10 Dec 2017 15:06:45 +0100 Subject: [PATCH] nouveau: add support for nir not all those nir options are actually required, it just made the work a little easier. v2: fix asserts parse compute shaders don't lower bitfield_insert v3: fix memory leak v4: don't lower fmod32 v5: set lower_all_io_to_temps to false fix memory leak because we take over ownership of the nir shader merge: use the lowering helper v6: include TGSI debug header for proper assert call add nv50 support v7: fix Automake build v8: free shader only for the set shader type v9: check for IR type inside get_compiler_options squash "nouveau: add env var to make nir default" fix memory leak when creating compute shaders use debug_get_bool_option as it is available in non debug builds return failure if unsupported IR is encountered don't lower fpow in nir lower int 64 divmod inside nir to prevent crashes Signed-off-by: Karol Herbst Reviewed-by: Pierre Moreau --- src/gallium/drivers/nouveau/Automake.inc | 3 + src/gallium/drivers/nouveau/Makefile.am | 5 + src/gallium/drivers/nouveau/Makefile.sources | 1 + .../drivers/nouveau/codegen/nv50_ir.cpp | 3 + src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../nouveau/codegen/nv50_ir_from_nir.cpp | 76 ++++++++++++++ src/gallium/drivers/nouveau/meson.build | 9 +- src/gallium/drivers/nouveau/nouveau_screen.c | 2 + src/gallium/drivers/nouveau/nouveau_screen.h | 2 + .../drivers/nouveau/nv50/nv50_program.c | 19 +++- .../drivers/nouveau/nv50/nv50_screen.c | 46 ++++++++- src/gallium/drivers/nouveau/nv50/nv50_state.c | 35 ++++++- .../drivers/nouveau/nvc0/nvc0_program.c | 18 +++- .../drivers/nouveau/nvc0/nvc0_screen.c | 99 +++++++++++++++++-- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 31 +++++- 15 files changed, 327 insertions(+), 23 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp diff --git a/src/gallium/drivers/nouveau/Automake.inc b/src/gallium/drivers/nouveau/Automake.inc index 1d383fcb7b1..657790494dc 100644 --- a/src/gallium/drivers/nouveau/Automake.inc +++ b/src/gallium/drivers/nouveau/Automake.inc @@ -8,4 +8,7 @@ TARGET_LIB_DEPS += \ $(NOUVEAU_LIBS) \ $(LIBDRM_LIBS) +TARGET_COMPILER_LIB_DEPS = \ + $(top_builddir)/src/compiler/nir/libnir.la + endif diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am index 48c0fdf512d..ee7191675cc 100644 --- a/src/gallium/drivers/nouveau/Makefile.am +++ b/src/gallium/drivers/nouveau/Makefile.am @@ -25,6 +25,10 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CPPFLAGS = \ -I$(top_srcdir)/include \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ $(GALLIUM_DRIVER_CFLAGS) \ $(LIBDRM_CFLAGS) \ $(NOUVEAU_CFLAGS) @@ -47,6 +51,7 @@ nouveau_compiler_SOURCES = \ nouveau_compiler_LDADD = \ libnouveau.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index ec344c63169..c6a1aff7110 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -117,6 +117,7 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_emit_nv50.cpp \ codegen/nv50_ir_from_common.cpp \ codegen/nv50_ir_from_common.h \ + codegen/nv50_ir_from_nir.cpp \ codegen/nv50_ir_from_tgsi.cpp \ codegen/nv50_ir_graph.cpp \ codegen/nv50_ir_graph.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 993d01c1e44..a181a13a3b1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1241,6 +1241,9 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info) prog->optLevel = info->optLevel; switch (info->bin.sourceRep) { + case PIPE_SHADER_IR_NIR: + ret = prog->makeFromNIR(info) ? 0 : -2; + break; case PIPE_SHADER_IR_TGSI: ret = prog->makeFromTGSI(info) ? 0 : -2; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 8d32a25ec23..b19751ab372 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1284,6 +1284,7 @@ public: inline void del(Function *fn, int& id) { allFuncs.remove(id); } inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } + bool makeFromNIR(struct nv50_ir_prog_info *); bool makeFromTGSI(struct nv50_ir_prog_info *); bool convertToSSA(); bool optimizeSSA(int level); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp new file mode 100644 index 00000000000..b22c62fd434 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -0,0 +1,76 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Karol Herbst + */ + +#include "compiler/nir/nir.h" + +#include "util/u_debug.h" + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_from_common.h" +#include "codegen/nv50_ir_lowering_helper.h" +#include "codegen/nv50_ir_util.h" + +namespace { + +using namespace nv50_ir; + +class Converter : public ConverterCommon +{ +public: + Converter(Program *, nir_shader *, nv50_ir_prog_info *); + + bool run(); +private: + nir_shader *nir; +}; + +Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) + : ConverterCommon(prog, info), + nir(nir) {} + +bool +Converter::run() +{ + return false; +} + +} // unnamed namespace + +namespace nv50_ir { + +bool +Program::makeFromNIR(struct nv50_ir_prog_info *info) +{ + nir_shader *nir = (nir_shader*)info->bin.source; + Converter converter(this, nir, info); + bool result = converter.run(); + if (!result) + return result; + LoweringHelper lowering; + lowering.run(this); + tlsSize = info->bin.tlsSpace; + return result; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index aff1b62436c..64138212b5b 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -131,6 +131,7 @@ files_libnouveau = files( 'codegen/nv50_ir_emit_nv50.cpp', 'codegen/nv50_ir_from_common.cpp', 'codegen/nv50_ir_from_common.h', + 'codegen/nv50_ir_from_nir.cpp', 'codegen/nv50_ir_from_tgsi.cpp', 'codegen/nv50_ir_graph.cpp', 'codegen/nv50_ir_graph.h', @@ -210,9 +211,9 @@ files_libnouveau = files( libnouveau = static_library( 'nouveau', - [files_libnouveau], + [files_libnouveau, nir_opcodes_h], include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_common, ], c_args : [c_vis_args], cpp_args : [cpp_vis_args], @@ -224,12 +225,12 @@ nouveau_compiler = executable( 'nouveau_compiler.c', include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux], dependencies : [dep_libdrm, dep_libdrm_nouveau], - link_with : [libnouveau, libgallium, libmesa_util], + link_with : [libnouveau, libgallium, libmesa_util, libnir], build_by_default : with_tools.contains('nouveau'), install : with_tools.contains('nouveau'), ) driver_nouveau = declare_dependency( compile_args : '-DGALLIUM_NOUVEAU', - link_with : [libnouveauwinsys, libnouveau], + link_with : [libnouveauwinsys, libnouveau, libnir], ) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e9fe04bddf7..98b44b7df0b 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -180,6 +180,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); + screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); + /* These must be set before any failure is possible, as the cleanup * paths assume they're responsible for deleting them. */ diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index b714f0a2aa9..4598d6a60e3 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -65,6 +65,8 @@ struct nouveau_screen { struct disk_cache *disk_shader_cache; + bool prefer_nir; + #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS union { uint64_t v[29]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index b117790d6ec..940fb9ce25c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -22,6 +22,8 @@ #include "pipe/p_defines.h" +#include "compiler/nir/nir.h" + #include "nv50/nv50_program.h" #include "nv50/nv50_context.h" @@ -333,8 +335,19 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->type = prog->type; info->target = chipset; - info->bin.sourceRep = PIPE_SHADER_IR_TGSI; - info->bin.source = (void *)prog->pipe.tokens; + + info->bin.sourceRep = prog->pipe.type; + switch (prog->pipe.type) { + case PIPE_SHADER_IR_TGSI: + info->bin.source = (void *)prog->pipe.tokens; + break; + case PIPE_SHADER_IR_NIR: + info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); + break; + default: + assert(!"unsupported IR!"); + return false; + } info->bin.smemSize = prog->cp.smem_size; info->io.auxCBSlot = 15; @@ -438,6 +451,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->bin.codeSize); out: + if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) + ralloc_free((void *)info->bin.source); FREE(info); return !ret; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 724457199fe..b9bfce21364 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -27,6 +27,7 @@ #include "util/u_format_s3tc.h" #include "util/u_screen.h" #include "pipe/p_screen.h" +#include "compiler/nir/nir.h" #include "nv50/nv50_context.h" #include "nv50/nv50_screen.h" @@ -346,6 +347,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { + const struct nouveau_screen *screen = nouveau_screen(pscreen); + switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: @@ -399,7 +402,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: @@ -873,6 +876,44 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) return 1; } +static const nir_shader_compiler_options nir_options = { + .fuse_ffma = false, /* nir doesn't track mad vs fma */ + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_fpow = false, + .lower_fmod64 = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + .lower_ffract = true, + .lower_pack_half_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_all_io_to_temps = false, + .native_integers = true, + .lower_cs_local_index_from_id = true, + .use_interpolated_input_intrinsics = true, + .max_unroll_iterations = 32, +}; + +static const void * +nv50_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + if (ir == PIPE_SHADER_IR_NIR) + return &nir_options; + return NULL; +} + struct nouveau_screen * nv50_screen_create(struct nouveau_device *dev) { @@ -918,6 +959,9 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; + /* nir stuff */ + pscreen->get_compiler_options = nv50_screen_get_compiler_options; + nv50_screen_init_resource_functions(pscreen); if (screen->base.device->chipset < 0x84 || diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index fd4b8b64455..55167a27c09 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -28,6 +28,7 @@ #include "util/format_srgb.h" #include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" #include "nv50/nv50_stateobj.h" #include "nv50/nv50_context.h" @@ -756,7 +757,19 @@ nv50_sp_state_create(struct pipe_context *pipe, return NULL; prog->type = type; - prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + prog->pipe.type = cso->type; + + switch (cso->type) { + case PIPE_SHADER_IR_TGSI: + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + break; + case PIPE_SHADER_IR_NIR: + prog->pipe.ir.nir = cso->ir.nir; + break; + default: + assert(!"unsupported IR!"); + return NULL; + } if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; @@ -775,7 +788,10 @@ nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso) nv50_program_destroy(nv50_context(pipe), prog); - FREE((void *)prog->pipe.tokens); + if (prog->pipe.type == PIPE_SHADER_IR_TGSI) + FREE((void *)prog->pipe.tokens); + else if (prog->pipe.type == PIPE_SHADER_IR_NIR) + ralloc_free(prog->pipe.ir.nir); FREE(prog); } @@ -837,13 +853,24 @@ nv50_cp_state_create(struct pipe_context *pipe, if (!prog) return NULL; prog->type = PIPE_SHADER_COMPUTE; + prog->pipe.type = cso->ir_type; + + switch(cso->ir_type) { + case PIPE_SHADER_IR_TGSI: + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + break; + case PIPE_SHADER_IR_NIR: + prog->pipe.ir.nir = (nir_shader *)cso->prog; + break; + default: + assert(!"unsupported IR!"); + return NULL; + } prog->cp.smem_size = cso->req_local_mem; prog->cp.lmem_size = cso->req_private_mem; prog->parm_size = cso->req_input_mem; - prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); - return (void *)prog; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 008b660b8c0..c81d8952c98 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -22,6 +22,7 @@ #include "pipe/p_defines.h" +#include "compiler/nir/nir.h" #include "tgsi/tgsi_ureg.h" #include "nvc0/nvc0_context.h" @@ -582,8 +583,19 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->type = prog->type; info->target = chipset; - info->bin.sourceRep = PIPE_SHADER_IR_TGSI; - info->bin.source = (void *)prog->pipe.tokens; + + info->bin.sourceRep = prog->pipe.type; + switch (prog->pipe.type) { + case PIPE_SHADER_IR_TGSI: + info->bin.source = (void *)prog->pipe.tokens; + break; + case PIPE_SHADER_IR_NIR: + info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); + break; + default: + assert(!"unsupported IR!"); + return false; + } #ifdef DEBUG info->target = debug_get_num_option("NV50_PROG_CHIPSET", chipset); @@ -711,6 +723,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, #endif out: + if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) + ralloc_free((void *)info->bin.source); FREE(info); return !ret; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index d08d15cb9db..9d10e169d80 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -27,6 +27,7 @@ #include "util/u_format_s3tc.h" #include "util/u_screen.h" #include "pipe/p_screen.h" +#include "compiler/nir/nir.h" #include "nouveau_vp3_video.h" @@ -106,7 +107,8 @@ static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nouveau_device *dev = screen->device; switch (param) { /* non-boolean caps */ @@ -233,7 +235,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: @@ -281,8 +282,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_BALLOT: - case PIPE_CAP_BINDLESS_TEXTURE: return class_3d >= NVE4_3D_CLASS; + case PIPE_CAP_BINDLESS_TEXTURE: + return class_3d >= NVE4_3D_CLASS && !screen->prefer_nir; case PIPE_CAP_TGSI_ATOMFADD: return class_3d < GM107_3D_CLASS; /* needs additional lowering */ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: @@ -297,6 +299,13 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: return class_3d >= GP100_3D_CLASS; + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + /* TODO: nir doesn't support tg4 with multiple offsets */ + return screen->prefer_nir ? 0 : 1; + /* caps has to be turned on with nir */ + case PIPE_CAP_INT64_DIVMOD: + return screen->prefer_nir ? 1 : 0; + /* unsupported caps */ case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -323,7 +332,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: case PIPE_CAP_MEMOBJ: @@ -375,7 +383,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { - const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + const uint16_t class_3d = screen->class_3d; switch (shader) { case PIPE_SHADER_VERTEX: @@ -391,9 +400,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 1 << PIPE_SHADER_IR_TGSI; + return 1 << PIPE_SHADER_IR_TGSI | + 1 << PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: @@ -886,6 +896,79 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); } +static const nir_shader_compiler_options nir_options = { + .lower_fdiv = false, + .lower_ffma = false, + .fuse_ffma = false, /* nir doesn't track mad vs fma */ + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_fpow = false, + .lower_fsat = false, + .lower_fsqrt = false, // TODO: only before gm200 + .lower_fmod32 = true, + .lower_fmod64 = true, + .lower_bitfield_extract = false, + .lower_bitfield_extract_to_shifts = false, + .lower_bitfield_insert = false, + .lower_bitfield_insert_to_shifts = false, + .lower_bitfield_reverse = false, + .lower_bit_count = false, + .lower_bfm = false, + .lower_ifind_msb = false, + .lower_find_lsb = false, + .lower_uadd_carry = true, // TODO + .lower_usub_borrow = true, // TODO + .lower_mul_high = false, + .lower_negate = false, + .lower_sub = false, // TODO + .lower_scmp = true, // TODO: not implemented yet + .lower_idiv = true, + .lower_isign = false, // TODO + .fdot_replicates = false, // TODO + .lower_ffloor = false, // TODO + .lower_ffract = true, + .lower_fceil = false, // TODO + .lower_ldexp = true, + .lower_pack_half_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_all_io_to_temps = false, + .native_integers = true, + .vertex_id_zero_based = false, + .lower_base_vertex = false, + .lower_helper_invocation = false, + .lower_cs_local_index_from_id = true, + .lower_cs_local_id_from_index = false, + .lower_device_index_to_zero = false, // TODO + .lower_wpos_pntc = false, // TODO + .lower_hadd = true, // TODO + .lower_add_sat = true, // TODO + .use_interpolated_input_intrinsics = true, + .lower_mul_2x32_64 = true, // TODO + .max_unroll_iterations = 32, + .lower_int64_options = nir_lower_divmod64, // TODO + .lower_doubles_options = 0, // TODO +}; + +static const void * +nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + if (ir == PIPE_SHADER_IR_NIR) + return &nir_options; + return NULL; +} + #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ @@ -960,6 +1043,8 @@ nvc0_screen_create(struct nouveau_device *dev) pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid; pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info; + /* nir stuff */ + pscreen->get_compiler_options = nvc0_screen_get_compiler_options; nvc0_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 53ad47c6ed4..ed4cb869ba6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -27,6 +27,7 @@ #include "util/u_transfer.h" #include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" #include "nvc0/nvc0_stateobj.h" #include "nvc0/nvc0_context.h" @@ -595,9 +596,19 @@ nvc0_sp_state_create(struct pipe_context *pipe, return NULL; prog->type = type; + prog->pipe.type = cso->type; - if (cso->tokens) + switch(cso->type) { + case PIPE_SHADER_IR_TGSI: prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + break; + case PIPE_SHADER_IR_NIR: + prog->pipe.ir.nir = cso->ir.nir; + break; + default: + assert(!"unsupported IR!"); + return NULL; + } if (cso->stream_output.num_outputs) prog->pipe.stream_output = cso->stream_output; @@ -616,7 +627,10 @@ nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso) nvc0_program_destroy(nvc0_context(pipe), prog); - FREE((void *)prog->pipe.tokens); + if (prog->pipe.type == PIPE_SHADER_IR_TGSI) + FREE((void *)prog->pipe.tokens); + else if (prog->pipe.type == PIPE_SHADER_IR_NIR) + ralloc_free(prog->pipe.ir.nir); FREE(prog); } @@ -710,12 +724,23 @@ nvc0_cp_state_create(struct pipe_context *pipe, if (!prog) return NULL; prog->type = PIPE_SHADER_COMPUTE; + prog->pipe.type = cso->ir_type; prog->cp.smem_size = cso->req_local_mem; prog->cp.lmem_size = cso->req_private_mem; prog->parm_size = cso->req_input_mem; - prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + switch(cso->ir_type) { + case PIPE_SHADER_IR_TGSI: + prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog); + break; + case PIPE_SHADER_IR_NIR: + prog->pipe.ir.nir = (nir_shader *)cso->prog; + break; + default: + assert(!"unsupported IR!"); + return NULL; + } prog->translated = nvc0_program_translate( prog, nvc0_context(pipe)->screen->base.device->chipset, -- 2.30.2