X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fnvc0%2Fnvc0_screen.c;h=fe80c7e91037eb178a7d3cb8bcaa0f8757a3209f;hb=4e110eca42ac2c56c5763a1f502e7c80db67e064;hp=37e10dcd07f9835619e9234339606b36927a2b90;hpb=c2ae9b4052701f8fde47947acb9ee47a878e1409;p=mesa.git diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 37e10dcd07f..fe80c7e9103 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -25,7 +25,9 @@ #include #include "util/u_format.h" #include "util/u_format_s3tc.h" +#include "util/u_screen.h" #include "pipe/p_screen.h" +#include "compiler/nir/nir.h" #include "nouveau_vp3_video.h" @@ -42,6 +44,7 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, + unsigned storage_sample_count, unsigned bindings) { const struct util_format_description *desc = util_format_description(format); @@ -51,6 +54,9 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ return false; + if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) + return false; + /* Short-circuit the rest of the logic -- this is used by the state tracker * to determine valid MS levels in a no-attachments scenario. */ @@ -101,7 +107,8 @@ static int nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; - struct nouveau_device *dev = nouveau_screen(pscreen)->device; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + struct nouveau_device *dev = screen->device; switch (param) { /* non-boolean caps */ @@ -125,11 +132,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: return 430; case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: - return 140; + return 430; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: return 1; + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: + case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS: + return 8; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return 4; case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: @@ -140,8 +150,14 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1024; case PIPE_CAP_MAX_VERTEX_STREAMS: return 4; + case PIPE_CAP_MAX_GS_INVOCATIONS: + return 32; + case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: + return 1 << 27; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; + case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: + return 2047; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: @@ -166,9 +182,19 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return NVC0_MAX_WINDOW_RECTANGLES; case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: return class_3d >= GM200_3D_CLASS ? 8 : 0; + case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: + return 64 * 1024 * 1024; + case PIPE_CAP_MAX_VARYINGS: + /* NOTE: These only count our slots for GENERIC varyings. + * The address space may be larger, but the actual hard limit seems to be + * less than what the address space layout permits, so don't add TEXCOORD, + * COLOR, etc. here. + */ + return 0x1f0 / 16; /* supported caps */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: @@ -249,11 +275,19 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: return 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; case PIPE_CAP_TGSI_FS_FBFETCH: return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */ + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_BALLOT: + return class_3d >= NVE4_3D_CLASS; + case PIPE_CAP_BINDLESS_TEXTURE: + return class_3d >= NVE4_3D_CLASS; + case PIPE_CAP_TGSI_ATOMFADD: + return class_3d < GM107_3D_CLASS; /* needs additional lowering */ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: @@ -265,12 +299,13 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return class_3d >= GM200_3D_CLASS; case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: return class_3d >= GP100_3D_CLASS; - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - case PIPE_CAP_TGSI_BALLOT: - case PIPE_CAP_BINDLESS_TEXTURE: - return class_3d >= NVE4_3D_CLASS; + + /* caps has to be turned on with nir */ + case PIPE_CAP_INT64_DIVMOD: + return screen->prefer_nir ? 1 : 0; /* unsupported caps */ + case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_SHADER_STENCIL_EXPORT: @@ -292,11 +327,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: - case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: case PIPE_CAP_MEMOBJ: @@ -304,12 +337,22 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: case PIPE_CAP_TILE_RASTER_ORDER: case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: + case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_CONTEXT_PRIORITY_MASK: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: + case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: + case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: + case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: + case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: + case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: + case PIPE_CAP_NIR_COMPACT_ARRAYS: + case PIPE_CAP_IMAGE_LOAD_FORMATTED: return 0; case PIPE_CAP_VENDOR_ID: @@ -328,10 +371,10 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return dev->vram_size >> 20; case PIPE_CAP_UMA: return 0; + default: + debug_printf("%s: unhandled cap %d\n", __func__, param); + return u_pipe_screen_get_param_defaults(pscreen, param); } - - NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); - return 0; } static int @@ -339,7 +382,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, enum pipe_shader_cap param) { - const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + const struct nouveau_screen *screen = nouveau_screen(pscreen); + const uint16_t class_3d = screen->class_3d; switch (shader) { case PIPE_SHADER_VERTEX: @@ -355,9 +399,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 1 << PIPE_SHADER_IR_TGSI; + return 1 << PIPE_SHADER_IR_TGSI | + 1 << PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: @@ -366,23 +411,11 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return 16; case PIPE_SHADER_CAP_MAX_INPUTS: - if (shader == PIPE_SHADER_VERTEX) - return 32; - /* NOTE: These only count our slots for GENERIC varyings. - * The address space may be larger, but the actual hard limit seems to be - * less than what the address space layout permits, so don't add TEXCOORD, - * COLOR, etc. here. - */ - if (shader == PIPE_SHADER_FRAGMENT) - return 0x1f0 / 16; - /* Actually this counts CLIPVERTEX, which occupies the last generic slot, - * and excludes 0x60 per-patch inputs. - */ return 0x200 / 16; case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return 65536; + return NVC0_MAX_CONSTBUF_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return NVC0_MAX_PIPE_CONSTBUFS; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: @@ -607,7 +640,6 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_heap_destroy(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); - FREE(screen->default_tsc); FREE(screen->tic.entries); nouveau_object_del(&screen->eng3d); @@ -829,6 +861,112 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) return 0; } +void +nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, + int stage, int index, int size, uint64_t addr) +{ + assert(stage != 5); + + struct nouveau_pushbuf *push = screen->base.pushbuf; + + if (screen->base.class_3d >= GM107_3D_CLASS) { + struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index]; + + // TODO: Better figure out the conditions in which this is needed + bool serialize = binding->addr == addr && binding->size != size; + if (can_serialize) + serialize = serialize && *can_serialize; + if (serialize) { + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + if (can_serialize) + *can_serialize = false; + } + + binding->addr = addr; + binding->size = size; + } + + if (size >= 0) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, size); + PUSH_DATAh(push, addr); + PUSH_DATA (push, addr); + } + IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); +} + +static const nir_shader_compiler_options nir_options = { + .lower_fdiv = false, + .lower_ffma = false, + .fuse_ffma = false, /* nir doesn't track mad vs fma */ + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_fpow = false, + .lower_fsat = false, + .lower_fsqrt = false, // TODO: only before gm200 + .lower_fmod32 = true, + .lower_fmod64 = true, + .lower_bitfield_extract = false, + .lower_bitfield_extract_to_shifts = false, + .lower_bitfield_insert = false, + .lower_bitfield_insert_to_shifts = false, + .lower_bitfield_reverse = false, + .lower_bit_count = false, + .lower_bfm = false, + .lower_ifind_msb = false, + .lower_find_lsb = false, + .lower_uadd_carry = true, // TODO + .lower_usub_borrow = true, // TODO + .lower_mul_high = false, + .lower_negate = false, + .lower_sub = false, // TODO + .lower_scmp = true, // TODO: not implemented yet + .lower_idiv = true, + .lower_isign = false, // TODO + .fdot_replicates = false, // TODO + .lower_ffloor = false, // TODO + .lower_ffract = true, + .lower_fceil = false, // TODO + .lower_ldexp = true, + .lower_pack_half_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_all_io_to_temps = false, + .vertex_id_zero_based = false, + .lower_base_vertex = false, + .lower_helper_invocation = false, + .lower_cs_local_index_from_id = true, + .lower_cs_local_id_from_index = false, + .lower_device_index_to_zero = false, // TODO + .lower_wpos_pntc = false, // TODO + .lower_hadd = true, // TODO + .lower_add_sat = true, // TODO + .use_interpolated_input_intrinsics = true, + .lower_mul_2x32_64 = true, // TODO + .max_unroll_iterations = 32, + .lower_int64_options = nir_lower_divmod64, // TODO + .lower_doubles_options = 0, // TODO +}; + +static const void * +nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + if (ir == PIPE_SHADER_IR_NIR) + return &nir_options; + return NULL; +} + #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ @@ -876,6 +1014,13 @@ nvc0_screen_create(struct nouveau_device *dev) push->user_priv = screen; push->rsvd_kick = 5; + /* TODO: could this be higher on Kepler+? how does reclocking vs no + * reclocking affect performance? + * TODO: could this be higher on Fermi? + */ + if (dev->chipset >= 0xe0) + screen->base.transfer_pushbuf_threshold = 1024; + screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_SHADER_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | @@ -896,6 +1041,8 @@ nvc0_screen_create(struct nouveau_device *dev) pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid; pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info; + /* nir stuff */ + pscreen->get_compiler_options = nvc0_screen_get_compiler_options; nvc0_screen_init_resource_functions(pscreen); @@ -1220,8 +1367,8 @@ nvc0_screen_create(struct nouveau_device *dev) for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3); PUSH_DATA (push, 1); - PUSH_DATA (push, 8192 << 16); - PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 16384 << 16); + PUSH_DATA (push, 16384 << 16); } #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); @@ -1240,6 +1387,8 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); + MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); @@ -1272,14 +1421,14 @@ nvc0_screen_create(struct nouveau_device *dev) /* XXX: Compute and 3D are somehow aliased on Fermi. */ for (i = 0; i < 5; ++i) { + unsigned j = 0; + for (j = 0; j < 16; j++) + screen->cb_bindings[i][j].size = -1; + /* TIC and TSC entries for each unit (nve4+ only) */ /* auxiliary constants (6 user clip planes, base instance id) */ - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); - PUSH_DATA (push, (15 << 4) | 1); + nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, + screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { unsigned j; BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); @@ -1325,9 +1474,6 @@ nvc0_screen_create(struct nouveau_device *dev) if (!nvc0_blitter_create(screen)) goto fail; - screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry); - screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION; - nouveau_fence_new(&screen->base, &screen->base.fence.current); return &screen->base;