X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=571618a9cab86bc8ec9df65276e6de79441e2822;hb=9406b3a5c16fd034d2f2db828f2f833c336683f9;hp=f765cff76b98b118a2bf05876997f9fbfb140b91;hpb=7e5c81235ff774ab463596222eac0e8141a90d1a;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index f765cff76b9..571618a9cab 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -36,6 +36,7 @@ #include "main/context.h" #include "main/fbobject.h" #include "main/extensions.h" +#include "main/glthread.h" #include "main/imports.h" #include "main/macros.h" #include "main/points.h" @@ -45,8 +46,9 @@ #include "main/framebuffer.h" #include "main/stencil.h" #include "main/state.h" +#include "main/spirv_extensions.h" -#include "vbo/vbo_context.h" +#include "vbo/vbo.h" #include "drivers/common/driverfuncs.h" #include "drivers/common/meta.h" @@ -73,8 +75,12 @@ #include "tnl/t_pipeline.h" #include "util/ralloc.h" #include "util/debug.h" +#include "util/disk_cache.h" #include "isl/isl.h" +#include "common/gen_defines.h" + +#include "compiler/spirv/nir_spirv.h" /*************************************** * Mesa's Driver Functions ***************************************/ @@ -143,6 +149,24 @@ intel_get_string(struct gl_context * ctx, GLenum name) } } +static void +brw_set_background_context(struct gl_context *ctx, + struct util_queue_monitoring *queue_info) +{ + struct brw_context *brw = brw_context(ctx); + __DRIcontext *driContext = brw->driContext; + __DRIscreen *driScreen = driContext->driScreenPriv; + const __DRIbackgroundCallableExtension *backgroundCallable = + driScreen->dri2.backgroundCallable; + + /* Note: Mesa will only call this function if we've called + * _mesa_enable_multithreading(). We only do that if the loader exposed + * the __DRI_BACKGROUND_CALLABLE extension. So we know that + * backgroundCallable is not NULL. + */ + backgroundCallable->setBackgroundContext(driContext->loaderPrivate); +} + static void intel_viewport(struct gl_context *ctx) { @@ -235,6 +259,35 @@ intel_flush_front(struct gl_context *ctx) } } +static void +brw_display_shared_buffer(struct brw_context *brw) +{ + __DRIcontext *dri_context = brw->driContext; + __DRIdrawable *dri_drawable = dri_context->driDrawablePriv; + __DRIscreen *dri_screen = brw->screen->driScrnPriv; + int fence_fd = -1; + + if (!brw->is_shared_buffer_bound) + return; + + if (!brw->is_shared_buffer_dirty) + return; + + if (brw->screen->has_exec_fence) { + /* This function is always called during a flush operation, so there is + * no need to flush again here. But we want to provide a fence_fd to the + * loader, and a redundant flush is the easiest way to acquire one. + */ + if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd)) + return; + } + + dri_screen->mutableRenderBuffer.loader + ->displaySharedBuffer(dri_drawable, fence_fd, + dri_drawable->loaderPrivate); + brw->is_shared_buffer_dirty = false; +} + static void intel_glFlush(struct gl_context *ctx) { @@ -242,7 +295,7 @@ intel_glFlush(struct gl_context *ctx) intel_batchbuffer_flush(brw); intel_flush_front(ctx); - + brw_display_shared_buffer(brw); brw->need_flush_throttle = true; } @@ -280,6 +333,7 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetString = intel_get_string; functions->UpdateState = intel_update_state; + brw_init_draw_functions(functions); intelInitTextureFuncs(functions); intelInitTextureImageFuncs(functions); intelInitTextureCopyImageFuncs(functions); @@ -302,6 +356,8 @@ brw_init_driver_functions(struct brw_context *brw, brw_init_compute_functions(functions); brw_init_conditional_render_functions(functions); + functions->GenerateMipmap = brw_generate_mipmap; + functions->QueryInternalFormat = brw_query_internal_format; functions->NewTransformFeedback = brw_new_transform_feedback; @@ -333,9 +389,38 @@ brw_init_driver_functions(struct brw_context *brw, /* GL_ARB_get_program_binary */ brw_program_binary_init(brw->screen->deviceID); functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1; - functions->ProgramBinarySerializeDriverBlob = brw_program_serialize_nir; + functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary; functions->ProgramBinaryDeserializeDriverBlob = brw_deserialize_program_binary; + + if (brw->screen->disk_cache) { + functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir; + } + + functions->SetBackgroundContext = brw_set_background_context; +} + +static void +brw_initialize_spirv_supported_capabilities(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct gl_context *ctx = &brw->ctx; + + /* The following SPIR-V capabilities are only supported on gen7+. In theory + * you should enable the extension only on gen7+, but just in case let's + * assert it. + */ + assert(devinfo->gen >= 7); + + ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.draw_parameters = true; + ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.image_write_without_format = true; + ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.tessellation = true; + ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.variable_pointers = true; } static void @@ -393,11 +478,11 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; if (devinfo->gen >= 7) { ctx->Const.MaxRenderbufferSize = 16384; - ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS); + ctx->Const.MaxTextureSize = 16384; ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */ } else { ctx->Const.MaxRenderbufferSize = 8192; - ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); + ctx->Const.MaxTextureSize = 8192; ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ } ctx->Const.Max3DTextureLevels = 12; /* 2048 */ @@ -536,6 +621,8 @@ brw_initialize_context_constants(struct brw_context *brw) if (devinfo->gen >= 5 || devinfo->is_g4x) ctx->Const.MaxClipPlanes = 8; + ctx->Const.GLSLFragCoordIsSysVal = true; + ctx->Const.GLSLFrontFacingIsSysVal = true; ctx->Const.GLSLTessLevelsAsInputs = true; ctx->Const.PrimitiveRestartForPatches = true; @@ -590,7 +677,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.QuadsFollowProvokingVertexConvention = false; ctx->Const.NativeIntegers = true; - ctx->Const.VertexID_is_zero_based = true; /* Regarding the CMP instruction, the Ivybridge PRM says: * @@ -662,7 +748,7 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_viewport_array, OES_viewport_array */ if (devinfo->gen >= 6) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; - ctx->Const.ViewportSubpixelBits = 0; + ctx->Const.ViewportSubpixelBits = 8; /* Cast to float before negating because MaxViewportWidth is unsigned. */ @@ -757,7 +843,8 @@ brw_process_driconf_options(struct brw_context *brw) driOptionCache *options = &brw->optionCache; driParseConfigFiles(options, &brw->screen->optionCache, - brw->driContext->driScreenPriv->myNum, "i965"); + brw->driContext->driScreenPriv->myNum, + "i965", NULL); int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); switch (bo_reuse_mode) { @@ -862,7 +949,7 @@ brwCreateContext(gl_api api, if (ctx_config->attribute_mask & ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY | - __DRIVER_CONTEXT_ATTRIB_RELEASE_BEHAVIOR)) { + __DRIVER_CONTEXT_ATTRIB_PRIORITY)) { *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; return false; } @@ -871,26 +958,13 @@ brwCreateContext(gl_api api, ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) && ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION); - GLenum release_behavior = GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH; - if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RELEASE_BEHAVIOR) { - switch (ctx_config->release_behavior) { - case __DRI_CTX_RELEASE_BEHAVIOR_NONE: - release_behavior = GL_NONE; - break; - case __DRI_CTX_RELEASE_BEHAVIOR_FLUSH: - break; - default: - *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; - return false; - } - } - struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { fprintf(stderr, "%s: failed to alloc context\n", __func__); *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } + brw->perf_ctx = gen_perf_new_context(brw); driContextPriv->driverPrivate = brw; brw->driContext = driContextPriv; @@ -910,21 +984,19 @@ brwCreateContext(gl_api api, brw->gs.base.stage = MESA_SHADER_GEOMETRY; brw->wm.base.stage = MESA_SHADER_FRAGMENT; brw->cs.base.stage = MESA_SHADER_COMPUTE; - if (devinfo->gen >= 8) { - brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; - } else if (devinfo->gen >= 7) { - brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; - } else if (devinfo->gen >= 6) { - brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; - } else { - brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; - } brw_init_driver_functions(brw, &functions); if (notify_reset) functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; + brw_process_driconf_options(brw); + + if (api == API_OPENGL_CORE && + driQueryOptionb(&screen->optionCache, "force_compat_profile")) { + api = API_OPENGL_COMPAT; + } + struct gl_context *ctx = &brw->ctx; if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { @@ -959,8 +1031,6 @@ brwCreateContext(gl_api api, _mesa_meta_init(ctx); - brw_process_driconf_options(brw); - if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; @@ -977,30 +1047,29 @@ brwCreateContext(gl_api api, intel_batchbuffer_init(brw); - if (devinfo->gen >= 6) { - /* Create a new hardware context. Using a hardware context means that - * our GPU state will be saved/restored on context switch, allowing us - * to assume that the GPU is in the same state we left it in. - * - * This is required for transform feedback buffer offsets, query objects, - * and also allows us to reduce how much state we have to emit. - */ - brw->hw_ctx = brw_create_hw_context(brw->bufmgr); - - if (!brw->hw_ctx) { - fprintf(stderr, "Failed to create hardware context.\n"); - intelDestroyContext(driContextPriv); - return false; - } + /* Create a new hardware context. Using a hardware context means that + * our GPU state will be saved/restored on context switch, allowing us + * to assume that the GPU is in the same state we left it in. + * + * This is required for transform feedback buffer offsets, query objects, + * and also allows us to reduce how much state we have to emit. + */ + brw->hw_ctx = brw_create_hw_context(brw->bufmgr); + if (!brw->hw_ctx && devinfo->gen >= 6) { + fprintf(stderr, "Failed to create hardware context.\n"); + intelDestroyContext(driContextPriv); + return false; + } - int hw_priority = BRW_CONTEXT_MEDIUM_PRIORITY; + if (brw->hw_ctx) { + int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY; if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) { switch (ctx_config->priority) { case __DRI_CTX_PRIORITY_LOW: - hw_priority = BRW_CONTEXT_LOW_PRIORITY; + hw_priority = GEN_CONTEXT_LOW_PRIORITY; break; case __DRI_CTX_PRIORITY_HIGH: - hw_priority = BRW_CONTEXT_HIGH_PRIORITY; + hw_priority = GEN_CONTEXT_HIGH_PRIORITY; break; } } @@ -1020,12 +1089,7 @@ brwCreateContext(gl_api api, return false; } - if (devinfo->gen == 10) { - fprintf(stderr, - "WARNING: i965 does not fully support Gen10 yet.\n" - "Instability or lower performance might occur.\n"); - - } + brw_upload_init(&brw->upload, brw->bufmgr, 65536); brw_init_state(brw); @@ -1064,14 +1128,24 @@ brwCreateContext(gl_api api, ctx->Const.RobustAccess = GL_TRUE; } - ctx->Const.ContextReleaseBehavior = release_behavior; - if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); _mesa_override_extensions(ctx); _mesa_compute_version(ctx); + /* GL_ARB_gl_spirv */ + if (ctx->Extensions.ARB_gl_spirv) { + brw_initialize_spirv_supported_capabilities(brw); + + if (ctx->Extensions.ARB_spirv_extensions) { + /* GL_ARB_spirv_extensions */ + ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions); + _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions, + &ctx->Const.SpirVCapabilities); + } + } + _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); @@ -1081,7 +1155,13 @@ brwCreateContext(gl_api api, vbo_use_buffer_objects(ctx); vbo_always_unmap_buffers(ctx); - brw_disk_cache_init(brw); + brw->ctx.Cache = brw->screen->disk_cache; + + if (driContextPriv->driScreenPriv->dri2.backgroundCallable && + driQueryOptionb(&screen->optionCache, "mesa_glthread")) { + /* Loader supports multithreading, and so do we. */ + _mesa_glthread_init(ctx); + } return true; } @@ -1092,7 +1172,18 @@ intelDestroyContext(__DRIcontext * driContextPriv) struct brw_context *brw = (struct brw_context *) driContextPriv->driverPrivate; struct gl_context *ctx = &brw->ctx; - const struct gen_device_info *devinfo = &brw->screen->devinfo; + + GET_CURRENT_CONTEXT(curctx); + + if (curctx == NULL) { + /* No current context, but we need one to release + * renderbuffer surface when we release framebuffer. + * So temporarily bind the context. + */ + _mesa_make_current(ctx, NULL, NULL); + } + + _mesa_glthread_destroy(&brw->ctx); _mesa_meta_free(&brw->ctx); @@ -1104,8 +1195,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) brw_destroy_shader_time(brw); } - if (devinfo->gen >= 6) - blorp_finish(&brw->blorp); + blorp_finish(&brw->blorp); brw_destroy_state(brw); brw_draw_destroy(brw); @@ -1146,7 +1236,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) driDestroyOptionCache(&brw->optionCache); /* free the Mesa context */ - _mesa_free_context_data(&brw->ctx); + _mesa_free_context_data(&brw->ctx, true); ralloc_free(brw); driContextPriv->driverPrivate = NULL; @@ -1155,6 +1245,9 @@ intelDestroyContext(__DRIcontext * driContextPriv) GLboolean intelUnbindContext(__DRIcontext * driContextPriv) { + GET_CURRENT_CONTEXT(ctx); + _mesa_glthread_finish(ctx); + /* Unset current context and dispath table */ _mesa_make_current(NULL, NULL, NULL); @@ -1258,6 +1351,8 @@ intelMakeCurrent(__DRIcontext * driContextPriv, _mesa_make_current(ctx, fb, readFb); } else { + GET_CURRENT_CONTEXT(ctx); + _mesa_glthread_finish(ctx); _mesa_make_current(NULL, NULL, NULL); } @@ -1298,6 +1393,21 @@ intel_resolve_for_dri2_flush(struct brw_context *brw, intel_miptree_prepare_external(brw, rb->mt); } else { intel_renderbuffer_downsample(brw, rb); + + /* Call prepare_external on the single-sample miptree to do any + * needed resolves prior to handing it off to the window system. + * This is needed in the case that rb->singlesample_mt is Y-tiled + * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In + * this case, the MSAA resolve above will write compressed data into + * rb->singlesample_mt. + * + * TODO: Some day, if we decide to care about the tiny performance + * hit we're taking by doing the MSAA resolve and then a CCS resolve, + * we could detect this case and just allocate the single-sampled + * miptree without aux. However, that would be a lot of plumbing and + * this is a rather exotic case so it's not really worth it. + */ + intel_miptree_prepare_external(brw, rb->singlesample_mt); } } } @@ -1433,6 +1543,11 @@ intel_prepare_render(struct brw_context *brw) */ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) brw->front_buffer_dirty = true; + + if (brw->is_shared_buffer_bound) { + /* Subsequent rendering will probably dirty the shared buffer. */ + brw->is_shared_buffer_dirty = true; + } } /** @@ -1582,6 +1697,9 @@ intel_process_dri2_buffer(struct brw_context *brw, return; } + uint32_t tiling, swizzle; + brw_bo_get_tiling(bo, &tiling, &swizzle); + struct intel_mipmap_tree *mt = intel_miptree_create_for_bo(brw, bo, @@ -1591,6 +1709,7 @@ intel_process_dri2_buffer(struct brw_context *brw, drawable->h, 1, buffer->pitch, + isl_tiling_from_i915_tiling(tiling), MIPTREE_CREATE_DEFAULT); if (!mt) { brw_bo_unreference(bo); @@ -1662,12 +1781,25 @@ intel_update_image_buffer(struct brw_context *intel, else last_mt = rb->singlesample_mt; - if (last_mt && last_mt->bo == buffer->bo) + if (last_mt && last_mt->bo == buffer->bo) { + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + intel_miptree_make_shareable(intel, last_mt); + } return; + } + + /* Only allow internal compression if samples == 0. For multisampled + * window system buffers, the only thing the single-sampled buffer is used + * for is as a resolve target. If we do any compression beyond what is + * supported by the window system, we will just have to resolve so it's + * probably better to just not bother. + */ + const bool allow_internal_aux = (num_samples == 0); struct intel_mipmap_tree *mt = intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D, - intel_rb_format(rb), true); + intel_rb_format(rb), + allow_internal_aux); if (!mt) return; @@ -1683,6 +1815,35 @@ intel_update_image_buffer(struct brw_context *intel, rb->Base.Base.NumSamples > 1) { intel_renderbuffer_upsample(intel, rb); } + + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + /* The compositor and the application may access this image + * concurrently. The display hardware may even scanout the image while + * the GPU is rendering to it. Aux surfaces cause difficulty with + * concurrent access, so permanently disable aux for this miptree. + * + * Perhaps we could improve overall application performance by + * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to + * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER + * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this + * approach to be highly dependent on the application's GL usage. + * + * I [chadv] expect clever disabling/reenabling to be counterproductive + * in the use cases I care about: applications that render nearly + * realtime handwriting to the surface while possibly undergiong + * simultaneously scanout as a display plane. The app requires low + * render latency. Even though the app spends most of its time in + * shared-buffer mode, it also frequently transitions between + * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER) + * mode. Visual sutter during the transitions should be avoided. + * + * In this case, I [chadv] believe reducing the GPU workload at + * shared-buffer/double-buffer transitions would offer a smoother app + * experience than any savings due to aux compression. But I've + * collected no data to prove my theory. + */ + intel_miptree_make_shareable(intel, mt); + } } static void @@ -1743,4 +1904,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) images.back, __DRI_IMAGE_BUFFER_BACK); } + + if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) { + assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED); + drawable->w = images.back->width; + drawable->h = images.back->height; + intel_update_image_buffer(brw, + drawable, + back_rb, + images.back, + __DRI_IMAGE_BUFFER_SHARED); + brw->is_shared_buffer_bound = true; + } else { + brw->is_shared_buffer_bound = false; + brw->is_shared_buffer_dirty = false; + } }