X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=3783d1532e9b1ea6961bc0b3af203e147311e6b4;hb=5a8496007202f065efb3734e5925717268efc226;hp=c1a429bfcbee5fa85105cfa82264932fb8a4add2;hpb=34e1ccbfbe851ecf4ebbfc86d70da384d176d994;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index c1a429bfcbe..3783d1532e9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -36,6 +36,7 @@ #include "main/context.h" #include "main/fbobject.h" #include "main/extensions.h" +#include "main/glthread.h" #include "main/imports.h" #include "main/macros.h" #include "main/points.h" @@ -45,8 +46,9 @@ #include "main/framebuffer.h" #include "main/stencil.h" #include "main/state.h" +#include "main/spirv_extensions.h" -#include "vbo/vbo_context.h" +#include "vbo/vbo.h" #include "drivers/common/driverfuncs.h" #include "drivers/common/meta.h" @@ -73,8 +75,12 @@ #include "tnl/t_pipeline.h" #include "util/ralloc.h" #include "util/debug.h" +#include "util/disk_cache.h" #include "isl/isl.h" +#include "common/gen_defines.h" + +#include "compiler/spirv/nir_spirv.h" /*************************************** * Mesa's Driver Functions ***************************************/ @@ -143,6 +149,24 @@ intel_get_string(struct gl_context * ctx, GLenum name) } } +static void +brw_set_background_context(struct gl_context *ctx, + struct util_queue_monitoring *queue_info) +{ + struct brw_context *brw = brw_context(ctx); + __DRIcontext *driContext = brw->driContext; + __DRIscreen *driScreen = driContext->driScreenPriv; + const __DRIbackgroundCallableExtension *backgroundCallable = + driScreen->dri2.backgroundCallable; + + /* Note: Mesa will only call this function if we've called + * _mesa_enable_multithreading(). We only do that if the loader exposed + * the __DRI_BACKGROUND_CALLABLE extension. So we know that + * backgroundCallable is not NULL. + */ + backgroundCallable->setBackgroundContext(driContext->loaderPrivate); +} + static void intel_viewport(struct gl_context *ctx) { @@ -194,8 +218,6 @@ intel_update_state(struct gl_context * ctx) if (new_state & _NEW_POLYGON) brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx); - intel_prepare_render(brw); - if (new_state & _NEW_BUFFERS) { intel_update_framebuffer(ctx, ctx->DrawBuffer); if (ctx->DrawBuffer != ctx->ReadBuffer) @@ -237,6 +259,35 @@ intel_flush_front(struct gl_context *ctx) } } +static void +brw_display_shared_buffer(struct brw_context *brw) +{ + __DRIcontext *dri_context = brw->driContext; + __DRIdrawable *dri_drawable = dri_context->driDrawablePriv; + __DRIscreen *dri_screen = brw->screen->driScrnPriv; + int fence_fd = -1; + + if (!brw->is_shared_buffer_bound) + return; + + if (!brw->is_shared_buffer_dirty) + return; + + if (brw->screen->has_exec_fence) { + /* This function is always called during a flush operation, so there is + * no need to flush again here. But we want to provide a fence_fd to the + * loader, and a redundant flush is the easiest way to acquire one. + */ + if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd)) + return; + } + + dri_screen->mutableRenderBuffer.loader + ->displaySharedBuffer(dri_drawable, fence_fd, + dri_drawable->loaderPrivate); + brw->is_shared_buffer_dirty = false; +} + static void intel_glFlush(struct gl_context *ctx) { @@ -244,7 +295,7 @@ intel_glFlush(struct gl_context *ctx) intel_batchbuffer_flush(brw); intel_flush_front(ctx); - + brw_display_shared_buffer(brw); brw->need_flush_throttle = true; } @@ -263,6 +314,8 @@ static void brw_init_driver_functions(struct brw_context *brw, struct dd_function_table *functions) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; + _mesa_init_driver_functions(functions); /* GLX uses DRI2 invalidate events to handle window resizing. @@ -280,9 +333,9 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetString = intel_get_string; functions->UpdateState = intel_update_state; + brw_init_draw_functions(functions); intelInitTextureFuncs(functions); intelInitTextureImageFuncs(functions); - intelInitTextureSubImageFuncs(functions); intelInitTextureCopyImageFuncs(functions); intelInitCopyImageFuncs(functions); intelInitClearFuncs(functions); @@ -294,15 +347,17 @@ brw_init_driver_functions(struct brw_context *brw, brwInitFragProgFuncs( functions ); brw_init_common_queryobj_functions(functions); - if (brw->gen >= 8 || brw->is_haswell) + if (devinfo->gen >= 8 || devinfo->is_haswell) hsw_init_queryobj_functions(functions); - else if (brw->gen >= 6) + else if (devinfo->gen >= 6) gen6_init_queryobj_functions(functions); else gen4_init_queryobj_functions(functions); brw_init_compute_functions(functions); brw_init_conditional_render_functions(functions); + functions->GenerateMipmap = brw_generate_mipmap; + functions->QueryInternalFormat = brw_query_internal_format; functions->NewTransformFeedback = brw_new_transform_feedback; @@ -312,7 +367,7 @@ brw_init_driver_functions(struct brw_context *brw, functions->EndTransformFeedback = hsw_end_transform_feedback; functions->PauseTransformFeedback = hsw_pause_transform_feedback; functions->ResumeTransformFeedback = hsw_resume_transform_feedback; - } else if (brw->gen >= 7) { + } else if (devinfo->gen >= 7) { functions->BeginTransformFeedback = gen7_begin_transform_feedback; functions->EndTransformFeedback = gen7_end_transform_feedback; functions->PauseTransformFeedback = gen7_pause_transform_feedback; @@ -328,28 +383,64 @@ brw_init_driver_functions(struct brw_context *brw, brw_get_transform_feedback_vertex_count; } - if (brw->gen >= 6) + if (devinfo->gen >= 6) functions->GetSamplePosition = gen6_get_sample_position; + + /* GL_ARB_get_program_binary */ + brw_program_binary_init(brw->screen->deviceID); + functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1; + functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary; + functions->ProgramBinaryDeserializeDriverBlob = + brw_deserialize_program_binary; + + if (brw->screen->disk_cache) { + functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir; + } + + functions->SetBackgroundContext = brw_set_background_context; +} + +static void +brw_initialize_spirv_supported_capabilities(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct gl_context *ctx = &brw->ctx; + + /* The following SPIR-V capabilities are only supported on gen7+. In theory + * you should enable the extension only on gen7+, but just in case let's + * assert it. + */ + assert(devinfo->gen >= 7); + + ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.draw_parameters = true; + ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.image_write_without_format = true; + ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.tessellation = true; + ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.variable_pointers = true; } static void brw_initialize_context_constants(struct brw_context *brw) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; const struct brw_compiler *compiler = brw->screen->compiler; const bool stage_exists[MESA_SHADER_STAGES] = { [MESA_SHADER_VERTEX] = true, - [MESA_SHADER_TESS_CTRL] = brw->gen >= 7, - [MESA_SHADER_TESS_EVAL] = brw->gen >= 7, - [MESA_SHADER_GEOMETRY] = brw->gen >= 6, + [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7, + [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7, + [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6, [MESA_SHADER_FRAGMENT] = true, [MESA_SHADER_COMPUTE] = - ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) && + (_mesa_is_desktop_gl(ctx) && ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || (ctx->API == API_OPENGLES2 && - ctx->Const.MaxComputeWorkGroupSize[0] >= 128) || - _mesa_extension_override_enables.ARB_compute_shader, + ctx->Const.MaxComputeWorkGroupSize[0] >= 128), }; unsigned num_stages = 0; @@ -359,7 +450,7 @@ brw_initialize_context_constants(struct brw_context *brw) } unsigned max_samplers = - brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; + devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16; ctx->Const.MaxDualSourceDrawBuffers = 1; ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; @@ -385,27 +476,27 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { ctx->Const.MaxRenderbufferSize = 16384; - ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS); + ctx->Const.MaxTextureSize = 16384; ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */ } else { ctx->Const.MaxRenderbufferSize = 8192; - ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); + ctx->Const.MaxTextureSize = 8192; ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ } ctx->Const.Max3DTextureLevels = 12; /* 2048 */ - ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; + ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512; ctx->Const.MaxTextureMbytes = 1536; - ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192; + ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192; ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 15.0; ctx->Const.StripTextureBorder = true; - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { ctx->Const.MaxProgramTextureGatherComponents = 4; ctx->Const.MinProgramTextureGatherOffset = -32; ctx->Const.MaxProgramTextureGatherOffset = 31; - } else if (brw->gen == 6) { + } else if (devinfo->gen == 6) { ctx->Const.MaxProgramTextureGatherComponents = 1; ctx->Const.MinProgramTextureGatherOffset = -8; ctx->Const.MaxProgramTextureGatherOffset = 7; @@ -504,7 +595,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { ctx->Const.MaxLineWidth = 7.375; ctx->Const.MaxLineWidthAA = 7.375; ctx->Const.LineWidthGranularity = 0.125; @@ -527,12 +618,10 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxPointSizeAA = 255.0; ctx->Const.PointSizeGranularity = 1.0; - if (brw->gen >= 5 || brw->is_g4x) + if (devinfo->gen >= 5 || devinfo->is_g4x) ctx->Const.MaxClipPlanes = 8; ctx->Const.GLSLTessLevelsAsInputs = true; - ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8; - ctx->Const.LowerTESPatchVerticesIn = true; ctx->Const.PrimitiveRestartForPatches = true; ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; @@ -582,11 +671,10 @@ brw_initialize_context_constants(struct brw_context *brw) * that affect provoking vertex decision. Always use last vertex * convention for quad primitive which works as expected for now. */ - if (brw->gen >= 6) + if (devinfo->gen >= 6) ctx->Const.QuadsFollowProvokingVertexConvention = false; ctx->Const.NativeIntegers = true; - ctx->Const.VertexID_is_zero_based = true; /* Regarding the CMP instruction, the Ivybridge PRM says: * @@ -614,8 +702,11 @@ brw_initialize_context_constants(struct brw_context *brw) * the element in the buffer." * * However, unaligned accesses are slower, so enforce buffer alignment. + * + * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional + * restriction: the start of the buffer needs to be 32B aligned. */ - ctx->Const.UniformBufferOffsetAlignment = 16; + ctx->Const.UniformBufferOffsetAlignment = 32; /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so * that we can safely have the CPU and GPU writing the same SSBO on @@ -628,10 +719,11 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.TextureBufferOffsetAlignment = 16; ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { ctx->Const.MaxVarying = 32; ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = + compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; @@ -646,15 +738,15 @@ brw_initialize_context_constants(struct brw_context *brw) brw->screen->compiler->glsl_compiler_options[i]; } - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { ctx->Const.MaxViewportWidth = 32768; ctx->Const.MaxViewportHeight = 32768; } /* ARB_viewport_array, OES_viewport_array */ - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; - ctx->Const.ViewportSubpixelBits = 0; + ctx->Const.ViewportSubpixelBits = 8; /* Cast to float before negating because MaxViewportWidth is unsigned. */ @@ -663,7 +755,7 @@ brw_initialize_context_constants(struct brw_context *brw) } /* ARB_gpu_shader5 */ - if (brw->gen >= 7) + if (devinfo->gen >= 7) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); /* ARB_framebuffer_no_attachments */ @@ -674,6 +766,28 @@ brw_initialize_context_constants(struct brw_context *brw) /* OES_primitive_bounding_box */ ctx->Const.NoPrimitiveBoundingBoxOutput = true; + + /* TODO: We should be able to use STD430 packing by default on all hardware + * but some piglit tests [1] currently fail on SNB when this is enabled. + * The problem is the messages we're using for doing uniform pulls + * in the vec4 back-end on SNB is the OWORD block load instruction, which + * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the + * sampler which doesn't have these restrictions. + * + * In the scalar back-end, we use the sampler for dynamic uniform loads and + * pull an entire cache line at a time for constant offset loads both of + * which support almost any alignment. + * + * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test + */ + if (devinfo->gen >= 7) + ctx->Const.UseSTD430AsDefaultPacking = true; + + if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) + ctx->Const.AllowMappedBuffersDuringExecution = true; + + /* GL_ARB_get_program_binary */ + ctx->Const.NumProgramBinaryFormats = 1; } static void @@ -684,7 +798,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw) struct gen_device_info *devinfo = &brw->screen->devinfo; /* FINISHME: Do this for all platforms that the kernel supports */ - if (brw->is_cherryview && + if (devinfo->is_cherryview && screen->subslice_total > 0 && screen->eu_total > 0) { /* Logical CS threads = EUs per subslice * 7 threads per EU */ uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7; @@ -722,11 +836,13 @@ brw_initialize_cs_context_constants(struct brw_context *brw) static void brw_process_driconf_options(struct brw_context *brw) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; struct gl_context *ctx = &brw->ctx; driOptionCache *options = &brw->optionCache; driParseConfigFiles(options, &brw->screen->optionCache, - brw->driContext->driScreenPriv->myNum, "i965"); + brw->driContext->driScreenPriv->myNum, + "i965", NULL); int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); switch (bo_reuse_mode) { @@ -740,10 +856,13 @@ brw_process_driconf_options(struct brw_context *brw) if (INTEL_DEBUG & DEBUG_NO_HIZ) { brw->has_hiz = false; /* On gen6, you can only do separate stencil with HIZ. */ - if (brw->gen == 6) + if (devinfo->gen == 6) brw->has_separate_stencil = false; } + if (driQueryOptionb(options, "mesa_no_error")) + ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; + if (driQueryOptionb(options, "always_flush_batch")) { fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); brw->always_flush_batch = true; @@ -789,18 +908,22 @@ brw_process_driconf_options(struct brw_context *brw) brw->dual_color_blend_by_location = driQueryOptionb(options, "dual_color_blend_by_location"); + + ctx->Const.AllowGLSLCrossStageInterpolationMismatch = + driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch"); + + ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); + driComputeOptionsSha1(&brw->screen->optionCache, + ctx->Const.dri_config_options_sha1); } GLboolean brwCreateContext(gl_api api, - const struct gl_config *mesaVis, - __DRIcontext *driContextPriv, - unsigned major_version, - unsigned minor_version, - uint32_t flags, - bool notify_reset, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + const struct __DriverContextConfig *ctx_config, unsigned *dri_ctx_error, - void *sharedContextPrivate) + void *sharedContextPrivate) { struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate; @@ -810,17 +933,29 @@ brwCreateContext(gl_api api, /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel * provides us with context reset notifications. */ - uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG - | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; + uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG | + __DRI_CTX_FLAG_FORWARD_COMPATIBLE | + __DRI_CTX_FLAG_NO_ERROR; if (screen->has_context_reset_notification) allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; - if (flags & ~allowed_flags) { + if (ctx_config->flags & ~allowed_flags) { *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; return false; } + if (ctx_config->attribute_mask & + ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY | + __DRIVER_CONTEXT_ATTRIB_PRIORITY)) { + *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; + return false; + } + + bool notify_reset = + ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) && + ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION); + struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { fprintf(stderr, "%s: failed to alloc context\n", __func__); @@ -833,46 +968,19 @@ brwCreateContext(gl_api api, brw->screen = screen; brw->bufmgr = screen->bufmgr; - brw->gen = devinfo->gen; - brw->gt = devinfo->gt; - brw->is_g4x = devinfo->is_g4x; - brw->is_baytrail = devinfo->is_baytrail; - brw->is_haswell = devinfo->is_haswell; - brw->is_cherryview = devinfo->is_cherryview; - brw->is_broxton = devinfo->is_broxton || devinfo->is_geminilake; - brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; - brw->has_pln = devinfo->has_pln; - brw->has_compr4 = devinfo->has_compr4; - brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; - brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; - brw->needs_unlit_centroid_workaround = - devinfo->needs_unlit_centroid_workaround; - - brw->must_use_separate_stencil = devinfo->must_use_separate_stencil; + brw->has_swizzling = screen->hw_has_swizzling; - isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling); + brw->isl_dev = screen->isl_dev; brw->vs.base.stage = MESA_SHADER_VERTEX; brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; brw->tes.base.stage = MESA_SHADER_TESS_EVAL; brw->gs.base.stage = MESA_SHADER_GEOMETRY; brw->wm.base.stage = MESA_SHADER_FRAGMENT; - if (brw->gen >= 8) { - gen8_init_vtable_surface_functions(brw); - brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; - } else if (brw->gen >= 7) { - gen7_init_vtable_surface_functions(brw); - brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; - } else if (brw->gen >= 6) { - gen6_init_vtable_surface_functions(brw); - brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; - } else { - gen4_init_vtable_surface_functions(brw); - brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; - } + brw->cs.base.stage = MESA_SHADER_COMPUTE; brw_init_driver_functions(brw, &functions); @@ -888,7 +996,7 @@ brwCreateContext(gl_api api, return false; } - driContextSetFlags(ctx, flags); + driContextSetFlags(ctx, ctx_config->flags); /* Initialize the software rasterizer and helper modules. * @@ -929,20 +1037,39 @@ brwCreateContext(gl_api api, intel_fbo_init(brw); - intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc); + intel_batchbuffer_init(brw); - if (brw->gen >= 6) { - /* Create a new hardware context. Using a hardware context means that - * our GPU state will be saved/restored on context switch, allowing us - * to assume that the GPU is in the same state we left it in. - * - * This is required for transform feedback buffer offsets, query objects, - * and also allows us to reduce how much state we have to emit. - */ - brw->hw_ctx = brw_create_hw_context(brw->bufmgr); + /* Create a new hardware context. Using a hardware context means that + * our GPU state will be saved/restored on context switch, allowing us + * to assume that the GPU is in the same state we left it in. + * + * This is required for transform feedback buffer offsets, query objects, + * and also allows us to reduce how much state we have to emit. + */ + brw->hw_ctx = brw_create_hw_context(brw->bufmgr); + if (!brw->hw_ctx && devinfo->gen >= 6) { + fprintf(stderr, "Failed to create hardware context.\n"); + intelDestroyContext(driContextPriv); + return false; + } - if (!brw->hw_ctx) { - fprintf(stderr, "Failed to create hardware context.\n"); + if (brw->hw_ctx) { + int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY; + if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) { + switch (ctx_config->priority) { + case __DRI_CTX_PRIORITY_LOW: + hw_priority = GEN_CONTEXT_LOW_PRIORITY; + break; + case __DRI_CTX_PRIORITY_HIGH: + hw_priority = GEN_CONTEXT_HIGH_PRIORITY; + break; + } + } + if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY && + brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) { + fprintf(stderr, + "Failed to set priority [%d:%d] for hardware context.\n", + ctx_config->priority, hw_priority); intelDestroyContext(driContextPriv); return false; } @@ -954,6 +1081,8 @@ brwCreateContext(gl_api api, return false; } + brw_upload_init(&brw->upload, brw->bufmgr, 65536); + brw_init_state(brw); intelInitExtensions(ctx); @@ -964,7 +1093,7 @@ brwCreateContext(gl_api api, brw->urb.size = devinfo->urb.size; - if (brw->gen == 6) + if (devinfo->gen == 6) brw->urb.gs_present = false; brw->prim_restart.in_progress = false; @@ -981,12 +1110,12 @@ brwCreateContext(gl_api api, brw_draw_init( brw ); - if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { + if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) { /* Turn on some extra GL_ARB_debug_output generation. */ brw->perf_debug = true; } - if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) { + if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) { ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; ctx->Const.RobustAccess = GL_TRUE; } @@ -994,8 +1123,21 @@ brwCreateContext(gl_api api, if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); + _mesa_override_extensions(ctx); _mesa_compute_version(ctx); + /* GL_ARB_gl_spirv */ + if (ctx->Extensions.ARB_gl_spirv) { + brw_initialize_spirv_supported_capabilities(brw); + + if (ctx->Extensions.ARB_spirv_extensions) { + /* GL_ARB_spirv_extensions */ + ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions); + _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions, + &ctx->Const.SpirVCapabilities); + } + } + _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); @@ -1005,6 +1147,14 @@ brwCreateContext(gl_api api, vbo_use_buffer_objects(ctx); vbo_always_unmap_buffers(ctx); + brw->ctx.Cache = brw->screen->disk_cache; + + if (driContextPriv->driScreenPriv->dri2.backgroundCallable && + driQueryOptionb(&screen->optionCache, "mesa_glthread")) { + /* Loader supports multithreading, and so do we. */ + _mesa_glthread_init(ctx); + } + return true; } @@ -1015,6 +1165,18 @@ intelDestroyContext(__DRIcontext * driContextPriv) (struct brw_context *) driContextPriv->driverPrivate; struct gl_context *ctx = &brw->ctx; + GET_CURRENT_CONTEXT(curctx); + + if (curctx == NULL) { + /* No current context, but we need one to release + * renderbuffer surface when we release framebuffer. + * So temporarily bind the context. + */ + _mesa_make_current(ctx, NULL, NULL); + } + + _mesa_glthread_destroy(&brw->ctx); + _mesa_meta_free(&brw->ctx); if (INTEL_DEBUG & DEBUG_SHADER_TIME) { @@ -1025,23 +1187,24 @@ intelDestroyContext(__DRIcontext * driContextPriv) brw_destroy_shader_time(brw); } - if (brw->gen >= 6) - blorp_finish(&brw->blorp); + blorp_finish(&brw->blorp); brw_destroy_state(brw); brw_draw_destroy(brw); brw_bo_unreference(brw->curbe.curbe_bo); - if (brw->vs.base.scratch_bo) - brw_bo_unreference(brw->vs.base.scratch_bo); - if (brw->tcs.base.scratch_bo) - brw_bo_unreference(brw->tcs.base.scratch_bo); - if (brw->tes.base.scratch_bo) - brw_bo_unreference(brw->tes.base.scratch_bo); - if (brw->gs.base.scratch_bo) - brw_bo_unreference(brw->gs.base.scratch_bo); - if (brw->wm.base.scratch_bo) - brw_bo_unreference(brw->wm.base.scratch_bo); + + brw_bo_unreference(brw->vs.base.scratch_bo); + brw_bo_unreference(brw->tcs.base.scratch_bo); + brw_bo_unreference(brw->tes.base.scratch_bo); + brw_bo_unreference(brw->gs.base.scratch_bo); + brw_bo_unreference(brw->wm.base.scratch_bo); + + brw_bo_unreference(brw->vs.base.push_const_bo); + brw_bo_unreference(brw->tcs.base.push_const_bo); + brw_bo_unreference(brw->tes.base.push_const_bo); + brw_bo_unreference(brw->gs.base.push_const_bo); + brw_bo_unreference(brw->wm.base.push_const_bo); brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx); @@ -1065,7 +1228,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) driDestroyOptionCache(&brw->optionCache); /* free the Mesa context */ - _mesa_free_context_data(&brw->ctx); + _mesa_free_context_data(&brw->ctx, true); ralloc_free(brw); driContextPriv->driverPrivate = NULL; @@ -1074,6 +1237,9 @@ intelDestroyContext(__DRIcontext * driContextPriv) GLboolean intelUnbindContext(__DRIcontext * driContextPriv) { + GET_CURRENT_CONTEXT(ctx); + _mesa_glthread_finish(ctx); + /* Unset current context and dispath table */ _mesa_make_current(NULL, NULL, NULL); @@ -1105,8 +1271,8 @@ intelUnbindContext(__DRIcontext * driContextPriv) * * Unfortunately, renderbuffer setup happens before a context is created. So * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 - * context (without an sRGB visual, though we don't have sRGB visuals exposed - * yet), we go turn that back off before anyone finds out. + * context (without an sRGB visual), we go turn that back off before anyone + * finds out. */ static void intel_gles3_srgb_workaround(struct brw_context *brw, @@ -1117,15 +1283,19 @@ intel_gles3_srgb_workaround(struct brw_context *brw, if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) return; - /* Some day when we support the sRGB capable bit on visuals available for - * GLES, we'll need to respect that and not disable things here. - */ - fb->Visual.sRGBCapable = false; for (int i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; + + /* Check if sRGB was specifically asked for. */ + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i); + if (irb && irb->need_srgb) + return; + if (rb) rb->Format = _mesa_get_srgb_format_linear(rb->Format); } + /* Disable sRGB from framebuffers that are not compatible. */ + fb->Visual.sRGBCapable = false; } GLboolean @@ -1134,21 +1304,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driReadPriv) { struct brw_context *brw; - GET_CURRENT_CONTEXT(curCtx); if (driContextPriv) brw = (struct brw_context *) driContextPriv->driverPrivate; else brw = NULL; - /* According to the glXMakeCurrent() man page: "Pending commands to - * the previous context, if any, are flushed before it is released." - * But only flush if we're actually changing contexts. - */ - if (brw_context(curCtx) && brw_context(curCtx) != brw) { - _mesa_flush(curCtx); - } - if (driContextPriv) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb, *readFb; @@ -1182,6 +1343,8 @@ intelMakeCurrent(__DRIcontext * driContextPriv, _mesa_make_current(ctx, fb, readFb); } else { + GET_CURRENT_CONTEXT(ctx); + _mesa_glthread_finish(ctx); _mesa_make_current(NULL, NULL, NULL); } @@ -1192,7 +1355,9 @@ void intel_resolve_for_dri2_flush(struct brw_context *brw, __DRIdrawable *drawable) { - if (brw->gen < 6) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + if (devinfo->gen < 6) { /* MSAA and fast color clear are not supported, so don't waste time * checking whether a resolve is needed. */ @@ -1214,12 +1379,27 @@ intel_resolve_for_dri2_flush(struct brw_context *brw, rb = intel_get_renderbuffer(fb, buffers[i]); if (rb == NULL || rb->mt == NULL) continue; - if (rb->mt->num_samples <= 1) { + if (rb->mt->surf.samples == 1) { assert(rb->mt_layer == 0 && rb->mt_level == 0 && rb->layer_count == 1); - intel_miptree_prepare_access(brw, rb->mt, 0, 1, 0, 1, false, false); + intel_miptree_prepare_external(brw, rb->mt); } else { intel_renderbuffer_downsample(brw, rb); + + /* Call prepare_external on the single-sample miptree to do any + * needed resolves prior to handing it off to the window system. + * This is needed in the case that rb->singlesample_mt is Y-tiled + * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In + * this case, the MSAA resolve above will write compressed data into + * rb->singlesample_mt. + * + * TODO: Some day, if we decide to care about the tiny performance + * hit we're taking by doing the MSAA resolve and then a CCS resolve, + * we could detect this case and just allocate the single-sampled + * miptree without aux. However, that would be a lot of plumbing and + * this is a rather exotic case so it's not really worth it. + */ + intel_miptree_prepare_external(brw, rb->singlesample_mt); } } } @@ -1355,6 +1535,11 @@ intel_prepare_render(struct brw_context *brw) */ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) brw->front_buffer_dirty = true; + + if (brw->is_shared_buffer_bound) { + /* Subsequent rendering will probably dirty the shared buffer. */ + brw->is_shared_buffer_dirty = true; + } } /** @@ -1504,6 +1689,9 @@ intel_process_dri2_buffer(struct brw_context *brw, return; } + uint32_t tiling, swizzle; + brw_bo_get_tiling(bo, &tiling, &swizzle); + struct intel_mipmap_tree *mt = intel_miptree_create_for_bo(brw, bo, @@ -1513,12 +1701,19 @@ intel_process_dri2_buffer(struct brw_context *brw, drawable->h, 1, buffer->pitch, - MIPTREE_LAYOUT_FOR_SCANOUT); + isl_tiling_from_i915_tiling(tiling), + MIPTREE_CREATE_DEFAULT); if (!mt) { brw_bo_unreference(bo); return; } + /* We got this BO from X11. We cana't assume that we have coherent texture + * access because X may suddenly decide to use it for scan-out which would + * destroy coherency. + */ + bo->cache_coherent = false; + if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt, drawable->w, drawable->h, buffer->pitch)) { @@ -1578,19 +1773,25 @@ intel_update_image_buffer(struct brw_context *intel, else last_mt = rb->singlesample_mt; - if (last_mt && last_mt->bo == buffer->bo) + if (last_mt && last_mt->bo == buffer->bo) { + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + intel_miptree_make_shareable(intel, last_mt); + } return; + } + + /* Only allow internal compression if samples == 0. For multisampled + * window system buffers, the only thing the single-sampled buffer is used + * for is as a resolve target. If we do any compression beyond what is + * supported by the window system, we will just have to resolve so it's + * probably better to just not bother. + */ + const bool allow_internal_aux = (num_samples == 0); struct intel_mipmap_tree *mt = - intel_miptree_create_for_bo(intel, - buffer->bo, - intel_rb_format(rb), - 0, - buffer->width, - buffer->height, - 1, - buffer->pitch, - MIPTREE_LAYOUT_FOR_SCANOUT); + intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D, + intel_rb_format(rb), + allow_internal_aux); if (!mt) return; @@ -1606,6 +1807,35 @@ intel_update_image_buffer(struct brw_context *intel, rb->Base.Base.NumSamples > 1) { intel_renderbuffer_upsample(intel, rb); } + + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + /* The compositor and the application may access this image + * concurrently. The display hardware may even scanout the image while + * the GPU is rendering to it. Aux surfaces cause difficulty with + * concurrent access, so permanently disable aux for this miptree. + * + * Perhaps we could improve overall application performance by + * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to + * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER + * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this + * approach to be highly dependent on the application's GL usage. + * + * I [chadv] expect clever disabling/reenabling to be counterproductive + * in the use cases I care about: applications that render nearly + * realtime handwriting to the surface while possibly undergiong + * simultaneously scanout as a display plane. The app requires low + * render latency. Even though the app spends most of its time in + * shared-buffer mode, it also frequently transitions between + * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER) + * mode. Visual sutter during the transitions should be avoided. + * + * In this case, I [chadv] believe reducing the GPU workload at + * shared-buffer/double-buffer transitions would offer a smoother app + * experience than any savings due to aux compression. But I've + * collected no data to prove my theory. + */ + intel_miptree_make_shareable(intel, mt); + } } static void @@ -1666,4 +1896,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) images.back, __DRI_IMAGE_BUFFER_BACK); } + + if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) { + assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED); + drawable->w = images.back->width; + drawable->h = images.back->height; + intel_update_image_buffer(brw, + drawable, + back_rb, + images.back, + __DRI_IMAGE_BUFFER_SHARED); + brw->is_shared_buffer_bound = true; + } else { + brw->is_shared_buffer_bound = false; + brw->is_shared_buffer_dirty = false; + } }