X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=e20da0b8ef88bba9f58b9ef02117ce39038ca316;hb=639696aa05df0b7f4bfb9e2e255863cd72effba3;hp=de58d515ddbfb284c281efdf2143036b4baf9e21;hpb=551d459af421a2eb937e9e16301bb64da4624f89;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index de58d515ddb..e20da0b8ef8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -34,11 +34,13 @@ #include "main/api_exec.h" #include "main/context.h" #include "main/fbobject.h" +#include "main/extensions.h" #include "main/imports.h" #include "main/macros.h" #include "main/points.h" #include "main/version.h" #include "main/vtxfmt.h" +#include "main/texobj.h" #include "vbo/vbo_context.h" @@ -57,14 +59,14 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_pixel.h" -#include "intel_regions.h" +#include "intel_image.h" #include "intel_tex.h" #include "intel_tex_obj.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" #include "tnl/t_pipeline.h" -#include "glsl/ralloc.h" +#include "util/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -79,6 +81,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target, (void) target; switch (brw->gen) { + case 9: case 8: samples[0] = 8; samples[1] = 4; @@ -95,6 +98,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target, return 1; default: + assert(brw->gen < 6); samples[0] = 1; return 1; } @@ -122,7 +126,7 @@ brw_get_renderer_string(unsigned deviceID) } static const GLubyte * -intelGetString(struct gl_context * ctx, GLenum name) +intel_get_string(struct gl_context * ctx, GLenum name) { const struct brw_context *const brw = brw_context(ctx); @@ -152,15 +156,39 @@ intel_viewport(struct gl_context *ctx) } static void -intelInvalidateState(struct gl_context * ctx, GLuint new_state) +intel_update_state(struct gl_context * ctx, GLuint new_state) { struct brw_context *brw = brw_context(ctx); + struct intel_texture_object *tex_obj; + struct intel_renderbuffer *depth_irb; if (ctx->swrast_context) _swrast_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); brw->NewGLState |= new_state; + + _mesa_unlock_context_textures(ctx); + + /* Resolve the depth buffer's HiZ buffer. */ + depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); + if (depth_irb) + intel_renderbuffer_resolve_hiz(brw, depth_irb); + + /* Resolve depth buffer and render cache of each enabled texture. */ + int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!ctx->Texture.Unit[i]._Current) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); + intel_miptree_resolve_color(brw, tex_obj->mt); + brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + } + + _mesa_lock_context_textures(ctx); } #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) @@ -208,8 +236,8 @@ intel_glFlush(struct gl_context *ctx) brw->need_throttle = true; } -void -intelFinish(struct gl_context * ctx) +static void +intel_finish(struct gl_context * ctx) { struct brw_context *brw = brw_context(ctx); @@ -236,14 +264,15 @@ brw_init_driver_functions(struct brw_context *brw, functions->Viewport = intel_viewport; functions->Flush = intel_glFlush; - functions->Finish = intelFinish; - functions->GetString = intelGetString; - functions->UpdateState = intelInvalidateState; + functions->Finish = intel_finish; + functions->GetString = intel_get_string; + functions->UpdateState = intel_update_state; intelInitTextureFuncs(functions); intelInitTextureImageFuncs(functions); intelInitTextureSubImageFuncs(functions); intelInitTextureCopyImageFuncs(functions); + intelInitCopyImageFuncs(functions); intelInitClearFuncs(functions); intelInitBufferFuncs(functions); intelInitPixelFuncs(functions); @@ -298,11 +327,11 @@ brw_initialize_context_constants(struct brw_context *brw) MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers; - if (brw->gen >= 7) + if (brw->gen >= 6) ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers; else ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; - if (getenv("INTEL_COMPUTE_SHADER")) { + if (_mesa_extension_override_enables.ARB_compute_shader) { ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxUniformBufferBindings += 12; } else { @@ -379,6 +408,14 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxDepthTextureSamples = max_samples; ctx->Const.MaxIntegerSamples = max_samples; + /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used + * to map indices of rectangular grid to sample numbers within a pixel. + * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled + * extension implementation. For more details see the comment above + * gen6_set_sample_maps() definition. + */ + gen6_set_sample_maps(ctx); + if (brw->gen >= 7) ctx->Const.MaxProgramTextureGatherComponents = 4; else if (brw->gen == 6) @@ -386,9 +423,19 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 5.0; - ctx->Const.MaxLineWidthAA = 5.0; - ctx->Const.LineWidthGranularity = 0.5; + if (brw->gen >= 9 || brw->is_cherryview) { + ctx->Const.MaxLineWidth = 40.0; + ctx->Const.MaxLineWidthAA = 40.0; + ctx->Const.LineWidthGranularity = 0.125; + } else if (brw->gen >= 6) { + ctx->Const.MaxLineWidth = 7.875; + ctx->Const.MaxLineWidthAA = 7.875; + ctx->Const.LineWidthGranularity = 0.125; + } else { + ctx->Const.MaxLineWidth = 7.0; + ctx->Const.MaxLineWidthAA = 7.0; + ctx->Const.LineWidthGranularity = 0.5; + } ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; @@ -435,6 +482,12 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; + ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; + if (brw->gen >= 7) { ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; @@ -456,7 +509,25 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.QuadsFollowProvokingVertexConvention = false; ctx->Const.NativeIntegers = true; - ctx->Const.UniformBooleanTrue = 1; + ctx->Const.VertexID_is_zero_based = true; + + /* Regarding the CMP instruction, the Ivybridge PRM says: + * + * "For each enabled channel 0b or 1b is assigned to the appropriate flag + * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord + * 0xFFFFFFFF) is assigned to dst." + * + * but PRMs for earlier generations say + * + * "In dword format, one GRF may store up to 8 results. When the register + * is used later as a vector of Booleans, as only LSB at each channel + * contains meaning [sic] data, software should make sure all higher bits + * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." + * + * We select the representation of a true boolean uniform to be ~0, and fix + * the results of Gen <= 5 CMP instruction's with -(result & 1). + */ + ctx->Const.UniformBooleanTrue = ~0; /* From the gen4 PRM, volume 4 page 127: * @@ -480,22 +551,30 @@ brw_initialize_context_constants(struct brw_context *brw) /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { - ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; - ctx->ShaderCompilerOptions[i].EmitCondCodes = true; - ctx->ShaderCompilerOptions[i].EmitNoNoise = true; - ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true; - ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true; - ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true; - - ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = + ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; + ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true; + ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true; + ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true; + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true; + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput = (i == MESA_SHADER_FRAGMENT); - ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - ctx->ShaderCompilerOptions[i].LowerClipDistance = true; + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; + ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; } - ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; - ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; + + if (brw->scalar_vs) { + /* If we're using the scalar backend for vertex shaders, we need to + * configure these accordingly. + */ + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; + } /* ARB_viewport_array */ if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) { @@ -507,6 +586,10 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; } + + /* ARB_gpu_shader5 */ + if (brw->gen >= 7) + ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); } /** @@ -556,9 +639,6 @@ brw_process_driconf_options(struct brw_context *brw) brw->disable_throttling = true; } - brw->disable_derivative_optimization = - driQueryOptionb(&brw->optionCache, "disable_derivative_optimization"); - brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); ctx->Const.ForceGLSLExtensionsWarn = @@ -566,6 +646,9 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.DisableGLSLLineContinuations = driQueryOptionb(options, "disable_glsl_line_continuations"); + + ctx->Const.AllowGLSLExtensionDirectiveMidShader = + driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); } GLboolean @@ -616,6 +699,7 @@ brwCreateContext(gl_api api, brw->is_g4x = devinfo->is_g4x; brw->is_baytrail = devinfo->is_baytrail; brw->is_haswell = devinfo->is_haswell; + brw->is_cherryview = devinfo->is_cherryview; brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; @@ -629,17 +713,20 @@ brwCreateContext(gl_api api, brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; + brw->vs.base.stage = MESA_SHADER_VERTEX; + brw->gs.base.stage = MESA_SHADER_GEOMETRY; + brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { gen8_init_vtable_surface_functions(brw); - gen7_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; } else if (brw->gen >= 7) { gen7_init_vtable_surface_functions(brw); - gen7_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; + } else if (brw->gen >= 6) { + gen6_init_vtable_surface_functions(brw); + brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz; } else { gen4_init_vtable_surface_functions(brw); - gen4_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; } @@ -684,6 +771,10 @@ brwCreateContext(gl_api api, brw_process_driconf_options(brw); brw_process_intel_debug_variable(brw); + + if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) + brw->scalar_vs = true; + brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset @@ -719,14 +810,6 @@ brwCreateContext(gl_api api, brw_init_surface_formats(brw); - if (brw->is_g4x || brw->gen >= 5) { - brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; - brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; - } else { - brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; - brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; - } - brw->max_vs_threads = devinfo->max_vs_threads; brw->max_gs_threads = devinfo->max_gs_threads; brw->max_wm_threads = devinfo->max_wm_threads; @@ -751,16 +834,12 @@ brwCreateContext(gl_api api, brw->max_gtt_map_object_size = gtt_size / 4; if (brw->gen == 6) - brw->urb.gen6_gs_previously_active = false; + brw->urb.gs_present = false; brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; brw->gs.enabled = false; - - if (brw->gen < 6) { - brw->curbe.last_buf = calloc(1, 4096); - brw->curbe.next_buf = calloc(1, 4096); - } + brw->sf.viewport_transform_enable = true; ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; @@ -775,9 +854,6 @@ brwCreateContext(gl_api api, if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; - brw_fs_alloc_reg_sets(brw); - brw_vec4_alloc_reg_set(brw); - if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); @@ -790,6 +866,9 @@ brwCreateContext(gl_api api, brw_init_performance_monitors(brw); } + vbo_use_buffer_objects(ctx); + vbo_always_unmap_buffers(ctx); + return true; } @@ -811,6 +890,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) } _mesa_meta_free(&brw->ctx); + brw_meta_fast_clear_free(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) { /* Force a report. */ @@ -824,11 +904,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) brw_draw_destroy(brw); drm_intel_bo_unreference(brw->curbe.curbe_bo); - drm_intel_bo_unreference(brw->vs.base.const_bo); - drm_intel_bo_unreference(brw->wm.base.const_bo); - - free(brw->curbe.last_buf); - free(brw->curbe.next_buf); drm_intel_gem_context_destroy(brw->hw_ctx); @@ -938,13 +1013,17 @@ intelMakeCurrent(__DRIcontext * driContextPriv, struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb, *readFb; - if (driDrawPriv == NULL && driReadPriv == NULL) { + if (driDrawPriv == NULL) { fb = _mesa_get_incomplete_framebuffer(); - readFb = _mesa_get_incomplete_framebuffer(); } else { fb = driDrawPriv->driverPrivate; - readFb = driReadPriv->driverPrivate; driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + } + + if (driReadPriv == NULL) { + readFb = _mesa_get_incomplete_framebuffer(); + } else { + readFb = driReadPriv->driverPrivate; driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; } @@ -1243,10 +1322,9 @@ intel_query_dri2_buffers(struct brw_context *brw, * DRI2BufferDepthStencil are handled as special cases. * * \param buffer_name is a human readable name, such as "dri2 front buffer", - * that is passed to intel_region_alloc_for_handle(). + * that is passed to drm_intel_bo_gem_create_from_name(). * * \see intel_update_renderbuffers() - * \see intel_region_alloc_for_handle() */ static void intel_process_dri2_buffer(struct brw_context *brw, @@ -1255,8 +1333,8 @@ intel_process_dri2_buffer(struct brw_context *brw, struct intel_renderbuffer *rb, const char *buffer_name) { - struct intel_region *region = NULL; struct gl_framebuffer *fb = drawable->driverPrivate; + drm_intel_bo *bo; if (!rb) return; @@ -1267,18 +1345,25 @@ intel_process_dri2_buffer(struct brw_context *brw, * use of a mapping of the buffer involves a bunch of page faulting which is * moderately expensive. */ - if (num_samples == 0) { - if (rb->mt && - rb->mt->region && - rb->mt->region->name == buffer->name) - return; - } else { - if (rb->singlesample_mt && - rb->singlesample_mt->region && - rb->singlesample_mt->region->name == buffer->name) - return; + struct intel_mipmap_tree *last_mt; + if (num_samples == 0) + last_mt = rb->mt; + else + last_mt = rb->singlesample_mt; + + uint32_t old_name = 0; + if (last_mt) { + /* The bo already has a name because the miptree was created by a + * previous call to intel_process_dri2_buffer(). If a bo already has a + * name, then drm_intel_bo_flink() is a low-cost getter. It does not + * create a new name. + */ + drm_intel_bo_flink(last_mt->bo, &old_name); } + if (old_name == buffer->name) + return; + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { fprintf(stderr, "attaching buffer %d, at %d, cpp %d, pitch %d\n", @@ -1287,24 +1372,21 @@ intel_process_dri2_buffer(struct brw_context *brw, } intel_miptree_release(&rb->mt); - region = intel_region_alloc_for_handle(brw->intelScreen, - buffer->cpp, - drawable->w, - drawable->h, - buffer->pitch, - buffer->name, - buffer_name); - if (!region) { + bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, + buffer->name); + if (!bo) { fprintf(stderr, - "Failed to make region for returned DRI2 buffer " - "(%dx%d, named %d).\n" + "Failed to open BO for returned DRI2 buffer " + "(%dx%d, %s, named %d).\n" "This is likely a bug in the X Server that will lead to a " "crash soon.\n", - drawable->w, drawable->h, buffer->name); + drawable->w, drawable->h, buffer_name, buffer->name); return; } - intel_update_winsys_renderbuffer_miptree(brw, rb, region); + intel_update_winsys_renderbuffer_miptree(brw, rb, bo, + drawable->w, drawable->h, + buffer->pitch); if (brw_is_front_buffer_drawing(fb) && (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || @@ -1315,7 +1397,7 @@ intel_process_dri2_buffer(struct brw_context *brw, assert(rb->mt); - intel_region_release(®ion); + drm_intel_bo_unreference(bo); } /** @@ -1341,10 +1423,9 @@ intel_update_image_buffer(struct brw_context *intel, __DRIimage *buffer, enum __DRIimageBufferMask buffer_type) { - struct intel_region *region = buffer->region; struct gl_framebuffer *fb = drawable->driverPrivate; - if (!rb || !region) + if (!rb || !buffer->bo) return; unsigned num_samples = rb->Base.Base.NumSamples; @@ -1352,19 +1433,18 @@ intel_update_image_buffer(struct brw_context *intel, /* Check and see if we're already bound to the right * buffer object */ - if (num_samples == 0) { - if (rb->mt && - rb->mt->region && - rb->mt->region->bo == region->bo) - return; - } else { - if (rb->singlesample_mt && - rb->singlesample_mt->region && - rb->singlesample_mt->region->bo == region->bo) - return; - } + struct intel_mipmap_tree *last_mt; + if (num_samples == 0) + last_mt = rb->mt; + else + last_mt = rb->singlesample_mt; + + if (last_mt && last_mt->bo == buffer->bo) + return; - intel_update_winsys_renderbuffer_miptree(intel, rb, region); + intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo, + buffer->width, buffer->height, + buffer->pitch); if (brw_is_front_buffer_drawing(fb) && buffer_type == __DRI_IMAGE_BUFFER_FRONT &&