X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=f428f58c69aa2a35165670c4a609ec9adb9b91fb;hb=d3e23f1ff915c01541f8df375b50b93b3da565a8;hp=ca389f80d7b5053f08990974cebb62636f960d8b;hpb=808b8e59c09ac5977ab020bd1771225a4e0a4cc4;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ca389f80d7b..f428f58c69a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -50,6 +50,7 @@ #include "brw_context.h" #include "brw_defines.h" +#include "brw_shader.h" #include "brw_draw.h" #include "brw_state.h" @@ -81,6 +82,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target, (void) target; switch (brw->gen) { + case 9: case 8: samples[0] = 8; samples[1] = 4; @@ -97,6 +99,7 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target, return 1; default: + assert(brw->gen < 6); samples[0] = 1; return 1; } @@ -230,8 +233,8 @@ intel_glFlush(struct gl_context *ctx) intel_batchbuffer_flush(brw); intel_flush_front(ctx); - if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) - brw->need_throttle = true; + + brw->need_flush_throttle = true; } static void @@ -284,6 +287,9 @@ brw_init_driver_functions(struct brw_context *brw, gen6_init_queryobj_functions(functions); else gen4_init_queryobj_functions(functions); + brw_init_compute_functions(functions); + if (brw->gen >= 7) + brw_init_conditional_render_functions(functions); functions->QuerySamplesForFormat = brw_query_samples_for_format; @@ -406,6 +412,14 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxDepthTextureSamples = max_samples; ctx->Const.MaxIntegerSamples = max_samples; + /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used + * to map indices of rectangular grid to sample numbers within a pixel. + * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled + * extension implementation. For more details see the comment above + * gen6_set_sample_maps() definition. + */ + gen6_set_sample_maps(ctx); + if (brw->gen >= 7) ctx->Const.MaxProgramTextureGatherComponents = 4; else if (brw->gen == 6) @@ -413,9 +427,22 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 5.0; - ctx->Const.MaxLineWidthAA = 5.0; - ctx->Const.LineWidthGranularity = 0.5; + if (brw->gen >= 6) { + ctx->Const.MaxLineWidth = 7.375; + ctx->Const.MaxLineWidthAA = 7.375; + ctx->Const.LineWidthGranularity = 0.125; + } else { + ctx->Const.MaxLineWidth = 7.0; + ctx->Const.MaxLineWidthAA = 7.0; + ctx->Const.LineWidthGranularity = 0.5; + } + + /* For non-antialiased lines, we have to round the line width to the + * nearest whole number. Make sure that we don't advertise a line + * width that, when rounded, will be beyond the actual hardware + * maximum. + */ + assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; @@ -462,6 +489,12 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; + ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; + ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; + if (brw->gen >= 7) { ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; @@ -472,6 +505,18 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO; ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO; ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms = + BRW_MAX_IMAGES; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms = + (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0); + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = + BRW_MAX_IMAGES; + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; + ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs = + MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; + ctx->Const.MaxImageSamples = 0; + ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; } /* Gen6 converts quads to polygon in beginning of 3D pipeline, @@ -498,18 +543,10 @@ brw_initialize_context_constants(struct brw_context *brw) * contains meaning [sic] data, software should make sure all higher bits * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." * - * We select the representation of a true boolean uniform to match what the - * CMP instruction returns. - * - * The Sandybridge BSpec's description of the CMP instruction matches that - * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems - * to have not been updated from Ironlake). Its CMP instruction behaves like - * Ivybridge and newer. + * We select the representation of a true boolean uniform to be ~0, and fix + * the results of Gen <= 5 CMP instruction's with -(result & 1). */ - if (brw->gen >= 6) - ctx->Const.UniformBooleanTrue = ~0; - else - ctx->Const.UniformBooleanTrue = 1; + ctx->Const.UniformBooleanTrue = ~0; /* From the gen4 PRM, volume 4 page 127: * @@ -522,6 +559,7 @@ brw_initialize_context_constants(struct brw_context *brw) */ ctx->Const.UniformBufferOffsetAlignment = 16; ctx->Const.TextureBufferOffsetAlignment = 16; + ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; @@ -533,28 +571,16 @@ brw_initialize_context_constants(struct brw_context *brw) /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { - ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; - ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true; - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp = - (i == MESA_SHADER_FRAGMENT); - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; - ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; + ctx->Const.ShaderCompilerOptions[i] = + brw->intelScreen->compiler->glsl_compiler_options[i]; } - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; - ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; - /* ARB_viewport_array */ - if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) { - ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS; + if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { + ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; ctx->Const.ViewportSubpixelBits = 0; - /* Cast to float before negating becuase MaxViewportWidth is unsigned. + /* Cast to float before negating because MaxViewportWidth is unsigned. */ ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; @@ -563,6 +589,34 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_gpu_shader5 */ if (brw->gen >= 7) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); + + /* ARB_framebuffer_no_attachments */ + ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; + ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; + ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; + ctx->Const.MaxFramebufferSamples = max_samples; +} + +static void +brw_adjust_cs_context_constants(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + + /* For ES, we set these constants based on SIMD8. + * + * TODO: Once we can always generate SIMD16, we should update this. + * + * For GL, we assume we can generate a SIMD16 program, but this currently + * is not always true. This allows us to run more test cases, and will be + * required based on desktop GL compute shader requirements. + */ + const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; + + const uint32_t max_invocations = simd_size * brw->max_cs_threads; + ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; + ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; + ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; + ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; } /** @@ -612,9 +666,6 @@ brw_process_driconf_options(struct brw_context *brw) brw->disable_throttling = true; } - brw->disable_derivative_optimization = - driQueryOptionb(&brw->optionCache, "disable_derivative_optimization"); - brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); ctx->Const.ForceGLSLExtensionsWarn = @@ -660,7 +711,7 @@ brwCreateContext(gl_api api, struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { - fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__); + fprintf(stderr, "%s: failed to alloc context\n", __func__); *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } @@ -676,6 +727,7 @@ brwCreateContext(gl_api api, brw->is_baytrail = devinfo->is_baytrail; brw->is_haswell = devinfo->is_haswell; brw->is_cherryview = devinfo->is_cherryview; + brw->is_broxton = devinfo->is_broxton; brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; @@ -715,7 +767,7 @@ brwCreateContext(gl_api api, if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__); + fprintf(stderr, "%s: failed to init mesa context\n", __func__); intelDestroyContext(driContextPriv); return false; } @@ -746,7 +798,10 @@ brwCreateContext(gl_api api, _mesa_meta_init(ctx); brw_process_driconf_options(brw); - brw_process_intel_debug_variable(brw); + + if (INTEL_DEBUG & DEBUG_PERF) + brw->perf_debug = true; + brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset @@ -776,6 +831,12 @@ brwCreateContext(gl_api api, } } + if (brw_init_pipe_control(brw, devinfo)) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + intelDestroyContext(driContextPriv); + return false; + } + brw_init_state(brw); intelInitExtensions(ctx); @@ -783,13 +844,20 @@ brwCreateContext(gl_api api, brw_init_surface_formats(brw); brw->max_vs_threads = devinfo->max_vs_threads; + brw->max_hs_threads = devinfo->max_hs_threads; + brw->max_ds_threads = devinfo->max_ds_threads; brw->max_gs_threads = devinfo->max_gs_threads; brw->max_wm_threads = devinfo->max_wm_threads; + brw->max_cs_threads = devinfo->max_cs_threads; brw->urb.size = devinfo->urb.size; brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + brw_adjust_cs_context_constants(brw); + /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware @@ -806,13 +874,19 @@ brwCreateContext(gl_api api, brw->max_gtt_map_object_size = gtt_size / 4; if (brw->gen == 6) - brw->urb.gen6_gs_previously_active = false; + brw->urb.gs_present = false; brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; brw->gs.enabled = false; brw->sf.viewport_transform_enable = true; + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + + brw->use_resource_streamer = screen->has_resource_streamer && + (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) || + brw_env_var_as_boolean("INTEL_USE_GATHER", false)); + ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; @@ -851,10 +925,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) (struct brw_context *) driContextPriv->driverPrivate; struct gl_context *ctx = &brw->ctx; - assert(brw); /* should never be null */ - if (!brw) - return; - /* Dump a final BMP in case the application doesn't call SwapBuffers */ if (INTEL_DEBUG & DEBUG_AUB) { intel_batchbuffer_flush(brw); @@ -876,6 +946,16 @@ intelDestroyContext(__DRIcontext * driContextPriv) brw_draw_destroy(brw); drm_intel_bo_unreference(brw->curbe.curbe_bo); + if (brw->vs.base.scratch_bo) + drm_intel_bo_unreference(brw->vs.base.scratch_bo); + if (brw->gs.base.scratch_bo) + drm_intel_bo_unreference(brw->gs.base.scratch_bo); + if (brw->wm.base.scratch_bo) + drm_intel_bo_unreference(brw->wm.base.scratch_bo); + + gen7_reset_hw_bt_pool_offsets(brw); + drm_intel_bo_unreference(brw->hw_bt_pool.bo); + brw->hw_bt_pool.bo = NULL; drm_intel_gem_context_destroy(brw->hw_ctx); @@ -888,10 +968,13 @@ intelDestroyContext(__DRIcontext * driContextPriv) if (ctx->swrast_context) _swrast_DestroyContext(&brw->ctx); + brw_fini_pipe_control(brw); intel_batchbuffer_free(brw); - drm_intel_bo_unreference(brw->first_post_swapbuffers_batch); - brw->first_post_swapbuffers_batch = NULL; + drm_intel_bo_unreference(brw->throttle_batch[1]); + drm_intel_bo_unreference(brw->throttle_batch[0]); + brw->throttle_batch[1] = NULL; + brw->throttle_batch[0] = NULL; driDestroyOptionCache(&brw->optionCache); @@ -920,7 +1003,7 @@ intelUnbindContext(__DRIcontext * driContextPriv) * sRGB encode if the renderbuffer can handle it. You can ask specifically * for a visual where you're guaranteed to be capable, but it turns out that * everyone just makes all their ARGB8888 visuals capable and doesn't offer - * incapable ones, becuase there's no difference between the two in resources + * incapable ones, because there's no difference between the two in resources * used. Applications thus get built that accidentally rely on the default * visual choice being sRGB, so we make ours sRGB capable. Everything sounds * great... @@ -1184,29 +1267,6 @@ intel_prepare_render(struct brw_context *brw) */ if (brw_is_front_buffer_drawing(ctx->DrawBuffer)) brw->front_buffer_dirty = true; - - /* Wait for the swapbuffers before the one we just emitted, so we - * don't get too many swaps outstanding for apps that are GPU-heavy - * but not CPU-heavy. - * - * We're using intelDRI2Flush (called from the loader before - * swapbuffer) and glFlush (for front buffer rendering) as the - * indicator that a frame is done and then throttle when we get - * here as we prepare to render the next frame. At this point for - * round trips for swap/copy and getting new buffers are done and - * we'll spend less time waiting on the GPU. - * - * Unfortunately, we don't have a handle to the batch containing - * the swap, and getting our hands on that doesn't seem worth it, - * so we just us the first batch we emitted after the last swap. - */ - if (brw->need_throttle && brw->first_post_swapbuffers_batch) { - if (!brw->disable_throttling) - drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch); - drm_intel_bo_unreference(brw->first_post_swapbuffers_batch); - brw->first_post_swapbuffers_batch = NULL; - brw->need_throttle = false; - } } /**