X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.c;h=3af45551e991ec567bef565a26a51fd2a905ae4b;hb=69fdf13c215c2970feaca76f178a5c2c11ba8fec;hp=3b125448e144e341b6f8f2187e2ca35b222112ed;hpb=958fc04dc51a2561c8598f42df59e3d9139e56a7;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3b125448e14..3af45551e99 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -51,6 +51,7 @@ #include "brw_context.h" #include "brw_defines.h" +#include "brw_blorp.h" #include "brw_compiler.h" #include "brw_draw.h" #include "brw_state.h" @@ -69,52 +70,36 @@ #include "tnl/tnl.h" #include "tnl/t_pipeline.h" #include "util/ralloc.h" +#include "util/debug.h" +#include "isl/isl.h" /*************************************** * Mesa's Driver Functions ***************************************/ -static size_t -brw_query_samples_for_format(struct gl_context *ctx, GLenum target, - GLenum internalFormat, int samples[16]) -{ - struct brw_context *brw = brw_context(ctx); - - (void) target; - - switch (brw->gen) { - case 9: - case 8: - samples[0] = 8; - samples[1] = 4; - samples[2] = 2; - return 3; - - case 7: - samples[0] = 8; - samples[1] = 4; - return 2; - - case 6: - samples[0] = 4; - return 1; +const char *const brw_vendor_string = "Intel Open Source Technology Center"; +static const char * +get_bsw_model(const struct intel_screen *intelScreen) +{ + switch (intelScreen->eu_total) { + case 16: + return "405"; + case 12: + return "400"; default: - assert(brw->gen < 6); - samples[0] = 1; - return 1; + return " "; } } -const char *const brw_vendor_string = "Intel Open Source Technology Center"; - const char * -brw_get_renderer_string(unsigned deviceID) +brw_get_renderer_string(const struct intel_screen *intelScreen) { const char *chipset; static char buffer[128]; + char *bsw = NULL; - switch (deviceID) { + switch (intelScreen->deviceID) { #undef CHIPSET #define CHIPSET(id, symbol, str) case id: chipset = str; break; #include "pci_ids/i965_pci_ids.h" @@ -123,7 +108,18 @@ brw_get_renderer_string(unsigned deviceID) break; } + /* Braswell branding is funny, so we have to fix it up here */ + if (intelScreen->deviceID == 0x22B1) { + bsw = strdup(chipset); + char *needle = strstr(bsw, "XXX"); + if (needle) { + memcpy(needle, get_bsw_model(intelScreen), 3); + chipset = bsw; + } + } + (void) driGetRendererString(buffer, chipset, 0); + free(bsw); return buffer; } @@ -138,7 +134,7 @@ intel_get_string(struct gl_context * ctx, GLenum name) case GL_RENDERER: return - (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID); + (GLubyte *) brw_get_renderer_string(brw->intelScreen); default: return NULL; @@ -152,9 +148,78 @@ intel_viewport(struct gl_context *ctx) __DRIcontext *driContext = brw->driContext; if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { - dri2InvalidateDrawable(driContext->driDrawablePriv); - dri2InvalidateDrawable(driContext->driReadablePriv); + if (driContext->driDrawablePriv) + dri2InvalidateDrawable(driContext->driDrawablePriv); + if (driContext->driReadablePriv) + dri2InvalidateDrawable(driContext->driReadablePriv); + } +} + +static void +intel_update_framebuffer(struct gl_context *ctx, + struct gl_framebuffer *fb) +{ + struct brw_context *brw = brw_context(ctx); + + /* Quantize the derived default number of samples + */ + fb->DefaultGeometry._NumSamples = + intel_quantize_num_samples(brw->intelScreen, + fb->DefaultGeometry.NumSamples); +} + +static bool +intel_disable_rb_aux_buffer(struct brw_context *brw, const drm_intel_bo *bo) +{ + const struct gl_framebuffer *fb = brw->ctx.DrawBuffer; + bool found = false; + + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { + const struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb && irb->mt->bo == bo) { + found = brw->draw_aux_buffer_disabled[i] = true; + } } + + return found; +} + +/* On Gen9 color buffers may be compressed by the hardware (lossless + * compression). There are, however, format restrictions and care needs to be + * taken that the sampler engine is capable for re-interpreting a buffer with + * format different the buffer was originally written with. + * + * For example, SRGB formats are not compressible and the sampler engine isn't + * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying + * color buffer needs to be resolved so that the sampling surface can be + * sampled as non-compressed (i.e., without the auxiliary MCS buffer being + * set). + */ +static bool +intel_texture_view_requires_resolve(struct brw_context *brw, + struct intel_texture_object *intel_tex) +{ + if (brw->gen < 9 || + !intel_miptree_is_lossless_compressed(brw, intel_tex->mt)) + return false; + + const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format); + + if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo, + brw_format)) + return false; + + perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", + _mesa_get_format_name(intel_tex->_Format), + _mesa_get_format_name(intel_tex->mt->format)); + + if (intel_disable_rb_aux_buffer(brw, intel_tex->mt->bo)) + perf_debug("Sampling renderbuffer with non-compressible format - " + "turning off compression"); + + return true; } static void @@ -177,6 +242,9 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) if (depth_irb) intel_renderbuffer_resolve_hiz(brw, depth_irb); + memset(brw->draw_aux_buffer_disabled, 0, + sizeof(brw->draw_aux_buffer_disabled)); + /* Resolve depth buffer and render cache of each enabled texture. */ int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; for (int i = 0; i <= maxEnabledUnit; i++) { @@ -186,11 +254,108 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) if (!tex_obj || !tex_obj->mt) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); + /* Sampling engine understands lossless compression and resolving + * those surfaces should be skipped for performance reasons. + */ + const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ? + 0 : INTEL_MIPTREE_IGNORE_CCS_E; + intel_miptree_resolve_color(brw, tex_obj->mt, flags); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + + if (tex_obj->base.StencilSampling || + tex_obj->mt->format == MESA_FORMAT_S_UINT8) { + intel_update_r8stencil(brw, tex_obj->mt); + } + } + + /* Resolve color for each active shader image. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + const struct gl_linked_shader *shader = + ctx->_Shader->CurrentProgram[i] ? + ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL; + + if (unlikely(shader && shader->NumImages)) { + for (unsigned j = 0; j < shader->NumImages; j++) { + struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]]; + tex_obj = intel_texture_object(u->TexObj); + + if (tex_obj && tex_obj->mt) { + /* Access to images is implemented using indirect messages + * against data port. Normal render target write understands + * lossless compression but unfortunately the typed/untyped + * read/write interface doesn't. Therefore even lossless + * compressed surfaces need to be resolved prior to accessing + * them. Hence skip setting INTEL_MIPTREE_IGNORE_CCS_E. + */ + intel_miptree_resolve_color(brw, tex_obj->mt, 0); + + if (intel_miptree_is_lossless_compressed(brw, tex_obj->mt) && + intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) { + perf_debug("Using renderbuffer as shader image - turning " + "off lossless compression"); + } + + brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + } + } + } + } + + /* Resolve color buffers for non-coherent framebuffer fetch. */ + if (!ctx->Extensions.MESA_shader_framebuffer_fetch && + ctx->FragmentProgram._Current && + ctx->FragmentProgram._Current->Base.OutputsRead) { + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) { + const struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb && + intel_miptree_resolve_color(brw, irb->mt, + INTEL_MIPTREE_IGNORE_CCS_E)) + brw_render_cache_set_check_flush(brw, irb->mt->bo); + } + } + + /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the + * single-sampled color renderbuffers because the CCS buffer isn't + * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is + * enabled because otherwise the surface state will be programmed with the + * linear equivalent format anyway. + */ + if (brw->gen >= 9 && ctx->Color.sRGBEnabled) { + struct gl_framebuffer *fb = ctx->DrawBuffer; + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i]; + + if (rb == NULL) + continue; + + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_mipmap_tree *mt = irb->mt; + + if (mt == NULL || + mt->num_samples > 1 || + _mesa_get_srgb_format_linear(mt->format) == mt->format) + continue; + + /* Lossless compression is not supported for SRGB formats, it + * should be impossible to get here with such surfaces. + */ + assert(!intel_miptree_is_lossless_compressed(brw, mt)); + intel_miptree_resolve_color(brw, mt, 0); + brw_render_cache_set_check_flush(brw, mt->bo); + } } _mesa_lock_context_textures(ctx); + + if (new_state & _NEW_BUFFERS) { + intel_update_framebuffer(ctx, ctx->DrawBuffer); + if (ctx->DrawBuffer != ctx->ReadBuffer) + intel_update_framebuffer(ctx, ctx->ReadBuffer); + } } #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) @@ -284,7 +449,9 @@ brw_init_driver_functions(struct brw_context *brw, brwInitFragProgFuncs( functions ); brw_init_common_queryobj_functions(functions); - if (brw->gen >= 6) + if (brw->gen >= 8 || brw->is_haswell) + hsw_init_queryobj_functions(functions); + else if (brw->gen >= 6) gen6_init_queryobj_functions(functions); else gen4_init_queryobj_functions(functions); @@ -292,17 +459,22 @@ brw_init_driver_functions(struct brw_context *brw, if (brw->gen >= 7) brw_init_conditional_render_functions(functions); - functions->QuerySamplesForFormat = brw_query_samples_for_format; + functions->QueryInternalFormat = brw_query_internal_format; functions->NewTransformFeedback = brw_new_transform_feedback; functions->DeleteTransformFeedback = brw_delete_transform_feedback; - functions->GetTransformFeedbackVertexCount = - brw_get_transform_feedback_vertex_count; - if (brw->gen >= 7) { + if (brw->intelScreen->has_mi_math_and_lrr) { + functions->BeginTransformFeedback = hsw_begin_transform_feedback; + functions->EndTransformFeedback = hsw_end_transform_feedback; + functions->PauseTransformFeedback = hsw_pause_transform_feedback; + functions->ResumeTransformFeedback = hsw_resume_transform_feedback; + } else if (brw->gen >= 7) { functions->BeginTransformFeedback = gen7_begin_transform_feedback; functions->EndTransformFeedback = gen7_end_transform_feedback; functions->PauseTransformFeedback = gen7_pause_transform_feedback; functions->ResumeTransformFeedback = gen7_resume_transform_feedback; + functions->GetTransformFeedbackVertexCount = + brw_get_transform_feedback_vertex_count; } else { functions->BeginTransformFeedback = brw_begin_transform_feedback; functions->EndTransformFeedback = brw_end_transform_feedback; @@ -316,64 +488,87 @@ static void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct brw_compiler *compiler = brw->intelScreen->compiler; + + const bool stage_exists[MESA_SHADER_STAGES] = { + [MESA_SHADER_VERTEX] = true, + [MESA_SHADER_TESS_CTRL] = brw->gen >= 7, + [MESA_SHADER_TESS_EVAL] = brw->gen >= 7, + [MESA_SHADER_GEOMETRY] = brw->gen >= 6, + [MESA_SHADER_FRAGMENT] = true, + [MESA_SHADER_COMPUTE] = + (ctx->API == API_OPENGL_CORE && + ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || + (ctx->API == API_OPENGLES2 && + ctx->Const.MaxComputeWorkGroupSize[0] >= 128) || + _mesa_extension_override_enables.ARB_compute_shader, + }; + + unsigned num_stages = 0; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + if (stage_exists[i]) + num_stages++; + } unsigned max_samplers = brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16; + ctx->Const.MaxDualSourceDrawBuffers = 1; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; + ctx->Const.MaxCombinedShaderOutputResources = + MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; + ctx->Const.QueryCounterBits.Timestamp = 36; + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; + ctx->Const.MaxRenderbufferSize = 8192; + ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); + ctx->Const.Max3DTextureLevels = 12; /* 2048 */ + ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ + ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; + ctx->Const.MaxTextureMbytes = 1536; + ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.StripTextureBorder = true; + if (brw->gen >= 7) + ctx->Const.MaxProgramTextureGatherComponents = 4; + else if (brw->gen == 6) + ctx->Const.MaxProgramTextureGatherComponents = 1; ctx->Const.MaxUniformBlockSize = 65536; + for (int i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_program_constants *prog = &ctx->Const.Program[i]; + + if (!stage_exists[i]) + continue; + + prog->MaxTextureImageUnits = max_samplers; + prog->MaxUniformBlocks = BRW_MAX_UBO; prog->MaxCombinedUniformComponents = prog->MaxUniformComponents + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; + + prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; + prog->MaxAtomicBuffers = BRW_MAX_ABO; + prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; + prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; } - ctx->Const.MaxDualSourceDrawBuffers = 1; - ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers; - if (brw->gen >= 6) - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers; - else - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0; - if (_mesa_extension_override_enables.ARB_compute_shader) { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO; - } else { - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0; - } - ctx->Const.MaxCombinedTextureImageUnits = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - - ctx->Const.MaxTextureLevels = 14; /* 8192 */ - if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS) - ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; - ctx->Const.Max3DTextureLevels = 12; /* 2048 */ - ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ - ctx->Const.MaxTextureMbytes = 1536; - if (brw->gen >= 7) - ctx->Const.MaxArrayTextureLayers = 2048; - else - ctx->Const.MaxArrayTextureLayers = 512; + ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; + ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; + ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; + ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; + ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; - ctx->Const.MaxTextureRectSize = 1 << 12; - - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - ctx->Const.MaxRenderbufferSize = 8192; /* Hardware only supports a limited number of transform feedback buffers. * So we need to override the Mesa default (which is based only on software @@ -395,7 +590,8 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxTransformFeedbackSeparateComponents = BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; - ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true; + ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = + !brw->intelScreen->has_mi_math_and_lrr; int max_samples; const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); @@ -421,6 +617,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxColorTextureSamples = max_samples; ctx->Const.MaxDepthTextureSamples = max_samples; ctx->Const.MaxIntegerSamples = max_samples; + ctx->Const.MaxImageSamples = 0; /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used * to map indices of rectangular grid to sample numbers within a pixel. @@ -430,11 +627,6 @@ brw_initialize_context_constants(struct brw_context *brw) */ gen6_set_sample_maps(ctx); - if (brw->gen >= 7) - ctx->Const.MaxProgramTextureGatherComponents = 4; - else if (brw->gen == 6) - ctx->Const.MaxProgramTextureGatherComponents = 1; - ctx->Const.MinLineWidth = 1.0; ctx->Const.MinLineWidthAA = 1.0; if (brw->gen >= 6) { @@ -463,6 +655,11 @@ brw_initialize_context_constants(struct brw_context *brw) if (brw->gen >= 5 || brw->is_g4x) ctx->Const.MaxClipPlanes = 8; + ctx->Const.LowerTessLevel = true; + ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8; + ctx->Const.LowerTESPatchVerticesIn = true; + ctx->Const.PrimitiveRestartForPatches = true; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; @@ -505,30 +702,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; - if (brw->gen >= 7) { - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO; - ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms = - (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0); - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = - BRW_MAX_IMAGES; - ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; - ctx->Const.MaxCombinedShaderOutputResources = - MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; - ctx->Const.MaxImageSamples = 0; - ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; - } - /* Gen6 converts quads to polygon in beginning of 3D pipeline, * but we're not sure how it's actually done for vertex order, * that affect provoking vertex decision. Always use last vertex @@ -580,27 +753,16 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.TextureBufferOffsetAlignment = 16; ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; - /* FIXME: Tessellation stages are not yet supported in i965, so - * MaxCombinedShaderStorageBlocks doesn't take them into account. - */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO; - ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3; - ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3; - - if (_mesa_extension_override_enables.ARB_compute_shader) - ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO; - if (brw->gen >= 6) { ctx->Const.MaxVarying = 32; ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64; ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; } /* We want the GLSL compiler to emit code that uses condition codes */ @@ -609,6 +771,11 @@ brw_initialize_context_constants(struct brw_context *brw) brw->intelScreen->compiler->glsl_compiler_options[i]; } + if (brw->gen >= 7) { + ctx->Const.MaxViewportWidth = 32768; + ctx->Const.MaxViewportHeight = 32768; + } + /* ARB_viewport_array */ if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; @@ -625,32 +792,51 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); /* ARB_framebuffer_no_attachments */ - ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; - ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; + ctx->Const.MaxFramebufferWidth = 16384; + ctx->Const.MaxFramebufferHeight = 16384; ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; ctx->Const.MaxFramebufferSamples = max_samples; + + /* OES_primitive_bounding_box */ + ctx->Const.NoPrimitiveBoundingBoxOutput = true; } static void -brw_adjust_cs_context_constants(struct brw_context *brw) +brw_initialize_cs_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + const struct intel_screen *screen = brw->intelScreen; + const struct gen_device_info *devinfo = screen->devinfo; + + /* FINISHME: Do this for all platforms that the kernel supports */ + if (brw->is_cherryview && + screen->subslice_total > 0 && screen->eu_total > 0) { + /* Logical CS threads = EUs per subslice * 7 threads per EU */ + brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7; + + /* Fuse configurations may give more threads than expected, never less. */ + if (brw->max_cs_threads < devinfo->max_cs_threads) + brw->max_cs_threads = devinfo->max_cs_threads; + } else { + brw->max_cs_threads = devinfo->max_cs_threads; + } - /* For ES, we set these constants based on SIMD8. - * - * TODO: Once we can always generate SIMD16, we should update this. + /* Maximum number of scalar compute shader invocations that can be run in + * parallel in the same subslice assuming SIMD32 dispatch. * - * For GL, we assume we can generate a SIMD16 program, but this currently - * is not always true. This allows us to run more test cases, and will be - * required based on desktop GL compute shader requirements. + * We don't advertise more than 64 threads, because we are limited to 64 by + * our usage of thread_width_max in the gpgpu walker command. This only + * currently impacts Haswell, which otherwise might be able to advertise 70 + * threads. With SIMD32 and 64 threads, Haswell still provides twice the + * required the number of invocation needed for ARB_compute_shader. */ - const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; - - const uint32_t max_invocations = simd_size * brw->max_cs_threads; + const unsigned max_threads = MIN2(64, brw->max_cs_threads); + const uint32_t max_invocations = 32 * max_threads; ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; + ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; } /** @@ -702,6 +888,9 @@ brw_process_driconf_options(struct brw_context *brw) brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); + if (driQueryOptionb(&brw->optionCache, "precise_trig")) + brw->intelScreen->compiler->precise_trig = true; + ctx->Const.ForceGLSLExtensionsWarn = driQueryOptionb(options, "force_glsl_extensions_warn"); @@ -710,6 +899,11 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.AllowGLSLExtensionDirectiveMidShader = driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); + + ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init"); + + brw->dual_color_blend_by_location = + driQueryOptionb(options, "dual_color_blend_by_location"); } GLboolean @@ -726,7 +920,7 @@ brwCreateContext(gl_api api, __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; struct intel_screen *screen = sPriv->driverPrivate; - const struct brw_device_info *devinfo = screen->devinfo; + const struct gen_device_info *devinfo = screen->devinfo; struct dd_function_table functions; /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel @@ -772,10 +966,14 @@ brwCreateContext(gl_api api, brw->needs_unlit_centroid_workaround = devinfo->needs_unlit_centroid_workaround; - brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; + brw->must_use_separate_stencil = devinfo->must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; + isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling); + brw->vs.base.stage = MESA_SHADER_VERTEX; + brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; + brw->tes.base.stage = MESA_SHADER_TESS_EVAL; brw->gs.base.stage = MESA_SHADER_GEOMETRY; brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { @@ -836,6 +1034,7 @@ brwCreateContext(gl_api api, if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; + brw_initialize_cs_context_constants(brw); brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset @@ -877,12 +1076,14 @@ brwCreateContext(gl_api api, brw_init_surface_formats(brw); + if (brw->gen >= 6) + brw_blorp_init(brw); + brw->max_vs_threads = devinfo->max_vs_threads; brw->max_hs_threads = devinfo->max_hs_threads; brw->max_ds_threads = devinfo->max_ds_threads; brw->max_gs_threads = devinfo->max_gs_threads; brw->max_wm_threads = devinfo->max_wm_threads; - brw->max_cs_threads = devinfo->max_cs_threads; brw->urb.size = devinfo->urb.size; brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; @@ -890,23 +1091,6 @@ brwCreateContext(gl_api api, brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - brw_adjust_cs_context_constants(brw); - - /* Estimate the size of the mappable aperture into the GTT. There's an - * ioctl to get the whole GTT size, but not one to get the mappable subset. - * It turns out it's basically always 256MB, though some ancient hardware - * was smaller. - */ - uint32_t gtt_size = 256 * 1024 * 1024; - - /* We don't want to map two objects such that a memcpy between them would - * just fault one mapping in and then the other over and over forever. So - * we would need to divide the GTT size by 2. Additionally, some GTT is - * taken up by things like the framebuffer and the ringbuffer and such, so - * be more conservative. - */ - brw->max_gtt_map_object_size = gtt_size / 4; - if (brw->gen == 6) brw->urb.gs_present = false; @@ -917,9 +1101,11 @@ brwCreateContext(gl_api api, brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + brw->max_gtt_map_object_size = screen->max_gtt_map_object_size; + brw->use_resource_streamer = screen->has_resource_streamer && - (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) || - brw_env_var_as_boolean("INTEL_USE_GATHER", false)); + (env_var_as_boolean("INTEL_USE_HW_BT", false) || + env_var_as_boolean("INTEL_USE_GATHER", false)); ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; @@ -966,7 +1152,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) } _mesa_meta_free(&brw->ctx); - brw_meta_fast_clear_free(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) { /* Force a report. */ @@ -976,12 +1161,19 @@ intelDestroyContext(__DRIcontext * driContextPriv) brw_destroy_shader_time(brw); } + if (brw->gen >= 6) + blorp_finish(&brw->blorp); + brw_destroy_state(brw); brw_draw_destroy(brw); drm_intel_bo_unreference(brw->curbe.curbe_bo); if (brw->vs.base.scratch_bo) drm_intel_bo_unreference(brw->vs.base.scratch_bo); + if (brw->tcs.base.scratch_bo) + drm_intel_bo_unreference(brw->tcs.base.scratch_bo); + if (brw->tes.base.scratch_bo) + drm_intel_bo_unreference(brw->tes.base.scratch_bo); if (brw->gs.base.scratch_bo) drm_intel_bo_unreference(brw->gs.base.scratch_bo); if (brw->wm.base.scratch_bo) @@ -1070,10 +1262,9 @@ intel_gles3_srgb_workaround(struct brw_context *brw, */ fb->Visual.sRGBCapable = false; for (int i = 0; i < BUFFER_COUNT; i++) { - if (fb->Attachment[i].Renderbuffer && - fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) { - fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM; - } + struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; + if (rb) + rb->Format = _mesa_get_srgb_format_linear(rb->Format); } } @@ -1164,7 +1355,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw, if (rb == NULL || rb->mt == NULL) continue; if (rb->mt->num_samples <= 1) - intel_miptree_resolve_color(brw, rb->mt); + intel_miptree_resolve_color(brw, rb->mt, 0); else intel_renderbuffer_downsample(brw, rb); } @@ -1528,6 +1719,7 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) struct __DRIimageList images; unsigned int format; uint32_t buffer_mask = 0; + int ret; front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); @@ -1547,12 +1739,14 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) if (back_rb) buffer_mask |= __DRI_IMAGE_BUFFER_BACK; - (*screen->image.loader->getBuffers) (drawable, - driGLFormatToImageFormat(format), - &drawable->dri2.stamp, - drawable->loaderPrivate, - buffer_mask, - &images); + ret = screen->image.loader->getBuffers(drawable, + driGLFormatToImageFormat(format), + &drawable->dri2.stamp, + drawable->loaderPrivate, + buffer_mask, + &images); + if (!ret) + return; if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { drawable->w = images.front->width;