*/
+#include "compiler/nir/nir.h"
#include "main/api_exec.h"
#include "main/context.h"
#include "main/fbobject.h"
#include "main/version.h"
#include "main/vtxfmt.h"
#include "main/texobj.h"
+#include "main/framebuffer.h"
#include "vbo/vbo_context.h"
#include "brw_context.h"
#include "brw_defines.h"
-#include "brw_shader.h"
+#include "brw_blorp.h"
#include "brw_draw.h"
#include "brw_state.h"
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
#include "util/ralloc.h"
+#include "util/debug.h"
+#include "isl/isl.h"
/***************************************
* Mesa's Driver Functions
***************************************/
-static size_t
-brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
- GLenum internalFormat, int samples[16])
-{
- struct brw_context *brw = brw_context(ctx);
-
- (void) target;
-
- switch (brw->gen) {
- case 9:
- case 8:
- samples[0] = 8;
- samples[1] = 4;
- samples[2] = 2;
- return 3;
-
- case 7:
- samples[0] = 8;
- samples[1] = 4;
- return 2;
-
- case 6:
- samples[0] = 4;
- return 1;
+const char *const brw_vendor_string = "Intel Open Source Technology Center";
+static const char *
+get_bsw_model(const struct intel_screen *screen)
+{
+ switch (screen->eu_total) {
+ case 16:
+ return "405";
+ case 12:
+ return "400";
default:
- assert(brw->gen < 6);
- samples[0] = 1;
- return 1;
+ return " ";
}
}
-const char *const brw_vendor_string = "Intel Open Source Technology Center";
-
const char *
-brw_get_renderer_string(unsigned deviceID)
+brw_get_renderer_string(const struct intel_screen *screen)
{
const char *chipset;
static char buffer[128];
+ char *bsw = NULL;
- switch (deviceID) {
+ switch (screen->deviceID) {
#undef CHIPSET
#define CHIPSET(id, symbol, str) case id: chipset = str; break;
#include "pci_ids/i965_pci_ids.h"
break;
}
+ /* Braswell branding is funny, so we have to fix it up here */
+ if (screen->deviceID == 0x22B1) {
+ bsw = strdup(chipset);
+ char *needle = strstr(bsw, "XXX");
+ if (needle) {
+ memcpy(needle, get_bsw_model(screen), 3);
+ chipset = bsw;
+ }
+ }
+
(void) driGetRendererString(buffer, chipset, 0);
+ free(bsw);
return buffer;
}
case GL_RENDERER:
return
- (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
+ (GLubyte *) brw_get_renderer_string(brw->screen);
default:
return NULL;
__DRIcontext *driContext = brw->driContext;
if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
- dri2InvalidateDrawable(driContext->driDrawablePriv);
- dri2InvalidateDrawable(driContext->driReadablePriv);
+ if (driContext->driDrawablePriv)
+ dri2InvalidateDrawable(driContext->driDrawablePriv);
+ if (driContext->driReadablePriv)
+ dri2InvalidateDrawable(driContext->driReadablePriv);
}
}
+static void
+intel_update_framebuffer(struct gl_context *ctx,
+ struct gl_framebuffer *fb)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ /* Quantize the derived default number of samples
+ */
+ fb->DefaultGeometry._NumSamples =
+ intel_quantize_num_samples(brw->screen,
+ fb->DefaultGeometry.NumSamples);
+}
+
+static bool
+intel_disable_rb_aux_buffer(struct brw_context *brw, const drm_intel_bo *bo)
+{
+ const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
+ bool found = false;
+
+ for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ const struct intel_renderbuffer *irb =
+ intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+ if (irb && irb->mt->bo == bo) {
+ found = brw->draw_aux_buffer_disabled[i] = true;
+ }
+ }
+
+ return found;
+}
+
+/* On Gen9 color buffers may be compressed by the hardware (lossless
+ * compression). There are, however, format restrictions and care needs to be
+ * taken that the sampler engine is capable for re-interpreting a buffer with
+ * format different the buffer was originally written with.
+ *
+ * For example, SRGB formats are not compressible and the sampler engine isn't
+ * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
+ * color buffer needs to be resolved so that the sampling surface can be
+ * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
+ * set).
+ */
+static bool
+intel_texture_view_requires_resolve(struct brw_context *brw,
+ struct intel_texture_object *intel_tex)
+{
+ if (brw->gen < 9 ||
+ !intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
+ return false;
+
+ const uint32_t brw_format = brw_isl_format_for_mesa_format(intel_tex->_Format);
+
+ if (isl_format_supports_ccs_e(&brw->screen->devinfo, brw_format))
+ return false;
+
+ perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
+ _mesa_get_format_name(intel_tex->_Format),
+ _mesa_get_format_name(intel_tex->mt->format));
+
+ if (intel_disable_rb_aux_buffer(brw, intel_tex->mt->bo))
+ perf_debug("Sampling renderbuffer with non-compressible format - "
+ "turning off compression");
+
+ return true;
+}
+
static void
intel_update_state(struct gl_context * ctx, GLuint new_state)
{
if (depth_irb)
intel_renderbuffer_resolve_hiz(brw, depth_irb);
+ memset(brw->draw_aux_buffer_disabled, 0,
+ sizeof(brw->draw_aux_buffer_disabled));
+
/* Resolve depth buffer and render cache of each enabled texture. */
int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
for (int i = 0; i <= maxEnabledUnit; i++) {
tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
if (!tex_obj || !tex_obj->mt)
continue;
- intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
- intel_miptree_resolve_color(brw, tex_obj->mt);
+ if (intel_miptree_sample_with_hiz(brw, tex_obj->mt))
+ intel_miptree_all_slices_resolve_hiz(brw, tex_obj->mt);
+ else
+ intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+ /* Sampling engine understands lossless compression and resolving
+ * those surfaces should be skipped for performance reasons.
+ */
+ const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
+ 0 : INTEL_MIPTREE_IGNORE_CCS_E;
+ intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+
+ if (tex_obj->base.StencilSampling ||
+ tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
+ intel_update_r8stencil(brw, tex_obj->mt);
+ }
+ }
+
+ /* Resolve color for each active shader image. */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
+
+ if (unlikely(prog && prog->info.num_images)) {
+ for (unsigned j = 0; j < prog->info.num_images; j++) {
+ struct gl_image_unit *u =
+ &ctx->ImageUnits[prog->sh.ImageUnits[j]];
+ tex_obj = intel_texture_object(u->TexObj);
+
+ if (tex_obj && tex_obj->mt) {
+ /* Access to images is implemented using indirect messages
+ * against data port. Normal render target write understands
+ * lossless compression but unfortunately the typed/untyped
+ * read/write interface doesn't. Therefore even lossless
+ * compressed surfaces need to be resolved prior to accessing
+ * them. Hence skip setting INTEL_MIPTREE_IGNORE_CCS_E.
+ */
+ intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, 0);
+
+ if (intel_miptree_is_lossless_compressed(brw, tex_obj->mt) &&
+ intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) {
+ perf_debug("Using renderbuffer as shader image - turning "
+ "off lossless compression");
+ }
+
+ brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+ }
+ }
+ }
+ }
+
+ /* Resolve color buffers for non-coherent framebuffer fetch. */
+ if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
+ ctx->FragmentProgram._Current &&
+ ctx->FragmentProgram._Current->info.outputs_read) {
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+ for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ const struct intel_renderbuffer *irb =
+ intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+ if (irb &&
+ intel_miptree_resolve_color(
+ brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count,
+ INTEL_MIPTREE_IGNORE_CCS_E))
+ brw_render_cache_set_check_flush(brw, irb->mt->bo);
+ }
+ }
+
+ /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
+ * single-sampled color renderbuffers because the CCS buffer isn't
+ * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
+ * enabled because otherwise the surface state will be programmed with the
+ * linear equivalent format anyway.
+ */
+ if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
+
+ if (rb == NULL)
+ continue;
+
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_mipmap_tree *mt = irb->mt;
+
+ if (mt == NULL ||
+ mt->num_samples > 1 ||
+ _mesa_get_srgb_format_linear(mt->format) == mt->format)
+ continue;
+
+ /* Lossless compression is not supported for SRGB formats, it
+ * should be impossible to get here with such surfaces.
+ */
+ assert(!intel_miptree_is_lossless_compressed(brw, mt));
+ intel_miptree_all_slices_resolve_color(brw, mt, 0);
+ brw_render_cache_set_check_flush(brw, mt->bo);
+ }
}
_mesa_lock_context_textures(ctx);
+
+ if (new_state & _NEW_BUFFERS) {
+ intel_update_framebuffer(ctx, ctx->DrawBuffer);
+ if (ctx->DrawBuffer != ctx->ReadBuffer)
+ intel_update_framebuffer(ctx, ctx->ReadBuffer);
+ }
}
#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
struct brw_context *brw = brw_context(ctx);
__DRIcontext *driContext = brw->driContext;
__DRIdrawable *driDrawable = driContext->driDrawablePriv;
- __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+ __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
- if (flushFront(screen) && driDrawable &&
+ if (flushFront(dri_screen) && driDrawable &&
driDrawable->loaderPrivate) {
/* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
intel_resolve_for_dri2_flush(brw, driDrawable);
intel_batchbuffer_flush(brw);
- flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
+ flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
/* We set the dirty bit in intel_prepare_render() if we're
* front buffer rendering once we get there.
intelInitBufferFuncs(functions);
intelInitPixelFuncs(functions);
intelInitBufferObjectFuncs(functions);
- intel_init_syncobj_functions(functions);
+ brw_init_syncobj_functions(functions);
brw_init_object_purgeable_functions(functions);
brwInitFragProgFuncs( functions );
brw_init_common_queryobj_functions(functions);
- if (brw->gen >= 6)
+ if (brw->gen >= 8 || brw->is_haswell)
+ hsw_init_queryobj_functions(functions);
+ else if (brw->gen >= 6)
gen6_init_queryobj_functions(functions);
else
gen4_init_queryobj_functions(functions);
if (brw->gen >= 7)
brw_init_conditional_render_functions(functions);
- functions->QuerySamplesForFormat = brw_query_samples_for_format;
+ functions->QueryInternalFormat = brw_query_internal_format;
functions->NewTransformFeedback = brw_new_transform_feedback;
functions->DeleteTransformFeedback = brw_delete_transform_feedback;
- functions->GetTransformFeedbackVertexCount =
- brw_get_transform_feedback_vertex_count;
- if (brw->gen >= 7) {
+ if (can_do_mi_math_and_lrr(brw->screen)) {
+ functions->BeginTransformFeedback = hsw_begin_transform_feedback;
+ functions->EndTransformFeedback = hsw_end_transform_feedback;
+ functions->PauseTransformFeedback = hsw_pause_transform_feedback;
+ functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
+ } else if (brw->gen >= 7) {
functions->BeginTransformFeedback = gen7_begin_transform_feedback;
functions->EndTransformFeedback = gen7_end_transform_feedback;
functions->PauseTransformFeedback = gen7_pause_transform_feedback;
functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
+ functions->GetTransformFeedbackVertexCount =
+ brw_get_transform_feedback_vertex_count;
} else {
functions->BeginTransformFeedback = brw_begin_transform_feedback;
functions->EndTransformFeedback = brw_end_transform_feedback;
+ functions->PauseTransformFeedback = brw_pause_transform_feedback;
+ functions->ResumeTransformFeedback = brw_resume_transform_feedback;
+ functions->GetTransformFeedbackVertexCount =
+ brw_get_transform_feedback_vertex_count;
}
if (brw->gen >= 6)
brw_initialize_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct brw_compiler *compiler = brw->screen->compiler;
+
+ const bool stage_exists[MESA_SHADER_STAGES] = {
+ [MESA_SHADER_VERTEX] = true,
+ [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
+ [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
+ [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
+ [MESA_SHADER_FRAGMENT] = true,
+ [MESA_SHADER_COMPUTE] =
+ ((ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGL_CORE) &&
+ ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
+ (ctx->API == API_OPENGLES2 &&
+ ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
+ _mesa_extension_override_enables.ARB_compute_shader,
+ };
+
+ unsigned num_stages = 0;
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (stage_exists[i])
+ num_stages++;
+ }
unsigned max_samplers =
brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
+ ctx->Const.MaxDualSourceDrawBuffers = 1;
+ ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
+ ctx->Const.MaxCombinedShaderOutputResources =
+ MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+
ctx->Const.QueryCounterBits.Timestamp = 36;
+ ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+ ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+ if (brw->gen >= 7) {
+ ctx->Const.MaxRenderbufferSize = 16384;
+ ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
+ ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
+ } else {
+ ctx->Const.MaxRenderbufferSize = 8192;
+ ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
+ ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+ }
+ ctx->Const.Max3DTextureLevels = 12; /* 2048 */
+ ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
+ ctx->Const.MaxTextureMbytes = 1536;
+ ctx->Const.MaxTextureRectSize = 1 << 12;
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ ctx->Const.MaxTextureLodBias = 15.0;
ctx->Const.StripTextureBorder = true;
+ if (brw->gen >= 7) {
+ ctx->Const.MaxProgramTextureGatherComponents = 4;
+ ctx->Const.MinProgramTextureGatherOffset = -32;
+ ctx->Const.MaxProgramTextureGatherOffset = 31;
+ } else if (brw->gen == 6) {
+ ctx->Const.MaxProgramTextureGatherComponents = 1;
+ ctx->Const.MinProgramTextureGatherOffset = -8;
+ ctx->Const.MaxProgramTextureGatherOffset = 7;
+ }
ctx->Const.MaxUniformBlockSize = 65536;
+
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_program_constants *prog = &ctx->Const.Program[i];
- prog->MaxUniformBlocks = 12;
+
+ if (!stage_exists[i])
+ continue;
+
+ prog->MaxTextureImageUnits = max_samplers;
+
+ prog->MaxUniformBlocks = BRW_MAX_UBO;
prog->MaxCombinedUniformComponents =
prog->MaxUniformComponents +
ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+
+ prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ prog->MaxAtomicBuffers = BRW_MAX_ABO;
+ prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
+ prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
}
- ctx->Const.MaxDualSourceDrawBuffers = 1;
- ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
- ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxTextureUnits =
MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
- if (brw->gen >= 6)
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
- else
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
- if (_mesa_extension_override_enables.ARB_compute_shader) {
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
- ctx->Const.MaxUniformBufferBindings += 12;
- } else {
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
- }
- ctx->Const.MaxCombinedTextureImageUnits =
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
-
- ctx->Const.MaxTextureLevels = 14; /* 8192 */
- if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
- ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
- ctx->Const.Max3DTextureLevels = 12; /* 2048 */
- ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
- ctx->Const.MaxTextureMbytes = 1536;
-
- if (brw->gen >= 7)
- ctx->Const.MaxArrayTextureLayers = 2048;
- else
- ctx->Const.MaxArrayTextureLayers = 512;
- ctx->Const.MaxTextureRectSize = 1 << 12;
+ ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
+ ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
+ ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
+ ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
+ ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
+ ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
+ ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
- ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-
- ctx->Const.MaxRenderbufferSize = 8192;
/* Hardware only supports a limited number of transform feedback buffers.
* So we need to override the Mesa default (which is based only on software
ctx->Const.MaxTransformFeedbackSeparateComponents =
BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
- ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
+ ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
+ !can_do_mi_math_and_lrr(brw->screen);
int max_samples;
- const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
+ const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
const int clamp_max_samples =
driQueryOptioni(&brw->optionCache, "clamp_max_samples");
ctx->Const.MaxColorTextureSamples = max_samples;
ctx->Const.MaxDepthTextureSamples = max_samples;
ctx->Const.MaxIntegerSamples = max_samples;
+ ctx->Const.MaxImageSamples = 0;
/* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
* to map indices of rectangular grid to sample numbers within a pixel.
*/
gen6_set_sample_maps(ctx);
- if (brw->gen >= 7)
- ctx->Const.MaxProgramTextureGatherComponents = 4;
- else if (brw->gen == 6)
- ctx->Const.MaxProgramTextureGatherComponents = 1;
-
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
if (brw->gen >= 6) {
if (brw->gen >= 5 || brw->is_g4x)
ctx->Const.MaxClipPlanes = 8;
+ ctx->Const.GLSLTessLevelsAsInputs = true;
+ ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
+ ctx->Const.LowerTESPatchVerticesIn = true;
+ ctx->Const.PrimitiveRestartForPatches = true;
+
ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
- if (brw->gen >= 7) {
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
- ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
-
- ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
- BRW_MAX_IMAGES;
- ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
- (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
- ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
- BRW_MAX_IMAGES;
- ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
- ctx->Const.MaxCombinedShaderOutputResources =
- MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
- ctx->Const.MaxImageSamples = 0;
- ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
- }
-
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
* but we're not sure how it's actually done for vertex order,
* that affect provoking vertex decision. Always use last vertex
* However, unaligned accesses are slower, so enforce buffer alignment.
*/
ctx->Const.UniformBufferOffsetAlignment = 16;
+
+ /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
+ * that we can safely have the CPU and GPU writing the same SSBO on
+ * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
+ * writes, so there's no problem. For an SSBO, the GPU and the CPU can
+ * be updating disjoint regions of the buffer simultaneously and that will
+ * break if the regions overlap the same cacheline.
+ */
+ ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
ctx->Const.TextureBufferOffsetAlignment = 16;
ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
}
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
ctx->Const.ShaderCompilerOptions[i] =
- brw->intelScreen->compiler->glsl_compiler_options[i];
+ brw->screen->compiler->glsl_compiler_options[i];
}
- /* ARB_viewport_array */
- if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
+ if (brw->gen >= 7) {
+ ctx->Const.MaxViewportWidth = 32768;
+ ctx->Const.MaxViewportHeight = 32768;
+ }
+
+ /* ARB_viewport_array, OES_viewport_array */
+ if (brw->gen >= 6) {
ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
ctx->Const.ViewportSubpixelBits = 0;
ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
/* ARB_framebuffer_no_attachments */
- ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
- ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
+ ctx->Const.MaxFramebufferWidth = 16384;
+ ctx->Const.MaxFramebufferHeight = 16384;
ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
ctx->Const.MaxFramebufferSamples = max_samples;
+
+ /* OES_primitive_bounding_box */
+ ctx->Const.NoPrimitiveBoundingBoxOutput = true;
}
static void
-brw_adjust_cs_context_constants(struct brw_context *brw)
+brw_initialize_cs_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct intel_screen *screen = brw->screen;
+ struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ /* FINISHME: Do this for all platforms that the kernel supports */
+ if (brw->is_cherryview &&
+ screen->subslice_total > 0 && screen->eu_total > 0) {
+ /* Logical CS threads = EUs per subslice * 7 threads per EU */
+ uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+
+ /* Fuse configurations may give more threads than expected, never less. */
+ if (max_cs_threads > devinfo->max_cs_threads)
+ devinfo->max_cs_threads = max_cs_threads;
+ }
- /* For ES, we set these constants based on SIMD8.
- *
- * TODO: Once we can always generate SIMD16, we should update this.
+ /* Maximum number of scalar compute shader invocations that can be run in
+ * parallel in the same subslice assuming SIMD32 dispatch.
*
- * For GL, we assume we can generate a SIMD16 program, but this currently
- * is not always true. This allows us to run more test cases, and will be
- * required based on desktop GL compute shader requirements.
+ * We don't advertise more than 64 threads, because we are limited to 64 by
+ * our usage of thread_width_max in the gpgpu walker command. This only
+ * currently impacts Haswell, which otherwise might be able to advertise 70
+ * threads. With SIMD32 and 64 threads, Haswell still provides twice the
+ * required the number of invocation needed for ARB_compute_shader.
*/
- const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
-
- const uint32_t max_invocations = simd_size * brw->max_cs_threads;
+ const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
+ const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
+ ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
}
/**
struct gl_context *ctx = &brw->ctx;
driOptionCache *options = &brw->optionCache;
- driParseConfigFiles(options, &brw->intelScreen->optionCache,
+ driParseConfigFiles(options, &brw->screen->optionCache,
brw->driContext->driScreenPriv->myNum, "i965");
int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
+ if (driQueryOptionb(&brw->optionCache, "precise_trig"))
+ brw->screen->compiler->precise_trig = true;
+
ctx->Const.ForceGLSLExtensionsWarn =
driQueryOptionb(options, "force_glsl_extensions_warn");
+ ctx->Const.ForceGLSLVersion =
+ driQueryOptioni(options, "force_glsl_version");
+
ctx->Const.DisableGLSLLineContinuations =
driQueryOptionb(options, "disable_glsl_line_continuations");
ctx->Const.AllowGLSLExtensionDirectiveMidShader =
driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
+
+ ctx->Const.AllowHigherCompatVersion =
+ driQueryOptionb(options, "allow_higher_compat_version");
+
+ ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
+
+ brw->dual_color_blend_by_location =
+ driQueryOptionb(options, "dual_color_blend_by_location");
}
GLboolean
unsigned *dri_ctx_error,
void *sharedContextPrivate)
{
- __DRIscreen *sPriv = driContextPriv->driScreenPriv;
struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
- struct intel_screen *screen = sPriv->driverPrivate;
- const struct brw_device_info *devinfo = screen->devinfo;
+ struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
+ const struct gen_device_info *devinfo = &screen->devinfo;
struct dd_function_table functions;
/* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
driContextPriv->driverPrivate = brw;
brw->driContext = driContextPriv;
- brw->intelScreen = screen;
+ brw->screen = screen;
brw->bufmgr = screen->bufmgr;
brw->gen = devinfo->gen;
brw->needs_unlit_centroid_workaround =
devinfo->needs_unlit_centroid_workaround;
- brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
+ brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
brw->has_swizzling = screen->hw_has_swizzling;
+ isl_device_init(&brw->isl_dev, devinfo, screen->hw_has_swizzling);
+
brw->vs.base.stage = MESA_SHADER_VERTEX;
+ brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
+ brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
brw->gs.base.stage = MESA_SHADER_GEOMETRY;
brw->wm.base.stage = MESA_SHADER_FRAGMENT;
if (brw->gen >= 8) {
if (INTEL_DEBUG & DEBUG_PERF)
brw->perf_debug = true;
+ brw_initialize_cs_context_constants(brw);
brw_initialize_context_constants(brw);
ctx->Const.ResetStrategy = notify_reset
intel_fbo_init(brw);
- intel_batchbuffer_init(brw);
+ intel_batchbuffer_init(&brw->batch, brw->bufmgr, brw->has_llc);
if (brw->gen >= 6) {
/* Create a new hardware context. Using a hardware context means that
brw_init_surface_formats(brw);
- brw->max_vs_threads = devinfo->max_vs_threads;
- brw->max_hs_threads = devinfo->max_hs_threads;
- brw->max_ds_threads = devinfo->max_ds_threads;
- brw->max_gs_threads = devinfo->max_gs_threads;
- brw->max_wm_threads = devinfo->max_wm_threads;
- brw->max_cs_threads = devinfo->max_cs_threads;
- brw->urb.size = devinfo->urb.size;
- brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
- brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
- brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
- brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
- brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
-
- brw_adjust_cs_context_constants(brw);
-
- /* Estimate the size of the mappable aperture into the GTT. There's an
- * ioctl to get the whole GTT size, but not one to get the mappable subset.
- * It turns out it's basically always 256MB, though some ancient hardware
- * was smaller.
- */
- uint32_t gtt_size = 256 * 1024 * 1024;
+ if (brw->gen >= 6)
+ brw_blorp_init(brw);
- /* We don't want to map two objects such that a memcpy between them would
- * just fault one mapping in and then the other over and over forever. So
- * we would need to divide the GTT size by 2. Additionally, some GTT is
- * taken up by things like the framebuffer and the ringbuffer and such, so
- * be more conservative.
- */
- brw->max_gtt_map_object_size = gtt_size / 4;
+ brw->urb.size = devinfo->urb.size;
if (brw->gen == 6)
brw->urb.gs_present = false;
brw->prim_restart.enable_cut_index = false;
brw->gs.enabled = false;
brw->sf.viewport_transform_enable = true;
+ brw->clip.viewport_count = 1;
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
- brw->use_resource_streamer = screen->has_resource_streamer &&
- (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
- brw_env_var_as_boolean("INTEL_USE_GATHER", false));
+ brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
brw->perf_debug = true;
}
- if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
+ if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
+ ctx->Const.RobustAccess = GL_TRUE;
+ }
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
brw_init_shader_time(brw);
_mesa_initialize_dispatch_tables(ctx);
_mesa_initialize_vbo_vtxfmt(ctx);
- if (ctx->Extensions.AMD_performance_monitor) {
- brw_init_performance_monitors(brw);
- }
+ if (ctx->Extensions.INTEL_performance_query)
+ brw_init_performance_queries(brw);
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
(struct brw_context *) driContextPriv->driverPrivate;
struct gl_context *ctx = &brw->ctx;
- /* Dump a final BMP in case the application doesn't call SwapBuffers */
- if (INTEL_DEBUG & DEBUG_AUB) {
- intel_batchbuffer_flush(brw);
- aub_dump_bmp(&brw->ctx);
- }
-
_mesa_meta_free(&brw->ctx);
- brw_meta_fast_clear_free(brw);
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
/* Force a report. */
brw_destroy_shader_time(brw);
}
+ if (brw->gen >= 6)
+ blorp_finish(&brw->blorp);
+
brw_destroy_state(brw);
brw_draw_destroy(brw);
drm_intel_bo_unreference(brw->curbe.curbe_bo);
if (brw->vs.base.scratch_bo)
drm_intel_bo_unreference(brw->vs.base.scratch_bo);
+ if (brw->tcs.base.scratch_bo)
+ drm_intel_bo_unreference(brw->tcs.base.scratch_bo);
+ if (brw->tes.base.scratch_bo)
+ drm_intel_bo_unreference(brw->tes.base.scratch_bo);
if (brw->gs.base.scratch_bo)
drm_intel_bo_unreference(brw->gs.base.scratch_bo);
if (brw->wm.base.scratch_bo)
drm_intel_bo_unreference(brw->wm.base.scratch_bo);
- gen7_reset_hw_bt_pool_offsets(brw);
- drm_intel_bo_unreference(brw->hw_bt_pool.bo);
- brw->hw_bt_pool.bo = NULL;
-
drm_intel_gem_context_destroy(brw->hw_ctx);
if (ctx->swrast_context) {
_swrast_DestroyContext(&brw->ctx);
brw_fini_pipe_control(brw);
- intel_batchbuffer_free(brw);
+ intel_batchbuffer_free(&brw->batch);
drm_intel_bo_unreference(brw->throttle_batch[1]);
drm_intel_bo_unreference(brw->throttle_batch[0]);
*/
fb->Visual.sRGBCapable = false;
for (int i = 0; i < BUFFER_COUNT; i++) {
- if (fb->Attachment[i].Renderbuffer &&
- fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
- fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
- }
+ struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
+ if (rb)
+ rb->Format = _mesa_get_srgb_format_linear(rb->Format);
}
}
rb = intel_get_renderbuffer(fb, buffers[i]);
if (rb == NULL || rb->mt == NULL)
continue;
- if (rb->mt->num_samples <= 1)
- intel_miptree_resolve_color(brw, rb->mt);
- else
+ if (rb->mt->num_samples <= 1) {
+ assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
+ rb->layer_count == 1);
+ intel_miptree_resolve_color(brw, rb->mt, 0, 0, 1, 0);
+ } else {
intel_renderbuffer_downsample(brw, rb);
+ }
}
}
intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
{
struct brw_context *brw = context->driverPrivate;
- __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+ __DRIscreen *dri_screen = brw->screen->driScrnPriv;
/* Set this up front, so that in case our buffers get invalidated
* while we're getting new buffers, we don't clobber the stamp and
if (unlikely(INTEL_DEBUG & DEBUG_DRI))
fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
- if (screen->image.loader)
+ if (dri_screen->image.loader)
intel_update_image_buffers(brw, drawable);
else
intel_update_dri2_buffers(brw, drawable);
* that will happen next will probably dirty the front buffer. So
* mark it as dirty here.
*/
- if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
+ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
brw->front_buffer_dirty = true;
}
__DRIbuffer **buffers,
int *buffer_count)
{
- __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+ __DRIscreen *dri_screen = brw->screen->driScrnPriv;
struct gl_framebuffer *fb = drawable->driverPrivate;
int i = 0;
unsigned attachments[8];
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
memset(attachments, 0, sizeof(attachments));
- if ((brw_is_front_buffer_drawing(fb) ||
- brw_is_front_buffer_reading(fb) ||
+ if ((_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) ||
!back_rb) && front_rb) {
/* If a fake front buffer is in use, then querying for
* __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
assert(i <= ARRAY_SIZE(attachments));
- *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
- &drawable->w,
- &drawable->h,
- attachments, i / 2,
- buffer_count,
- drawable->loaderPrivate);
+ *buffers =
+ dri_screen->dri2.loader->getBuffersWithFormat(drawable,
+ &drawable->w,
+ &drawable->h,
+ attachments, i / 2,
+ buffer_count,
+ drawable->loaderPrivate);
}
/**
buffer->cpp, buffer->pitch);
}
- intel_miptree_release(&rb->mt);
bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
buffer->name);
if (!bo) {
drawable->w, drawable->h,
buffer->pitch);
- if (brw_is_front_buffer_drawing(fb) &&
+ if (_mesa_is_front_buffer_drawing(fb) &&
(buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
rb->Base.Base.NumSamples > 1) {
buffer->width, buffer->height,
buffer->pitch);
- if (brw_is_front_buffer_drawing(fb) &&
+ if (_mesa_is_front_buffer_drawing(fb) &&
buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
rb->Base.Base.NumSamples > 1) {
intel_renderbuffer_upsample(intel, rb);
intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
{
struct gl_framebuffer *fb = drawable->driverPrivate;
- __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+ __DRIscreen *dri_screen = brw->screen->driScrnPriv;
struct intel_renderbuffer *front_rb;
struct intel_renderbuffer *back_rb;
struct __DRIimageList images;
unsigned int format;
uint32_t buffer_mask = 0;
+ int ret;
front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
else
return;
- if (front_rb && (brw_is_front_buffer_drawing(fb) ||
- brw_is_front_buffer_reading(fb) || !back_rb)) {
+ if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
+ _mesa_is_front_buffer_reading(fb) || !back_rb)) {
buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
}
if (back_rb)
buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
- (*screen->image.loader->getBuffers) (drawable,
- driGLFormatToImageFormat(format),
- &drawable->dri2.stamp,
- drawable->loaderPrivate,
- buffer_mask,
- &images);
+ ret = dri_screen->image.loader->getBuffers(drawable,
+ driGLFormatToImageFormat(format),
+ &drawable->dri2.stamp,
+ drawable->loaderPrivate,
+ buffer_mask,
+ &images);
+ if (!ret)
+ return;
if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
drawable->w = images.front->width;