#include "main/points.h"
#include "main/version.h"
#include "main/vtxfmt.h"
+#include "main/texobj.h"
#include "vbo/vbo_context.h"
#include "tnl/t_pipeline.h"
#include "util/ralloc.h"
+#include "glsl/nir/nir.h"
+
/***************************************
* Mesa's Driver Functions
***************************************/
(void) target;
switch (brw->gen) {
+ case 9:
case 8:
samples[0] = 8;
samples[1] = 4;
return 1;
default:
+ assert(brw->gen < 6);
samples[0] = 1;
return 1;
}
}
static const GLubyte *
-intelGetString(struct gl_context * ctx, GLenum name)
+intel_get_string(struct gl_context * ctx, GLenum name)
{
const struct brw_context *const brw = brw_context(ctx);
}
static void
-intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+intel_update_state(struct gl_context * ctx, GLuint new_state)
{
struct brw_context *brw = brw_context(ctx);
+ struct intel_texture_object *tex_obj;
+ struct intel_renderbuffer *depth_irb;
if (ctx->swrast_context)
_swrast_InvalidateState(ctx, new_state);
_vbo_InvalidateState(ctx, new_state);
brw->NewGLState |= new_state;
+
+ _mesa_unlock_context_textures(ctx);
+
+ /* Resolve the depth buffer's HiZ buffer. */
+ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+ if (depth_irb)
+ intel_renderbuffer_resolve_hiz(brw, depth_irb);
+
+ /* Resolve depth buffer and render cache of each enabled texture. */
+ int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+ for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!ctx->Texture.Unit[i]._Current)
+ continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+ continue;
+ intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+ intel_miptree_resolve_color(brw, tex_obj->mt);
+ brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+ }
+
+ _mesa_lock_context_textures(ctx);
}
#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
intel_batchbuffer_flush(brw);
intel_flush_front(ctx);
- if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
- brw->need_throttle = true;
+
+ brw->need_flush_throttle = true;
}
-void
-intelFinish(struct gl_context * ctx)
+static void
+intel_finish(struct gl_context * ctx)
{
struct brw_context *brw = brw_context(ctx);
functions->Viewport = intel_viewport;
functions->Flush = intel_glFlush;
- functions->Finish = intelFinish;
- functions->GetString = intelGetString;
- functions->UpdateState = intelInvalidateState;
+ functions->Finish = intel_finish;
+ functions->GetString = intel_get_string;
+ functions->UpdateState = intel_update_state;
intelInitTextureFuncs(functions);
intelInitTextureImageFuncs(functions);
MIN2(ctx->Const.MaxTextureCoordUnits,
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
- if (brw->gen >= 7)
+ if (brw->gen >= 6)
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
else
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
ctx->Const.MaxDepthTextureSamples = max_samples;
ctx->Const.MaxIntegerSamples = max_samples;
+ /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
+ * to map indices of rectangular grid to sample numbers within a pixel.
+ * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
+ * extension implementation. For more details see the comment above
+ * gen6_set_sample_maps() definition.
+ */
+ gen6_set_sample_maps(ctx);
+
if (brw->gen >= 7)
ctx->Const.MaxProgramTextureGatherComponents = 4;
else if (brw->gen == 6)
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
- ctx->Const.MaxLineWidth = 5.0;
- ctx->Const.MaxLineWidthAA = 5.0;
- ctx->Const.LineWidthGranularity = 0.5;
+ if (brw->gen >= 9 || brw->is_cherryview) {
+ ctx->Const.MaxLineWidth = 40.0;
+ ctx->Const.MaxLineWidthAA = 40.0;
+ ctx->Const.LineWidthGranularity = 0.125;
+ } else if (brw->gen >= 6) {
+ ctx->Const.MaxLineWidth = 7.375;
+ ctx->Const.MaxLineWidthAA = 7.375;
+ ctx->Const.LineWidthGranularity = 0.125;
+ } else {
+ ctx->Const.MaxLineWidth = 7.0;
+ ctx->Const.MaxLineWidthAA = 7.0;
+ ctx->Const.LineWidthGranularity = 0.5;
+ }
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
+ ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
+ ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
+ ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
+ ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
+
if (brw->gen >= 7) {
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
ctx->Const.QuadsFollowProvokingVertexConvention = false;
ctx->Const.NativeIntegers = true;
- ctx->Const.UniformBooleanTrue = 1;
+ ctx->Const.VertexID_is_zero_based = true;
+
+ /* Regarding the CMP instruction, the Ivybridge PRM says:
+ *
+ * "For each enabled channel 0b or 1b is assigned to the appropriate flag
+ * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
+ * 0xFFFFFFFF) is assigned to dst."
+ *
+ * but PRMs for earlier generations say
+ *
+ * "In dword format, one GRF may store up to 8 results. When the register
+ * is used later as a vector of Booleans, as only LSB at each channel
+ * contains meaning [sic] data, software should make sure all higher bits
+ * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
+ *
+ * We select the representation of a true boolean uniform to be ~0, and fix
+ * the results of Gen <= 5 CMP instruction's with -(result & 1).
+ */
+ ctx->Const.UniformBooleanTrue = ~0;
/* From the gen4 PRM, volume 4 page 127:
*
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
}
+ static const nir_shader_compiler_options gen4_nir_options = {
+ .native_integers = true,
+ .lower_ffma = true,
+ };
+
+ static const nir_shader_compiler_options gen6_nir_options = {
+ .native_integers = true,
+ };
+
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
- ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
- ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
- ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
- ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
- ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput =
+ ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
+ ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
+ ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
+ ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
(i == MESA_SHADER_FRAGMENT);
- ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
(i == MESA_SHADER_FRAGMENT);
- ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
- ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
+ ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
+ if (brw->gen >= 6)
+ ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen6_nir_options;
+ else
+ ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen4_nir_options;
}
- ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
- ctx->ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
+
+ if (brw->scalar_vs) {
+ /* If we're using the scalar backend for vertex shaders, we need to
+ * configure these accordingly.
+ */
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+ }
/* ARB_viewport_array */
if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
brw->disable_throttling = true;
}
- brw->disable_derivative_optimization =
- driQueryOptionb(&brw->optionCache, "disable_derivative_optimization");
-
brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
ctx->Const.ForceGLSLExtensionsWarn =
driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
}
+/* drop when libdrm 2.4.61 is released */
+#ifndef I915_PARAM_REVISION
+#define I915_PARAM_REVISION 32
+#endif
+
+static int
+brw_get_revision(int fd)
+{
+ struct drm_i915_getparam gp;
+ int revision;
+ int ret;
+
+ memset(&gp, 0, sizeof(gp));
+ gp.param = I915_PARAM_REVISION;
+ gp.value = &revision;
+
+ ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+ if (ret)
+ revision = -1;
+
+ return revision;
+}
+
GLboolean
brwCreateContext(gl_api api,
const struct gl_config *mesaVis,
brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
brw->needs_unlit_centroid_workaround =
devinfo->needs_unlit_centroid_workaround;
+ brw->revision = brw_get_revision(sPriv->fd);
brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
brw->has_swizzling = screen->hw_has_swizzling;
} else if (brw->gen >= 7) {
gen7_init_vtable_surface_functions(brw);
brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+ } else if (brw->gen >= 6) {
+ gen6_init_vtable_surface_functions(brw);
+ brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
} else {
gen4_init_vtable_surface_functions(brw);
brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
brw_process_driconf_options(brw);
brw_process_intel_debug_variable(brw);
+
+ if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
+ brw->scalar_vs = true;
+
brw_initialize_context_constants(brw);
ctx->Const.ResetStrategy = notify_reset
brw_init_surface_formats(brw);
brw->max_vs_threads = devinfo->max_vs_threads;
+ brw->max_hs_threads = devinfo->max_hs_threads;
+ brw->max_ds_threads = devinfo->max_ds_threads;
brw->max_gs_threads = devinfo->max_gs_threads;
brw->max_wm_threads = devinfo->max_wm_threads;
brw->urb.size = devinfo->urb.size;
brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+ brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+ brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
/* Estimate the size of the mappable aperture into the GTT. There's an
brw->max_gtt_map_object_size = gtt_size / 4;
if (brw->gen == 6)
- brw->urb.gen6_gs_previously_active = false;
+ brw->urb.gs_present = false;
brw->prim_restart.in_progress = false;
brw->prim_restart.enable_cut_index = false;
brw->gs.enabled = false;
+ brw->sf.viewport_transform_enable = true;
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
}
_mesa_meta_free(&brw->ctx);
+ brw_meta_fast_clear_free(brw);
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
/* Force a report. */
brw_draw_destroy(brw);
drm_intel_bo_unreference(brw->curbe.curbe_bo);
+ if (brw->vs.base.scratch_bo)
+ drm_intel_bo_unreference(brw->vs.base.scratch_bo);
+ if (brw->gs.base.scratch_bo)
+ drm_intel_bo_unreference(brw->gs.base.scratch_bo);
+ if (brw->wm.base.scratch_bo)
+ drm_intel_bo_unreference(brw->wm.base.scratch_bo);
drm_intel_gem_context_destroy(brw->hw_ctx);
intel_batchbuffer_free(brw);
- drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
- brw->first_post_swapbuffers_batch = NULL;
+ drm_intel_bo_unreference(brw->throttle_batch[1]);
+ drm_intel_bo_unreference(brw->throttle_batch[0]);
+ brw->throttle_batch[1] = NULL;
+ brw->throttle_batch[0] = NULL;
driDestroyOptionCache(&brw->optionCache);
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb, *readFb;
- if (driDrawPriv == NULL && driReadPriv == NULL) {
+ if (driDrawPriv == NULL) {
fb = _mesa_get_incomplete_framebuffer();
- readFb = _mesa_get_incomplete_framebuffer();
} else {
fb = driDrawPriv->driverPrivate;
- readFb = driReadPriv->driverPrivate;
driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+ }
+
+ if (driReadPriv == NULL) {
+ readFb = _mesa_get_incomplete_framebuffer();
+ } else {
+ readFb = driReadPriv->driverPrivate;
driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
}
*/
if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
brw->front_buffer_dirty = true;
-
- /* Wait for the swapbuffers before the one we just emitted, so we
- * don't get too many swaps outstanding for apps that are GPU-heavy
- * but not CPU-heavy.
- *
- * We're using intelDRI2Flush (called from the loader before
- * swapbuffer) and glFlush (for front buffer rendering) as the
- * indicator that a frame is done and then throttle when we get
- * here as we prepare to render the next frame. At this point for
- * round trips for swap/copy and getting new buffers are done and
- * we'll spend less time waiting on the GPU.
- *
- * Unfortunately, we don't have a handle to the batch containing
- * the swap, and getting our hands on that doesn't seem worth it,
- * so we just us the first batch we emitted after the last swap.
- */
- if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
- if (!brw->disable_throttling)
- drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
- drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
- brw->first_post_swapbuffers_batch = NULL;
- brw->need_throttle = false;
- }
}
/**