#include "brw_context.h"
#include "brw_defines.h"
+#include "brw_blorp.h"
#include "brw_compiler.h"
#include "brw_draw.h"
#include "brw_state.h"
fb->DefaultGeometry.NumSamples);
}
+static bool
+intel_disable_rb_aux_buffer(struct brw_context *brw, const drm_intel_bo *bo)
+{
+ const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
+ bool found = false;
+
+ for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ const struct intel_renderbuffer *irb =
+ intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+ if (irb && irb->mt->bo == bo) {
+ found = brw->draw_aux_buffer_disabled[i] = true;
+ }
+ }
+
+ return found;
+}
+
/* On Gen9 color buffers may be compressed by the hardware (lossless
* compression). There are, however, format restrictions and care needs to be
* taken that the sampler engine is capable for re-interpreting a buffer with
_mesa_get_format_name(intel_tex->_Format),
_mesa_get_format_name(intel_tex->mt->format));
+ if (intel_disable_rb_aux_buffer(brw, intel_tex->mt->bo))
+ perf_debug("Sampling renderbuffer with non-compressible format - "
+ "turning off compression");
+
return true;
}
if (depth_irb)
intel_renderbuffer_resolve_hiz(brw, depth_irb);
+ memset(brw->draw_aux_buffer_disabled, 0,
+ sizeof(brw->draw_aux_buffer_disabled));
+
/* Resolve depth buffer and render cache of each enabled texture. */
int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
for (int i = 0; i <= maxEnabledUnit; i++) {
0 : INTEL_MIPTREE_IGNORE_CCS_E;
intel_miptree_resolve_color(brw, tex_obj->mt, flags);
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+
+ if (tex_obj->base.StencilSampling ||
+ tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
+ intel_update_r8stencil(brw, tex_obj->mt);
+ }
}
/* Resolve color for each active shader image. */
/* Access to images is implemented using indirect messages
* against data port. Normal render target write understands
* lossless compression but unfortunately the typed/untyped
- * read/write interface doesn't. Therefore the compressed
- * surfaces need to be resolved prior to accessing them.
+ * read/write interface doesn't. Therefore even lossless
+ * compressed surfaces need to be resolved prior to accessing
+ * them. Hence skip setting INTEL_MIPTREE_IGNORE_CCS_E.
*/
intel_miptree_resolve_color(brw, tex_obj->mt, 0);
+
+ if (intel_miptree_is_lossless_compressed(brw, tex_obj->mt) &&
+ intel_disable_rb_aux_buffer(brw, tex_obj->mt->bo)) {
+ perf_debug("Using renderbuffer as shader image - turning "
+ "off lossless compression");
+ }
+
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
}
}
}
- /* Resolve color buffers for non-coherent framebufer fetch. */
+ /* Resolve color buffers for non-coherent framebuffer fetch. */
if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
ctx->FragmentProgram._Current &&
ctx->FragmentProgram._Current->Base.OutputsRead) {
ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
ctx->Const.MaxRenderbufferSize = 8192;
ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
-
- /* On Sandy Bridge and prior, the "Render Target View Extent" field of
- * RENDER_SURFACE_STATE is only 9 bits so the largest 3-D texture we can do
- * a layered render into has a depth of 512. On Iron Lake and earlier, we
- * don't support layered rendering and we use manual offsetting to render
- * into the different layers so this doesn't matter. On Sandy Bridge,
- * however, we do support layered rendering so this is a problem.
- */
- ctx->Const.Max3DTextureLevels = brw->gen == 6 ? 10 /* 512 */ : 12; /* 2048 */
-
+ ctx->Const.Max3DTextureLevels = 12; /* 2048 */
ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
ctx->Const.MaxTextureMbytes = 1536;
ctx->Const.MaxFramebufferHeight = 16384;
ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
ctx->Const.MaxFramebufferSamples = max_samples;
+
+ /* OES_primitive_bounding_box */
+ ctx->Const.NoPrimitiveBoundingBoxOutput = true;
}
static void
-brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
+brw_initialize_cs_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ const struct intel_screen *screen = brw->intelScreen;
+ const struct gen_device_info *devinfo = screen->devinfo;
+
+ /* FINISHME: Do this for all platforms that the kernel supports */
+ if (brw->is_cherryview &&
+ screen->subslice_total > 0 && screen->eu_total > 0) {
+ /* Logical CS threads = EUs per subslice * 7 threads per EU */
+ brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
+
+ /* Fuse configurations may give more threads than expected, never less. */
+ if (brw->max_cs_threads < devinfo->max_cs_threads)
+ brw->max_cs_threads = devinfo->max_cs_threads;
+ } else {
+ brw->max_cs_threads = devinfo->max_cs_threads;
+ }
+
/* Maximum number of scalar compute shader invocations that can be run in
* parallel in the same subslice assuming SIMD32 dispatch.
+ *
+ * We don't advertise more than 64 threads, because we are limited to 64 by
+ * our usage of thread_width_max in the gpgpu walker command. This only
+ * currently impacts Haswell, which otherwise might be able to advertise 70
+ * threads. With SIMD32 and 64 threads, Haswell still provides twice the
+ * required the number of invocation needed for ARB_compute_shader.
*/
+ const unsigned max_threads = MIN2(64, brw->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
__DRIscreen *sPriv = driContextPriv->driScreenPriv;
struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
struct intel_screen *screen = sPriv->driverPrivate;
- const struct brw_device_info *devinfo = screen->devinfo;
+ const struct gen_device_info *devinfo = screen->devinfo;
struct dd_function_table functions;
/* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
if (INTEL_DEBUG & DEBUG_PERF)
brw->perf_debug = true;
- brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
+ brw_initialize_cs_context_constants(brw);
brw_initialize_context_constants(brw);
ctx->Const.ResetStrategy = notify_reset
brw_init_surface_formats(brw);
+ if (brw->gen >= 6)
+ brw_blorp_init(brw);
+
brw->max_vs_threads = devinfo->max_vs_threads;
brw->max_hs_threads = devinfo->max_hs_threads;
brw->max_ds_threads = devinfo->max_ds_threads;
brw->max_gs_threads = devinfo->max_gs_threads;
brw->max_wm_threads = devinfo->max_wm_threads;
- /* FINISHME: Do this for all platforms that the kernel supports */
- if (brw->is_cherryview &&
- screen->subslice_total > 0 && screen->eu_total > 0) {
- /* Logical CS threads = EUs per subslice * 7 threads per EU */
- brw->max_cs_threads = screen->eu_total / screen->subslice_total * 7;
-
- /* Fuse configurations may give more threads than expected, never less. */
- if (brw->max_cs_threads < devinfo->max_cs_threads)
- brw->max_cs_threads = devinfo->max_cs_threads;
- } else {
- brw->max_cs_threads = devinfo->max_cs_threads;
- }
brw->urb.size = devinfo->urb.size;
brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
brw_destroy_shader_time(brw);
}
+ if (brw->gen >= 6)
+ blorp_finish(&brw->blorp);
+
brw_destroy_state(brw);
brw_draw_destroy(brw);