i965/fs: Don't propagate saturate modifiers into partial writes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
index fd0ea547dbd094c4fef1405efa42cb3bec19f94c..e11365484cd4f549f145e1a7edfdfed77d798a92 100644 (file)
@@ -79,6 +79,12 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
    (void) target;
 
    switch (brw->gen) {
+   case 8:
+      samples[0] = 8;
+      samples[1] = 4;
+      samples[2] = 2;
+      return 3;
+
    case 7:
       samples[0] = 8;
       samples[1] = 4;
@@ -198,7 +204,7 @@ intel_glFlush(struct gl_context *ctx)
 
    intel_batchbuffer_flush(brw);
    intel_flush_front(ctx);
-   if (brw->is_front_buffer_rendering)
+   if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->need_throttle = true;
 }
 
@@ -277,32 +283,43 @@ brw_initialize_context_constants(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
 
+   unsigned max_samplers =
+      brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
+
    ctx->Const.QueryCounterBits.Timestamp = 36;
 
    ctx->Const.StripTextureBorder = true;
 
    ctx->Const.MaxDualSourceDrawBuffers = 1;
    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
    ctx->Const.MaxTextureUnits =
       MIN2(ctx->Const.MaxTextureCoordUnits,
            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
    if (brw->gen >= 7)
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
    else
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
+   if (getenv("INTEL_COMPUTE_SHADER")) {
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+      ctx->Const.MaxUniformBufferBindings += 12;
+   } else {
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
+   }
    ctx->Const.MaxCombinedTextureImageUnits =
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits;
+      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
 
    ctx->Const.MaxTextureLevels = 14; /* 8192 */
    if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
       ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
-   ctx->Const.Max3DTextureLevels = 9;
-   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
+   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+   ctx->Const.MaxTextureMbytes = 1536;
 
    if (brw->gen >= 7)
       ctx->Const.MaxArrayTextureLayers = 2048;
@@ -364,6 +381,8 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    if (brw->gen >= 7)
       ctx->Const.MaxProgramTextureGatherComponents = 4;
+   else if (brw->gen == 6)
+      ctx->Const.MaxProgramTextureGatherComponents = 1;
 
    ctx->Const.MinLineWidth = 1.0;
    ctx->Const.MinLineWidthAA = 1.0;
@@ -420,9 +439,11 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
    }
 
@@ -563,7 +584,6 @@ brwCreateContext(gl_api api,
    struct intel_screen *screen = sPriv->driverPrivate;
    const struct brw_device_info *devinfo = screen->devinfo;
    struct dd_function_table functions;
-   struct gl_config visual;
 
    /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
     * provides us with context reset notifications.
@@ -581,7 +601,7 @@ brwCreateContext(gl_api api,
 
    struct brw_context *brw = rzalloc(NULL, struct brw_context);
    if (!brw) {
-      printf("%s: failed to alloc context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
       return false;
    }
@@ -597,7 +617,7 @@ brwCreateContext(gl_api api,
    brw->is_baytrail = devinfo->is_baytrail;
    brw->is_haswell = devinfo->is_haswell;
    brw->has_llc = devinfo->has_llc;
-   brw->has_hiz = devinfo->has_hiz_and_separate_stencil && brw->gen < 8;
+   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
    brw->has_pln = devinfo->has_pln;
    brw->has_compr4 = devinfo->has_compr4;
@@ -609,7 +629,11 @@ brwCreateContext(gl_api api,
    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
 
-   if (brw->gen >= 7) {
+   if (brw->gen >= 8) {
+      gen8_init_vtable_surface_functions(brw);
+      gen7_init_vtable_sampler_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
+   } else if (brw->gen >= 7) {
       gen7_init_vtable_surface_functions(brw);
       gen7_init_vtable_sampler_functions(brw);
       brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
@@ -626,14 +650,9 @@ brwCreateContext(gl_api api,
 
    struct gl_context *ctx = &brw->ctx;
 
-   if (mesaVis == NULL) {
-      memset(&visual, 0, sizeof visual);
-      mesaVis = &visual;
-   }
-
    if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
       *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      printf("%s: failed to init mesa context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
       intelDestroyContext(driContextPriv);
       return false;
    }
@@ -673,14 +692,10 @@ brwCreateContext(gl_api api,
    /* Reinitialize the context point state.  It depends on ctx->Const values. */
    _mesa_init_point(ctx);
 
-   intel_batchbuffer_init(brw);
-
-   brw_init_state(brw);
-
-   intelInitExtensions(ctx);
-
    intel_fbo_init(brw);
 
+   intel_batchbuffer_init(brw);
+
    if (brw->gen >= 6) {
       /* Create a new hardware context.  Using a hardware context means that
        * our GPU state will be saved/restored on context switch, allowing us
@@ -698,6 +713,10 @@ brwCreateContext(gl_api api,
       }
    }
 
+   brw_init_state(brw);
+
+   intelInitExtensions(ctx);
+
    brw_init_surface_formats(brw);
 
    if (brw->is_g4x || brw->gen >= 5) {
@@ -756,9 +775,6 @@ brwCreateContext(gl_api api,
    if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
 
-   brw_fs_alloc_reg_sets(brw);
-   brw_vec4_alloc_reg_set(brw);
-
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
       brw_init_shader_time(brw);
 
@@ -805,8 +821,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
    brw_draw_destroy(brw);
 
    drm_intel_bo_unreference(brw->curbe.curbe_bo);
-   drm_intel_bo_unreference(brw->vs.base.const_bo);
-   drm_intel_bo_unreference(brw->wm.base.const_bo);
 
    free(brw->curbe.last_buf);
    free(brw->curbe.next_buf);
@@ -888,8 +902,8 @@ intel_gles3_srgb_workaround(struct brw_context *brw,
    fb->Visual.sRGBCapable = false;
    for (int i = 0; i < BUFFER_COUNT; i++) {
       if (fb->Attachment[i].Renderbuffer &&
-          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
-         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
+          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
+         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
       }
    }
 }
@@ -918,7 +932,6 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
    if (driContextPriv) {
       struct gl_context *ctx = &brw->ctx;
       struct gl_framebuffer *fb, *readFb;
-      struct intel_renderbuffer *rb = NULL;
 
       if (driDrawPriv == NULL && driReadPriv == NULL) {
          fb = _mesa_get_incomplete_framebuffer();
@@ -926,7 +939,6 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
       } else {
          fb = driDrawPriv->driverPrivate;
          readFb = driReadPriv->driverPrivate;
-         rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
          driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
          driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
       }
@@ -938,11 +950,11 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
       intel_gles3_srgb_workaround(brw, fb);
       intel_gles3_srgb_workaround(brw, readFb);
 
-      if (rb && !rb->mt) {
-         /* If we don't have buffers for the drawable yet, force a call to
-          * getbuffers here so we can have a default drawable size. */
+      /* If the context viewport hasn't been initialized, force a call out to
+       * the loader to get buffers so we have a drawable size for the initial
+       * viewport. */
+      if (!brw->ctx.ViewportInitialized)
          intel_prepare_render(brw);
-      }
 
       _mesa_make_current(ctx, fb, readFb);
    } else {
@@ -981,7 +993,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
       if (rb->mt->num_samples <= 1)
          intel_miptree_resolve_color(brw, rb->mt);
       else
-         intel_miptree_downsample(brw, rb->mt);
+         intel_renderbuffer_downsample(brw, rb);
    }
 }
 
@@ -1092,6 +1104,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 void
 intel_prepare_render(struct brw_context *brw)
 {
+   struct gl_context *ctx = &brw->ctx;
    __DRIcontext *driContext = brw->driContext;
    __DRIdrawable *drawable;
 
@@ -1113,7 +1126,7 @@ intel_prepare_render(struct brw_context *brw)
     * that will happen next will probably dirty the front buffer.  So
     * mark it as dirty here.
     */
-   if (brw->is_front_buffer_rendering)
+   if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->front_buffer_dirty = true;
 
    /* Wait for the swapbuffers before the one we just emitted, so we
@@ -1175,8 +1188,8 @@ intel_query_dri2_buffers(struct brw_context *brw,
    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
 
    memset(attachments, 0, sizeof(attachments));
-   if ((brw->is_front_buffer_rendering ||
-        brw->is_front_buffer_reading ||
+   if ((brw_is_front_buffer_drawing(fb) ||
+        brw_is_front_buffer_reading(fb) ||
         !back_rb) && front_rb) {
       /* If a fake front buffer is in use, then querying for
        * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
@@ -1238,6 +1251,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
                           const char *buffer_name)
 {
    struct intel_region *region = NULL;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
 
    if (!rb)
       return;
@@ -1248,18 +1262,14 @@ intel_process_dri2_buffer(struct brw_context *brw,
     * use of a mapping of the buffer involves a bunch of page faulting which is
     * moderately expensive.
     */
-   if (num_samples == 0) {
-       if (rb->mt &&
-           rb->mt->region &&
-           rb->mt->region->name == buffer->name)
-          return;
-   } else {
-       if (rb->mt &&
-           rb->mt->singlesample_mt &&
-           rb->mt->singlesample_mt->region &&
-           rb->mt->singlesample_mt->region->name == buffer->name)
-          return;
-   }
+   struct intel_mipmap_tree *last_mt;
+   if (num_samples == 0)
+      last_mt = rb->mt;
+   else
+      last_mt = rb->singlesample_mt;
+
+   if (last_mt && last_mt->region->name == buffer->name)
+      return;
 
    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
       fprintf(stderr,
@@ -1276,14 +1286,27 @@ intel_process_dri2_buffer(struct brw_context *brw,
                                           buffer->pitch,
                                           buffer->name,
                                           buffer_name);
-   if (!region)
+   if (!region) {
+      fprintf(stderr,
+              "Failed to make region for returned DRI2 buffer "
+              "(%dx%d, named %d).\n"
+              "This is likely a bug in the X Server that will lead to a "
+              "crash soon.\n",
+              drawable->w, drawable->h, buffer->name);
       return;
+   }
+
+   intel_update_winsys_renderbuffer_miptree(brw, rb, region);
+
+   if (brw_is_front_buffer_drawing(fb) &&
+       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
+        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
+       rb->Base.Base.NumSamples > 1) {
+      intel_renderbuffer_upsample(brw, rb);
+   }
+
+   assert(rb->mt);
 
-   rb->mt = intel_miptree_create_for_dri2_buffer(brw,
-                                                 buffer->attachment,
-                                                 intel_rb_format(rb),
-                                                 num_samples,
-                                                 region);
    intel_region_release(&region);
 }
 
@@ -1311,6 +1334,7 @@ intel_update_image_buffer(struct brw_context *intel,
                           enum __DRIimageBufferMask buffer_type)
 {
    struct intel_region *region = buffer->region;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
 
    if (!rb || !region)
       return;
@@ -1320,25 +1344,22 @@ intel_update_image_buffer(struct brw_context *intel,
    /* Check and see if we're already bound to the right
     * buffer object
     */
-   if (num_samples == 0) {
-       if (rb->mt &&
-           rb->mt->region &&
-           rb->mt->region->bo == region->bo)
-          return;
-   } else {
-       if (rb->mt &&
-           rb->mt->singlesample_mt &&
-           rb->mt->singlesample_mt->region &&
-           rb->mt->singlesample_mt->region->bo == region->bo)
-          return;
-   }
+   struct intel_mipmap_tree *last_mt;
+   if (num_samples == 0)
+      last_mt = rb->mt;
+   else
+      last_mt = rb->singlesample_mt;
 
-   intel_miptree_release(&rb->mt);
-   rb->mt = intel_miptree_create_for_image_buffer(intel,
-                                                  buffer_type,
-                                                  intel_rb_format(rb),
-                                                  num_samples,
-                                                  region);
+   if (last_mt && last_mt->region->bo == region->bo)
+      return;
+
+   intel_update_winsys_renderbuffer_miptree(intel, rb, region);
+
+   if (brw_is_front_buffer_drawing(fb) &&
+       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
+       rb->Base.Base.NumSamples > 1) {
+      intel_renderbuffer_upsample(intel, rb);
+   }
 }
 
 static void
@@ -1362,8 +1383,10 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
    else
       return;
 
-   if ((brw->is_front_buffer_rendering || brw->is_front_buffer_reading || !back_rb) && front_rb)
+   if (front_rb && (brw_is_front_buffer_drawing(fb) ||
+                    brw_is_front_buffer_reading(fb) || !back_rb)) {
       buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
+   }
 
    if (back_rb)
       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;