intel: Add a batch flush between front-buffer downsample and X protocol.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_sampler_state.c
index 918c1d6243d5ab79532a540c4090384340973157..f2117a48e1a3ec71bca4be56ca5b6203d74ce744 100644 (file)
 
 
 
-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-GLuint
-translate_wrap_mode(GLenum wrap)
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
 {
    switch( wrap ) {
    case GL_REPEAT: 
       return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+        return BRW_TEXCOORDMODE_CLAMP;
+      else
+        return BRW_TEXCOORDMODE_CLAMP_BORDER;
    case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
    case GL_CLAMP_TO_BORDER: 
       return BRW_TEXCOORDMODE_CLAMP_BORDER;
    case GL_MIRRORED_REPEAT: 
@@ -71,17 +79,19 @@ translate_wrap_mode(GLenum wrap)
  * Upload SAMPLER_BORDER_COLOR_STATE.
  */
 void
-upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
-                    int unit)
+upload_default_color(struct brw_context *brw,
+                     struct gl_sampler_object *sampler,
+                     int unit,
+                     uint32_t *sdc_offset)
 {
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
+   struct gl_context *ctx = &brw->ctx;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    struct gl_texture_object *texObj = texUnit->_Current;
    struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
    float color[4];
 
-   if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+   switch (firstImage->_BaseFormat) {
+   case GL_DEPTH_COMPONENT:
       /* GL specs that border color for depth textures is taken from the
        * R channel, while the hardware uses A.  Spam R into all the
        * channels for safety.
@@ -90,17 +100,51 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
       color[1] = sampler->BorderColor.f[0];
       color[2] = sampler->BorderColor.f[0];
       color[3] = sampler->BorderColor.f[0];
-   } else {
+      break;
+   case GL_ALPHA:
+      color[0] = 0.0;
+      color[1] = 0.0;
+      color[2] = 0.0;
+      color[3] = sampler->BorderColor.f[3];
+      break;
+   case GL_INTENSITY:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = sampler->BorderColor.f[0];
+      break;
+   case GL_LUMINANCE:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = 1.0;
+      break;
+   case GL_LUMINANCE_ALPHA:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = sampler->BorderColor.f[3];
+      break;
+   default:
       color[0] = sampler->BorderColor.f[0];
       color[1] = sampler->BorderColor.f[1];
       color[2] = sampler->BorderColor.f[2];
       color[3] = sampler->BorderColor.f[3];
+      break;
    }
 
-   if (intel->gen == 5 || intel->gen == 6) {
+   /* In some cases we use an RGBA surface format for GL RGB textures,
+    * where we've initialized the A channel to 1.0.  We also have to set
+    * the border color alpha to 1.0 in that case.
+    */
+   if (firstImage->_BaseFormat == GL_RGB)
+      color[3] = 1.0;
+
+   if (brw->gen == 5 || brw->gen == 6) {
       struct gen5_sampler_default_color *sdc;
 
-      sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]);
+      sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
+                           sizeof(*sdc), 32, sdc_offset);
 
       memset(sdc, 0, sizeof(*sdc));
 
@@ -136,7 +180,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
    } else {
       struct brw_sampler_default_color *sdc;
 
-      sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]);
+      sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
+                           sizeof(*sdc), 32, sdc_offset);
 
       COPY_4V(sdc->color, color);
    }
@@ -148,18 +193,26 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
  */
 static void brw_update_sampler_state(struct brw_context *brw,
                                     int unit,
-                                    struct brw_sampler_state *sampler)
+                                     int ss_index,
+                                     struct brw_sampler_state *sampler,
+                                     uint32_t sampler_state_table_offset,
+                                     uint32_t *sdc_offset)
 {
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
+   struct gl_context *ctx = &brw->ctx;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    struct gl_texture_object *texObj = texUnit->_Current;
    struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
+
+   /* These don't use samplers at all. */
+   if (texObj->Target == GL_TEXTURE_BUFFER)
+      return;
 
    switch (gl_sampler->MinFilter) {
    case GL_NEAREST:
       sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
       sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
       break;
    case GL_LINEAR:
       sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -200,6 +253,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
       switch (gl_sampler->MagFilter) {
       case GL_NEAREST:
         sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+        using_nearest = true;
         break;
       case GL_LINEAR:
         sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -209,18 +263,22 @@ static void brw_update_sampler_state(struct brw_context *brw,
       }  
    }
 
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                 using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                 using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                 using_nearest);
 
-   if (intel->gen >= 6 &&
+   if (brw->gen >= 6 &&
        sampler->ss0.min_filter != sampler->ss0.mag_filter)
        sampler->ss0.min_mag_neq = 1;
 
    /* Cube-maps on 965 and later must use the same wrap mode for all 3
     * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
     */
-   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
+       texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
       if (ctx->Texture.CubeMapSeamless &&
          (gl_sampler->MinFilter != GL_NEAREST ||
           gl_sampler->MagFilter != GL_NEAREST)) {
@@ -273,64 +331,119 @@ static void brw_update_sampler_state(struct brw_context *brw,
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
 
-   upload_default_color(brw, gl_sampler, unit);
+   /* On Gen6+, the sampler can handle non-normalized texture
+    * rectangle coordinates natively
+    */
+   if (brw->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
+   upload_default_color(brw, gl_sampler, unit, sdc_offset);
 
-   if (intel->gen >= 6) {
-      sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
+   if (brw->gen >= 6) {
+      sampler->ss2.default_color_pointer = *sdc_offset >> 5;
    } else {
       /* reloc */
-      sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
-                                           brw->wm.sdc_offset[unit]) >> 5;
+      sampler->ss2.default_color_pointer = (brw->batch.bo->offset +
+                                           *sdc_offset) >> 5;
 
-      drm_intel_bo_emit_reloc(intel->batch.bo,
-                             brw->wm.sampler_offset +
-                             unit * sizeof(struct brw_sampler_state) +
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                             sampler_state_table_offset +
+                             ss_index * sizeof(struct brw_sampler_state) +
                              offsetof(struct brw_sampler_state, ss2),
-                             intel->batch.bo, brw->wm.sdc_offset[unit],
+                             brw->batch.bo, *sdc_offset,
                              I915_GEM_DOMAIN_SAMPLER, 0);
    }
+
+   if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
+   if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
 }
 
 
-/* All samplers must be uploaded in a single contiguous array, which
- * complicates various things.  However, this is still too confusing -
- * FIXME: simplify all the different new texture state flags.
- */
 static void
-prepare_wm_samplers(struct brw_context *brw)
+brw_upload_sampler_state_table(struct brw_context *brw,
+                               struct gl_program *prog,
+                               uint32_t sampler_count,
+                               uint32_t *sst_offset,
+                               uint32_t *sdc_offset)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct gl_context *ctx = &brw->ctx;
    struct brw_sampler_state *samplers;
-   int i;
 
-   brw->wm.sampler_count = 0;
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-        brw->wm.sampler_count = i + 1;
-   }
+   GLbitfield SamplersUsed = prog->SamplersUsed;
 
-   if (brw->wm.sampler_count == 0)
+   if (sampler_count == 0)
       return;
 
-   samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers),
-                             32, &brw->wm.sampler_offset);
-   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
-
-   for (i = 0; i < brw->wm.sampler_count; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-        brw_update_sampler_state(brw, i, &samplers[i]);
+   samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
+                             sampler_count * sizeof(*samplers),
+                             32, sst_offset);
+   memset(samplers, 0, sampler_count * sizeof(*samplers));
+
+   for (unsigned s = 0; s < sampler_count; s++) {
+      if (SamplersUsed & (1 << s)) {
+         const unsigned unit = prog->SamplerUnits[s];
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            brw_update_sampler_state(brw, unit, s, &samplers[s],
+                                     *sst_offset, &sdc_offset[s]);
+      }
    }
 
    brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
 }
 
-const struct brw_tracked_state brw_wm_samplers = {
+static void
+brw_upload_fs_samplers(struct brw_context *brw)
+{
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+   brw->vtbl.upload_sampler_state_table(brw, fs,
+                                        brw->wm.sampler_count,
+                                        &brw->wm.sampler_offset,
+                                        brw->wm.sdc_offset);
+}
+
+const struct brw_tracked_state brw_fs_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0
    },
-   .prepare = prepare_wm_samplers,
+   .emit = brw_upload_fs_samplers,
 };
 
+static void
+brw_upload_vs_samplers(struct brw_context *brw)
+{
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   brw->vtbl.upload_sampler_state_table(brw, vs,
+                                        brw->vs.sampler_count,
+                                        &brw->vs.sampler_offset,
+                                        brw->vs.sdc_offset);
+}
 
+
+const struct brw_tracked_state brw_vs_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM,
+      .cache = 0
+   },
+   .emit = brw_upload_vs_samplers,
+};
+
+
+void
+gen4_init_vtable_sampler_functions(struct brw_context *brw)
+{
+   brw->vtbl.upload_sampler_state_table = brw_upload_sampler_state_table;
+}